Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYury Melnichek <melnichek@gmail.com>2011-04-26 20:46:37 +0400
committerAlex Zolotarev <alex@maps.me>2015-09-23 01:17:28 +0300
commit45bb508bad5df2c7e8effe06cd7b72915e0a0a3c (patch)
tree1dd5aa4945fd23c6eb75d5a5651ecf1aa49b2f35 /indexer
parent9179f04bfec92ad07c4c70bd232f7dc657718a3e (diff)
New smaller index format.
Diffstat (limited to 'indexer')
-rw-r--r--indexer/indexer_tests/interval_index_test.cpp53
-rw-r--r--indexer/interval_index.hpp151
-rw-r--r--indexer/interval_index_builder.hpp212
-rw-r--r--indexer/scale_index.hpp2
-rw-r--r--indexer/scale_index_builder.hpp3
5 files changed, 255 insertions, 166 deletions
diff --git a/indexer/indexer_tests/interval_index_test.cpp b/indexer/indexer_tests/interval_index_test.cpp
index 3f5c2bb184..d8e2a3c0f0 100644
--- a/indexer/indexer_tests/interval_index_test.cpp
+++ b/indexer/indexer_tests/interval_index_test.cpp
@@ -20,6 +20,35 @@ struct CellIdFeaturePairForTest
};
}
+UNIT_TEST(IntervalIndex_Serialized)
+{
+ vector<CellIdFeaturePairForTest> data;
+ data.push_back(CellIdFeaturePairForTest(0x21U, 0));
+ data.push_back(CellIdFeaturePairForTest(0x22U, 1));
+ data.push_back(CellIdFeaturePairForTest(0x41U, 2));
+ vector<char> serializedIndex;
+ MemWriter<vector<char> > writer(serializedIndex);
+ BuildIntervalIndex(data.begin(), data.end(), writer, 10);
+
+ char const expSerial [] =
+ "\x02\x05" // Header
+ "\x00\x00\x00\x00" "\x06\x00\x00\x00" // Root
+ "\x10\x00\x00\x00" "\x06\x00\x00\x00" // 0x21 and 0x22
+ "\x0A\x00\x00\x00" "\x02\x00\x00\x00" // 0x41
+ "\x03\x00\x00\x00" "\x00\x00\x00\x00" // Dummy last internal node
+ "\x01" "\x05" "\x09" // (0x21, 0), (0x22, 1), (0x31, 2)
+ "";
+
+ TEST_EQUAL(serializedIndex, vector<char>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
+
+ MemReader reader(&serializedIndex[0], serializedIndex.size());
+ IntervalIndex<MemReader> index(reader);
+ uint32_t expected [] = {0, 1, 2};
+ vector<uint32_t> values;
+ index.ForEach(MakeBackInsertFunctor(values), 0, 0xFF);
+ TEST_EQUAL(values, vector<uint32_t>(expected, expected + ARRAY_SIZE(expected)), ());
+}
+
UNIT_TEST(IntervalIndex_Simple)
{
vector<CellIdFeaturePairForTest> data;
@@ -28,9 +57,9 @@ UNIT_TEST(IntervalIndex_Simple)
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D100ULL, 2));
vector<char> serializedIndex;
MemWriter<vector<char> > writer(serializedIndex);
- BuildIntervalIndex<5>(data.begin(), data.end(), writer, 2);
+ BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serializedIndex[0], serializedIndex.size());
- IntervalIndex<uint32_t, MemReader> index(reader, 5);
+ IntervalIndex<MemReader> index(reader);
{
uint32_t expected [] = {0, 1, 2};
vector<uint32_t> values;
@@ -78,9 +107,9 @@ UNIT_TEST(IntervalIndex_Empty)
vector<CellIdFeaturePairForTest> data;
vector<char> serializedIndex;
MemWriter<vector<char> > writer(serializedIndex);
- BuildIntervalIndex<5>(data.begin(), data.end(), writer, 2);
+ BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serializedIndex[0], serializedIndex.size());
- IntervalIndex<uint32_t, MemReader> index(reader, 5);
+ IntervalIndex<MemReader> index(reader);
{
vector<uint32_t> values;
index.ForEach(MakeBackInsertFunctor(values), 0ULL, 0xFFFFFFFFFFULL);
@@ -97,9 +126,9 @@ UNIT_TEST(IntervalIndex_Simple2)
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D200ULL, 2));
vector<char> serializedIndex;
MemWriter<vector<char> > writer(serializedIndex);
- BuildIntervalIndex<5>(data.begin(), data.end(), writer, 2);
+ BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serializedIndex[0], serializedIndex.size());
- IntervalIndex<uint32_t, MemReader> index(reader, 5);
+ IntervalIndex<MemReader> index(reader);
{
uint32_t expected [] = {0, 1, 2, 3};
vector<uint32_t> values;
@@ -116,9 +145,9 @@ UNIT_TEST(IntervalIndex_Simple3)
data.push_back(CellIdFeaturePairForTest(0x0200ULL, 1));
vector<char> serializedIndex;
MemWriter<vector<char> > writer(serializedIndex);
- BuildIntervalIndex<2>(data.begin(), data.end(), writer, 1);
+ BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serializedIndex[0], serializedIndex.size());
- IntervalIndex<uint32_t, MemReader> index(reader, 2);
+ IntervalIndex<MemReader> index(reader);
{
uint32_t expected [] = {0, 1};
vector<uint32_t> values;
@@ -135,9 +164,9 @@ UNIT_TEST(IntervalIndex_Simple4)
data.push_back(CellIdFeaturePairForTest(0x02030400ULL, 1));
vector<char> serializedIndex;
MemWriter<vector<char> > writer(serializedIndex);
- BuildIntervalIndex<4>(data.begin(), data.end(), writer, 1);
+ BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serializedIndex[0], serializedIndex.size());
- IntervalIndex<uint32_t, MemReader> index(reader, 4);
+ IntervalIndex<MemReader> index(reader);
{
uint32_t expected [] = {0, 1};
vector<uint32_t> values;
@@ -156,9 +185,9 @@ UNIT_TEST(IntervalIndex_Simple5)
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D200ULL, 2));
vector<char> serializedIndex;
MemWriter<vector<char> > writer(serializedIndex);
- BuildIntervalIndex<5>(data.begin(), data.end(), writer, 1);
+ BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serializedIndex[0], serializedIndex.size());
- IntervalIndex<uint32_t, MemReader> index(reader, 5);
+ IntervalIndex<MemReader> index(reader);
{
uint32_t expected [] = {0, 1, 2, 3};
vector<uint32_t> values;
diff --git a/indexer/interval_index.hpp b/indexer/interval_index.hpp
index 49ffa06847..2894aae208 100644
--- a/indexer/interval_index.hpp
+++ b/indexer/interval_index.hpp
@@ -1,10 +1,14 @@
#pragma once
#include "../coding/endianness.hpp"
+#include "../coding/byte_stream.hpp"
+#include "../coding/varint.hpp"
#include "../base/assert.hpp"
#include "../base/base.hpp"
+#include "../base/bits.hpp"
+#include "../base/buffer_vector.hpp"
#include "../base/macros.hpp"
-
#include "../std/memcpy.hpp"
+#include "../std/static_assert.hpp"
class IntervalIndexBase
{
@@ -12,31 +16,24 @@ public:
#pragma pack(push, 1)
struct Header
{
- uint8_t m_CellIdLeafBytes;
+ uint8_t m_Levels;
+ uint8_t m_BitsPerLevel;
};
#pragma pack(pop)
+ STATIC_ASSERT(sizeof(Header) == 2);
- class Index
+ struct Index
{
- public:
- uint32_t GetBaseOffset() const { return UINT32_FROM_UINT16(m_BaseOffsetHi, m_BaseOffsetLo); }
- void SetBaseOffset(uint32_t baseOffset)
- {
- m_BaseOffsetLo = UINT32_LO(baseOffset);
- m_BaseOffsetHi = UINT32_HI(baseOffset);
- }
+ inline uint32_t GetOffset() const { return m_Offset; }
+ inline uint32_t Bit(uint32_t i) const { return (m_BitMask >> i) & 1; }
- private:
- uint16_t m_BaseOffsetLo;
- uint16_t m_BaseOffsetHi;
- public:
- uint16_t m_Count[256];
+ uint32_t m_Offset;
+ uint32_t m_BitMask;
};
- STATIC_ASSERT(sizeof(Index) == 2 * 258);
+ STATIC_ASSERT(sizeof(Index) == 8);
};
-// TODO: IntervalIndex shouldn't do SwapIfBigEndian for ValueT.
-template <typename ValueT, class ReaderT>
+template <class ReaderT>
class IntervalIndex : public IntervalIndexBase
{
public:
@@ -50,21 +47,26 @@ public:
vector<char> m_IntervalIndexCache;
};
- IntervalIndex(ReaderT const & reader, int cellIdBytes = 5)
- : m_Reader(reader), m_CellIdBytes(cellIdBytes)
+ explicit IntervalIndex(ReaderT const & reader)
+ : m_Reader(reader)
{
m_Reader.Read(0, &m_Header, sizeof(m_Header));
+ ASSERT_EQUAL(m_Header.m_BitsPerLevel, 5, ());
ReadIndex(sizeof(m_Header), m_Level0Index);
}
template <typename F>
void ForEach(F const & f, uint64_t beg, uint64_t end, Query & query) const
{
- ASSERT_LESS(beg, 1ULL << 8 * m_CellIdBytes, (beg, end));
- ASSERT_LESS_OR_EQUAL(end, 1ULL << 8 * m_CellIdBytes, (beg, end));
- // end is inclusive in ForEachImpl().
- --end;
- ForEachImpl(f, beg, end, m_Level0Index, m_CellIdBytes - 1, query);
+ ASSERT_LESS(beg, 1ULL << m_Header.m_Levels * m_Header.m_BitsPerLevel, (beg, end));
+ ASSERT_LESS_OR_EQUAL(end, 1ULL << m_Header.m_Levels * m_Header.m_BitsPerLevel, (beg, end));
+ if (beg != end)
+ {
+ // end is inclusive in ForEachImpl().
+ --end;
+ ForEachImpl(f, beg, end, m_Level0Index, sizeof(m_Header) + sizeof(m_Level0Index),
+ (m_Header.m_Levels - 1) * m_Header.m_BitsPerLevel, query);
+ }
}
template <typename F>
@@ -76,52 +78,64 @@ public:
private:
template <typename F>
- void ForEachImpl(F const & f, uint64_t beg, uint64_t end, Index const & index, int level,
- Query & query) const
+ void ForEachImpl(F const & f, uint64_t beg, uint64_t end, Index const & index,
+ uint32_t baseOffset, int skipBits, Query & query) const
{
- uint32_t const beg0 = static_cast<uint32_t>(beg >> (8 * level));
- uint32_t const end0 = static_cast<uint32_t>(end >> (8 * level));
- uint32_t cumCount = 0;
- for (uint32_t i = 0; i < beg0; ++i)
- cumCount += index.m_Count[i];
- for (uint32_t i = beg0; i <= end0; ++i)
+ uint32_t const beg0 = static_cast<uint32_t>(beg >> skipBits);
+ uint32_t const end0 = static_cast<uint32_t>(end >> skipBits);
+ ASSERT_GREATER_OR_EQUAL(skipBits, 0, (beg, end, baseOffset));
+ ASSERT_LESS_OR_EQUAL(beg, end, (baseOffset, skipBits));
+ ASSERT_LESS_OR_EQUAL(beg0, end0, (beg, end, baseOffset, skipBits));
+ ASSERT_LESS(end0, (1 << m_Header.m_BitsPerLevel), (beg0, beg, end, baseOffset, skipBits));
+
+ if (skipBits > 0)
{
- ASSERT_LESS(i, 256, ());
- if (index.m_Count[i] != 0)
+ uint32_t cumCount = 0;
+ for (uint32_t i = 0; i < beg0; ++i)
+ cumCount += index.Bit(i);
+ for (uint32_t i = beg0; i <= end0; ++i)
{
- uint64_t const levelBytesFF = (1ULL << 8 * level) - 1;
- uint64_t const b1 = (i == beg0) ? (beg & levelBytesFF) : 0;
- uint64_t const e1 = (i == end0) ? (end & levelBytesFF) : levelBytesFF;
- if (level > m_Header.m_CellIdLeafBytes)
+ if (index.Bit(i))
{
+ uint64_t const levelBytesFF = (1ULL << skipBits) - 1;
+ uint64_t const b1 = (i == beg0) ? (beg & levelBytesFF) : 0;
+ uint64_t const e1 = (i == end0) ? (end & levelBytesFF) : levelBytesFF;
+
Index index1;
- ReadIndex(index.GetBaseOffset() + (cumCount * sizeof(Index)), index1);
- ForEachImpl(f, b1, e1, index1, level - 1, query);
- }
- else
- {
- // TODO: Use binary search here if count is very large.
- uint32_t const step = sizeof(ValueT) + m_Header.m_CellIdLeafBytes;
- uint32_t const count = index.m_Count[i];
- uint32_t pos = index.GetBaseOffset() + (cumCount * step);
- size_t const readSize = step * count;
- query.m_IntervalIndexCache.assign(readSize, 0);
- char * pData = &query.m_IntervalIndexCache[0];
- m_Reader.Read(pos, pData, readSize);
- for (uint32_t j = 0; j < count; ++j, pData += step)
- // for (uint32_t j = 0; j < count; ++j, pos += step)
- {
- ValueT value;
- uint32_t cellIdOnDisk = 0;
- memcpy(&value, pData, sizeof(ValueT));
- memcpy(&cellIdOnDisk, pData + sizeof(ValueT), m_Header.m_CellIdLeafBytes);
- // m_Reader.Read(pos, &value, step);
- uint32_t const cellId = SwapIfBigEndian(cellIdOnDisk);
- if (b1 <= cellId && cellId <= e1)
- f(SwapIfBigEndian(value));
- }
+ uint32_t const offset = baseOffset + index.GetOffset() + (cumCount * sizeof(Index));
+ ReadIndex(offset, index1);
+ ForEachImpl(f, b1, e1, index1, offset + sizeof(Index),
+ skipBits - m_Header.m_BitsPerLevel, query);
+ ++cumCount;
}
- cumCount += index.m_Count[i];
+ }
+ }
+ else
+ {
+ Index nextIndex;
+ ReadIndex(baseOffset, nextIndex);
+ uint32_t const begOffset = baseOffset + index.GetOffset();
+ uint32_t const endOffset = baseOffset + sizeof(Index) + nextIndex.GetOffset();
+ ASSERT_LESS(begOffset, endOffset, (beg, end, baseOffset, skipBits));
+ buffer_vector<uint8_t, 256> data(endOffset - begOffset);
+ m_Reader.Read(begOffset, &data[0], data.size());
+ ArrayByteSource src(&data[0]);
+ void const * const pEnd = &data[0] + data.size();
+ uint32_t key = -1;
+ uint32_t value = 0;
+ while (src.Ptr() < pEnd)
+ {
+ uint32_t const x = ReadVarUint<uint32_t>(src);
+ int32_t const delta = bits::ZigZagDecode(x >> 1);
+ ASSERT_GREATER_OR_EQUAL(static_cast<int32_t>(value) + delta, 0, ());
+ value += delta;
+ if (x & 1)
+ for (++key; key < (1 << m_Header.m_BitsPerLevel) && !index.Bit(key); ++key) ;
+ ASSERT_LESS(key, 1 << m_Header.m_BitsPerLevel, (key));
+ if (key > end0)
+ break;
+ if (key >= beg0)
+ f(value);
}
}
}
@@ -129,11 +143,8 @@ private:
void ReadIndex(uint64_t pos, Index & index) const
{
m_Reader.Read(pos, &index, sizeof(Index));
- if (IsBigEndian())
- {
- for (uint32_t i = 0; i < 256; ++i)
- index.m_Count[i] = SwapIfBigEndian(index.m_Count[i]);
- }
+ index.m_Offset = SwapIfBigEndian(index.m_Offset);
+ index.m_BitMask = SwapIfBigEndian(index.m_BitMask);
}
ReaderT m_Reader;
diff --git a/indexer/interval_index_builder.hpp b/indexer/interval_index_builder.hpp
index 08dc1e870f..1802cbff35 100644
--- a/indexer/interval_index_builder.hpp
+++ b/indexer/interval_index_builder.hpp
@@ -1,19 +1,57 @@
#pragma once
#include "interval_index.hpp"
+#include "../coding/byte_stream.hpp"
#include "../coding/endianness.hpp"
+#include "../coding/varint.hpp"
#include "../coding/write_to_sink.hpp"
#include "../base/assert.hpp"
#include "../base/base.hpp"
+#include "../base/bits.hpp"
#include "../base/logging.hpp"
#include "../std/vector.hpp"
#include "../std/memcpy.hpp"
-// TODO: BuildIntervalIndex() shouldn't rely on indexing cellid-feature pairs.
-template <int kCellIdBytes, typename CellIdValueIterT, class SinkT>
-void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & end,
- SinkT & writer, int const leafBytes = 1)
+namespace impl
+{
+
+template <class WriterT>
+void WriteIntervalIndexNode(WriterT & writer, uint64_t offset, uint64_t bitMask)
+{
+ // At the moment, uint32_t is used as a bitMask, but this can change in the future.
+ CHECK_EQUAL(static_cast<uint32_t>(bitMask), bitMask, (offset));
+ CHECK_GREATER_OR_EQUAL(offset, writer.Pos() + 8, ());
+ WriteToSink(writer, static_cast<uint32_t>(offset - writer.Pos() - 8));
+ WriteToSink(writer, static_cast<uint32_t>(bitMask));
+}
+
+template <class SinkT> void WriteIntervalIndexLeaf(SinkT & sink, int bitsPerLevel,
+ uint64_t prevKey, uint64_t prevValue,
+ uint64_t key, uint64_t value)
+{
+ uint64_t const lastBitsZeroMask = (1ULL << bitsPerLevel) - 1;
+ if ((key & ~lastBitsZeroMask) != (prevKey & ~lastBitsZeroMask))
+ prevValue = 0;
+
+ int64_t const delta = static_cast<int64_t>(value) - static_cast<int64_t>(prevValue);
+ uint64_t const encodedDelta = bits::ZigZagEncode(delta);
+ uint64_t const code = (encodedDelta << 1) + (key == prevKey ? 0 : 1);
+ WriteVarUint(sink, code);
+}
+
+inline uint32_t IntervalIndexLeafSize(int bitsPerLevel,
+ uint64_t prevKey, uint64_t prevValue,
+ uint64_t key, uint64_t value)
+{
+ CountingSink sink;
+ WriteIntervalIndexLeaf(sink, bitsPerLevel, prevKey, prevValue, key, value);
+ return sink.GetCount();
+}
+
+template <typename CellIdValueIterT>
+bool CheckIntervalIndexInputSequence(CellIdValueIterT const & beg,
+ CellIdValueIterT const & end,
+ uint32_t keyBits)
{
-#ifdef DEBUG
// Check that [beg, end) is sorted and log most populous cell.
if (beg != end)
{
@@ -24,8 +62,8 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e
uint64_t prev = it->GetCell();
for (++it; it != end; ++it)
{
- ASSERT_GREATER(it->GetCell(), 0, ());
- ASSERT_LESS_OR_EQUAL(prev, it->GetCell(), ());
+ CHECK_GREATER(it->GetCell(), 0, ());
+ CHECK_LESS_OR_EQUAL(prev, it->GetCell(), ());
count = (prev == it->GetCell() ? count + 1 : 0);
if (count > maxCount)
{
@@ -41,98 +79,108 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e
}
}
for (CellIdValueIterT it = beg; it != end; ++it)
- ASSERT_LESS(it->GetCell(), 1ULL << 8 * kCellIdBytes, ());
-#endif
+ CHECK_LESS(it->GetCell(), 1ULL << keyBits, ());
+ return true;
+}
+
+}
+
+// TODO: BuildIntervalIndex() shouldn't rely on indexing cellid-feature pairs.
+template <typename CellIdValueIterT, class SinkT>
+void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & end,
+ SinkT & writer, uint8_t const keyBits)
+{
+ CHECK_LESS(keyBits, 63, ());
+ CHECK(impl::CheckIntervalIndexInputSequence(beg, end, keyBits), ());
+
+ uint32_t const bitsPerLevel = 5;
+ uint32_t const lastBitsMask = (1 << bitsPerLevel) - 1;
+ int const levelCount = (keyBits + bitsPerLevel - 1) / bitsPerLevel;
+
// Write header.
{
IntervalIndexBase::Header header;
- header.m_CellIdLeafBytes = leafBytes;
+ header.m_BitsPerLevel = bitsPerLevel;
+ header.m_Levels = levelCount;
writer.Write(&header, sizeof(header));
}
-#ifdef DEBUG
- vector<uint32_t> childOffsets;
- childOffsets.push_back(static_cast<uint32_t>(writer.Pos()));
-#endif
- uint32_t childOffset = static_cast<uint32_t>(writer.Pos()) + sizeof(IntervalIndexBase::Index);
- uint32_t thisLevelCount = 1;
- for (int level = kCellIdBytes - 1; level >= leafBytes; --level)
+ if (beg == end)
+ {
+ // Write empty index.
+ CHECK_GREATER(levelCount, 1, ());
+ impl::WriteIntervalIndexNode(writer, writer.Pos() + sizeof(IntervalIndexBase::Index), 0);
+ LOG(LWARNING, ("Written empty index."));
+ return;
+ }
+
+ // Write internal nodes.
+ uint64_t childOffset = writer.Pos() + sizeof(IntervalIndexBase::Index);
+ uint64_t nextChildOffset = childOffset;
+ for (int level = levelCount - 1; level >= 0; --level)
{
- LOG(LINFO, ("Building interval index, level", level));
-#ifdef DEBUG
- ASSERT_EQUAL(childOffsets.back(), writer.Pos(), ());
- childOffsets.push_back(childOffset);
-#endif
- uint64_t const initialWriterPos = writer.Pos();
- uint32_t childCount = 0, totalChildCount = 0;
- IntervalIndexBase::Index index;
- memset(&index, 0, sizeof(index));
- uint64_t prevParentBytes = 0;
- uint8_t prevByte = 0;
+ // LOG(LINFO, ("Building interval index, level", level));
+ uint64_t const initialLevelWriterPos = writer.Pos();
+
+ uint64_t bitMask = 0;
+ uint64_t prevKey = 0;
+ uint64_t prevValue = 0;
for (CellIdValueIterT it = beg; it != end; ++it)
{
- uint64_t id = it->GetCell();
- uint64_t const thisParentBytes = id >> 8 * (level + 1);
- uint8_t const thisByte = static_cast<uint8_t>(0xFF & (id >> 8 * level));
- if (it != beg && prevParentBytes != thisParentBytes)
- {
- // Writing index for previous parent.
- index.SetBaseOffset(childOffset);
- for (uint32_t i = 0; i < 256; ++i)
- index.m_Count[i] = SwapIfBigEndian(index.m_Count[i]);
- writer.Write(&index, sizeof(index));
- memset(&index, 0, sizeof(index));
- if (level != leafBytes)
- childOffset += childCount * sizeof(index);
- else
- childOffset += (sizeof(beg->GetFeature()) + leafBytes) * childCount;
- childCount = 0;
- --thisLevelCount;
- }
+ uint64_t const key = it->GetCell() >> (level * bitsPerLevel);
+ uint32_t const value = it->GetFeature();
- if (level == leafBytes || it == beg ||
- prevByte != thisByte || prevParentBytes != thisParentBytes)
+ if (it != beg && (prevKey & ~lastBitsMask) != (key & ~lastBitsMask))
{
- ++childCount;
- ++totalChildCount;
- ++index.m_Count[thisByte];
- CHECK_LESS(index.m_Count[thisByte], 65535, (level, leafBytes, prevByte, thisByte, \
- prevParentBytes, thisParentBytes, \
- it->GetCell()));
+ // Write node for the previous parent.
+ impl::WriteIntervalIndexNode(writer, childOffset, bitMask);
+ childOffset = nextChildOffset;
+ bitMask = 0;
}
- prevParentBytes = thisParentBytes;
- prevByte = thisByte;
+ bitMask |= (1ULL << (key & lastBitsMask));
+
+ if (level == 0)
+ nextChildOffset += impl::IntervalIndexLeafSize(bitsPerLevel,
+ prevKey, prevValue, key, value);
+ else if (it == beg || prevKey != key)
+ nextChildOffset += sizeof(IntervalIndexBase::Index);
+
+ prevKey = key;
+ prevValue = value;
}
- index.SetBaseOffset(childOffset);
- for (uint32_t i = 0; i < 256; ++i)
- index.m_Count[i] = SwapIfBigEndian(index.m_Count[i]);
- writer.Write(&index, sizeof(index));
- memset(&index, 0, sizeof(index));
- // if level == leafBytes, this is wrong, but childOffset is not needed any more.
- childOffset += childCount * sizeof(IntervalIndexBase::Index);
- --thisLevelCount;
- CHECK_EQUAL(thisLevelCount, 0, (kCellIdBytes, leafBytes));
- thisLevelCount = totalChildCount;
-
- LOG(LINFO, ("Level size:", writer.Pos() - initialWriterPos));
-
- if (beg == end)
- break;
+
+ // Write the last node.
+ impl::WriteIntervalIndexNode(writer, childOffset, bitMask);
+
+ if (level == 1)
+ nextChildOffset += sizeof(IntervalIndexBase::Index);
+
+ childOffset = nextChildOffset;
+
+ LOG(LINFO, ("Level:", level, "size:", writer.Pos() - initialLevelWriterPos));
}
- // Writing values.
- ASSERT_EQUAL(childOffsets.back(), writer.Pos(), ());
- LOG(LINFO, ("Building interval, leaves."));
- uint64_t const initialWriterPos = writer.Pos();
- uint32_t const mask = (1ULL << 8 * leafBytes) - 1;
- for (CellIdValueIterT it = beg; it != end; ++it)
+ // Write the dummy one-after-last node.
+ impl::WriteIntervalIndexNode(writer, nextChildOffset, 0);
+
+ // Write leaves.
{
- WriteToSink(writer, it->GetFeature());
- uint32_t cellId = static_cast<uint32_t>(it->GetCell() & mask);
- cellId = SwapIfBigEndian(cellId);
- writer.Write(&cellId, leafBytes);
+ uint64_t const initialLevelWriterPos = writer.Pos();
+
+ uint64_t prevKey = -1;
+ uint32_t prevValue = 0;
+ for (CellIdValueIterT it = beg; it != end; ++it)
+ {
+ uint64_t const key = it->GetCell();
+ uint32_t const value = it->GetFeature();
+ impl::WriteIntervalIndexLeaf(writer, bitsPerLevel, prevKey, prevValue, key, value);
+ prevKey = key;
+ prevValue = value;
+ }
+
+ LOG(LINFO, ("Leaves size:", writer.Pos() - initialLevelWriterPos));
}
- LOG(LINFO, ("Level size:", writer.Pos() - initialWriterPos));
+
LOG(LINFO, ("Interval index building done."));
}
diff --git a/indexer/scale_index.hpp b/indexer/scale_index.hpp
index 2f8ac3b0fd..b51c844836 100644
--- a/indexer/scale_index.hpp
+++ b/indexer/scale_index.hpp
@@ -55,7 +55,7 @@ class ScaleIndex : public ScaleIndexBase
{
public:
typedef ReaderT ReaderType;
- typedef IntervalIndex<uint32_t, ReaderT> IntervalIndexType;
+ typedef IntervalIndex<ReaderT> IntervalIndexType;
typedef typename IntervalIndexType::Query Query;
ScaleIndex() {}
diff --git a/indexer/scale_index_builder.hpp b/indexer/scale_index_builder.hpp
index 0eed5590b9..c611688a10 100644
--- a/indexer/scale_index_builder.hpp
+++ b/indexer/scale_index_builder.hpp
@@ -145,7 +145,8 @@ inline void IndexScales(uint32_t bucketsCount,
LOG(LINFO, ("Being indexed", "features:", numFeatures, "cells:", numCells,
"cells per feature:", (numCells + 1.0) / (numFeatures + 1.0)));
SubWriter<WriterT> subWriter(writer);
- BuildIntervalIndex<5>(cellsToFeatures.begin(), cellsToFeatures.end(), subWriter);
+ BuildIntervalIndex(cellsToFeatures.begin(), cellsToFeatures.end(), subWriter,
+ RectId::DEPTH_LEVELS * 2 + 1);
}
FileWriter::DeleteFileX(tmpFilePrefix + ".c2f.sorted");
// LOG(LINFO, ("Indexing done."));