Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxim Pimenov <m@maps.me>2015-06-03 17:35:18 +0300
committerAlex Zolotarev <alex@maps.me>2015-09-23 02:52:07 +0300
commit58fa09b57ec14a0a474e8fa5d476c2a71aaa5874 (patch)
tree38fc26d81418c30ebd8b2f303bda26902e66f590 /indexer/scale_index_builder.hpp
parent3f37dc59dcf8277b55ac6da02440185bcfc68c5b (diff)
[omim] [indexer] Reorder indexing: all buckets for a feature instead of all features for a bucket.
Diffstat (limited to 'indexer/scale_index_builder.hpp')
-rw-r--r--indexer/scale_index_builder.hpp204
1 files changed, 144 insertions, 60 deletions
diff --git a/indexer/scale_index_builder.hpp b/indexer/scale_index_builder.hpp
index fe04d8f069..17c37c0734 100644
--- a/indexer/scale_index_builder.hpp
+++ b/indexer/scale_index_builder.hpp
@@ -1,10 +1,10 @@
#pragma once
-#include "indexer/scale_index.hpp"
+#include "indexer/cell_id.hpp"
+#include "indexer/data_header.hpp"
+#include "indexer/feature.hpp"
#include "indexer/feature_covering.hpp"
#include "indexer/feature_visibility.hpp"
-#include "indexer/feature.hpp"
#include "indexer/interval_index_builder.hpp"
-#include "indexer/cell_id.hpp"
#include "defines.hpp"
@@ -16,13 +16,16 @@
#include "base/base.hpp"
#include "base/logging.hpp"
#include "base/macros.hpp"
+#include "base/scope_guard.hpp"
#include "std/string.hpp"
-#include "std/vector.hpp"
#include "std/utility.hpp"
-#include "std/unordered_set.hpp"
+#include "std/vector.hpp"
+namespace covering
+{
+
class CellFeaturePair
{
public:
@@ -49,68 +52,131 @@ private:
};
STATIC_ASSERT(sizeof(CellFeaturePair) == 12);
+class CellFeatureBucketTuple
+{
+public:
+ CellFeatureBucketTuple() {}
+ CellFeatureBucketTuple(CellFeaturePair p, uint32_t bucket) : m_pair(p), m_bucket(bucket) {}
+
+ bool operator<(CellFeatureBucketTuple const & rhs) const
+ {
+ if (m_bucket != rhs.m_bucket)
+ return m_bucket < rhs.m_bucket;
+ return m_pair < rhs.m_pair;
+ }
+
+ CellFeaturePair GetCellFeaturePair() const { return m_pair; }
+ uint32_t GetBucket() const { return m_bucket; }
+
+private:
+ CellFeaturePair m_pair;
+ uint32_t m_bucket;
+};
+STATIC_ASSERT(sizeof(CellFeatureBucketTuple) == 16);
+
template <class SorterT>
class FeatureCoverer
{
- unordered_set<uint32_t> & m_skipped;
-
public:
- FeatureCoverer(unordered_set<uint32_t> & skipped,
- uint32_t bucket,
- int codingScale,
- SorterT & sorter,
- uint32_t & numFeatures)
- : m_skipped(skipped), m_Sorter(sorter),
- m_codingDepth(covering::GetCodingDepth(codingScale)),
- m_ScaleRange(ScaleIndexBase::ScaleRangeForBucket(bucket)),
- m_NumFeatures(numFeatures)
+ FeatureCoverer(feature::DataHeader const & header, SorterT & sorter, vector<uint32_t> & featuresInBucket, vector<uint32_t> & cellsInBucket)
+ : m_header(header),
+ m_bucketsCount(header.GetLastScale() + 1),
+ m_Sorter(sorter),
+ m_codingDepth(covering::GetCodingDepth(header.GetLastScale())),
+ m_featuresInBucket(featuresInBucket),
+ m_cellsInBucket(cellsInBucket)
{
- ASSERT_LESS(m_ScaleRange.first, m_ScaleRange.second, ());
+ m_featuresInBucket.resize(m_bucketsCount);
+ m_cellsInBucket.resize(m_bucketsCount);
}
-
+
template <class TFeature>
void operator() (TFeature const & f, uint32_t offset) const
{
- if (FeatureShouldBeIndexed(f, offset))
+ uint32_t minScale = 0;
+ bool skip = false;
+ m_scalesIdx = 0;
+ for (uint32_t bucket = 0; bucket < m_bucketsCount; ++bucket)
{
- vector<int64_t> const cells = covering::CoverFeature(f, m_codingDepth, 250);
- for (int64_t cell : cells)
- m_Sorter.Add(CellFeaturePair(cell, offset));
+ // There is a one-to-one correspondence between buckets and scales.
+ // This is not immediately obvious and in fact there was an idea to map
+ // a bucket to a contiguous range of scales.
+ // todo(@pimenov): We probably should remove scale_index.hpp altogether.
+ if (FeatureShouldBeIndexed(f, offset, bucket, skip, minScale))
+ {
+ vector<int64_t> const cells = covering::CoverFeature(f, m_codingDepth, 250);
+ for (int64_t cell : cells)
+ m_Sorter.Add(CellFeatureBucketTuple(CellFeaturePair(cell, offset), bucket));
- ++m_NumFeatures;
+ m_featuresInBucket[bucket] += 1;
+ m_cellsInBucket[bucket] += cells.size();
+ }
}
}
private:
template <class TFeature>
- bool FeatureShouldBeIndexed(TFeature const & f, uint32_t offset) const
+ bool FeatureShouldBeIndexed(TFeature const & f, uint32_t offset, uint32_t scale, bool & skip,
+ uint32_t & minScale) const
{
// Do index features for the first visible interval only once.
// If the feature was skipped as empty for the suitable interval,
// it should be indexed in the next interval where geometry is not empty.
+ bool needReset = (scale == 0);
+ while (m_scalesIdx < m_header.GetScalesCount() && m_header.GetScale(m_scalesIdx) < scale)
+ {
+ ++m_scalesIdx;
+ needReset = true;
+ }
+
+ if (needReset)
+ f.ResetGeometry();
+
// This function invokes geometry reading for the needed scale.
- if (f.IsEmptyGeometry(m_ScaleRange.second - 1))
+ if (f.IsEmptyGeometry(scale))
{
- m_skipped.insert(offset);
+ skip = true;
return false;
}
- // This function assumes that geometry rect for the needed scale is already initialized.
- uint32_t const minScale = feature::GetMinDrawableScale(f);
- if (m_ScaleRange.first <= minScale && minScale < m_ScaleRange.second)
+ if (needReset)
{
- (void) m_skipped.erase(offset);
+ // This function assumes that geometry rect for the needed scale is already initialized.
+ // Note: it works with FeatureBase so in fact it does not use the information about
+ // the feature's geometry except for the type and the LimitRect.
+ minScale = feature::GetMinDrawableScale(f);
+ }
+
+ if (minScale == scale)
+ {
+ skip = false;
return true;
}
-
- return (minScale < m_ScaleRange.first && m_skipped.erase(offset) == 1);
+
+ if (minScale < scale && skip)
+ {
+ skip = false;
+ return true;
+ }
+ return false;
}
+ // We do not need to parse a feature's geometry for every bucket.
+ // The scales at which geometry changes are encoded in the mwm header.
+ // We cannot know them beforehand because they are different for
+ // the common case of a country file and the special case of the world file.
+ // m_scaleIdx is a position in the scales array that should be reset for every feature
+ // and then only move forward. Its purpose is to detect the moments when we
+ // need to reread the feature's geometry.
+ feature::DataHeader const & m_header;
+ mutable size_t m_scalesIdx;
+
+ uint32_t m_bucketsCount;
SorterT & m_Sorter;
int m_codingDepth;
- pair<uint32_t, uint32_t> m_ScaleRange;
- uint32_t & m_NumFeatures;
+ vector<uint32_t> & m_featuresInBucket;
+ vector<uint32_t> & m_cellsInBucket;
};
template <class SinkT>
@@ -131,57 +197,75 @@ private:
};
template <class FeaturesVectorT, class WriterT>
-inline void IndexScales(uint32_t bucketsCount,
- int codingScale,
+inline void IndexScales(feature::DataHeader const & header,
FeaturesVectorT const & featuresVector,
WriterT & writer,
string const & tmpFilePrefix)
{
// TODO: Make scale bucketing dynamic.
- STATIC_ASSERT(sizeof(CellFeaturePair) == 12);
+ int bucketsCount = header.GetLastScale() + 1;
+
+ string const cells2featureAllBucketsFile =
+ tmpFilePrefix + CELL2FEATURE_SORTED_EXT + ".allbuckets";
+ MY_SCOPE_GUARD(cells2featureAllBucketsFileGuard,
+ bind(&FileWriter::DeleteFileX, cells2featureAllBucketsFile));
+ {
+ FileWriter cellsToFeaturesAllBucketsWriter(cells2featureAllBucketsFile);
+
+ typedef FileSorter<CellFeatureBucketTuple, WriterFunctor<FileWriter> > SorterType;
+ WriterFunctor<FileWriter> out(cellsToFeaturesAllBucketsWriter);
+ SorterType sorter(1024 * 1024 /* bufferBytes */, tmpFilePrefix + CELL2FEATURE_TMP_EXT, out);
+ vector<uint32_t> featuresInBucket(bucketsCount);
+ vector<uint32_t> cellsInBucket(bucketsCount);
+ featuresVector.ForEachOffset(FeatureCoverer<SorterType>(header, sorter, featuresInBucket, cellsInBucket));
+ sorter.SortAndFinish();
+
+ for (uint32_t bucket = 0; bucket < bucketsCount; ++bucket)
+ {
+ uint32_t numCells = cellsInBucket[bucket];
+ uint32_t numFeatures = featuresInBucket[bucket];
+ LOG(LINFO, ("Building scale index for bucket:", bucket));
+ double const cellsPerFeature = numFeatures == 0 ? 0.0 : static_cast<double>(numCells) / static_cast<double>(numFeatures);
+ LOG(LINFO, ("Features:", numFeatures, "cells:", numCells, "cells per feature:", cellsPerFeature));
+ }
+ }
- unordered_set<uint32_t> skipped;
+ FileReader reader(cells2featureAllBucketsFile);
+ DDVector<CellFeatureBucketTuple, FileReader, uint64_t> cellsToFeaturesAllBuckets(reader);
VarSerialVectorWriter<WriterT> recordWriter(writer, bucketsCount);
+ auto it = cellsToFeaturesAllBuckets.begin();
+
for (uint32_t bucket = 0; bucket < bucketsCount; ++bucket)
{
- LOG(LINFO, ("Building scale index for bucket:", bucket));
-
- uint32_t numFeatures = 0;
string const cells2featureFile = tmpFilePrefix + CELL2FEATURE_SORTED_EXT;
+ MY_SCOPE_GUARD(cells2featureFileGuard, bind(&FileWriter::DeleteFileX, cells2featureFile));
{
FileWriter cellsToFeaturesWriter(cells2featureFile);
-
- typedef FileSorter<CellFeaturePair, WriterFunctor<FileWriter> > SorterType;
WriterFunctor<FileWriter> out(cellsToFeaturesWriter);
- SorterType sorter(1024*1024, tmpFilePrefix + CELL2FEATURE_TMP_EXT, out);
- featuresVector.ForEachOffset(
- FeatureCoverer<SorterType>(skipped, bucket, codingScale, sorter, numFeatures));
- // LOG(LINFO, ("Sorting..."));
- sorter.SortAndFinish();
+ while (it < cellsToFeaturesAllBuckets.end() && it->GetBucket() == bucket)
+ {
+ out(it->GetCellFeaturePair());
+ ++it;
+ }
}
- // LOG(LINFO, ("Indexing..."));
{
FileReader reader(cells2featureFile);
- uint64_t const numCells = reader.Size() / sizeof(CellFeaturePair);
DDVector<CellFeaturePair, FileReader, uint64_t> cellsToFeatures(reader);
- LOG(LINFO, ("Being indexed", "features:", numFeatures, "cells:", numCells,
- "cells per feature:", (numCells + 1.0) / (numFeatures + 1.0)));
SubWriter<WriterT> subWriter(writer);
- BuildIntervalIndex(cellsToFeatures.begin(), cellsToFeatures.end(), subWriter,
- RectId::DEPTH_LEVELS * 2 + 1);
+ LOG(LINFO, ("Building interval index for bucket:", bucket));
+ BuildIntervalIndex(cellsToFeatures.begin(), cellsToFeatures.end(), subWriter, RectId::DEPTH_LEVELS * 2 + 1);
}
-
- FileWriter::DeleteFileX(cells2featureFile);
- // LOG(LINFO, ("Indexing done."));
recordWriter.FinishRecord();
}
- /// @todo Now we can't check this condition here.
- /// We have stored features that are invisible even on the last zoom for now (coastlines).
- //CHECK(skipped.empty(), ());
+ // todo(@pimenov). There was an old todo here that said there were
+ // features (coastlines) that have been indexed despite being invisible at the last scale.
+ // This should be impossible but it is better to re-check it.
LOG(LINFO, ("All scale indexes done."));
}
+
+} // namespace covering \ No newline at end of file