diff options
author | Maxim Pimenov <m@maps.me> | 2015-06-03 17:35:18 +0300 |
---|---|---|
committer | Alex Zolotarev <alex@maps.me> | 2015-09-23 02:52:07 +0300 |
commit | 58fa09b57ec14a0a474e8fa5d476c2a71aaa5874 (patch) | |
tree | 38fc26d81418c30ebd8b2f303bda26902e66f590 /indexer/scale_index_builder.hpp | |
parent | 3f37dc59dcf8277b55ac6da02440185bcfc68c5b (diff) |
[omim] [indexer] Reorder indexing: all buckets for a feature instead of all features for a bucket.
Diffstat (limited to 'indexer/scale_index_builder.hpp')
-rw-r--r-- | indexer/scale_index_builder.hpp | 204 |
1 files changed, 144 insertions, 60 deletions
diff --git a/indexer/scale_index_builder.hpp b/indexer/scale_index_builder.hpp index fe04d8f069..17c37c0734 100644 --- a/indexer/scale_index_builder.hpp +++ b/indexer/scale_index_builder.hpp @@ -1,10 +1,10 @@ #pragma once -#include "indexer/scale_index.hpp" +#include "indexer/cell_id.hpp" +#include "indexer/data_header.hpp" +#include "indexer/feature.hpp" #include "indexer/feature_covering.hpp" #include "indexer/feature_visibility.hpp" -#include "indexer/feature.hpp" #include "indexer/interval_index_builder.hpp" -#include "indexer/cell_id.hpp" #include "defines.hpp" @@ -16,13 +16,16 @@ #include "base/base.hpp" #include "base/logging.hpp" #include "base/macros.hpp" +#include "base/scope_guard.hpp" #include "std/string.hpp" -#include "std/vector.hpp" #include "std/utility.hpp" -#include "std/unordered_set.hpp" +#include "std/vector.hpp" +namespace covering +{ + class CellFeaturePair { public: @@ -49,68 +52,131 @@ private: }; STATIC_ASSERT(sizeof(CellFeaturePair) == 12); +class CellFeatureBucketTuple +{ +public: + CellFeatureBucketTuple() {} + CellFeatureBucketTuple(CellFeaturePair p, uint32_t bucket) : m_pair(p), m_bucket(bucket) {} + + bool operator<(CellFeatureBucketTuple const & rhs) const + { + if (m_bucket != rhs.m_bucket) + return m_bucket < rhs.m_bucket; + return m_pair < rhs.m_pair; + } + + CellFeaturePair GetCellFeaturePair() const { return m_pair; } + uint32_t GetBucket() const { return m_bucket; } + +private: + CellFeaturePair m_pair; + uint32_t m_bucket; +}; +STATIC_ASSERT(sizeof(CellFeatureBucketTuple) == 16); + template <class SorterT> class FeatureCoverer { - unordered_set<uint32_t> & m_skipped; - public: - FeatureCoverer(unordered_set<uint32_t> & skipped, - uint32_t bucket, - int codingScale, - SorterT & sorter, - uint32_t & numFeatures) - : m_skipped(skipped), m_Sorter(sorter), - m_codingDepth(covering::GetCodingDepth(codingScale)), - m_ScaleRange(ScaleIndexBase::ScaleRangeForBucket(bucket)), - m_NumFeatures(numFeatures) + FeatureCoverer(feature::DataHeader const & header, SorterT & sorter, vector<uint32_t> & featuresInBucket, vector<uint32_t> & cellsInBucket) + : m_header(header), + m_bucketsCount(header.GetLastScale() + 1), + m_Sorter(sorter), + m_codingDepth(covering::GetCodingDepth(header.GetLastScale())), + m_featuresInBucket(featuresInBucket), + m_cellsInBucket(cellsInBucket) { - ASSERT_LESS(m_ScaleRange.first, m_ScaleRange.second, ()); + m_featuresInBucket.resize(m_bucketsCount); + m_cellsInBucket.resize(m_bucketsCount); } - + template <class TFeature> void operator() (TFeature const & f, uint32_t offset) const { - if (FeatureShouldBeIndexed(f, offset)) + uint32_t minScale = 0; + bool skip = false; + m_scalesIdx = 0; + for (uint32_t bucket = 0; bucket < m_bucketsCount; ++bucket) { - vector<int64_t> const cells = covering::CoverFeature(f, m_codingDepth, 250); - for (int64_t cell : cells) - m_Sorter.Add(CellFeaturePair(cell, offset)); + // There is a one-to-one correspondence between buckets and scales. + // This is not immediately obvious and in fact there was an idea to map + // a bucket to a contiguous range of scales. + // todo(@pimenov): We probably should remove scale_index.hpp altogether. + if (FeatureShouldBeIndexed(f, offset, bucket, skip, minScale)) + { + vector<int64_t> const cells = covering::CoverFeature(f, m_codingDepth, 250); + for (int64_t cell : cells) + m_Sorter.Add(CellFeatureBucketTuple(CellFeaturePair(cell, offset), bucket)); - ++m_NumFeatures; + m_featuresInBucket[bucket] += 1; + m_cellsInBucket[bucket] += cells.size(); + } } } private: template <class TFeature> - bool FeatureShouldBeIndexed(TFeature const & f, uint32_t offset) const + bool FeatureShouldBeIndexed(TFeature const & f, uint32_t offset, uint32_t scale, bool & skip, + uint32_t & minScale) const { // Do index features for the first visible interval only once. // If the feature was skipped as empty for the suitable interval, // it should be indexed in the next interval where geometry is not empty. + bool needReset = (scale == 0); + while (m_scalesIdx < m_header.GetScalesCount() && m_header.GetScale(m_scalesIdx) < scale) + { + ++m_scalesIdx; + needReset = true; + } + + if (needReset) + f.ResetGeometry(); + // This function invokes geometry reading for the needed scale. - if (f.IsEmptyGeometry(m_ScaleRange.second - 1)) + if (f.IsEmptyGeometry(scale)) { - m_skipped.insert(offset); + skip = true; return false; } - // This function assumes that geometry rect for the needed scale is already initialized. - uint32_t const minScale = feature::GetMinDrawableScale(f); - if (m_ScaleRange.first <= minScale && minScale < m_ScaleRange.second) + if (needReset) { - (void) m_skipped.erase(offset); + // This function assumes that geometry rect for the needed scale is already initialized. + // Note: it works with FeatureBase so in fact it does not use the information about + // the feature's geometry except for the type and the LimitRect. + minScale = feature::GetMinDrawableScale(f); + } + + if (minScale == scale) + { + skip = false; return true; } - - return (minScale < m_ScaleRange.first && m_skipped.erase(offset) == 1); + + if (minScale < scale && skip) + { + skip = false; + return true; + } + return false; } + // We do not need to parse a feature's geometry for every bucket. + // The scales at which geometry changes are encoded in the mwm header. + // We cannot know them beforehand because they are different for + // the common case of a country file and the special case of the world file. + // m_scaleIdx is a position in the scales array that should be reset for every feature + // and then only move forward. Its purpose is to detect the moments when we + // need to reread the feature's geometry. + feature::DataHeader const & m_header; + mutable size_t m_scalesIdx; + + uint32_t m_bucketsCount; SorterT & m_Sorter; int m_codingDepth; - pair<uint32_t, uint32_t> m_ScaleRange; - uint32_t & m_NumFeatures; + vector<uint32_t> & m_featuresInBucket; + vector<uint32_t> & m_cellsInBucket; }; template <class SinkT> @@ -131,57 +197,75 @@ private: }; template <class FeaturesVectorT, class WriterT> -inline void IndexScales(uint32_t bucketsCount, - int codingScale, +inline void IndexScales(feature::DataHeader const & header, FeaturesVectorT const & featuresVector, WriterT & writer, string const & tmpFilePrefix) { // TODO: Make scale bucketing dynamic. - STATIC_ASSERT(sizeof(CellFeaturePair) == 12); + int bucketsCount = header.GetLastScale() + 1; + + string const cells2featureAllBucketsFile = + tmpFilePrefix + CELL2FEATURE_SORTED_EXT + ".allbuckets"; + MY_SCOPE_GUARD(cells2featureAllBucketsFileGuard, + bind(&FileWriter::DeleteFileX, cells2featureAllBucketsFile)); + { + FileWriter cellsToFeaturesAllBucketsWriter(cells2featureAllBucketsFile); + + typedef FileSorter<CellFeatureBucketTuple, WriterFunctor<FileWriter> > SorterType; + WriterFunctor<FileWriter> out(cellsToFeaturesAllBucketsWriter); + SorterType sorter(1024 * 1024 /* bufferBytes */, tmpFilePrefix + CELL2FEATURE_TMP_EXT, out); + vector<uint32_t> featuresInBucket(bucketsCount); + vector<uint32_t> cellsInBucket(bucketsCount); + featuresVector.ForEachOffset(FeatureCoverer<SorterType>(header, sorter, featuresInBucket, cellsInBucket)); + sorter.SortAndFinish(); + + for (uint32_t bucket = 0; bucket < bucketsCount; ++bucket) + { + uint32_t numCells = cellsInBucket[bucket]; + uint32_t numFeatures = featuresInBucket[bucket]; + LOG(LINFO, ("Building scale index for bucket:", bucket)); + double const cellsPerFeature = numFeatures == 0 ? 0.0 : static_cast<double>(numCells) / static_cast<double>(numFeatures); + LOG(LINFO, ("Features:", numFeatures, "cells:", numCells, "cells per feature:", cellsPerFeature)); + } + } - unordered_set<uint32_t> skipped; + FileReader reader(cells2featureAllBucketsFile); + DDVector<CellFeatureBucketTuple, FileReader, uint64_t> cellsToFeaturesAllBuckets(reader); VarSerialVectorWriter<WriterT> recordWriter(writer, bucketsCount); + auto it = cellsToFeaturesAllBuckets.begin(); + for (uint32_t bucket = 0; bucket < bucketsCount; ++bucket) { - LOG(LINFO, ("Building scale index for bucket:", bucket)); - - uint32_t numFeatures = 0; string const cells2featureFile = tmpFilePrefix + CELL2FEATURE_SORTED_EXT; + MY_SCOPE_GUARD(cells2featureFileGuard, bind(&FileWriter::DeleteFileX, cells2featureFile)); { FileWriter cellsToFeaturesWriter(cells2featureFile); - - typedef FileSorter<CellFeaturePair, WriterFunctor<FileWriter> > SorterType; WriterFunctor<FileWriter> out(cellsToFeaturesWriter); - SorterType sorter(1024*1024, tmpFilePrefix + CELL2FEATURE_TMP_EXT, out); - featuresVector.ForEachOffset( - FeatureCoverer<SorterType>(skipped, bucket, codingScale, sorter, numFeatures)); - // LOG(LINFO, ("Sorting...")); - sorter.SortAndFinish(); + while (it < cellsToFeaturesAllBuckets.end() && it->GetBucket() == bucket) + { + out(it->GetCellFeaturePair()); + ++it; + } } - // LOG(LINFO, ("Indexing...")); { FileReader reader(cells2featureFile); - uint64_t const numCells = reader.Size() / sizeof(CellFeaturePair); DDVector<CellFeaturePair, FileReader, uint64_t> cellsToFeatures(reader); - LOG(LINFO, ("Being indexed", "features:", numFeatures, "cells:", numCells, - "cells per feature:", (numCells + 1.0) / (numFeatures + 1.0))); SubWriter<WriterT> subWriter(writer); - BuildIntervalIndex(cellsToFeatures.begin(), cellsToFeatures.end(), subWriter, - RectId::DEPTH_LEVELS * 2 + 1); + LOG(LINFO, ("Building interval index for bucket:", bucket)); + BuildIntervalIndex(cellsToFeatures.begin(), cellsToFeatures.end(), subWriter, RectId::DEPTH_LEVELS * 2 + 1); } - - FileWriter::DeleteFileX(cells2featureFile); - // LOG(LINFO, ("Indexing done.")); recordWriter.FinishRecord(); } - /// @todo Now we can't check this condition here. - /// We have stored features that are invisible even on the last zoom for now (coastlines). - //CHECK(skipped.empty(), ()); + // todo(@pimenov). There was an old todo here that said there were + // features (coastlines) that have been indexed despite being invisible at the last scale. + // This should be impossible but it is better to re-check it. LOG(LINFO, ("All scale indexes done.")); } + +} // namespace covering
\ No newline at end of file |