// $Id$ // vim:tabstop=2 /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #ifndef moses_MonotonicVector_h #define moses_MonotonicVector_h // MonotonicVector - Represents a monotonic increasing function that maps // positive integers of any size onto a given number type. Each value has to be // equal or larger than the previous one. Depending on the stepSize it can save // up to 90% of memory compared to a std::vector. Time complexity is roughly // constant, in the worst case, however, stepSize times slower than a normal // std::vector. #include #include #include #include #include #include "ThrowingFwrite.h" #include "ListCoders.h" #include "MmapAllocator.h" namespace Moses { template class Allocator = std::allocator> class MonotonicVector { private: typedef std::vector > Anchors; typedef std::vector > Diffs; Anchors m_anchors; Diffs m_diffs; std::vector m_tempDiffs; size_t m_size; PosT m_last; bool m_final; public: typedef PosT value_type; MonotonicVector() : m_size(0), m_last(0), m_final(false) {} size_t size() const { return m_size + m_tempDiffs.size(); } PosT at(size_t i) const { PosT s = stepSize; PosT j = m_anchors[i / s]; PosT r = i % s; typename Diffs::const_iterator it = m_diffs.begin() + j; PosT k = 0; k += VarInt32::DecodeAndSum(it, m_diffs.end(), 1); if(i < m_size) k += Simple9::DecodeAndSum(it, m_diffs.end(), r); else if(i < m_size + m_tempDiffs.size()) for(size_t l = 0; l < r; l++) k += m_tempDiffs[l]; return k; } PosT operator[](PosT i) const { return at(i); } PosT back() const { return at(size()-1); } void push_back(PosT i) { assert(m_final != true); if(m_anchors.size() == 0 && m_tempDiffs.size() == 0) { m_anchors.push_back(0); VarInt32::Encode(&i, &i+1, std::back_inserter(m_diffs)); m_last = i; m_size++; return; } if(m_tempDiffs.size() == stepSize-1) { Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(), std::back_inserter(m_diffs)); m_anchors.push_back(m_diffs.size()); VarInt32::Encode(&i, &i+1, std::back_inserter(m_diffs)); m_size += m_tempDiffs.size() + 1; m_tempDiffs.clear(); } else { PosT last = m_last; PosT diff = i - last; m_tempDiffs.push_back(diff); } m_last = i; } void commit() { assert(m_final != true); Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(), std::back_inserter(m_diffs)); m_size += m_tempDiffs.size(); m_tempDiffs.clear(); m_final = true; } size_t usage() { return m_diffs.size() * sizeof(unsigned int) + m_anchors.size() * sizeof(NumT); } size_t load(std::FILE* in, bool map = false) { size_t byteSize = 0; byteSize += fread(&m_final, sizeof(bool), 1, in) * sizeof(bool); byteSize += fread(&m_size, sizeof(size_t), 1, in) * sizeof(size_t); byteSize += fread(&m_last, sizeof(PosT), 1, in) * sizeof(PosT); byteSize += loadVector(m_diffs, in, map); byteSize += loadVector(m_anchors, in, map); return byteSize; } template size_t loadVector(std::vector >& v, std::FILE* in, bool map = false) { // Can only be read into memory. Mapping not possible with std:allocator. assert(map == false); size_t byteSize = 0; size_t valSize; byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t); v.resize(valSize, 0); byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT); return byteSize; } template size_t loadVector(std::vector >& v, std::FILE* in, bool map = false) { size_t byteSize = 0; size_t valSize; byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t); if(map == false) { // Read data into temporary file (default constructor of MmapAllocator) // and map memory onto temporary file. Can be resized. v.resize(valSize, 0); byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT); } else { // Map it directly on specified region of file "in" starting at valPos // with length valSize * sizeof(ValueT). Mapped region cannot be resized. size_t valPos = std::ftell(in); Allocator alloc(in, valPos); std::vector > vTemp(alloc); vTemp.resize(valSize); v.swap(vTemp); std::fseek(in, valSize * sizeof(ValueT), SEEK_CUR); byteSize += valSize * sizeof(ValueT); } return byteSize; } size_t save(std::FILE* out) { if(!m_final) commit(); bool byteSize = 0; byteSize += ThrowingFwrite(&m_final, sizeof(bool), 1, out) * sizeof(bool); byteSize += ThrowingFwrite(&m_size, sizeof(size_t), 1, out) * sizeof(size_t); byteSize += ThrowingFwrite(&m_last, sizeof(PosT), 1, out) * sizeof(PosT); size_t size = m_diffs.size(); byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t); byteSize += ThrowingFwrite(&m_diffs[0], sizeof(unsigned int), size, out) * sizeof(unsigned int); size = m_anchors.size(); byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t); byteSize += ThrowingFwrite(&m_anchors[0], sizeof(NumT), size, out) * sizeof(NumT); return byteSize; } void swap(MonotonicVector &mv) { if(!m_final) commit(); m_diffs.swap(mv.m_diffs); m_anchors.swap(mv.m_anchors); } }; } #endif