// $Id$ // vim:tabstop=2 /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #ifndef moses_StringVector_h #define moses_StringVector_h #include #include #include #include #include #include #include #include "ThrowingFwrite.h" #include "MonotonicVector.h" #include "MmapAllocator.h" namespace Moses { // ********** ValueIteratorRange ********** template class ValueIteratorRange { private: ValueIteratorT m_begin; ValueIteratorT m_end; public: ValueIteratorRange(ValueIteratorT begin, ValueIteratorT end); const ValueIteratorT& begin() const; const ValueIteratorT& end() const; const std::string str() const; operator const std::string() { return str(); } size_t size() { return std::distance(m_begin, m_end); } template bool operator==(const StringT& o) const; bool operator==(const char* c) const; template bool operator<(const StringT& o) const; bool operator<(const char* c) const; }; // ********** StringVector ********** template class Allocator = std::allocator> class StringVector { protected: bool m_sorted; bool m_memoryMapped; std::vector >* m_charArray; MonotonicVector m_positions; virtual const ValueT* value_ptr(PosT i) const; public: typedef ValueIteratorRange >::const_iterator> range; // ********** RangeIterator ********** class RangeIterator : public boost::iterator_facade { private: PosT m_index; StringVector* m_container; public: RangeIterator(); RangeIterator(StringVector &sv, PosT index=0); PosT get_index(); private: friend class boost::iterator_core_access; range dereference() const; bool equal(RangeIterator const& other) const; void increment(); void decrement(); void advance(PosT n); PosT distance_to(RangeIterator const& other) const; }; // ********** StringIterator ********** class StringIterator : public boost::iterator_facade { private: PosT m_index; StringVector* m_container; public: StringIterator(); StringIterator(StringVector &sv, PosT index=0); PosT get_index(); private: friend class boost::iterator_core_access; const std::string dereference() const; bool equal(StringIterator const& other) const; void increment(); void decrement(); void advance(PosT n); PosT distance_to(StringIterator const& other) const; }; typedef RangeIterator iterator; typedef StringIterator string_iterator; StringVector(); StringVector(Allocator alloc); virtual ~StringVector() { delete m_charArray; } void swap(StringVector &c) { m_positions.commit(); m_positions.swap(c.m_positions); m_charArray->swap(*c.m_charArray); bool temp = m_sorted; m_sorted = c.m_sorted; c.m_sorted = temp; } bool is_sorted() const; PosT size() const; virtual PosT size2() const; template Iterator begin() const; template Iterator end() const; iterator begin() const; iterator end() const; PosT length(PosT i) const; typename std::vector >::const_iterator begin(PosT i) const; typename std::vector >::const_iterator end(PosT i) const; void clear() { m_charArray->clear(); m_sorted = true; m_positions = MonotonicVector(); } range at(PosT i) const; range operator[](PosT i) const; range back() const; template void push_back(StringT s); void push_back(const char* c); template PosT find(StringT &s) const; PosT find(const char* c) const; virtual size_t load(std::FILE* in, bool memoryMapped = false) { size_t size = 0; m_memoryMapped = memoryMapped; size += std::fread(&m_sorted, sizeof(bool), 1, in) * sizeof(bool); size += m_positions.load(in, m_memoryMapped); size += loadCharArray(*m_charArray, in, m_memoryMapped); return size; } size_t loadCharArray(std::vector >& c, std::FILE* in, bool map = false) { // Can only be read into memory. Mapping not possible with std:allocator. assert(map == false); size_t byteSize = 0; size_t valSize; byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t); c.resize(valSize, 0); byteSize += std::fread(&c[0], sizeof(ValueT), valSize, in) * sizeof(ValueT); return byteSize; } size_t loadCharArray(std::vector >& c, std::FILE* in, bool map = false) { size_t byteSize = 0; size_t valSize; byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t); if(map == false) { // Read data into temporary file (default constructor of MmapAllocator) // and map memory onto temporary file. Can be resized. c.resize(valSize, 0); byteSize += std::fread(&c[0], sizeof(ValueT), valSize, in) * sizeof(ValueT); } else { // Map it directly on specified region of file "in" starting at valPos // with length valSize * sizeof(ValueT). Mapped region cannot be resized. size_t valPos = std::ftell(in); Allocator alloc(in, valPos); std::vector > charArrayTemp(alloc); charArrayTemp.resize(valSize); c.swap(charArrayTemp); byteSize += valSize * sizeof(ValueT); } return byteSize; } size_t load(std::string filename, bool memoryMapped = false) { std::FILE* pFile = fopen(filename.c_str(), "r"); size_t byteSize = load(pFile, memoryMapped); fclose(pFile); return byteSize; } size_t save(std::FILE* out) { size_t byteSize = 0; byteSize += ThrowingFwrite(&m_sorted, sizeof(bool), 1, out) * sizeof(bool); byteSize += m_positions.save(out); size_t valSize = size2(); byteSize += ThrowingFwrite(&valSize, sizeof(size_t), 1, out) * sizeof(size_t); byteSize += ThrowingFwrite(&(*m_charArray)[0], sizeof(ValueT), valSize, out) * sizeof(ValueT); return byteSize; } size_t save(std::string filename) { std::FILE* pFile = fopen(filename.c_str(), "w"); size_t byteSize = save(pFile); fclose(pFile); return byteSize; } }; // ********** Implementation ********** // ValueIteratorRange template ValueIteratorRange::ValueIteratorRange(ValueIteratorT begin, ValueIteratorT end) : m_begin(begin), m_end(end) { } template const ValueIteratorT& ValueIteratorRange::begin() const { return m_begin; } template const ValueIteratorT& ValueIteratorRange::end() const { return m_end; } template const std::string ValueIteratorRange::str() const { std::string dummy; for(ValueIteratorT it = m_begin; it != m_end; it++) dummy.push_back(*it); return dummy; } template template bool ValueIteratorRange::operator==(const StringT& o) const { if(std::distance(m_begin, m_end) == std::distance(o.begin(), o.end())) return std::equal(m_begin, m_end, o.begin()); else return false; } template bool ValueIteratorRange::operator==(const char* c) const { return *this == std::string(c); } template template bool ValueIteratorRange::operator<(const StringT &s2) const { return std::lexicographical_compare(m_begin, m_end, s2.begin(), s2.end(), std::less::value_type>()); } template bool ValueIteratorRange::operator<(const char* c) const { return *this < std::string(c); } template bool operator<(const StringT &s1, const ValueIteratorRange &s2) { return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), s2.end(), std::less::value_type>()); } template bool operator<(const char* c, const ValueIteratorRange &s2) { size_t len = std::char_traits::length(c); return std::lexicographical_compare(c, c + len, s2.begin(), s2.end(), std::less::value_type>()); } template OStream& operator<<(OStream &os, ValueIteratorRange cr) { ValueIteratorT it = cr.begin(); while(it != cr.end()) os << *(it++); return os; } // StringVector template class Allocator> StringVector::StringVector() : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector >()) { } template class Allocator> StringVector::StringVector(Allocator alloc) : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector >(alloc)) { } template class Allocator> template void StringVector::push_back(StringT s) { if(is_sorted() && size() && !(back() < s)) m_sorted = false; m_positions.push_back(size2()); std::copy(s.begin(), s.end(), std::back_inserter(*m_charArray)); } template class Allocator> void StringVector::push_back(const char* c) { std::string dummy(c); push_back(dummy); } template class Allocator> template Iterator StringVector::begin() const { return Iterator(const_cast&>(*this), 0); } template class Allocator> template Iterator StringVector::end() const { return Iterator(const_cast&>(*this), size()); } template class Allocator> typename StringVector::iterator StringVector::begin() const { return begin(); }; template class Allocator> typename StringVector::iterator StringVector::end() const { return end(); }; template class Allocator> bool StringVector::is_sorted() const { return m_sorted; } template class Allocator> PosT StringVector::size() const { return m_positions.size(); } template class Allocator> PosT StringVector::size2() const { return m_charArray->size(); } template class Allocator> typename StringVector::range StringVector::at(PosT i) const { return range(begin(i), end(i)); } template class Allocator> typename StringVector::range StringVector::operator[](PosT i) const { return at(i); } template class Allocator> typename StringVector::range StringVector::back() const { return at(size()-1); } template class Allocator> PosT StringVector::length(PosT i) const { if(i+1 < size()) return m_positions[i+1] - m_positions[i]; else return size2() - m_positions[i]; } template class Allocator> const ValueT* StringVector::value_ptr(PosT i) const { return &(*m_charArray)[m_positions[i]]; } template class Allocator> typename std::vector >::const_iterator StringVector::begin(PosT i) const { return typename std::vector >::const_iterator(value_ptr(i)); } template class Allocator> typename std::vector >::const_iterator StringVector::end(PosT i) const { return typename std::vector >::const_iterator(value_ptr(i) + length(i)); } template class Allocator> template PosT StringVector::find(StringT &s) const { if(m_sorted) return std::distance(begin(), std::lower_bound(begin(), end(), s)); return std::distance(begin(), std::find(begin(), end(), s)); } template class Allocator> PosT StringVector::find(const char* c) const { std::string s(c); return find(s); } // RangeIterator template class Allocator> StringVector::RangeIterator::RangeIterator() : m_index(0), m_container(0) { } template class Allocator> StringVector::RangeIterator::RangeIterator(StringVector &sv, PosT index) : m_index(index), m_container(&sv) { } template class Allocator> PosT StringVector::RangeIterator::get_index() { return m_index; } template class Allocator> typename StringVector::range StringVector::RangeIterator::dereference() const { return typename StringVector::range( m_container->begin(m_index), m_container->end(m_index) ); } template class Allocator> bool StringVector::RangeIterator::equal( StringVector::RangeIterator const& other) const { return m_index == other.m_index && m_container == other.m_container; } template class Allocator> void StringVector::RangeIterator::increment() { m_index++; } template class Allocator> void StringVector::RangeIterator::decrement() { m_index--; } template class Allocator> void StringVector::RangeIterator::advance(PosT n) { m_index += n; } template class Allocator> PosT StringVector::RangeIterator::distance_to( StringVector::RangeIterator const& other) const { return other.m_index - m_index; } // StringIterator template class Allocator> StringVector::StringIterator::StringIterator() : m_index(0), m_container(0) { } template class Allocator> StringVector::StringIterator::StringIterator( StringVector &sv, PosT index) : m_index(index), m_container(&sv) { } template class Allocator> PosT StringVector::StringIterator::get_index() { return m_index; } template class Allocator> const std::string StringVector::StringIterator::dereference() const { return StringVector::range(m_container->begin(m_index), m_container->end(m_index)).str(); } template class Allocator> bool StringVector::StringIterator::equal( StringVector::StringIterator const& other) const { return m_index == other.m_index && m_container == other.m_container; } template class Allocator> void StringVector::StringIterator::increment() { m_index++; } template class Allocator> void StringVector::StringIterator::decrement() { m_index--; } template class Allocator> void StringVector::StringIterator::advance(PosT n) { m_index += n; } template class Allocator> PosT StringVector::StringIterator::distance_to( StringVector::StringIterator const& other) const { return other.m_index - m_index; } // ********** Some typedefs ********** typedef StringVector MediumStringVector; typedef StringVector LongStringVector; } #endif