Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/training/eppex/IndexedPhrasesPair.h')
-rw-r--r--scripts/training/eppex/IndexedPhrasesPair.h316
1 files changed, 0 insertions, 316 deletions
diff --git a/scripts/training/eppex/IndexedPhrasesPair.h b/scripts/training/eppex/IndexedPhrasesPair.h
deleted file mode 100644
index 18e3a39fd..000000000
--- a/scripts/training/eppex/IndexedPhrasesPair.h
+++ /dev/null
@@ -1,316 +0,0 @@
-/**
- * IndexedPhrasesPair - implementation of a single phrase pair source and target
- * phrases represented by numeric indices.
- *
- * (C) Ceslav Przywara, UFAL MFF UK, 2011
- *
- * $Id$
- *
- * TODO:
- * - current unordered_map implementation is terribly slow. More sophisticated
- * design of hash function should help.
- */
-
-#ifndef INDEXEDPHRASESPAIR_H
-#define INDEXEDPHRASESPAIR_H
-
-#include <vector>
-#include <new>
-#include <algorithm>
-#include <string.h>
-#ifdef USE_UNORDERED_MAP
-#include <tr1/functional_hash.h>
-#endif
-
-// Forward declaration because of friend comparison operator declaration below.
-template<class OrientationIndexType, class TokenIndexType> class IndexedPhrasesPair;
-
-#ifdef USE_UNORDERED_MAP
-template<class OrientationIndexType, class TokenIndexType>
-class IndexedPhrasePairHasher;
-
-// Comparison operator (is going to be declared as friend).
-template<class OrientationIndexType, class TokenIndexType>
-bool operator== (const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& lhs, const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& rhs);
-#else
-// Comparison operator (is going to be declared as friend).
-template<class OrientationIndexType, class TokenIndexType>
-bool operator< (const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& lhs, const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& rhs);
-#endif
-
-/**
- * Structure capable of holding a phrase pair consisting of:
- * a) source phrase
- * b) target phrase
- * c) word alignment
- * d) orientation info
- * @param OrientationIndexType
- * @param TokenIndexType - datatype for token indices.
- */
-template<class OrientationIndexType = unsigned char, class TokenIndexType = unsigned int>
-class IndexedPhrasesPair {
-public:
-
- typedef TokenIndexType token_index_t;
-
- typedef OrientationIndexType orientation_info_index_t;
-
- typedef std::vector<TokenIndexType> phrase_t;
-
- // A single alignment point.
- typedef unsigned char alignment_point_t;
-
- // A single pair of alignments points.
- typedef std::pair<alignment_point_t, alignment_point_t> alignment_pair_t;
-
- // A single phrase alignment (eg. 0-0 0-1 1-2)
- typedef std::vector<alignment_pair_t> alignment_t;
-
-#ifdef USE_UNORDERED_MAP
- // Unordered map requires hashing functor object.
- typedef IndexedPhrasePairHasher<OrientationIndexType, TokenIndexType> Hash;
-#endif
-
-private:
-
- /** @var Source and target phrase as array of respective token indices */
- token_index_t* _data;
-
- /** @var A single phrase alignment stored in array */
- alignment_point_t* _alignment;
-
- /** @var Index of orientation info string */
- orientation_info_index_t _orientationInfoIndex;
-
- alignment_point_t _srcPhraseLength;
-
- alignment_point_t _tgtPhraseLength;
-
- alignment_point_t _alignmentLength;
-
-public:
-
- IndexedPhrasesPair(void): _data(NULL), _alignment(NULL), _orientationInfoIndex(0), _srcPhraseLength(0), _tgtPhraseLength(0), _alignmentLength(0) {}
-
- IndexedPhrasesPair(const phrase_t& srcPhrase, const phrase_t& tgtPhrase, orientation_info_index_t orientationInfo, const alignment_t& alignment);
-
- IndexedPhrasesPair(const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& copy);
-
- ~IndexedPhrasesPair(void);
-
- IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& operator=(const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& other);
-
- phrase_t srcPhrase(void) const { return phrase_t(_data, _data + _srcPhraseLength); }
-
- phrase_t tgtPhrase(void) const { return phrase_t(_data + _srcPhraseLength, _data + _srcPhraseLength + _tgtPhraseLength); }
-
- orientation_info_index_t orientationInfo(void) const { return _orientationInfoIndex; }
-
- alignment_t alignment(void) const;
-
- const alignment_point_t * alignmentData(void) const { return _alignment; }
-
- alignment_point_t alignmentLength(void) const { return _alignmentLength; }
-
-#ifdef USE_UNORDERED_MAP
- friend class IndexedPhrasePairHasher<OrientationIndexType, TokenIndexType>;
- friend bool operator== <> (const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& lhs, const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& rhs);
-#else
- friend bool operator< <> (const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& lhs, const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& rhs);
-#endif
-};
-
-
-////////////////////////////////////////////////////////////////////////////////
-//// IndexedPhrasesPair IMPLEMENTATION /////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////
-
-template<class OrientationIndexType, class TokenIndexType>
-IndexedPhrasesPair<OrientationIndexType, TokenIndexType>::IndexedPhrasesPair(const phrase_t& srcPhrase, const phrase_t& tgtPhrase, orientation_info_index_t orientationInfo, const alignment_t& alignment):
- _data(NULL), _alignment(NULL), _orientationInfoIndex(orientationInfo), _srcPhraseLength(static_cast<alignment_point_t>(srcPhrase.size())), _tgtPhraseLength(static_cast<alignment_point_t>(tgtPhrase.size())), _alignmentLength(alignment.size()) {
-
- // Save alignment.
- _alignment = new alignment_point_t[2 * _alignmentLength]; // Note: *2 for each pair.
- for ( size_t i = 0; i < alignment.size(); ++i ) {
- _alignment[i*2] = alignment[i].first;
- _alignment[i*2 + 1] = alignment[i].second;
- }
-
- // Save data.
- _data = new token_index_t[_srcPhraseLength + _tgtPhraseLength];
- std::copy(srcPhrase.begin(), srcPhrase.end(), _data);
- std::copy(tgtPhrase.begin(), tgtPhrase.end(), _data + _srcPhraseLength);
-
-}
-
-template<class OrientationIndexType, class TokenIndexType>
-IndexedPhrasesPair<OrientationIndexType, TokenIndexType>::IndexedPhrasesPair(const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& copy):
- _data(NULL), _alignment(NULL), _orientationInfoIndex(copy._orientationInfoIndex), _srcPhraseLength(copy._srcPhraseLength), _tgtPhraseLength(copy._tgtPhraseLength), _alignmentLength(copy._alignmentLength) {
-
- // Copy alignment.
- // alignment_point_t alignmentLength = std::max(_srcPhraseLength, _tgtPhraseLength);
- _alignment = new alignment_point_t[2 * _alignmentLength]; // Note: *2 for each pair.
- memcpy(_alignment, copy._alignment, _alignmentLength * 2 * sizeof(alignment_point_t));
-
- // Copy data.
- _data = new token_index_t[_srcPhraseLength + _tgtPhraseLength];
- std::copy(copy._data, copy._data + _srcPhraseLength + _tgtPhraseLength, _data);
-}
-
-template<class OrientationIndexType, class TokenIndexType>
-IndexedPhrasesPair<OrientationIndexType, TokenIndexType>::~IndexedPhrasesPair(void) {
- delete[] _alignment;
- delete[] _data;
-}
-
-template<class OrientationIndexType, class TokenIndexType>
-IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& IndexedPhrasesPair<OrientationIndexType, TokenIndexType>::operator=(const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& other) {
-
- if ( this != &other ) {
-
- // Copy alignment.
- _alignmentLength = other._alignmentLength;
- // alignment_point_t alignmentLength = std::max(_srcPhraseLength, _tgtPhraseLength);
- alignment_point_t * alignment = new alignment_point_t[2 * _alignmentLength]; // Note: *2 for each pair.
- memcpy(alignment, other._alignment, _alignmentLength * 2 * sizeof(alignment_point_t));
- if ( _alignment != NULL ) {
- delete[] _alignment; // !
- }
- _alignment = alignment; // !
-
- // Copy data.
- _srcPhraseLength = other._srcPhraseLength;
- _tgtPhraseLength = other._tgtPhraseLength;
- token_index_t * data = new token_index_t[_srcPhraseLength + _tgtPhraseLength];
- std::copy(other._data, other._data + _srcPhraseLength + _tgtPhraseLength, data);
- if ( _data != NULL ) {
- delete[] _data; // !
- }
- _data = data; // !
-
- //
- _orientationInfoIndex = other._orientationInfoIndex;
-
- }
-
- return *this;
-}
-
-template<class OrientationIndexType, class TokenIndexType>
-typename IndexedPhrasesPair<OrientationIndexType, TokenIndexType>::alignment_t IndexedPhrasesPair<OrientationIndexType, TokenIndexType>::alignment(void) const {
- alignment_t a;
-
- //alignment_point_t alignmentLength = std::max(_srcPhraseLength, _tgtPhraseLength);
- for ( size_t i = 0; i < _alignmentLength; ++i ) {
- a.push_back(alignment_pair_t(_alignment[2*i], _alignment[2*i + 1]));
- }
-
- return a;
-}
-
-#ifdef USE_UNORDERED_MAP
-template<class OrientationIndexType, class TokenIndexType>
-class IndexedPhrasePairHasher: public std::unary_function<IndexedPhrasesPair<OrientationIndexType, TokenIndexType>, size_t> {
-
- typedef typename IndexedPhrasesPair<OrientationIndexType, TokenIndexType>::alignment_point_t alignment_point_t;
-
- std::tr1::hash<TokenIndexType> _hash;
-
-public:
- size_t operator()(const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& phrasePair) const {
- size_t hash = 0;
- for ( alignment_point_t i = 0; i < phrasePair._srcPhraseLength + phrasePair._tgtPhraseLength; ++i ) {
- hash ^= _hash(phrasePair._data[i]);
- }
- return hash;
- }
-};
-
-template<class OrientationIndexType, class TokenIndexType>
-bool operator== (const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& lhs, const IndexedPhrasesPair<OrientationIndexType, TokenIndexType>& rhs) {
-
- typedef typename IndexedPhrasesPair<OrientationIndexType, TokenIndexType>::alignment_point_t alignment_point_t;
- typedef typename IndexedPhrasesPair<OrientationIndexType, TokenIndexType>::token_index_t string_index_t;
-
- // Alignments comparable?
- if ( lhs._alignmentLength != rhs._alignmentLength ) {
- return false;
- }
-
- // Same alignment length -> compare alignments.
- int cmp = memcmp(lhs._alignment, rhs._alignment, lhs._alignmentLength * 2 * sizeof(alignment_point_t));
-
- if ( cmp != 0 ) {
- // Alignments differ.
- return false;
- }
-
- // Alignments are equal, compare phrases (data).
-
- if ( lhs._srcPhraseLength != rhs._srcPhraseLength ) {
- // Source phrase lengths differs.
- return false;
- }
-
- if ( lhs._tgtPhraseLength != rhs._tgtPhraseLength ) {
- // Target phrase lengths differs.
- return false;
- }
-
- // Phrases have matching lengths, compare the data in the end:
- cmp = memcmp(lhs._data, rhs._data, (lhs._srcPhraseLength + lhs._tgtPhraseLength) * sizeof(string_index_t));
-
- if ( cmp != 0 ) {
- // Data differ.
- return false;
- }
-
- // Compare orientation info in the end.
- return lhs._orientationInfoIndex == rhs._orientationInfoIndex;
-}
-#else
-template<class OrientationIndexType, class StringIndexType>
-bool operator< (const IndexedPhrasesPair<OrientationIndexType, StringIndexType>& lhs, const IndexedPhrasesPair<OrientationIndexType, StringIndexType>& rhs) {
-
- typedef typename IndexedPhrasesPair<OrientationIndexType, StringIndexType>::alignment_point_t alignment_point_t;
- typedef typename IndexedPhrasesPair<OrientationIndexType, StringIndexType>::token_index_t string_index_t;
-
- // Alignments comparable?
- if ( lhs._alignmentLength != rhs._alignmentLength ) {
- // Shorter alignment length => lesser item.
- return lhs._alignmentLength < rhs._alignmentLength;
- }
-
- // Same alignment length -> compare alignments.
- int cmp = memcmp(lhs._alignment, rhs._alignment, lhs._alignmentLength * 2 * sizeof(alignment_point_t));
-
- if ( cmp != 0 ) {
- // Alignments differ.
- return cmp < 0;
- }
-
- // Alignments are equal, compare phrases (data).
-
- if ( lhs._srcPhraseLength != rhs._srcPhraseLength ) {
- // Source phrase lengths differs.
- return lhs._srcPhraseLength < rhs._srcPhraseLength;
- }
-
- if ( lhs._tgtPhraseLength != rhs._tgtPhraseLength ) {
- // Target phrase lengths differs.
- return lhs._tgtPhraseLength < rhs._tgtPhraseLength;
- }
-
- // Phrases have matching lengths, compare the data in the end:
- cmp = memcmp(lhs._data, rhs._data, (lhs._srcPhraseLength + lhs._tgtPhraseLength) * sizeof(string_index_t));
- if ( cmp != 0 ) {
- // Data differ.
- return cmp < 0;
- }
-
- // Compare orientation info in the end.
- return lhs._orientationInfoIndex < rhs._orientationInfoIndex;
-}
-#endif
-
-#endif /* INDEXEDPHRASESPAIR_H */