diff options
author | Christophe SERVAN <cservan@trou-de-fer.grenoble.xrce.xerox.com> | 2015-02-16 21:02:46 +0300 |
---|---|---|
committer | Christophe SERVAN <cservan@trou-de-fer.grenoble.xrce.xerox.com> | 2015-02-16 21:02:46 +0300 |
commit | 87a4f1954619a4a7eec531349e0865b245df7a0b (patch) | |
tree | fcbf12e7bbbf32e866e127844224435277e31fbf /mert | |
parent | f6884c55a1095b484195acc8570b280e638a72bf (diff) |
Memory leak correction in TER algorithm
Diffstat (limited to 'mert')
-rw-r--r-- | mert/Jamfile | 1 | ||||
-rw-r--r-- | mert/TER/alignmentStruct.cpp | 25 | ||||
-rw-r--r-- | mert/TER/alignmentStruct.h | 31 | ||||
-rw-r--r-- | mert/TER/bestShiftStruct.cpp | 66 | ||||
-rw-r--r-- | mert/TER/bestShiftStruct.h | 48 | ||||
-rw-r--r-- | mert/TER/hashMap.cpp | 232 | ||||
-rw-r--r-- | mert/TER/hashMap.h | 44 | ||||
-rw-r--r-- | mert/TER/hashMapInfos.cpp | 239 | ||||
-rw-r--r-- | mert/TER/hashMapInfos.h | 46 | ||||
-rw-r--r-- | mert/TER/hashMapStringInfos.cpp | 313 | ||||
-rw-r--r-- | mert/TER/hashMapStringInfos.h | 46 | ||||
-rw-r--r-- | mert/TER/infosHasher.cpp | 58 | ||||
-rw-r--r-- | mert/TER/infosHasher.h | 40 | ||||
-rw-r--r-- | mert/TER/stringHasher.cpp | 46 | ||||
-rw-r--r-- | mert/TER/stringHasher.h | 32 | ||||
-rw-r--r-- | mert/TER/stringInfosHasher.cpp | 58 | ||||
-rw-r--r-- | mert/TER/stringInfosHasher.h | 40 | ||||
-rw-r--r-- | mert/TER/terAlignment.cpp | 339 | ||||
-rw-r--r-- | mert/TER/terAlignment.h | 79 | ||||
-rw-r--r-- | mert/TER/terShift.cpp | 164 | ||||
-rw-r--r-- | mert/TER/terShift.h | 53 | ||||
-rw-r--r-- | mert/TER/tercalc.cpp | 1832 | ||||
-rw-r--r-- | mert/TER/tercalc.h | 104 | ||||
-rw-r--r-- | mert/TER/tools.cpp | 1224 | ||||
-rw-r--r-- | mert/TER/tools.h | 116 |
25 files changed, 3108 insertions, 2168 deletions
diff --git a/mert/Jamfile b/mert/Jamfile index ee8a1fcc3..4dd2fb540 100644 --- a/mert/Jamfile +++ b/mert/Jamfile @@ -46,6 +46,7 @@ TER/infosHasher.cpp TER/stringInfosHasher.cpp TER/tercalc.cpp TER/tools.cpp +TER/bestShiftStruct.cpp TerScorer.cpp CderScorer.cpp MeteorScorer.cpp diff --git a/mert/TER/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp index e42ec4a14..e2a880396 100644 --- a/mert/TER/alignmentStruct.cpp +++ b/mert/TER/alignmentStruct.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -23,15 +23,24 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; namespace TERCpp { -string alignmentStruct::toString() -{ - stringstream s; + string alignmentStruct::toString() + { + stringstream s; // s << "nword : " << vectorToString(nwords)<<endl; // s << "alignment" << vectorToString(alignment)<<endl; // s << "afterShift" << vectorToString(alignment)<<endl; - s << "Nothing to be printed" <<endl; - return s.str(); -} + s << "Nothing to be printed" <<endl; + return s.str(); + } + void alignmentStruct::set(alignmentStruct l_alignmentStruct) + { + nwords=l_alignmentStruct.nwords; // The words we shifted + alignment=l_alignmentStruct.alignment ; // for pra_more output + aftershift=l_alignmentStruct.aftershift; // for pra_more output + cost=l_alignmentStruct.cost; + } + + // alignmentStruct::alignmentStruct() // { @@ -99,7 +108,7 @@ string alignmentStruct::toString() // return s.str(); // } -/* The distance of the shift. */ + /* The distance of the shift. */ // int alignmentStruct::distance() // { // if (moveto < start) diff --git a/mert/TER/alignmentStruct.h b/mert/TER/alignmentStruct.h index c1459960b..0963fbe94 100644 --- a/mert/TER/alignmentStruct.h +++ b/mert/TER/alignmentStruct.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef MERT_TER_ALIGNMENTSTRUCT_H_ -#define MERT_TER_ALIGNMENTSTRUCT_H_ +#ifndef __TERCPPALIGNMENTSTRUCT_H__ +#define __TERCPPALIGNMENTSTRUCT_H__ #include <vector> @@ -34,10 +34,10 @@ using namespace Tools; namespace TERCpp { -class alignmentStruct -{ -private: -public: + class alignmentStruct + { + private: + public: // alignmentStruct(); // alignmentStruct (int _start, int _end, int _moveto, int _newloc); @@ -53,14 +53,15 @@ public: // int end; // int moveto; // int newloc; - vector<string> nwords; // The words we shifted - vector<char> alignment ; // for pra_more output - vector<vecInt> aftershift; // for pra_more output - // This is used to store the cost of a shift, so we don't have to - // calculate it multiple times. - double cost; - string toString(); -}; + vector<string> nwords; // The words we shifted + vector<char> alignment ; // for pra_more output + vector<vecInt> aftershift; // for pra_more output + // This is used to store the cost of a shift, so we don't have to + // calculate it multiple times. + double cost; + string toString(); + void set(alignmentStruct l_alignmentStruct); + }; } #endif
\ No newline at end of file diff --git a/mert/TER/bestShiftStruct.cpp b/mert/TER/bestShiftStruct.cpp new file mode 100644 index 000000000..8c27f1ff8 --- /dev/null +++ b/mert/TER/bestShiftStruct.cpp @@ -0,0 +1,66 @@ +/********************************* +tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation. + +Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France +Contact: christophe.servan@lium.univ-lemans.fr + +The tercpp tool and library are free software: you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation, either version 3 of the licence, or +(at your option) any later version. + +This program and library are distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this library; if not, write to the Free Software Foundation, +Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +**********************************/ +#include "bestShiftStruct.h" + +using namespace std; + +namespace TERCpp +{ + bestShiftStruct::bestShiftStruct() + { + m_best_shift=new terShift(); + m_best_align=new terAlignment(); + m_empty=new bool(false); + } + bestShiftStruct::~bestShiftStruct() + { + delete(m_best_align); + delete(m_best_shift); + } + void bestShiftStruct::setEmpty(bool b) + { + m_empty=new bool(b); + } + void bestShiftStruct::setBestShift(terShift * l_terShift) + { + m_best_shift->set(l_terShift); + } + void bestShiftStruct::setBestAlign(terAlignment * l_terAlignment) + { + m_best_align->set(l_terAlignment); + } + string bestShiftStruct::toString() + { + stringstream s; + s << m_best_shift->toString() << endl; + s << m_best_align->toString() << endl; +// s << (*m_empty) << endl; + } + bool bestShiftStruct::getEmpty() + { + return (*(m_empty)); + } + + + + + +} diff --git a/mert/TER/bestShiftStruct.h b/mert/TER/bestShiftStruct.h index d68f2319f..144787faa 100644 --- a/mert/TER/bestShiftStruct.h +++ b/mert/TER/bestShiftStruct.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef __BESTSHIFTSTRUCT_H_ -#define __BESTSHIFTSTRUCT_H_ +#ifndef __BESTSHIFTSTRUCT_H__ +#define __BESTSHIFTSTRUCT_H__ #include <vector> @@ -36,10 +36,10 @@ using namespace Tools; namespace TERCpp { -class bestShiftStruct -{ -private: -public: + class bestShiftStruct + { + private: + public: // alignmentStruct(); // alignmentStruct (int _start, int _end, int _moveto, int _newloc); @@ -55,16 +55,36 @@ public: // int end; // int moveto; // int newloc; - terShift m_best_shift; - terAlignment m_best_align; - bool m_empty; + terShift * m_best_shift; + terAlignment * m_best_align; + bool * m_empty; + bestShiftStruct(); + ~bestShiftStruct(); + inline void set(bestShiftStruct l_bestShiftStruct) + { + m_best_shift->set(l_bestShiftStruct.m_best_shift); + m_best_align->set(l_bestShiftStruct.m_best_align); + setEmpty(l_bestShiftStruct.getEmpty()); + } + inline void set(bestShiftStruct * l_bestShiftStruct) + { + m_best_shift->set(l_bestShiftStruct->m_best_shift); + m_best_align->set(l_bestShiftStruct->m_best_align); + setEmpty(l_bestShiftStruct->getEmpty()); + } + void setEmpty(bool b); + void setBestShift(terShift * l_terShift); + void setBestAlign(terAlignment * l_terAlignment); + string toString(); + bool getEmpty(); + // vector<string> nwords; // The words we shifted // char* alignment ; // for pra_more output // vector<vecInt> aftershift; // for pra_more output - // This is used to store the cost of a shift, so we don't have to - // calculate it multiple times. + // This is used to store the cost of a shift, so we don't have to + // calculate it multiple times. // double cost; -}; + }; } -#endif
\ No newline at end of file +#endif diff --git a/mert/TER/hashMap.cpp b/mert/TER/hashMap.cpp index 253fda715..de84ff796 100644 --- a/mert/TER/hashMap.cpp +++ b/mert/TER/hashMap.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -28,142 +28,156 @@ using namespace std; namespace HashMapSpace { // hashMap::hashMap(); -/* hashMap::~hashMap() + /* hashMap::~hashMap() + { + // vector<stringHasher>::const_iterator del = m_hasher.begin(); + for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) + { + delete(*del); + } + }*/ + /** + * int hashMap::trouve ( long searchKey ) + * @param searchKey + * @return + */ + int hashMap::trouve ( long searchKey ) { -// vector<stringHasher>::const_iterator del = m_hasher.begin(); - for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) - { - delete(*del); - } - }*/ -/** - * int hashMap::trouve ( long searchKey ) - * @param searchKey - * @return - */ -int hashMap::trouve ( long searchKey ) -{ - long foundKey; + long foundKey; // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -int hashMap::trouve ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey;; + int hashMap::trouve ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey;; // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -/** - * long hashMap::hashValue ( string key ) - * @param key - * @return - */ -long hashMap::hashValue ( string key ) -{ - locale loc; // the "C" locale - const collate<char>& coll = use_facet<collate<char> >(loc); - return coll.hash(key.data(),key.data()+key.length()); + /** + * long hashMap::hashValue ( string key ) + * @param key + * @return + */ + long hashMap::hashValue ( string key ) + { + locale loc; // the "C" locale + const collate<char>& coll = use_facet<collate<char> >(loc); + return coll.hash(key.data(),key.data()+key.length()); // boost::hash<string> hasher; // return hasher ( key ); -} -/** - * void hashMap::addHasher ( string key, string value ) - * @param key - * @param value - */ -void hashMap::addHasher ( string key, string value ) -{ - if ( trouve ( hashValue ( key ) ) ==0 ) { + } + /** + * void hashMap::addHasher ( string key, string value ) + * @param key + * @param value + */ + void hashMap::addHasher ( string key, string value ) + { + if ( trouve ( hashValue ( key ) ) ==0 ) + { // cerr << "ICI1" <<endl; - stringHasher H ( hashValue ( key ),key,value ); + stringHasher H ( hashValue ( key ),key,value ); // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl; // cerr << "ICI2" <<endl; - m_hasher.push_back ( H ); - } -} -stringHasher hashMap::getHasher ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey; - stringHasher defaut(0,"",""); + m_hasher.push_back ( H ); + } + } + stringHasher hashMap::getHasher ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey; + stringHasher defaut(0,"",""); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return ( *l_hasher ); + for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return ( *l_hasher ); + } + } + return defaut; } - } - return defaut; -} -string hashMap::getValue ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey; + string hashMap::getValue ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey; // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { + for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl; - return ( *l_hasher ).getValue(); + return ( *l_hasher ).getValue(); + } + } + return ""; } - } - return ""; -} -string hashMap::searchValue ( string value ) -{ + string hashMap::searchValue ( string value ) + { // long searchKey=hashValue ( key ); // long foundKey; - string foundValue; + string foundValue; // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundValue= ( *l_hasher ).getValue(); - if ( foundValue.compare ( value ) == 0 ) { - return ( *l_hasher ).getKey(); + for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundValue= ( *l_hasher ).getValue(); + if ( foundValue.compare ( value ) == 0 ) + { + return ( *l_hasher ).getKey(); + } + } + return ""; } - } - return ""; -} -void hashMap::setValue ( string key , string value ) -{ - long searchKey=hashValue ( key ); - long foundKey; + void hashMap::setValue ( string key , string value ) + { + long searchKey=hashValue ( key ); + long foundKey; // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - ( *l_hasher ).setValue ( value ); + for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + ( *l_hasher ).setValue ( value ); // return ( *l_hasher ).getValue(); + } + } } - } -} -/** - * - */ -void hashMap::printHash() -{ - for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; - } -} + /** + * + */ + void hashMap::printHash() + { + for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; + } + } diff --git a/mert/TER/hashMap.h b/mert/TER/hashMap.h index c2708b360..017e6b831 100644 --- a/mert/TER/hashMap.h +++ b/mert/TER/hashMap.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* * Generic hashmap manipulation functions */ -#ifndef __HASHMAP_H_ -#define __HASHMAP_H_ +#ifndef __HASHMAP_H__ +#define __HASHMAP_H__ #include <boost/functional/hash.hpp> #include "stringHasher.h" #include <vector> @@ -35,27 +35,27 @@ using namespace std; namespace HashMapSpace { -class hashMap -{ -private: - vector<stringHasher> m_hasher; + class hashMap + { + private: + vector<stringHasher> m_hasher; -public: + public: // ~hashMap(); - long hashValue ( string key ); - int trouve ( long searchKey ); - int trouve ( string key ); - void addHasher ( string key, string value ); - stringHasher getHasher ( string key ); - string getValue ( string key ); - string searchValue ( string key ); - void setValue ( string key , string value ); - void printHash(); - vector<stringHasher> getHashMap(); - string printStringHash(); - string printStringHash2(); - string printStringHashForLexicon(); -}; + long hashValue ( string key ); + int trouve ( long searchKey ); + int trouve ( string key ); + void addHasher ( string key, string value ); + stringHasher getHasher ( string key ); + string getValue ( string key ); + string searchValue ( string key ); + void setValue ( string key , string value ); + void printHash(); + vector<stringHasher> getHashMap(); + string printStringHash(); + string printStringHash2(); + string printStringHashForLexicon(); + }; } diff --git a/mert/TER/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp index 0ab6d21b2..23f57d808 100644 --- a/mert/TER/hashMapInfos.cpp +++ b/mert/TER/hashMapInfos.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -28,108 +28,117 @@ using namespace std; namespace HashMapSpace { // hashMapInfos::hashMap(); -/* hashMapInfos::~hashMap() + /* hashMapInfos::~hashMap() + { + // vector<infosHasher>::const_iterator del = m_hasher.begin(); + for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) + { + delete(*del); + } + }*/ + /** + * int hashMapInfos::trouve ( long searchKey ) + * @param searchKey + * @return + */ + int hashMapInfos::trouve ( long searchKey ) { -// vector<infosHasher>::const_iterator del = m_hasher.begin(); - for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) - { - delete(*del); - } - }*/ -/** - * int hashMapInfos::trouve ( long searchKey ) - * @param searchKey - * @return - */ -int hashMapInfos::trouve ( long searchKey ) -{ - long foundKey; + long foundKey; // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -int hashMapInfos::trouve ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey;; + int hashMapInfos::trouve ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey;; // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -/** - * long hashMapInfos::hashValue ( string key ) - * @param key - * @return - */ -long hashMapInfos::hashValue ( string key ) -{ - locale loc; // the "C" locale - const collate<char>& coll = use_facet<collate<char> >(loc); - return coll.hash(key.data(),key.data()+key.length()); + /** + * long hashMapInfos::hashValue ( string key ) + * @param key + * @return + */ + long hashMapInfos::hashValue ( string key ) + { + locale loc; // the "C" locale + const collate<char>& coll = use_facet<collate<char> >(loc); + return coll.hash(key.data(),key.data()+key.length()); // boost::hash<string> hasher; // return hasher ( key ); -} -/** - * void hashMapInfos::addHasher ( string key, string value ) - * @param key - * @param value - */ -void hashMapInfos::addHasher ( string key, vector<int> value ) -{ - if ( trouve ( hashValue ( key ) ) ==0 ) { + } + /** + * void hashMapInfos::addHasher ( string key, string value ) + * @param key + * @param value + */ + void hashMapInfos::addHasher ( string key, vector<int> value ) + { + if ( trouve ( hashValue ( key ) ) ==0 ) + { // cerr << "ICI1" <<endl; - infosHasher H ( hashValue ( key ),key,value ); + infosHasher H ( hashValue ( key ),key,value ); // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl; // cerr << "ICI2" <<endl; - m_hasher.push_back ( H ); - } -} -void hashMapInfos::addValue ( string key, vector<int> value ) -{ - addHasher ( key, value ); -} -infosHasher hashMapInfos::getHasher ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey; + m_hasher.push_back ( H ); + } + } + void hashMapInfos::addValue ( string key, vector<int> value ) + { + addHasher ( key, value ); + } + infosHasher hashMapInfos::getHasher ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey; // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return ( *l_hasher ); + for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return ( *l_hasher ); + } + } + vector<int> temp; + infosHasher defaut(0,"",temp); + return defaut; } - } - vector<int> temp; - infosHasher defaut(0,"",temp); - return defaut; -} -vector<int> hashMapInfos::getValue ( string key ) -{ - long searchKey=hashValue ( key ); - long foundKey; - vector<int> retour; + vector<int> hashMapInfos::getValue ( string key ) + { + long searchKey=hashValue ( key ); + long foundKey; + vector<int> retour; // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { + for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl; - return ( *l_hasher ).getValue(); + return ( *l_hasher ).getValue(); + } + } + return retour; } - } - return retour; -} // string hashMapInfos::searchValue ( string value ) // { // // long searchKey=hashValue ( key ); @@ -149,38 +158,42 @@ vector<int> hashMapInfos::getValue ( string key ) // } // -void hashMapInfos::setValue ( string key , vector<int> value ) -{ - long searchKey=hashValue ( key ); - long foundKey; + void hashMapInfos::setValue ( string key , vector<int> value ) + { + long searchKey=hashValue ( key ); + long foundKey; // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { - foundKey= ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - ( *l_hasher ).setValue ( value ); + for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { + foundKey= ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + ( *l_hasher ).setValue ( value ); // return ( *l_hasher ).getValue(); + } + } + } + string hashMapInfos::toString () + { + stringstream to_return; + for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + to_return << (*l_hasher).toString(); + // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; + } + return to_return.str(); } - } -} -string hashMapInfos::toString () -{ - stringstream to_return; - for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - to_return << (*l_hasher).toString(); - // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; - } - return to_return.str(); -} -/** - * - */ -void hashMapInfos::printHash() -{ - for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { + /** + * + */ + void hashMapInfos::printHash() + { + for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + { // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; - } -} + } + } diff --git a/mert/TER/hashMapInfos.h b/mert/TER/hashMapInfos.h index e975aa738..58cd50aef 100644 --- a/mert/TER/hashMapInfos.h +++ b/mert/TER/hashMapInfos.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* * Generic hashmap manipulation functions */ -#ifndef __HASHMAPINFOS_H_ -#define __HASHMAPINFOS_H_ +#ifndef __HASHMAPINFOS_H__ +#define __HASHMAPINFOS_H__ #include <boost/functional/hash.hpp> #include "infosHasher.h" #include <vector> @@ -34,29 +34,29 @@ using namespace std; namespace HashMapSpace { -class hashMapInfos -{ -private: - vector<infosHasher> m_hasher; + class hashMapInfos + { + private: + vector<infosHasher> m_hasher; -public: + public: // ~hashMap(); - long hashValue ( string key ); - int trouve ( long searchKey ); - int trouve ( string key ); - void addHasher ( string key, vector<int> value ); - void addValue ( string key, vector<int> value ); - infosHasher getHasher ( string key ); - vector<int> getValue ( string key ); + long hashValue ( string key ); + int trouve ( long searchKey ); + int trouve ( string key ); + void addHasher ( string key, vector<int> value ); + void addValue ( string key, vector<int> value ); + infosHasher getHasher ( string key ); + vector<int> getValue ( string key ); // string searchValue ( string key ); - void setValue ( string key , vector<int> value ); - void printHash(); - string toString(); - vector<infosHasher> getHashMap(); - string printStringHash(); - string printStringHash2(); - string printStringHashForLexicon(); -}; + void setValue ( string key , vector<int> value ); + void printHash(); + string toString(); + vector<infosHasher> getHashMap(); + string printStringHash(); + string printStringHash2(); + string printStringHashForLexicon(); + }; } diff --git a/mert/TER/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp index d984bdadc..773c148d4 100644 --- a/mert/TER/hashMapStringInfos.cpp +++ b/mert/TER/hashMapStringInfos.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -27,166 +27,179 @@ using namespace std; namespace HashMapSpace { -// hashMapStringInfos::hashMap(); -/* hashMapStringInfos::~hashMap() -{ -// vector<stringInfosHasher>::const_iterator del = m_hasher.begin(); - for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) - { - delete(*del); - } -}*/ -/** -* int hashMapStringInfos::trouve ( long searchKey ) -* @param searchKey -* @return -*/ -int hashMapStringInfos::trouve ( long searchKey ) -{ - long foundKey; - // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - foundKey = ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + // hashMapStringInfos::hashMap(); + /* hashMapStringInfos::~hashMap() + { + // vector<stringInfosHasher>::const_iterator del = m_hasher.begin(); + for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) + { + delete(*del); + } + }*/ + /** + * int hashMapStringInfos::trouve ( long searchKey ) + * @param searchKey + * @return + */ + int hashMapStringInfos::trouve ( long searchKey ) + { + long foundKey; + // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); + for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + foundKey = ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -int hashMapStringInfos::trouve ( string key ) -{ - long searchKey = hashValue ( key ); - long foundKey;; - // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - foundKey = ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return 1; + int hashMapStringInfos::trouve ( string key ) + { + long searchKey = hashValue ( key ); + long foundKey;; + // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); + for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + foundKey = ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return 1; + } + } + return 0; } - } - return 0; -} -/** -* long hashMapStringInfos::hashValue ( string key ) -* @param key -* @return -*/ -long hashMapStringInfos::hashValue ( string key ) -{ - locale loc; // the "C" locale - const collate<char>& coll = use_facet<collate<char> > ( loc ); - return coll.hash ( key.data(), key.data() + key.length() ); + /** + * long hashMapStringInfos::hashValue ( string key ) + * @param key + * @return + */ + long hashMapStringInfos::hashValue ( string key ) + { + locale loc; // the "C" locale + const collate<char>& coll = use_facet<collate<char> > ( loc ); + return coll.hash ( key.data(), key.data() + key.length() ); // boost::hash<string> hasher; // return hasher ( key ); -} -/** -* void hashMapStringInfos::addHasher ( string key, string value ) -* @param key -* @param value -*/ -void hashMapStringInfos::addHasher ( string key, vector<string> value ) -{ - if ( trouve ( hashValue ( key ) ) == 0 ) { - // cerr << "ICI1" <<endl; - stringInfosHasher H ( hashValue ( key ), key, value ); - // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl; - // cerr << "ICI2" <<endl; - - m_hasher.push_back ( H ); - } -} -void hashMapStringInfos::addValue ( string key, vector<string> value ) -{ - addHasher ( key, value ); -} -stringInfosHasher hashMapStringInfos::getHasher ( string key ) -{ - long searchKey = hashValue ( key ); - long foundKey; - // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - foundKey = ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - return ( *l_hasher ); } - } - vector<string> tmp; - stringInfosHasher defaut ( 0, "", tmp ); - return defaut; -} -vector<string> hashMapStringInfos::getValue ( string key ) -{ - long searchKey = hashValue ( key ); - long foundKey; - vector<string> retour; - // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - foundKey = ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl; - return ( *l_hasher ).getValue(); + /** + * void hashMapStringInfos::addHasher ( string key, string value ) + * @param key + * @param value + */ + void hashMapStringInfos::addHasher ( string key, vector<string> value ) + { + if ( trouve ( hashValue ( key ) ) == 0 ) + { + // cerr << "ICI1" <<endl; + stringInfosHasher H ( hashValue ( key ), key, value ); + // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl; + // cerr << "ICI2" <<endl; + + m_hasher.push_back ( H ); + } } - } - return retour; -} -// string hashMapStringInfos::searchValue ( string value ) -// { -// // long searchKey=hashValue ( key ); -// // long foundKey; -// vector<int> foundValue; -// -// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); -// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) -// { -// foundValue= ( *l_hasher ).getValue(); -// /* if ( foundValue.compare ( value ) == 0 ) -// { -// return ( *l_hasher ).getKey(); -// }*/ -// } -// return ""; -// } -// - -void hashMapStringInfos::setValue ( string key , vector<string> value ) -{ - long searchKey = hashValue ( key ); - long foundKey; - // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); - for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - foundKey = ( *l_hasher ).getHashKey(); - if ( searchKey == foundKey ) { - ( *l_hasher ).setValue ( value ); - // return ( *l_hasher ).getValue(); + void hashMapStringInfos::addValue ( string key, vector<string> value ) + { + addHasher ( key, value ); + } + stringInfosHasher hashMapStringInfos::getHasher ( string key ) + { + long searchKey = hashValue ( key ); + long foundKey; + // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); + for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + foundKey = ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + return ( *l_hasher ); + } + } + vector<string> tmp; + stringInfosHasher defaut ( 0, "", tmp ); + return defaut; + } + vector<string> hashMapStringInfos::getValue ( string key ) + { + long searchKey = hashValue ( key ); + long foundKey; + vector<string> retour; + // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); + for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + foundKey = ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl; + return ( *l_hasher ).getValue(); + } + } + return retour; + } + // string hashMapStringInfos::searchValue ( string value ) + // { + // // long searchKey=hashValue ( key ); + // // long foundKey; + // vector<int> foundValue; + // + // // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); + // for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) + // { + // foundValue= ( *l_hasher ).getValue(); + // /* if ( foundValue.compare ( value ) == 0 ) + // { + // return ( *l_hasher ).getKey(); + // }*/ + // } + // return ""; + // } + // + + void hashMapStringInfos::setValue ( string key , vector<string> value ) + { + long searchKey = hashValue ( key ); + long foundKey; + // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); + for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + foundKey = ( *l_hasher ).getHashKey(); + if ( searchKey == foundKey ) + { + ( *l_hasher ).setValue ( value ); + // return ( *l_hasher ).getValue(); + } + } } - } -} -string hashMapStringInfos::toString () -{ - stringstream to_return; - for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - to_return << (*l_hasher).toString(); - // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; - } - return to_return.str(); -} + string hashMapStringInfos::toString () + { + stringstream to_return; + for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + to_return << (*l_hasher).toString(); + // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; + } + return to_return.str(); + } -/** -* -*/ -void hashMapStringInfos::printHash() -{ - for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { - // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; - } -} -vector< stringInfosHasher > hashMapStringInfos::getHashMap() -{ - return m_hasher; -} + /** + * + */ + void hashMapStringInfos::printHash() + { + for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) + { + // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; + } + } + vector< stringInfosHasher > hashMapStringInfos::getHashMap() + { + return m_hasher; + } diff --git a/mert/TER/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h index a0eae951d..3ea3794e5 100644 --- a/mert/TER/hashMapStringInfos.h +++ b/mert/TER/hashMapStringInfos.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* * Generic hashmap manipulation functions */ -#ifndef __HASHMAPSTRINGINFOS_H_ -#define __HASHMAPSTRINGINFOS_H_ +#ifndef __HASHMAPSTRINGINFOS_H__ +#define __HASHMAPSTRINGINFOS_H__ #include <boost/functional/hash.hpp> #include "stringInfosHasher.h" #include <vector> @@ -34,29 +34,29 @@ using namespace std; namespace HashMapSpace { -class hashMapStringInfos -{ -private: - vector<stringInfosHasher> m_hasher; + class hashMapStringInfos + { + private: + vector<stringInfosHasher> m_hasher; -public: + public: // ~hashMap(); - long hashValue ( string key ); - int trouve ( long searchKey ); - int trouve ( string key ); - void addHasher ( string key, vector<string> value ); - void addValue ( string key, vector<string> value ); - stringInfosHasher getHasher ( string key ); - vector<string> getValue ( string key ); + long hashValue ( string key ); + int trouve ( long searchKey ); + int trouve ( string key ); + void addHasher ( string key, vector<string> value ); + void addValue ( string key, vector<string> value ); + stringInfosHasher getHasher ( string key ); + vector<string> getValue ( string key ); // string searchValue ( string key ); - void setValue ( string key , vector<string> value ); - void printHash(); - string toString(); - vector<stringInfosHasher> getHashMap(); - string printStringHash(); - string printStringHash2(); - string printStringHashForLexicon(); -}; + void setValue ( string key , vector<string> value ); + void printHash(); + string toString(); + vector<stringInfosHasher> getHashMap(); + string printStringHash(); + string printStringHash2(); + string printStringHashForLexicon(); + }; } diff --git a/mert/TER/infosHasher.cpp b/mert/TER/infosHasher.cpp index 450b70d94..8ce23ae44 100644 --- a/mert/TER/infosHasher.cpp +++ b/mert/TER/infosHasher.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -27,35 +27,35 @@ using namespace Tools; namespace HashMapSpace { -infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt ) -{ - m_hashKey=cle; - m_key=cleTxt; - m_value=valueVecInt; -} + infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt ) + { + m_hashKey=cle; + m_key=cleTxt; + m_value=valueVecInt; + } // infosHasher::~infosHasher(){};*/ -long infosHasher::getHashKey() -{ - return m_hashKey; -} -string infosHasher::getKey() -{ - return m_key; -} -vector<int> infosHasher::getValue() -{ - return m_value; -} -void infosHasher::setValue ( vector<int> value ) -{ - m_value=value; -} -string infosHasher::toString() -{ - stringstream to_return; - to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; - return to_return.str(); -} + long infosHasher::getHashKey() + { + return m_hashKey; + } + string infosHasher::getKey() + { + return m_key; + } + vector<int> infosHasher::getValue() + { + return m_value; + } + void infosHasher::setValue ( vector<int> value ) + { + m_value=value; + } + string infosHasher::toString() + { + stringstream to_return; + to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; + return to_return.str(); + } // typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S; diff --git a/mert/TER/infosHasher.h b/mert/TER/infosHasher.h index ab9c7b5ed..692bde49d 100644 --- a/mert/TER/infosHasher.h +++ b/mert/TER/infosHasher.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef __INFOSHASHER_H_ -#define __INFOSHASHER_H_ +#ifndef __INFOSHASHER_H__ +#define __INFOSHASHER_H__ #include <string> // #include <ext/hash_map> #include <stdio.h> @@ -31,23 +31,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; namespace HashMapSpace { -class infosHasher -{ -private: - long m_hashKey; - string m_key; - vector<int> m_value; - -public: - infosHasher ( long cle, string cleTxt, vector<int> valueVecInt ); - long getHashKey(); - string getKey(); - vector<int> getValue(); - void setValue ( vector<int> value ); - string toString(); - - -}; + class infosHasher + { + private: + long m_hashKey; + string m_key; + vector<int> m_value; + + public: + infosHasher ( long cle, string cleTxt, vector<int> valueVecInt ); + long getHashKey(); + string getKey(); + vector<int> getValue(); + void setValue ( vector<int> value ); + string toString(); + + + }; } diff --git a/mert/TER/stringHasher.cpp b/mert/TER/stringHasher.cpp index 729310352..f4d1526e8 100644 --- a/mert/TER/stringHasher.cpp +++ b/mert/TER/stringHasher.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -26,29 +26,29 @@ using namespace std; namespace HashMapSpace { -stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt ) -{ - m_hashKey=cle; - m_key=cleTxt; - m_value=valueTxt; -} + stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt ) + { + m_hashKey=cle; + m_key=cleTxt; + m_value=valueTxt; + } // stringHasher::~stringHasher(){};*/ -long stringHasher::getHashKey() -{ - return m_hashKey; -} -string stringHasher::getKey() -{ - return m_key; -} -string stringHasher::getValue() -{ - return m_value; -} -void stringHasher::setValue ( string value ) -{ - m_value=value; -} + long stringHasher::getHashKey() + { + return m_hashKey; + } + string stringHasher::getKey() + { + return m_key; + } + string stringHasher::getValue() + { + return m_value; + } + void stringHasher::setValue ( string value ) + { + m_value=value; + } // typedef stdext::hash_map<string, string, stringhasher> HASH_S_S; diff --git a/mert/TER/stringHasher.h b/mert/TER/stringHasher.h index 5b0ccfc94..e2a79834c 100644 --- a/mert/TER/stringHasher.h +++ b/mert/TER/stringHasher.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef __STRINGHASHER_H_ -#define __STRINGHASHER_H_ +#ifndef __STRINGHASHER_H__ +#define __STRINGHASHER_H__ #include <string> //#include <ext/hash_map> #include <iostream> @@ -28,22 +28,22 @@ using namespace std; namespace HashMapSpace { -class stringHasher -{ -private: - long m_hashKey; - string m_key; - string m_value; + class stringHasher + { + private: + long m_hashKey; + string m_key; + string m_value; -public: - stringHasher ( long cle, string cleTxt, string valueTxt ); - long getHashKey(); - string getKey(); - string getValue(); - void setValue ( string value ); + public: + stringHasher ( long cle, string cleTxt, string valueTxt ); + long getHashKey(); + string getKey(); + string getValue(); + void setValue ( string value ); -}; + }; } diff --git a/mert/TER/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp index ecbc10fa5..007fd720f 100644 --- a/mert/TER/stringInfosHasher.cpp +++ b/mert/TER/stringInfosHasher.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -27,35 +27,35 @@ using namespace Tools; namespace HashMapSpace { -stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt ) -{ - m_hashKey=cle; - m_key=cleTxt; - m_value=valueVecInt; -} + stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt ) + { + m_hashKey=cle; + m_key=cleTxt; + m_value=valueVecInt; + } // stringInfosHasher::~stringInfosHasher(){};*/ -long stringInfosHasher::getHashKey() -{ - return m_hashKey; -} -string stringInfosHasher::getKey() -{ - return m_key; -} -vector<string> stringInfosHasher::getValue() -{ - return m_value; -} -void stringInfosHasher::setValue ( vector<string> value ) -{ - m_value=value; -} -string stringInfosHasher::toString() -{ - stringstream to_return; - to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; - return to_return.str(); -} + long stringInfosHasher::getHashKey() + { + return m_hashKey; + } + string stringInfosHasher::getKey() + { + return m_key; + } + vector<string> stringInfosHasher::getValue() + { + return m_value; + } + void stringInfosHasher::setValue ( vector<string> value ) + { + m_value=value; + } + string stringInfosHasher::toString() + { + stringstream to_return; + to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; + return to_return.str(); + } // typedef stdext::hash_map<string, string, stringhasher> HASH_S_S; diff --git a/mert/TER/stringInfosHasher.h b/mert/TER/stringInfosHasher.h index e4369f27a..f35e4596b 100644 --- a/mert/TER/stringInfosHasher.h +++ b/mert/TER/stringInfosHasher.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef __STRINGINFOSHASHER_H_ -#define __STRINGINFOSHASHER_H_ +#ifndef __STRINGINFOSHASHER_H__ +#define __STRINGINFOSHASHER_H__ #include <string> // #include <ext/hash_map> #include <iostream> @@ -29,23 +29,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using namespace std; namespace HashMapSpace { -class stringInfosHasher -{ -private: - long m_hashKey; - string m_key; - vector<string> m_value; - -public: - stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt ); - long getHashKey(); - string getKey(); - vector<string> getValue(); - void setValue ( vector<string> value ); - string toString(); - - -}; + class stringInfosHasher + { + private: + long m_hashKey; + string m_key; + vector<string> m_value; + + public: + stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt ); + long getHashKey(); + string getKey(); + vector<string> getValue(); + void setValue ( vector<string> value ); + string toString(); + + + }; } diff --git a/mert/TER/terAlignment.cpp b/mert/TER/terAlignment.cpp index ec7bcafb7..dda4a4239 100644 --- a/mert/TER/terAlignment.cpp +++ b/mert/TER/terAlignment.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -24,163 +24,244 @@ using namespace std; namespace TERCpp { -terAlignment::terAlignment() -{ + terAlignment::terAlignment() + { // vector<string> ref; // vector<string> hyp; // vector<string> aftershift; - // TERshift[] allshifts = null; + // TERshift[] allshifts = null; - numEdits=0; - numWords=0; - bestRef=""; + numEdits=0; + numWords=0; +// bestRef=""; - numIns=0; - numDel=0; - numSub=0; - numSft=0; - numWsf=0; -} -string terAlignment::toString() -{ - stringstream s; - s.str ( "" ); - s << "Original Ref: \t" << join ( " ", ref ) << endl; - s << "Original Hyp: \t" << join ( " ", hyp ) <<endl; - s << "Hyp After Shift:\t" << join ( " ", aftershift ); + numIns=0; + numDel=0; + numSub=0; + numSft=0; + numWsf=0; + averageWords=0; + + } + void terAlignment::set(terAlignment& l_terAlignment) + { + numEdits=l_terAlignment.numEdits; + numWords=l_terAlignment.numWords; + bestRef=l_terAlignment.bestRef; + numIns=l_terAlignment.numIns; + numDel=l_terAlignment.numDel; + numSub=l_terAlignment.numSub; + numSft=l_terAlignment.numSft; + numWsf=l_terAlignment.numWsf; + averageWords=l_terAlignment.averageWords; + ref=l_terAlignment.ref; + hyp=l_terAlignment.hyp; + aftershift=l_terAlignment.aftershift; +// allshifts=l_terAlignment.allshifts; + hyp_int=l_terAlignment.hyp_int; + aftershift_int=l_terAlignment.aftershift_int; + alignment=l_terAlignment.alignment; + allshifts=(*(new vector<terShift>((int)l_terAlignment.allshifts.size()))); + for (int l_i=0; l_i< (int)l_terAlignment.allshifts.size(); l_i++) + { + allshifts.at(l_i).set(l_terAlignment.allshifts.at(l_i)); + } + + } + void terAlignment::set(terAlignment* l_terAlignment) + { + numEdits=l_terAlignment->numEdits; + numWords=l_terAlignment->numWords; + bestRef=l_terAlignment->bestRef; + numIns=l_terAlignment->numIns; + numDel=l_terAlignment->numDel; + numSub=l_terAlignment->numSub; + numSft=l_terAlignment->numSft; + numWsf=l_terAlignment->numWsf; + averageWords=l_terAlignment->averageWords; + ref=l_terAlignment->ref; + hyp=l_terAlignment->hyp; + aftershift=l_terAlignment->aftershift; +// allshifts=l_terAlignment->allshifts; + hyp_int=l_terAlignment->hyp_int; + aftershift_int=l_terAlignment->aftershift_int; + alignment=l_terAlignment->alignment; + allshifts=(*(new vector<terShift>((int)l_terAlignment->allshifts.size()))); + for (int l_i=0; l_i< (int)l_terAlignment->allshifts.size(); l_i++) + { + allshifts.at(l_i).set(l_terAlignment->allshifts.at(l_i)); + } + + } + + string terAlignment::toString() + { + stringstream s; + s.str ( "" ); + s << "Original Ref: \t" << join ( " ", ref ) << endl; + s << "Original Hyp: \t" << join ( " ", hyp ) <<endl; + s << "Hyp After Shift:\t" << join ( " ", aftershift ); // s << "Hyp After Shift: " << join ( " ", aftershift ); - s << endl; + s << endl; // string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift); - if ( ( int ) sizeof ( alignment ) >0 ) { - s << "Alignment: ("; + if ( ( int ) sizeof ( alignment ) >0 ) + { + s << "Alignment: ("; // s += "\nAlignment: ("; - for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) { - s << alignment[i]; + for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) + { + s << alignment[i]; // s+=alignment[i]; - } + } // s += ")"; - s << ")"; - } - s << endl; - if ( ( int ) allshifts.size() == 0 ) { + s << ")"; + } + s << endl; + if ( ( int ) allshifts.size() == 0 ) + { // s += "\nNumShifts: 0"; - s << "NumShifts: 0"; - } else { + s << "NumShifts: 0"; + } + else + { // s += "\nNumShifts: " + (int)allshifts.size(); - s << "NumShifts: "<< ( int ) allshifts.size(); - for ( int i = 0; i < ( int ) allshifts.size(); i++ ) { - s << endl << " " ; - s << ( ( terShift ) allshifts[i] ).toString(); + s << "NumShifts: "<< ( int ) allshifts.size(); + for ( int i = 0; i < ( int ) allshifts.size(); i++ ) + { + s << endl << " " ; + s << ( ( terShift ) allshifts[i] ).toString(); // s += "\n " + allshifts[i]; - } - } - s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")"; + } + } + s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")"; // s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")"; - return s.str(); + return s.str(); -} -string terAlignment::join ( string delim, vector<string> arr ) -{ - if ( ( int ) arr.size() == 0 ) return ""; + } + string terAlignment::join ( string delim, vector<string> arr ) + { + if ( ( int ) arr.size() == 0 ) return ""; // if ((int)delim.compare("") == 0) delim = new String(""); // String s = new String(""); - stringstream s; - s.str ( "" ); - for ( int i = 0; i < ( int ) arr.size(); i++ ) { - if ( i == 0 ) { - s << arr.at ( i ); - } else { - s << delim << arr.at ( i ); - } - } - return s.str(); + stringstream s; + s.str ( "" ); + for ( int i = 0; i < ( int ) arr.size(); i++ ) + { + if ( i == 0 ) + { + s << arr.at ( i ); + } + else + { + s << delim << arr.at ( i ); + } + } + return s.str(); // return ""; -} -double terAlignment::score() -{ - if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) { - return 1.0; - } - if ( numWords <= 0.0 ) { - return 0.0; - } - return ( double ) numEdits / numWords; -} -double terAlignment::scoreAv() -{ - if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) { - return 1.0; - } - if ( averageWords <= 0.0 ) { - return 0.0; - } - return ( double ) numEdits / averageWords; -} - -void terAlignment::scoreDetails() -{ - numIns = numDel = numSub = numWsf = numSft = 0; - if((int)allshifts.size()>0) { - for(int i = 0; i < (int)allshifts.size(); ++i) { - numWsf += allshifts[i].size(); } - numSft = allshifts.size(); - } - - if((int)alignment.size()>0 ) { - for(int i = 0; i < (int)alignment.size(); ++i) { - switch (alignment[i]) { - case 'S': - case 'T': - numSub++; - break; - case 'D': - numDel++; - break; - case 'I': - numIns++; - break; - } + double terAlignment::score() + { + if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) + { + return 1.0; + } + if ( numWords <= 0.0 ) + { + return 0.0; + } + return ( double ) numEdits / numWords; } - } - // if(numEdits != numSft + numDel + numIns + numSub) - // System.out.println("** Error, unmatch edit erros " + numEdits + - // " vs " + (numSft + numDel + numIns + numSub)); -} -string terAlignment::printAlignments() -{ - stringstream to_return; - for(int i = 0; i < (int)alignment.size(); ++i) { - char alignInfo=alignment.at(i); - if (alignInfo == 'A' ) { - alignInfo='A'; + double terAlignment::scoreAv() + { + if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) + { + return 1.0; + } + if ( averageWords <= 0.0 ) + { + return 0.0; + } + return ( double ) numEdits / averageWords; } - if (i==0) { - to_return << alignInfo; - } else { - to_return << " " << alignInfo; - } + void terAlignment::scoreDetails() + { + numIns = numDel = numSub = numWsf = numSft = 0; + if((int)allshifts.size()>0) + { + for(int i = 0; i < (int)allshifts.size(); ++i) + { + numWsf += allshifts[i].size(); + } + numSft = allshifts.size(); + } + + if((int)alignment.size()>0 ) + { + for(int i = 0; i < (int)alignment.size(); ++i) + { + switch (alignment[i]) + { + case 'S': + case 'T': + numSub++; + break; + case 'D': + numDel++; + break; + case 'I': + numIns++; + break; + } + } + } + // if(numEdits != numSft + numDel + numIns + numSub) + // System.out.println("** Error, unmatch edit erros " + numEdits + + // " vs " + (numSft + numDel + numIns + numSub)); + } + string terAlignment::printAlignments() + { + stringstream to_return; + for(int i = 0; i < (int)alignment.size(); ++i) + { + char alignInfo=alignment.at(i); + if (alignInfo == 'A' ) + { + alignInfo='A'; + } + + if (i==0) + { + to_return << alignInfo; + } + else + { + to_return << " " << alignInfo; + } + } + return to_return.str(); } - return to_return.str(); -} string terAlignment::printAllShifts() { - stringstream to_return; - if ( ( int ) allshifts.size() == 0 ) { + stringstream to_return; + if ( ( int ) allshifts.size() == 0 ) + { // s += "\nNumShifts: 0"; - to_return << "NbrShifts: 0"; - } else { + to_return << "NbrShifts: 0"; + } + else + { // s += "\nNumShifts: " + (int)allshifts.size(); - to_return << "NbrShifts: "<< ( int ) allshifts.size(); - for ( int i = 0; i < ( int ) allshifts.size(); i++ ) { - to_return << "\t" ; - to_return << ( ( terShift ) allshifts[i] ).toString(); + to_return << "NbrShifts: "<< ( int ) allshifts.size(); + for ( int i = 0; i < ( int ) allshifts.size(); i++ ) + { + to_return << "\t" ; + to_return << ( ( terShift ) allshifts[i] ).toString(); // s += "\n " + allshifts[i]; - } - } - return to_return.str(); + } + } + return to_return.str(); } }
\ No newline at end of file diff --git a/mert/TER/terAlignment.h b/mert/TER/terAlignment.h index 2af0b7490..e9524df7c 100644 --- a/mert/TER/terAlignment.h +++ b/mert/TER/terAlignment.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef MERT_TER_TERALIGNMENT_H_ -#define MERT_TER_TERALIGNMENT_H_ +#ifndef __TERCPPTERALIGNMENT_H__ +#define __TERCPPTERALIGNMENT_H__ #include <vector> @@ -34,41 +34,44 @@ using namespace std; namespace TERCpp { -class terAlignment -{ -private: -public: - - terAlignment(); - string toString(); - void scoreDetails(); - - vector<string> ref; - vector<string> hyp; - vector<string> aftershift; - vector<terShift> allshifts; - vector<int> hyp_int; - vector<int> aftershift_int; - - double numEdits; - double numWords; - double averageWords; - vector<char> alignment; - string bestRef; - - int numIns; - int numDel; - int numSub; - int numSft; - int numWsf; - - - string join ( string delim, vector<string> arr ); - double score(); - double scoreAv(); - string printAlignments(); - string printAllShifts(); -}; + class terAlignment + { + private: + public: + + vector<string> ref; + vector<string> hyp; + vector<string> aftershift; + vector<terShift> allshifts; + vector<int> hyp_int; + vector<int> aftershift_int; + + double numEdits; + double numWords; + double averageWords; + vector<char> alignment; + string bestRef; + + int numIns; + int numDel; + int numSub; + int numSft; + int numWsf; + + + terAlignment(); + string toString(); + void scoreDetails(); + + + string join ( string delim, vector<string> arr ); + double score(); + double scoreAv(); + string printAlignments(); + string printAllShifts(); + void set(terAlignment& l_terAlignment); + void set(terAlignment* l_terAlignment); + }; } #endif
\ No newline at end of file diff --git a/mert/TER/terShift.cpp b/mert/TER/terShift.cpp index 440b4d2ce..e271ad6a7 100644 --- a/mert/TER/terShift.cpp +++ b/mert/TER/terShift.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -42,32 +42,70 @@ namespace TERCpp // numSft=0; // numWsf=0; // } -terShift::terShift () -{ - start = 0; - end = 0; - moveto = 0; - newloc = 0; - cost=1.0; -} -terShift::terShift ( int _start, int _end, int _moveto, int _newloc ) -{ - start = _start; - end = _end; - moveto = _moveto; - newloc = _newloc; - cost=1.0; -} + terShift::terShift () + { + start = 0; + end = 0; + moveto = 0; + newloc = 0; + cost=1.0; + shifted.clear(); + alignment.clear(); + aftershift.clear(); + } + terShift::terShift ( int _start, int _end, int _moveto, int _newloc ) + { + start = _start; + end = _end; + moveto = _moveto; + newloc = _newloc; + cost=1.0; + } -terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted ) -{ - start = _start; - end = _end; - moveto = _moveto; - newloc = _newloc; - shifted = _shifted; - cost=1.0; -} + terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted ) + { + start = _start; + end = _end; + moveto = _moveto; + newloc = _newloc; + shifted = _shifted; + cost=1.0; + } + void terShift::set(terShift l_terShift) + { + start=l_terShift.start; + end=l_terShift.end; + moveto=l_terShift.moveto; + newloc=l_terShift.newloc; + shifted=l_terShift.shifted; +// alignment=l_terShift.alignment; +// aftershift=l_terShift.aftershift; + } + void terShift::set(terShift *l_terShift) + { + start=l_terShift->start; + end=l_terShift->end; + moveto=l_terShift->moveto; + newloc=l_terShift->newloc; + shifted=l_terShift->shifted; +// alignment=l_terShift->alignment; +// aftershift=l_terShift->aftershift; + } + + void terShift::erase() + { + start = 0; + end = 0; + moveto = 0; + newloc = 0; + cost=1.0; + shifted.clear(); + alignment.clear(); + aftershift.clear(); + } + + + // string terShift::vectorToString(vector<string> vec) // { // string retour(""); @@ -78,38 +116,54 @@ terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<stri // return retour; // } -string terShift::toString() -{ - stringstream s; - s.str ( "" ); - s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]"; - if ( ( int ) shifted.size() > 0 ) { - s << " (" << vectorToString ( shifted ) << ")"; - } - return s.str(); -} + string terShift::toString() + { + stringstream s; + s.str ( "" ); + s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]"; + if ( ( int ) shifted.size() > 0 ) + { + s << " (" << vectorToString ( shifted ) << ")"; + } +// s<< endl; +// if ( ( int ) shifted.size() > 0 ) +// { +// s << " (" << vectorToString ( alignment ) << ")"; +// } +// s<< endl; +// if ( ( int ) shifted.size() > 0 ) +// { +// s << " (" << vectorToString ( aftershift ) << ")"; +// } + return s.str(); + } -/* The distance of the shift. */ -int terShift::distance() -{ - if ( moveto < start ) { - return start - moveto; - } else if ( moveto > end ) { - return moveto - end; - } else { - return moveto - start; - } -} + /* The distance of the shift. */ + int terShift::distance() + { + if ( moveto < start ) + { + return start - moveto; + } + else if ( moveto > end ) + { + return moveto - end; + } + else + { + return moveto - start; + } + } -bool terShift::leftShift() -{ - return ( moveto < start ); -} + bool terShift::leftShift() + { + return ( moveto < start ); + } -int terShift::size() -{ - return ( end - start ) + 1; -} + int terShift::size() + { + return ( end - start ) + 1; + } // terShift terShift::operator=(terShift t) // { // diff --git a/mert/TER/terShift.h b/mert/TER/terShift.h index 74545e0de..65a812d15 100644 --- a/mert/TER/terShift.h +++ b/mert/TER/terShift.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef MERT_TER_TERSHIFT_H_ -#define MERT_TER_TERSHIFT_H_ +#ifndef __TERCPPTERSHIFT_H__ +#define __TERCPPTERSHIFT_H__ #include <vector> @@ -34,32 +34,35 @@ using namespace Tools; namespace TERCpp { -class terShift -{ -private: -public: + class terShift + { + private: + public: - terShift(); - terShift ( int _start, int _end, int _moveto, int _newloc ); - terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted ); - string toString(); - int distance() ; - bool leftShift(); - int size(); + terShift(); + terShift ( int _start, int _end, int _moveto, int _newloc ); + terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted ); + string toString(); + int distance() ; + bool leftShift(); + int size(); // terShift operator=(terShift t); // string vectorToString(vector<string> vec); - int start; - int end; - int moveto; - int newloc; - vector<string> shifted; // The words we shifted - vector<char> alignment ; // for pra_more output - vector<string> aftershift; // for pra_more output - // This is used to store the cost of a shift, so we don't have to - // calculate it multiple times. - double cost; -}; + int start; + int end; + int moveto; + int newloc; + vector<string> shifted; // The words we shifted + vector<char> alignment ; // for pra_more output + vector<string> aftershift; // for pra_more output + // This is used to store the cost of a shift, so we don't have to + // calculate it multiple times. + double cost; + void set(terShift l_terShift); + void set(terShift *l_terShift); + void erase(); + }; } #endif
\ No newline at end of file diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp index c4629c639..8a84b49b3 100644 --- a/mert/TER/tercalc.cpp +++ b/mert/TER/tercalc.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -35,724 +35,1238 @@ using namespace Tools; namespace TERCpp { -terCalc::terCalc() -{ - TAILLE_PERMUT_MAX = 50; - infinite = 999999.0; - shift_cost = 1.0; - insert_cost = 1.0; - delete_cost = 1.0; - substitute_cost = 1.0; - match_cost = 0.0; - NBR_SEGS_EVALUATED = 0; - NBR_PERMUTS_CONSID = 0; - NBR_BS_APPELS = 0; - TAILLE_BEAM = 20; - DIST_MAX_PERMUT = 50; - PRINT_DEBUG = false; - hypSpans.clear(); - refSpans.clear(); -} - - -terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref ) -{ + terCalc::terCalc() + { + TAILLE_PERMUT_MAX = 10; + NBR_PERMUT_MAX = 10; + infinite = 99999.0; + shift_cost = 1.0; + insert_cost = 1.0; + delete_cost = 1.0; + substitute_cost = 1.0; + match_cost = 0.0; + NBR_SEGS_EVALUATED = 0; + NBR_PERMUTS_CONSID = 0; + NBR_BS_APPELS = 0; + TAILLE_BEAM = 10; + DIST_MAX_PERMUT = 25; + PRINT_DEBUG = false; + hypSpans.clear(); + refSpans.clear(); + CALL_TER_ALIGN=0; + CALL_CALC_PERMUT=0; + CALL_FIND_BSHIFT=0; + MAX_LENGTH_SENTENCE=10; + S = new vector < vector < double > >(MAX_LENGTH_SENTENCE, std::vector<double>(MAX_LENGTH_SENTENCE,0.0)); + P = new vector < vector < char > >(MAX_LENGTH_SENTENCE, std::vector<char>(MAX_LENGTH_SENTENCE,' ')); + } - return minimizeDistanceEdition ( hyp, ref, hypSpans ); + terCalc::~terCalc() + { + delete(S); + delete(P); + } -} -terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref ) -{ - stringstream s; - s.str ( "" ); - string stringRef ( "" ); - string stringHyp ( "" ); - for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) { - if ( l_it == ref.begin() ) { - s << ( *l_it ); - } else { - s << " " << ( *l_it ); + terAlignment terCalc::WERCalculation ( vector< string >& hyp , vector< string >& ref ) + { + + return minimizeDistanceEdition ( hyp, ref, hypSpans ); + } - } - stringRef = s.str(); - s.str ( "" ); - for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) { - if ( l_itHyp == hyp.begin() ) { - s << ( *l_itHyp ); - } else { - s << " " << ( *l_itHyp ); + + terAlignment terCalc::TER ( vector< int >& hyp, vector< int >& ref ) + { + stringstream s; + s.str ( "" ); + string stringRef ( "" ); + string stringHyp ( "" ); + for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) + { + if ( l_it == ref.begin() ) + { + s << ( *l_it ); + } + else + { + s << " " << ( *l_it ); + } + } + stringRef = s.str(); + s.str ( "" ); + for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) + { + if ( l_itHyp == hyp.begin() ) + { + s << ( *l_itHyp ); + } + else + { + s << " " << ( *l_itHyp ); + } + } + stringHyp = s.str(); + s.str ( "" ); + vector<string> l_vref=stringToVector ( stringRef , " " ); + vector<string> l_vhyp=stringToVector ( stringHyp , " " ); + return TER ( l_vhyp , l_vref); } - } - stringHyp = s.str(); - s.str ( "" ); - return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) ); -} -hashMapInfos terCalc::createConcordMots ( vector<string> hyp, vector<string> ref ) -{ - hashMap tempHash; - hashMapInfos retour; - for ( int i = 0; i < ( int ) hyp.size(); i++ ) { - tempHash.addHasher ( hyp.at ( i ), "" ); - } - bool cor[ref.size() ]; - for ( int i = 0; i < ( int ) ref.size(); i++ ) { - if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) { - cor[i] = true; - } else { - cor[i] = false; - } - } - for ( int start = 0; start < ( int ) ref.size(); start++ ) { - if ( cor[start] ) { - for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) ); end++ ) { - vector<string> ajouter = subVector ( ref, start, end + 1 ); - string ajouterString = vectorToString ( ajouter ); - vector<int> values = retour.getValue ( ajouterString ); - values.push_back ( start ); - if ( values.size() > 1 ) { - retour.setValue ( ajouterString, values ); - } else { - retour.addValue ( ajouterString, values ); + hashMapInfos terCalc::createConcordMots ( vector< string >& hyp, vector< string >& ref ) + { + hashMap tempHash; + hashMapInfos retour; + for ( int i = 0; i < ( int ) hyp.size(); i++ ) + { + tempHash.addHasher ( hyp.at ( i ), "" ); + } + bool cor[ref.size() ]; + for ( int i = 0; i < ( int ) ref.size(); i++ ) + { + if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) + { + cor[i] = true; + } + else + { + cor[i] = false; + } + } + for ( int start = 0; start < ( int ) ref.size(); start++ ) + { + if ( cor[start] ) + { + for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) );end++ ) + { + vector<string> ajouter = subVector ( ref, start, end + 1 ); + string ajouterString = vectorToString ( ajouter ); + vector<int> values = retour.getValue ( ajouterString ); + values.push_back ( start ); + if ( values.size() > 1 ) + { + retour.setValue ( ajouterString, values ); + } + else + { + retour.addValue ( ajouterString, values ); + } + } + } } - } + return retour; } - } - return retour; -} - -bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan ) -{ - if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) { - return true; - } - return false; -} + bool terCalc::trouverIntersection ( vecInt& refSpan, vecInt& hypSpan ) + { + if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) + { + return true; + } + return false; + } -terAlignment terCalc::minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans ) -{ - double current_best = infinite; - double last_best = infinite; - int first_good = 0; - int current_first_good = 0; - int last_good = -1; - int cur_last_good = 0; - int last_peak = 0; - int cur_last_peak = 0; - int i, j; - double cost, icost, dcost; - double score; + terAlignment terCalc::minimizeDistanceEdition ( vector< string >& hyp, vector< string >& ref, vector< vecInt >& curHypSpans ) + { + double current_best = infinite; + double last_best = infinite; + int first_good = 0; + int current_first_good = 0; + int last_good = -1; + int cur_last_good = 0; + int last_peak = 0; + int cur_last_peak = 0; + int i=0; + int j=0; + int ref_size=0 ; + ref_size=( int ) ref.size(); + int hyp_size=0; + hyp_size=( int ) hyp.size(); + double cost, icost, dcost; + double score; + delete(S); + delete(P); + S = new vector < vector < double > >(ref_size+1, std::vector<double>(hyp_size+1,-1.0)); + P = new vector < vector < char > >(ref_size+1, std::vector<char>(hyp_size+1,'0')); - NBR_BS_APPELS++; + + NBR_BS_APPELS++; +// cerr << "Appels : " << NBR_BS_APPELS << endl; + +// for ( i = 0; i <= ref_size; i++ ) +// { +// for ( j = 0; j <= hyp_size; j++ ) +// { +// S->at(i).at(j) = -1.0; +// P->at(i).at(j) = '0'; +// } +// } + S->at(0).at(0) = 0.0; + for ( j = 0; j <= hyp_size; j++ ) + { + last_best = current_best; + current_best = infinite; + first_good = current_first_good; + current_first_good = -1; + last_good = cur_last_good; + cur_last_good = -1; + last_peak = cur_last_peak; + cur_last_peak = 0; + for ( i = first_good; i <= ref_size; i++ ) + { + if ( i > last_good ) + { + break; + } + if ( S->at(i).at(j) < 0 ) + { + continue; + } + score = S->at(i).at(j); + if ( ( j < hyp_size ) && ( score > last_best + TAILLE_BEAM ) ) + { + continue; + } + if ( current_first_good == -1 ) + { + current_first_good = i ; + } + if ( ( i < ref_size ) && ( j < hyp_size ) ) + { + if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) + { + if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) + { + cost = match_cost + score; + if ( ( S->at(i+1).at(j+1) == -1 ) || ( cost < S->at(i+1).at(j+1) ) ) + { + S->at(i+1).at(j+1) = cost; + P->at(i+1).at(j+1) = 'A'; + } + if ( cost < current_best ) + { + current_best = cost; + } + if ( current_best == cost ) + { + cur_last_peak = i + 1; + } + } + else + { + cost = substitute_cost + score; + if ( ( S->at(i+1).at(j+1) < 0 ) || ( cost < S->at(i+1).at(j+1) ) ) + { + S->at(i+1).at(j+1) = cost; + P->at(i+1).at(j+1) = 'S'; + if ( cost < current_best ) + { + current_best = cost; + } + if ( current_best == cost ) + { + cur_last_peak = i + 1 ; + } + } + } + } + } + cur_last_good = i + 1; + if ( j < hyp_size ) + { + icost = score + insert_cost; + if ( ( S->at(i).at(j+1) < 0 ) || ( S->at(i).at(j+1) > icost ) ) + { + S->at(i).at(j+1) = icost; + P->at(i).at(j+1) = 'I'; + if ( ( cur_last_peak < i ) && ( current_best == icost ) ) + { + cur_last_peak = i; + } + } + } + if ( i < ref_size ) + { + dcost = score + delete_cost; + if ( ( S->at(i+1).at(j) < 0.0 ) || ( S->at(i+1).at(j) > dcost ) ) + { + S->at(i+1).at(j) = dcost; + P->at(i+1).at(j) = 'D'; + if ( i >= last_good ) + { + last_good = i + 1 ; + } + } + } + } + } - for ( i = 0; i <= ( int ) ref.size(); i++ ) { - for ( j = 0; j <= ( int ) hyp.size(); j++ ) { - S[i][j] = -1.0; - P[i][j] = '0'; - } - } - S[0][0] = 0.0; - for ( j = 0; j <= ( int ) hyp.size(); j++ ) { - last_best = current_best; - current_best = infinite; - first_good = current_first_good; - current_first_good = -1; - last_good = cur_last_good; - cur_last_good = -1; - last_peak = cur_last_peak; - cur_last_peak = 0; - for ( i = first_good; i <= ( int ) ref.size(); i++ ) { - if ( i > last_good ) { - break; - } - if ( S[i][j] < 0 ) { - continue; - } - score = S[i][j]; - if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) ) { - continue; - } - if ( current_first_good == -1 ) { - current_first_good = i ; - } - if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) ) { - if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) { - if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) { - cost = match_cost + score; - if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) ) { - S[i+1][j+1] = cost; - P[i+1][j+1] = 'A'; + int tracelength = 0; + i = ref.size(); + j = hyp.size(); + while ( ( i > 0 ) || ( j > 0 ) ) + { + tracelength++; + if ( P->at(i).at(j) == 'A' ) + { + i--; + j--; } - if ( cost < current_best ) { - current_best = cost; + else + if ( P->at(i).at(j) == 'S' ) + { + i--; + j--; + } + else + if ( P->at(i).at(j) == 'D' ) + { + i--; + } + else + if ( P->at(i).at(j) == 'I' ) + { + j--; + } + else + { + cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P->at(i).at(j) << endl; + exit ( -1 ); + } + } + vector<char> path ( tracelength ); + i = ref.size(); + j = hyp.size(); + while ( ( i > 0 ) || ( j > 0 ) ) + { + path[--tracelength] = P->at(i).at(j); + if ( P->at(i).at(j) == 'A' ) + { + i--; + j--; } - if ( current_best == cost ) { - cur_last_peak = i + 1; + else + if ( P->at(i).at(j) == 'S' ) + { + i--; + j--; + } + else + if ( P->at(i).at(j) == 'D' ) + { + i--; + } + else + if ( P->at(i).at(j) == 'I' ) + { + j--; + } + } + terAlignment to_return; + to_return.numWords = ref_size; + to_return.alignment = path; + to_return.numEdits = S->at(ref_size).at(hyp_size); + to_return.hyp = hyp; + to_return.ref = ref; + to_return.averageWords = ref_size; + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl; + } + return to_return; + + } + void terCalc::minimizeDistanceEdition ( vector< string >& hyp, vector< string >& ref, vector< vecInt >& curHypSpans, terAlignment* to_return ) + { + double current_best = infinite; + double last_best = infinite; + int first_good = 0; + int current_first_good = 0; + int last_good = -1; + int cur_last_good = 0; + int last_peak = 0; + int cur_last_peak = 0; + int i=0; + int j=0; + int ref_size=0 ; + ref_size=( int ) ref.size(); + int hyp_size=0; + hyp_size=( int ) hyp.size(); + double cost, icost, dcost; + double score; + delete(S); + delete(P); + S = new vector < vector < double > >(ref_size+1, std::vector<double>(hyp_size+1,-1.0)); + P = new vector < vector < char > >(ref_size+1, std::vector<char>(hyp_size+1,'0')); + + NBR_BS_APPELS++; +// cerr << "Appels : " << NBR_BS_APPELS << endl; + +// for ( i = 0; i <= ref_size; i++ ) +// { +// for ( j = 0; j <= hyp_size; j++ ) +// { +// S->at(i).at(j) = -1.0; +// P->at(i).at(j) = '0'; +// } +// } + S->at(0).at(0) = 0.0; + for ( j = 0; j <= hyp_size; j++ ) + { + last_best = current_best; + current_best = infinite; + first_good = current_first_good; + current_first_good = -1; + last_good = cur_last_good; + cur_last_good = -1; + last_peak = cur_last_peak; + cur_last_peak = 0; + for ( i = first_good; i <= ref_size; i++ ) + { + if ( i > last_good ) + { + break; + } + if (S->at(i).at(j) < 0 ) + { + continue; + } + score = S->at(i).at(j); + if ( ( j < hyp_size ) && ( score > last_best + TAILLE_BEAM ) ) + { + continue; + } + if ( current_first_good == -1 ) + { + current_first_good = i ; + } + if ( ( i < ref_size ) && ( j < hyp_size ) ) + { + if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) + { + if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) + { + cost = match_cost + score; + if ( ( S->at(i+1).at(j+1) == -1 ) || ( cost < S->at(i+1).at(j+1) ) ) + { + S->at(i+1).at(j+1) = cost; + P->at(i+1).at(j+1) = 'A'; + } + if ( cost < current_best ) + { + current_best = cost; + } + if ( current_best == cost ) + { + cur_last_peak = i + 1; + } + } + else + { + cost = substitute_cost + score; + if ( ( S->at(i+1).at(j+1) < 0 ) || ( cost < S->at(i+1).at(j+1) ) ) + { + S->at(i+1).at(j+1) = cost; + P->at(i+1).at(j+1) = 'S'; + if ( cost < current_best ) + { + current_best = cost; + } + if ( current_best == cost ) + { + cur_last_peak = i + 1 ; + } + } + } + } + } + cur_last_good = i + 1; + if ( j < hyp_size ) + { + icost = score + insert_cost; + if ( ( S->at(i).at(j+1) < 0 ) || ( S->at(i).at(j+1) > icost ) ) + { + S->at(i).at(j+1) = icost; + P->at(i).at(j+1) = 'I'; + if ( ( cur_last_peak < i ) && ( current_best == icost ) ) + { + cur_last_peak = i; + } + } + } + if ( i < ref_size ) + { + dcost = score + delete_cost; + if ( ( S->at(i+1).at(j) < 0.0 ) || ( S->at(i+1).at(j) > dcost ) ) + { + S->at(i+1).at(j) = dcost; + P->at(i+1).at(j) = 'D'; + if ( i >= last_good ) + { + last_good = i + 1 ; + } + } + } } - } else { - cost = substitute_cost + score; - if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) ) { - S[i+1][j+1] = cost; - P[i+1][j+1] = 'S'; - if ( cost < current_best ) { - current_best = cost; - } - if ( current_best == cost ) { - cur_last_peak = i + 1 ; - } + } + + + int tracelength = 0; + i = ref_size;; + j = hyp_size; + while ( ( i > 0 ) || ( j > 0 ) ) + { + tracelength++; + if (P->at(i).at(j) == 'A' ) + { + i--; + j--; } - } + else + if (P->at(i).at(j) == 'S' ) + { + i--; + j--; + } + else + if (P->at(i).at(j) == 'D' ) + { + i--; + } + else + if (P->at(i).at(j) == 'I' ) + { + j--; + } + else + { + cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " <<P->at(i).at(j) << endl; + exit ( -1 ); + } } - } - cur_last_good = i + 1; - if ( j < ( int ) hyp.size() ) { - icost = score + insert_cost; - if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) ) { - S[i][j+1] = icost; - P[i][j+1] = 'I'; - if ( ( cur_last_peak < i ) && ( current_best == icost ) ) { - cur_last_peak = i; - } + vector<char> path ( tracelength ); + i = ref_size; + j = hyp_size; + while ( ( i > 0 ) || ( j > 0 ) ) + { + path[--tracelength] =P->at(i).at(j); + if (P->at(i).at(j) == 'A' ) + { + i--; + j--; + } + else + if (P->at(i).at(j) == 'S' ) + { + i--; + j--; + } + else + if (P->at(i).at(j) == 'D' ) + { + i--; + } + else + if (P->at(i).at(j) == 'I' ) + { + j--; + } } - } - if ( i < ( int ) ref.size() ) { - dcost = score + delete_cost; - if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) ) { - S[i+1][j] = dcost; - P[i+1][j] = 'D'; - if ( i >= last_good ) { - last_good = i + 1 ; - } +// terAlignment to_return; + to_return->numWords = ref_size; + to_return->alignment = path; + to_return->numEdits = S->at(ref_size).at(hyp_size); + to_return->hyp = hyp; + to_return->ref = ref; + to_return->averageWords = ref_size; + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return->toString() << endl << "END DEBUG" << endl; } - } - } - } - - - int tracelength = 0; - i = ref.size(); - j = hyp.size(); - while ( ( i > 0 ) || ( j > 0 ) ) { - tracelength++; - if ( P[i][j] == 'A' ) { - i--; - j--; - } else if ( P[i][j] == 'S' ) { - i--; - j--; - } else if ( P[i][j] == 'D' ) { - i--; - } else if ( P[i][j] == 'I' ) { - j--; - } else { - cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl; - exit ( -1 ); - } - } - vector<char> path ( tracelength ); - i = ref.size(); - j = hyp.size(); - while ( ( i > 0 ) || ( j > 0 ) ) { - path[--tracelength] = P[i][j]; - if ( P[i][j] == 'A' ) { - i--; - j--; - } else if ( P[i][j] == 'S' ) { - i--; - j--; - } else if ( P[i][j] == 'D' ) { - i--; - } else if ( P[i][j] == 'I' ) { - j--; +// return to_return; + } - } - terAlignment to_return; - to_return.numWords = ref.size(); - to_return.alignment = path; - to_return.numEdits = S[ref.size() ][hyp.size() ]; - to_return.hyp = hyp; - to_return.ref = ref; - to_return.averageWords = (int)ref.size(); - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl; - } - return to_return; -} -terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref ) -{ - hashMapInfos rloc = createConcordMots ( hyp, ref ); - terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans ); - vector<string> cur = hyp; - cur_align.hyp = hyp; - cur_align.ref = ref; - cur_align.aftershift = hyp; - double edits = 0; + + terAlignment terCalc::TER ( vector<string>& hyp, vector<string>& ref ) + { + hashMapInfos rloc = createConcordMots ( hyp, ref ); + terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans ); + vector<string> cur = hyp; + cur_align.hyp = hyp; + cur_align.ref = ref; + cur_align.aftershift = hyp; + double edits = 0; // int numshifts = 0; - vector<terShift> allshifts; + vector<terShift> * allshifts=new vector<terShift>(0); + bestShiftStruct * returns=new bestShiftStruct(); // cerr << "Initial Alignment:" << endl << cur_align.toString() <<endl; - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl; - } - while ( true ) { - bestShiftStruct returns; - returns = findBestShift ( cur, hyp, ref, rloc, cur_align ); - if ( returns.m_empty ) { - break; - } - terShift bestShift = returns.m_best_shift; - cur_align = returns.m_best_align; - edits += bestShift.cost; - bestShift.alignment = cur_align.alignment; - bestShift.aftershift = cur_align.aftershift; - allshifts.push_back ( bestShift ); - cur = cur_align.aftershift; - } - terAlignment to_return; - to_return = cur_align; - to_return.allshifts = allshifts; - to_return.numEdits += edits; - NBR_SEGS_EVALUATED++; - return to_return; -} -bestShiftStruct terCalc::findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align ) -{ - bestShiftStruct to_return; - bool anygain = false; - bool herr[ ( int ) hyp.size() ]; - bool rerr[ ( int ) ref.size() ]; - int ralign[ ( int ) ref.size() ]; - calculateTerAlignment ( med_align, herr, rerr, ralign ); - vector<vecTerShift> poss_shifts; - - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl; - cerr << "indices: "; - for (int l_i=0; l_i < ( int ) ref.size() ; l_i++) { - cerr << l_i << "\t"; - } - cerr << endl; - cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl; - cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl; - cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl; - cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl; - cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl; - cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl; - cerr << "END DEBUG " << endl; - } - poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign ); - double curerr = med_align.numEdits; - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; - cerr << "Possible Shifts:" << endl; - for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) { - for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ ) { - cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl; - } + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl; + } + while ( true ) + { + + returns=findBestShift ( cur, hyp, ref, rloc, cur_align ); +// cerr << "****************************************************************** " << returns->getEmpty() << endl; + if ( returns->getEmpty()) + { + break; + } + terShift bestShift = (*(returns->m_best_shift)); + cur_align = (*(returns->m_best_align)); + edits += bestShift.cost; + bestShift.alignment = cur_align.alignment; + bestShift.aftershift = cur_align.aftershift; + allshifts->push_back ( bestShift ); + cur = cur_align.aftershift; + delete(returns); + } + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::TER : Final to return :" << endl << cur_align.toString() << endl << "END DEBUG" << endl; + } + terAlignment to_return; + to_return = cur_align; + to_return.allshifts = (*(allshifts)); + to_return.numEdits += edits; + NBR_SEGS_EVALUATED++; + return to_return; } - cerr << endl; - cerr << "END DEBUG " << endl; - } + bestShiftStruct * terCalc::findBestShift ( vector<string>& cur, vector<string>& hyp, vector<string>& ref, hashMapInfos& rloc, terAlignment& med_align ) + { + CALL_FIND_BSHIFT++; +// cerr << "CALL_FIND_BSHIFT " << CALL_FIND_BSHIFT <<endl; +// to_return->m_empty = new bool(false); + bool anygain = false; + vector <bool> * herr = new vector<bool>(( int ) hyp.size() + 1 ); + vector <bool> * rerr = new vector<bool>( ( int ) ref.size() + 1 ); + vector <int> * ralign = new vector<int>( ( int ) ref.size() + 1 ); + int l_i,i,j,s; + for (i = 0 ; i< ( int ) hyp.size() + 1 ; i++) + { + herr->at(i)=false; + } + for (i = 0 ; i< ( int ) ref.size() + 1 ; i++) + { + rerr->at(i)=false; + ralign->at(i)=-1; + } + calculateTerAlignment ( med_align, herr, rerr, ralign ); + vector<vecTerShift> * poss_shifts = new vector< vector<terShift> >(0) ; + terAlignment * cur_best_align = new terAlignment(); + terShift * cur_best_shift = new terShift(); + double cur_best_shift_cost = 0.0; + vector<string> shiftarr; + vector<vecInt> curHypSpans; + terShift * curshift = new terShift(); + alignmentStruct shiftReturns; + terAlignment * curalign = new terAlignment() ; + + + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl; + cerr << "indices: "; + for (l_i=0; l_i < ( int ) ref.size() ; l_i++) + { + cerr << l_i << "\t"; + } + cerr << endl; + cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl; + cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl; + cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl; + cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl; + cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl; + cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl; + cerr << "END DEBUG " << endl; + } + poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign ); + double curerr = med_align.numEdits; + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "Possible Shifts:" << endl; + for ( i = ( int ) poss_shifts->size() - 1; i >= 0; i-- ) + { + for ( j = 0; j < ( int ) ( poss_shifts->at ( i ) ).size(); j++ ) + { + cerr << " [" << i << "] " << ( ( poss_shifts->at ( i ) ).at ( j ) ).toString() << endl; + } + } + cerr << endl; + cerr << "END DEBUG " << endl; + } // exit(0); - double cur_best_shift_cost = 0.0; - terAlignment cur_best_align = med_align; - terShift cur_best_shift; - + cur_best_align->set(med_align); + for ( i = ( int ) poss_shifts->size() - 1; i >= 0; i-- ) + { + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "Considering shift of length " << i << " (" << ( poss_shifts->at ( i ) ).size() << ")" << endl; + cerr << "END DEBUG " << endl; + } + /* Consider shifts of length i+1 */ + double curfix = curerr - ( cur_best_shift_cost + cur_best_align->numEdits ); + double maxfix = ( 2 * ( 1 + i ) ); + if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) + { + break; + } + else + { + for ( s = 0; s < ( int ) ( poss_shifts->at ( i ) ).size(); s++ ) + { + curfix = curerr - ( cur_best_shift_cost + cur_best_align->numEdits ); + if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) + { + break; + } + else + { + curshift->set(( poss_shifts->at ( i ) ).at ( s )); + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "cur : "<< join(" ",cur) << endl; + cerr << "shift size : "<< i << endl; + cerr << "shift number : "<< s << endl; + cerr << "size of shift size : "<< ( int ) ( poss_shifts->at ( i ) ).size() << endl; + cerr << "curshift : "<< curshift->toString() << endl; + + } +// alignmentStruct shiftReturns; + shiftReturns.set(permuter ( cur, curshift )); + shiftarr = shiftReturns.nwords; + curHypSpans = shiftReturns.aftershift; + if ( PRINT_DEBUG ) + { + cerr << "shiftarr : "<< join(" ",shiftarr) << endl; + cerr << "curHypSpans size : "<< (int)curHypSpans.size() << endl; + cerr << "END DEBUG " << endl; + } +// terAlignment tmp=minimizeDistanceEdition ( shiftarr, ref, curHypSpans ); + minimizeDistanceEdition ( shiftarr, ref, curHypSpans, curalign ); +// curalign->set(tmp); - for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) { - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; - cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl; - cerr << "END DEBUG " << endl; - } - /* Consider shifts of length i+1 */ - double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits ); - double maxfix = ( 2 * ( 1 + i ) ); - if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) { - break; - } + curalign->hyp = hyp; + curalign->ref = ref; + curalign->aftershift = shiftarr; - for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ ) { - curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits ); - if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) { - break; - } - terShift curshift = ( poss_shifts.at ( i ) ).at ( s ); - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; - cerr << "cur : "<< join(" ",cur) << endl; - cerr << "curshift : "<< curshift.toString() << endl; - - } - alignmentStruct shiftReturns = permuter ( cur, curshift ); - vector<string> shiftarr = shiftReturns.nwords; - vector<vecInt> curHypSpans = shiftReturns.aftershift; - - if ( PRINT_DEBUG ) { - cerr << "shiftarr : "<< join(" ",shiftarr) << endl; -// cerr << "curHypSpans : "<< curHypSpans.toString() << endl; - cerr << "END DEBUG " << endl; - } - terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans ); - - curalign.hyp = hyp; - curalign.ref = ref; - curalign.aftershift = shiftarr; - - - double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost ); - - // if (DEBUG) { - // string testeuh=terAlignment join(" ", shiftarr); - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; - cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl; - cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"<<endl; - cerr << "Details of gains : gain = ("<<cur_best_align.numEdits << "+" << cur_best_shift_cost << ") - (" << curalign.numEdits << "+" << curshift.cost << ")"<<endl; - cerr << "" << curalign.toString() << "\n" << endl; - cerr << "END DEBUG " << endl; - } - // } - // - if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) { - anygain = true; - cur_best_shift = curshift; - cur_best_shift_cost = curshift.cost; - cur_best_align = curalign; - // if (DEBUG) - if ( PRINT_DEBUG ) { - cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; - cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl; - cerr << "END DEBUG " << endl; - } - } - } - } - if ( anygain ) { - to_return.m_best_shift = cur_best_shift; - to_return.m_best_align = cur_best_align; - to_return.m_empty = false; - } else { - to_return.m_empty = true; - } - return to_return; -} -void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign ) -{ - int hpos = -1; - int rpos = -1; - if ( PRINT_DEBUG ) { - - cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl; - cerr << "END DEBUG " << endl; - } - for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) { - herr[i] = false; - rerr[i] = false; - ralign[i] = -1; - } - for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) { - char sym = align.alignment[i]; - if ( sym == 'A' ) { - hpos++; - rpos++; - herr[hpos] = false; - rerr[rpos] = false; - ralign[rpos] = hpos; - } else if ( sym == 'S' ) { - hpos++; - rpos++; - herr[hpos] = true; - rerr[rpos] = true; - ralign[rpos] = hpos; - } else if ( sym == 'I' ) { - hpos++; - herr[hpos] = true; - } else if ( sym == 'D' ) { - rpos++; - rerr[rpos] = true; - ralign[rpos] = hpos+1; - } else { - cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl; - exit ( -1 ); - } - } -} + double gain = ( cur_best_align->numEdits + cur_best_shift_cost ) - ( curalign->numEdits + curshift->cost ); -vector<vecTerShift> terCalc::calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign ) -{ - vector<vecTerShift> to_return; - if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) ) { - return to_return; - } - - vector<vecTerShift> allshifts ( TAILLE_PERMUT_MAX + 1 ); - for ( int start = 0; start < ( int ) hyp.size(); start++ ) { - string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) ); - if ( ! rloc.trouve ( subVectorHypString ) ) { - continue; - } + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "Gain for " << curshift->toString() << " is " << gain << ". (result: [" << curalign->join ( " ", shiftarr ) << "]" << endl; + cerr << "Details of gains : gain = ( cur_best_align->numEdits + cur_best_shift_cost ) - ( curalign->numEdits + curshift->cost )"<<endl; + cerr << "Details of gains : gain = ("<<cur_best_align->numEdits << "+" << cur_best_shift_cost << ") - (" << curalign->numEdits << "+" << curshift->cost << ")"<<endl; + cerr << "" << curalign->toString() << "\n" << endl; + cerr << "END DEBUG " << endl; + } - bool ok = false; - vector<int> mtiVec = rloc.getValue ( subVectorHypString ); - vector<int>::iterator mti = mtiVec.begin(); - while ( mti != mtiVec.end() && ( ! ok ) ) { - int moveto = ( *mti ); - mti++; - if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) ) { - ok = true; - } - } - if ( ! ok ) { - continue; - } - ok = true; - for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ ) { - /* check if cand is good if so, add it */ - vector<string> cand = subVector ( hyp, start, end + 1 ); - ok = false; - if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) { - continue; - } - - bool any_herr = false; - - for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) { - if ( herr[start+i] ) { - any_herr = true; + if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) + { + anygain = true; + cur_best_shift->set(curshift); + cur_best_shift_cost = curshift->cost; + cur_best_align->set(curalign); + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "Tmp Choosing shift: " << cur_best_shift->toString() << " gives:\n" << cur_best_align->toString() << "\n" << endl; + cerr << "END DEBUG " << endl; + } + } + } + } + } } - } - if ( any_herr == false ) { - ok = true; - continue; - } - - vector<int> movetoitVec; - movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) ); -// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl; - vector<int>::iterator movetoit = movetoitVec.begin(); - while ( movetoit != movetoitVec.end() ) { - int moveto = ( *movetoit ); - movetoit++; - if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) ) { - continue; + bestShiftStruct * to_return=new bestShiftStruct(); + if ( anygain ) + { + to_return->setEmpty(false); + if ( PRINT_DEBUG ) + { + cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl; + cerr << "Final shift chosen : " << cur_best_shift->toString() << " gives:\n" << cur_best_align->toString() << "\n" << endl; + cerr << "END DEBUG " << endl; + } + to_return->m_best_shift->set(cur_best_shift); +// terAlignment tmp=cur_best_align; +// cur_best_align->toString(); +// to_return.m_best_align.toString(); +// if ((int)cur_best_align->alignment.size() == 0) +// { +// to_return.m_best_align = cur_best_align; +// } +// else +// { +// cerr << "Warning: cur_best_align->alignment.size() = 0 !!!"<<endl; +// +// } + to_return->m_best_align->set(cur_best_align); +// to_return.m_best_align.toString(); } - ok = true; + else + { + to_return->setEmpty(true); + } +// // cerr << to_return->toString() << endl; + delete(poss_shifts); + delete(cur_best_align); + delete(cur_best_shift); + delete(curshift); + delete(curalign) ; + return to_return; + } - /* check to see if there are any errors in either string - (only move if this is the case!) - */ + void terCalc::calculateTerAlignment ( terAlignment& align, vector<bool>* herr, vector<bool>* rerr, vector<int>* ralign ) + { + int hpos = -1; + int rpos = -1; + CALL_TER_ALIGN++; +// cerr << "CALL_TER_ALIGN " << CALL_TER_ALIGN << endl; + if ( PRINT_DEBUG ) + { - bool any_rerr = false; - for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) { - if ( rerr[moveto+i] ) { - any_rerr = true; - } + cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl; + cerr << "END DEBUG " << endl; } - if ( ! any_rerr ) { - continue; +// cerr << (int)herr->size() <<endl; +// cerr << (int)rerr->size() <<endl; +// cerr << ( int ) align.alignment.size() <<endl; +// for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) +// { +// herr->at(i) = false; +// rerr->at(i) = false; +// ralign->at(i) = -1; +// } + for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) + { + char sym = align.alignment.at(i); + if ( sym == 'A' ) + { + hpos++; + rpos++; + herr->at(hpos) = false; + rerr->at(rpos) = false; + ralign->at(rpos) = hpos; + } + else + if ( sym == 'S' ) + { + hpos++; + rpos++; + herr->at(hpos) = true; + rerr->at(rpos) = true; + ralign->at(rpos) = hpos; + } + else + if ( sym == 'I' ) + { + hpos++; + herr->at(hpos) = true; + } + else + if ( sym == 'D' ) + { + rpos++; + rerr->at(rpos) = true; + ralign->at(rpos) = hpos+1; + } + else + { + cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl; + exit ( -1 ); + } } - for ( int roff = -1; roff <= ( end - start ); roff++ ) { - terShift topush; - bool topushNull = true; - if ( ( roff == -1 ) && ( moveto == 0 ) ) { - if ( PRINT_DEBUG ) { + } - cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl; + vector<vecTerShift> * terCalc::calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& align, vector<bool>* herr, vector<bool>* rerr, vector<int>* ralign ) + { + vector<vecTerShift> * allshifts = new vector<vecTerShift>(0); +// to_return.clear(); + CALL_CALC_PERMUT++; +// cerr << "CALL_CALC_PERMUT " << CALL_CALC_PERMUT << endl; + if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) ) + { + return allshifts; + } + allshifts = new vector<vecTerShift>( TAILLE_PERMUT_MAX + 1 ); + int start=0; + int end=0; + bool ok = false; + vector<int> mtiVec(0); + vector<int>::iterator mti; + int moveto=0; + vector<string> cand(0); + bool any_herr = false; + bool any_rerr = false; + int i=0; + int l_nbr_permuts=0; +// for (i=0; i< (int)ref.size() +1 ; i++) {cerr << " " << ralign[i] ;} cerr <<endl; + vector<int> movetoitVec(0); + string subVectorHypString=""; + terShift * topush; + for ( start = 0; start < ( int ) hyp.size(); start++ ) + { + subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) ); + if ( ! rloc.trouve ( subVectorHypString ) ) + { + continue; } - terShift t01 ( start, end, -1, -1 ); - topush = t01; - topushNull = false; - } else if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) ) { - int newloc = ralign[moveto+roff]; - if ( PRINT_DEBUG ) { - - cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl; + + ok = false; + mtiVec = rloc.getValue ( subVectorHypString ); + mti = mtiVec.begin(); + while ( mti != mtiVec.end() && ( ! ok ) ) + { + moveto = ( *mti ); + mti++; + if ( ( start != ralign->at(moveto) ) && ( ( ralign->at(moveto) - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign->at(moveto) - 1 ) <= DIST_MAX_PERMUT ) ) + { + ok = true; + } } - terShift t02 ( start, end, moveto + roff, newloc ); - topush = t02; - topushNull = false; - } - if ( !topushNull ) { - topush.shifted = cand; - topush.cost = shift_cost; - if ( PRINT_DEBUG ) { - - cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl; - cerr << "start : " << start << endl; - cerr << "end : " << end << endl; - cerr << "end - start : " << end - start << endl; - cerr << "END DEBUG " << endl; + if ( ! ok ) + { + continue; } - ( allshifts.at ( end - start ) ).push_back ( topush ); - } - } - } - } - } - to_return.clear(); - for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ ) { - to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) ); - } - return to_return; -} + ok = true; + for ( end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ ) + { + /* check if cand is good if so, add it */ + cand = subVector ( hyp, start, end + 1 ); + ok = false; + if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) + { + continue; + } + any_herr = false; -alignmentStruct terCalc::permuter ( vector<string> words, terShift s ) -{ - return permuter ( words, s.start, s.end, s.newloc ); -} + for ( i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) + { + if ( herr->at(start+i) ) + { + any_herr = true; + } + } + if ( any_herr == false ) + { + ok = true; + continue; + } + movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) ); +// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl; + vector<int>::iterator movetoit; + movetoit = movetoitVec.begin(); + while ( movetoit != movetoitVec.end() ) + { + moveto = ( *movetoit ); + movetoit++; + if ( ! ( ( ralign->at(moveto) != start ) && ( ( ralign->at(moveto) < start ) || ( ralign->at(moveto) > end ) ) && ( ( ralign->at(moveto) - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign->at(moveto) ) <= DIST_MAX_PERMUT ) ) ) + { + continue; + } + ok = true; -alignmentStruct terCalc::permuter ( vector<string> words, int start, int end, int newloc ) -{ - int c = 0; - vector<string> nwords ( words ); - vector<vecInt> spans ( ( int ) hypSpans.size() ); - alignmentStruct to_return; - if ( PRINT_DEBUG ) { - - if ( ( int ) hypSpans.size() > 0 ) { - cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ; - } else { - cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ; - } - cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl; - } - if (newloc >= ( int ) words.size()) { - if ( PRINT_DEBUG ) { - cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl; - } - newloc = ( int ) words.size()-1; - } + /* check to see if there are any errors in either string + (only move if this is the case!) + */ -// } + any_rerr = false; + for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) + { + if ( rerr->at(moveto+i) ) + { + any_rerr = true; + } + } + if ( ! any_rerr ) + { + continue; + } + for ( int roff = -1; roff <= ( end - start ); roff++ ) + { + topush = new terShift(); + bool topushNull = true; + if ( ( roff == -1 ) && ( moveto == 0 ) ) + { + if ( PRINT_DEBUG ) + { - if ( newloc == -1 ) { - for ( int i = start; i <= end; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl; + } +// terShift t01 ( start, end, -1, -1 ); +// topush = t01; + topush->start=start; + topush->end=end; + topush->moveto=-1; + topush->newloc=-1; + topushNull = false; + } + else + if ( ( start != ralign->at(moveto+roff) ) && ( ( roff == 0 ) || ( ralign->at(moveto+roff) != ralign->at(moveto) ) ) ) + { + int newloc = ralign->at(moveto+roff); + if ( PRINT_DEBUG ) + { + + cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl; + } +// terShift t02 ( start, end, moveto + roff, newloc ); +// topush = t02; + topush->start=start; + topush->end=end; + topush->moveto=moveto + roff; + topush->newloc=newloc; + topushNull = false; + } + if ( !topushNull ) + { + topush->shifted = cand; + topush->cost = shift_cost; + l_nbr_permuts++; + if ( PRINT_DEBUG ) + { + + cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl; + cerr << "start : " << start << endl; + cerr << "end : " << end << endl; + cerr << "end - start : " << end - start << endl; + cerr << "nbr Permutations added: " << l_nbr_permuts << endl; + cerr << "END DEBUG " << endl; + } + if (l_nbr_permuts < NBR_PERMUT_MAX + 1) + { + ( allshifts->at ( end - start ) ).push_back ( (*(topush)) ); + } +// else +// { +// break; +// } + } + delete(topush); + } + } + } + } +// to_return.clear(); +// for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ ) +// { +// to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) ); +// } + return allshifts; } - for ( int i = 0; i <= start - 1; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + + + alignmentStruct terCalc::permuter ( vector< string >& words, TERCpp::terShift& s ) + { + return permuter ( words, s.start, s.end, s.newloc ); } - for ( int i = end + 1; i < ( int ) words.size(); i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + alignmentStruct terCalc::permuter ( vector< string >& words, TERCpp::terShift* s ) + { + return permuter ( words, s->start, s->end, s->newloc ); } - } else { - if ( newloc < start ) { - for ( int i = 0; i < newloc; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = start; i <= end; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = newloc ; i < start ; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = end + 1; i < ( int ) words.size(); i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - } else { - if ( newloc > end ) { - for ( int i = 0; i <= start - 1; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = end + 1; i <= newloc; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = start; i <= end; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = newloc + 1; i < ( int ) words.size(); i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - } else { - // we are moving inside of ourselves - for ( int i = 0; i <= start - 1; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } - } - for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + + alignmentStruct terCalc::permuter ( vector< string >& words, int start, int end, int newloc ) + { + int c = 0; + vector<string> nwords ( words ); + vector<vecInt> spans ( ( int ) hypSpans.size() ); + alignmentStruct to_return; + if ( PRINT_DEBUG ) + { + + if ( ( int ) hypSpans.size() > 0 ) + { + cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ; + } + else + { + cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ; + } + cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl; } - for ( int i = start; i <= end; i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + if (newloc >= ( int ) words.size()) + { + if ( PRINT_DEBUG ) + { + cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl; + } + newloc = ( int ) words.size()-1; + } + +// } + + if ( newloc == -1 ) + { + for ( int i = start; i <= end;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = 0; i <= start - 1;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = end + 1; i < ( int ) words.size();i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } } - for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size(); i++ ) { - nwords.at ( c++ ) = words.at ( i ); - if ( ( int ) hypSpans.size() > 0 ) { - spans.at ( c - 1 ) = hypSpans.at ( i ); - } + else + { + if ( newloc < start ) + { + + for ( int i = 0; i < newloc; i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = start; i <= end;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = newloc ; i < start ;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = end + 1; i < ( int ) words.size();i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + } + else + { + if ( newloc > end ) + { + for ( int i = 0; i <= start - 1; i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = end + 1; i <= newloc;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = start; i <= end;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = newloc + 1; i < ( int ) words.size();i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + } + else + { + // we are moving inside of ourselves + for ( int i = 0; i <= start - 1; i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = start; i <= end;i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size();i++ ) + { + nwords.at ( c++ ) = words.at ( i ); + if ( ( int ) hypSpans.size() > 0 ) + { + spans.at ( c - 1 ) = hypSpans.at ( i ); + } + } + } + } } - } - } - } - NBR_PERMUTS_CONSID++; - - if ( PRINT_DEBUG ) { - cerr << "nwords" << join(" ",nwords) << endl; + NBR_PERMUTS_CONSID++; + + if ( PRINT_DEBUG ) + { + cerr << "nwords" << join(" ",nwords) << endl; // cerr << "spans" << spans. << endl; - } - - to_return.nwords = nwords; - to_return.aftershift = spans; - return to_return; -} -void terCalc::setDebugMode ( bool b ) -{ - PRINT_DEBUG = b; -} + } + + to_return.nwords = nwords; + to_return.aftershift = spans; + return to_return; + } + void terCalc::setDebugMode ( bool b ) + { + PRINT_DEBUG = b; + } } diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h index 778d83395..22b5e2c9d 100644 --- a/mert/TER/tercalc.h +++ b/mert/TER/tercalc.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef _TERCPPTERCALC_H__ -#define _TERCPPTERCALC_H__ +#ifndef _TERCPPTERCALC_H___ +#define _TERCPPTERCALC_H___ #include <vector> #include <stdio.h> @@ -41,62 +41,70 @@ namespace TERCpp { // typedef size_t WERelement[2]; // Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del) -typedef vector<terShift> vecTerShift; -/** - @author -*/ -class terCalc -{ -private : + typedef vector<terShift> vecTerShift; + /** + @author + */ + class terCalc + { + private : // Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del) - WERalignment l_WERalignment; + WERalignment l_WERalignment; // HashMap contenant les valeurs de hash de chaque mot - hashMap bagOfWords; - int TAILLE_PERMUT_MAX; - // Increments internes - int NBR_SEGS_EVALUATED; - int NBR_PERMUTS_CONSID; - int NBR_BS_APPELS; - int DIST_MAX_PERMUT; - bool PRINT_DEBUG; + hashMap bagOfWords; + int TAILLE_PERMUT_MAX; + int NBR_PERMUT_MAX; + // Increments internes + int NBR_SEGS_EVALUATED; + int NBR_PERMUTS_CONSID; + int NBR_BS_APPELS; + int DIST_MAX_PERMUT; + int CALL_TER_ALIGN; + int CALL_CALC_PERMUT; + int CALL_FIND_BSHIFT; + int MAX_LENGTH_SENTENCE; + bool PRINT_DEBUG; - // Utilisés dans minDistEdit et ils ne sont pas réajustés - double S[1000][1000]; - char P[1000][1000]; - vector<vecInt> refSpans; - vector<vecInt> hypSpans; - int TAILLE_BEAM; + // Utilisés dans minDistEdit et ils ne sont pas réajustés + vector < vector < double > > * S; + vector < vector < char > > * P; + vector<vecInt> refSpans; + vector<vecInt> hypSpans; + int TAILLE_BEAM; -public: - int shift_cost; - int insert_cost; - int delete_cost; - int substitute_cost; - int match_cost; - double infinite; - terCalc(); + public: + int shift_cost; + int insert_cost; + int delete_cost; + int substitute_cost; + int match_cost; + double infinite; + terCalc(); -// ~terCalc(); + ~terCalc(); // size_t* hashVec ( vector<string> s ); - void setDebugMode ( bool b ); + void setDebugMode ( bool b ); // int WERCalculation ( size_t * ref, size_t * hyp ); // int WERCalculation ( vector<string> ref, vector<string> hyp ); // int WERCalculation ( vector<int> ref, vector<int> hyp ); - terAlignment WERCalculation ( vector<string> hyp, vector<string> ref ); + terAlignment WERCalculation ( vector< string >& hyp, vector< string >& ref ); // string vectorToString(vector<string> vec); // vector<string> subVector(vector<string> vec, int start, int end); - hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref ); - terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans ); - bool trouverIntersection ( vecInt refSpan, vecInt hypSpan ); - terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength ); - terAlignment TER ( vector<string> hyp, vector<string> ref ); - terAlignment TER ( vector<int> hyp, vector<int> ref ); - bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align ); - void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign ); - vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign ); - alignmentStruct permuter ( vector<string> words, terShift s ); - alignmentStruct permuter ( vector<string> words, int start, int end, int newloc ); -}; + hashMapInfos createConcordMots ( vector<string>& hyp, vector<string>& ref ); + terAlignment minimizeDistanceEdition ( vector<string>& hyp, vector<string>& ref, vector<vecInt>& curHypSpans ); + void minimizeDistanceEdition ( vector<string>& hyp, vector<string>& ref, vector<vecInt>& curHypSpans , terAlignment* l_terAlign); +// terAlignment minimizeDistanceEdition ( vector<string>& hyp, vector<string>& ref, vector<vecInt>& curHypSpans ); + bool trouverIntersection ( vecInt& refSpan, vecInt& hypSpan ); + terAlignment TER ( vector<string>& hyp, vector<string>& ref , float avRefLength ); + terAlignment TER ( vector<string>& hyp, vector<string>& ref ); + terAlignment TER ( vector<int>& hyp, vector<int>& ref ); + bestShiftStruct * findBestShift ( vector< string >& cur, vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& med_align ); + void calculateTerAlignment ( terAlignment& align, vector<bool>* herr, vector<bool>* rerr, vector<int>* ralign ); + vector<vecTerShift> * calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& align, vector<bool>* herr, vector<bool>* rerr, vector<int>* ralign ); + alignmentStruct permuter ( vector<string>& words, terShift& s ); + alignmentStruct permuter ( vector<string>& words, terShift* s ); + alignmentStruct permuter ( vector<string>& words, int start, int end, int newloc ); + }; } diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp index 8858a7119..22ee091a8 100644 --- a/mert/TER/tools.cpp +++ b/mert/TER/tools.cpp @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -25,677 +25,811 @@ using namespace boost::xpressive; namespace Tools { -string vectorToString ( vector<string> vec ) -{ - string retour ( "" ); - for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour += ( *vecIter ); - } else { - retour += "\t" + ( *vecIter ); + string vectorToString ( vector<string> vec ) + { + string retour ( "" ); + for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour += ( *vecIter ); + } + else + { + retour += "\t" + ( *vecIter ); + } + } + return retour; } - } - return retour; -} -string vectorToString ( vector<char> vec ) -{ - stringstream retour; - retour.str(""); - for ( vector<char>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour << ( *vecIter ); - } else { - retour << "\t" << ( *vecIter ); + string vectorToString ( vector<char> vec ) + { + stringstream retour; + retour.str(""); + for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << "\t" << ( *vecIter ); + } + } + return retour.str(); } - } - return retour.str(); -} -string vectorToString ( vector<int> vec ) -{ - stringstream retour; - retour.str(""); - for ( vector<int>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour << ( *vecIter ); - } else { - retour << "\t" << ( *vecIter ); + string vectorToString ( vector<int> vec ) + { + stringstream retour; + retour.str(""); + for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << "\t" << ( *vecIter ); + } + } + return retour.str(); + } + string vectorToString ( vector<int> * vec ) + { + stringstream retour; + retour.str(""); + for ( vector<int>::iterator vecIter = vec->begin();vecIter != vec->end(); vecIter++ ) + { + if ( vecIter == vec->begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << "\t" << ( *vecIter ); + } + } + return retour.str(); } - } - return retour.str(); -} -string vectorToString ( vector< string > vec, string s ) -{ - string retour ( "" ); - for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour += ( *vecIter ); - } else { - retour += s + ( *vecIter ); + string vectorToString ( vector< string > vec, string s ) + { + string retour ( "" ); + for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour += ( *vecIter ); + } + else + { + retour += s + ( *vecIter ); + } + } + return retour; + } - } - return retour; -} + string vectorToString ( vector< char > vec, string s ) + { + stringstream retour; + retour.str(""); + for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << s << ( *vecIter ); + } + } + return retour.str(); -string vectorToString ( vector< char > vec, string s ) -{ - stringstream retour; - retour.str(""); - for ( vector<char>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour << ( *vecIter ); - } else { - retour << s << ( *vecIter ); } - } - return retour.str(); -} + string vectorToString ( vector< int > vec, string s ) + { + stringstream retour; + retour.str(""); + for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << s << ( *vecIter ); + } + } + return retour.str(); -string vectorToString ( vector< int > vec, string s ) -{ - stringstream retour; - retour.str(""); - for ( vector<int>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour << ( *vecIter ); - } else { - retour << s << ( *vecIter ); } - } - return retour.str(); -} + string vectorToString ( vector< bool > vec, string s ) + { + stringstream retour; + retour.str(""); + for ( vector<bool>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ ) + { + if ( vecIter == vec.begin() ) + { + retour << ( *vecIter ); + } + else + { + retour << s << ( *vecIter ); + } + } + return retour.str(); -string vectorToString ( vector< bool > vec, string s ) -{ - stringstream retour; - retour.str(""); - for ( vector<bool>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) { - if ( vecIter == vec.begin() ) { - retour << ( *vecIter ); - } else { - retour << s << ( *vecIter ); } - } - return retour.str(); + string vectorToString ( char* vec, string s , int taille) + { + stringstream retour; + retour.str(""); + int l_i; + for ( l_i=0; l_i < taille ; l_i++) + { + if ( l_i == 0 ) + { + retour << vec[l_i]; + } + else + { + retour << s << vec[l_i]; + } + } + return retour.str(); -} -string vectorToString ( char* vec, string s , int taille) -{ - stringstream retour; - retour.str(""); - int l_i; - for ( l_i=0; l_i < taille ; l_i++) { - if ( l_i == 0 ) { - retour << vec[l_i]; - } else { - retour << s << vec[l_i]; } - } - return retour.str(); -} + string vectorToString ( int* vec, string s , int taille) + { + stringstream retour; + retour.str(""); + int l_i; + for ( l_i=0; l_i < taille ; l_i++) + { + if ( l_i == 0 ) + { + retour << vec[l_i]; + } + else + { + retour << s << vec[l_i]; + } + } + return retour.str(); -string vectorToString ( int* vec, string s , int taille) -{ - stringstream retour; - retour.str(""); - int l_i; - for ( l_i=0; l_i < taille ; l_i++) { - if ( l_i == 0 ) { - retour << vec[l_i]; - } else { - retour << s << vec[l_i]; } - } - return retour.str(); -} + string vectorToString ( bool* vec, string s , int taille) + { + stringstream retour; + retour.str(""); + int l_i; + for ( l_i=0; l_i < taille ; l_i++) + { + if ( l_i == 0 ) + { + retour << vec[l_i]; + } + else + { + retour << s << vec[l_i]; + } + } + return retour.str(); -string vectorToString ( bool* vec, string s , int taille) -{ - stringstream retour; - retour.str(""); - int l_i; - for ( l_i=0; l_i < taille ; l_i++) { - if ( l_i == 0 ) { - retour << vec[l_i]; - } else { - retour << s << vec[l_i]; } - } - return retour.str(); - -} + + string vectorToString ( vector<bool>* vec, string s , int taille) + { + stringstream retour; + retour.str(""); + int l_i; + for ( l_i=0; l_i < taille ; l_i++) + { + if ( l_i == 0 ) + { + retour << vec->at(l_i); + } + else + { + retour << s << vec->at(l_i); + } + } + return retour.str(); -vector<string> subVector ( vector<string> vec, int start, int end ) -{ - vector<string> retour; - if ( start > end ) { - cerr << "ERREUR : TERcalc::subVector : end > start" << endl; - exit ( 0 ); - } - for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} + } -vector<int> subVector ( vector<int> vec, int start, int end ) -{ - vector<int> retour; - if ( start > end ) { - cerr << "ERREUR : TERcalc::subVector : end > start" << endl; - exit ( 0 ); - } - for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} + string vectorToString ( vector<int>* vec, string s , int taille) + { + stringstream retour; + retour.str(""); + int l_i; + for ( l_i=0; l_i < taille ; l_i++) + { + if ( l_i == 0 ) + { + retour << vec->at(l_i); + } + else + { + retour << s << vec->at(l_i); + } + } + return retour.str(); -vector<float> subVector ( vector<float> vec, int start, int end ) -{ - vector<float> retour; - if ( start > end ) { - cerr << "ERREUR : TERcalc::subVector : end > start" << endl; - exit ( 0 ); - } - for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} + } -vector<string> copyVector ( vector<string> vec ) -{ - vector<string> retour; - for ( int i = 0; i < ( int ) vec.size(); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} -vector<int> copyVector ( vector<int> vec ) -{ - vector<int> retour; - for ( int i = 0; i < ( int ) vec.size(); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} -vector<float> copyVector ( vector<float> vec ) -{ - vector<float> retour; - for ( int i = 0; i < ( int ) vec.size(); i++ ) { - retour.push_back ( vec.at ( i ) ); - } - return retour; -} -vector<string> stringToVector ( string s, string tok ) -{ - vector<string> to_return; - string to_push ( "" ); - bool pushed = false; - string::iterator sIt; - for ( sIt = s.begin(); sIt < s.end(); sIt++ ) { - pushed = false; - for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) { - if ( ( *sIt ) == ( *sTok ) ) { - to_return.push_back ( to_push ); - to_push = ""; - pushed = true; - } + + + vector<string> subVector ( vector<string> vec, int start, int end ) + { + vector<string> retour; + if ( start > end ) + { + cerr << "ERREUR : TERcalc::subVector : end > start" << endl; + exit ( 0 ); + } + for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) + { + retour.push_back ( vec.at ( i ) ); + } + return retour; } - if ( !pushed ) { - to_push.push_back ( ( *sIt ) ); + + vector<int> subVector ( vector<int> vec, int start, int end ) + { + vector<int> retour; + if ( start > end ) + { + cerr << "ERREUR : TERcalc::subVector : end > start" << endl; + exit ( 0 ); + } + for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) + { + retour.push_back ( vec.at ( i ) ); + } + return retour; } - } - to_return.push_back ( to_push ); - return to_return; -} -vector<int> stringToVectorInt ( string s, string tok ) -{ - vector<int> to_return; - string to_push ( "" ); - bool pushed = false; - string::iterator sIt; - for ( sIt = s.begin(); sIt < s.end(); sIt++ ) { - pushed = false; - for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) { - if ( ( *sIt ) == ( *sTok ) ) { - if ( ( int ) to_push.length() > 0 ) { - to_return.push_back ( atoi ( to_push.c_str() ) ); + + vector<float> subVector ( vector<float> vec, int start, int end ) + { + vector<float> retour; + if ( start > end ) + { + cerr << "ERREUR : TERcalc::subVector : end > start" << endl; + exit ( 0 ); + } + for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) + { + retour.push_back ( vec.at ( i ) ); } - to_push = ""; - pushed = true; - } + return retour; } - if ( !pushed ) { - to_push.push_back ( ( *sIt ) ); + + vector<string> copyVector ( vector<string> vec ) + { + vector<string> retour; + for ( int i = 0; i < ( int ) vec.size(); i++ ) + { + retour.push_back ( vec.at ( i ) ); + } + return retour; } - } - if ( ( int ) to_push.length() > 0 ) { - to_return.push_back ( atoi ( to_push.c_str() ) ); - } - return to_return; -} -vector<float> stringToVectorFloat ( string s, string tok ) -{ - vector<float> to_return; - string to_push ( "" ); - bool pushed = false; - string::iterator sIt; - for ( sIt = s.begin(); sIt < s.end(); sIt++ ) { - pushed = false; - for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) { - if ( ( *sIt ) == ( *sTok ) ) { - if ( ( int ) to_push.length() > 0 ) { - to_return.push_back ( atof ( to_push.c_str() ) ); + vector<int> copyVector ( vector<int> vec ) + { + vector<int> retour; + for ( int i = 0; i < ( int ) vec.size(); i++ ) + { + retour.push_back ( vec.at ( i ) ); } - to_push = ""; - pushed = true; - } + return retour; } - if ( !pushed ) { - to_push.push_back ( ( *sIt ) ); + vector<float> copyVector ( vector<float> vec ) + { + vector<float> retour; + for ( int i = 0; i < ( int ) vec.size(); i++ ) + { + retour.push_back ( vec.at ( i ) ); + } + return retour; + } + vector<string> stringToVector ( string s, string tok ) + { + vector<string> to_return; + string to_push ( "" ); + bool pushed = false; + string::iterator sIt; + for ( sIt = s.begin(); sIt < s.end(); sIt++ ) + { + pushed = false; + for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) + { + if ( ( *sIt ) == ( *sTok ) ) + { + to_return.push_back ( to_push ); + to_push = ""; + pushed = true; + } + } + if ( !pushed ) + { + to_push.push_back ( ( *sIt ) ); + } + } + to_return.push_back ( to_push ); + return to_return; + } + vector<int> stringToVectorInt ( string s, string tok ) + { + vector<int> to_return; + string to_push ( "" ); + bool pushed = false; + string::iterator sIt; + for ( sIt = s.begin(); sIt < s.end(); sIt++ ) + { + pushed = false; + for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) + { + if ( ( *sIt ) == ( *sTok ) ) + { + if ( ( int ) to_push.length() > 0 ) + { + to_return.push_back ( atoi ( to_push.c_str() ) ); + } + to_push = ""; + pushed = true; + } + } + if ( !pushed ) + { + to_push.push_back ( ( *sIt ) ); + } + } + if ( ( int ) to_push.length() > 0 ) + { + to_return.push_back ( atoi ( to_push.c_str() ) ); + } + return to_return; + } + vector<float> stringToVectorFloat ( string s, string tok ) + { + vector<float> to_return; + string to_push ( "" ); + bool pushed = false; + string::iterator sIt; + for ( sIt = s.begin(); sIt < s.end(); sIt++ ) + { + pushed = false; + for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) + { + if ( ( *sIt ) == ( *sTok ) ) + { + if ( ( int ) to_push.length() > 0 ) + { + to_return.push_back ( atof ( to_push.c_str() ) ); + } + to_push = ""; + pushed = true; + } + } + if ( !pushed ) + { + to_push.push_back ( ( *sIt ) ); + } + } + if ( ( int ) to_push.length() > 0 ) + { + to_return.push_back ( atoi ( to_push.c_str() ) ); + } + return to_return; } - } - if ( ( int ) to_push.length() > 0 ) { - to_return.push_back ( atoi ( to_push.c_str() ) ); - } - return to_return; -} -string lowerCase ( string str ) -{ - for ( int i = 0; i < ( int ) str.size(); i++ ) { - if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) { - str[i] = str[i] + 0x20; + string lowerCase ( string str ) + { + for ( int i = 0;i < ( int ) str.size();i++ ) + { + if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) + { + str[i] = str[i] + 0x20; + } + } + return str; } - } - return str; -} -string removePunctTercom ( string str ) -{ - string str_mod = str; - sregex rex; - string replace; + string removePunctTercom ( string str ) + { + string str_mod = str; + sregex rex; + string replace; - rex = sregex::compile ( "^[ ]+" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "^[ ]+" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\"]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\"]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[,]" ); - replace = " "; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[,]" ); + replace = " "; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([\\.]$)" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([\\.]$)" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\?]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\?]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\;]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\;]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\:]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\:]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\!]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\!]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\(]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\(]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\)]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\)]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+" ); - replace = " "; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[ ]+" ); + replace = " "; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+$" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[ ]+$" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - return str_mod; -} -string removePunct ( string str ) -{ - string str_mod = str; - sregex rex; - string replace; + return str_mod; + } + string removePunct ( string str ) + { + string str_mod = str; + sregex rex; + string replace; - rex = sregex::compile ( "^[ ]+" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "^[ ]+" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\"]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\"]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[,]" ); - replace = " "; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[,]" ); + replace = " "; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); - replace = ( "$1 $3" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" ); + replace = ( "$1 $3" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "([\\.]$)" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "([\\.]$)" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\?]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\?]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\;]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\;]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\:]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\:]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\!]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\!]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\(]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\(]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\)]" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\)]" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+" ); - replace = " "; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[ ]+" ); + replace = " "; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+$" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[ ]+$" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "^[ ]+" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "^[ ]+" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - return str_mod; -} -string tokenizePunct ( string str ) -{ - string str_mod = str; - sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" ); - string replace ( "$2 $3 $4" ); - str_mod = regex_replace ( str_mod, rex, replace ); + return str_mod; + } + string tokenizePunct ( string str ) + { + string str_mod = str; + sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" ); + string replace ( "$2 $3 $4" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" ); - replace = ( "$2 $3 $4" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" ); + replace = ( "$2 $3 $4" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" ); - replace = ( " $2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" ); + replace = ( " $2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" ); - replace = ( " $2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" ); + replace = ( " $2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" ); - replace = ( " $2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" ); + replace = ( " $2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" ); - replace = ( "$2.$4. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" ); + replace = ( "$2.$4. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\?]" ); - replace = ( " ? " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\?]" ); + replace = ( " ? " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\;]" ); - replace = ( " ; " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\;]" ); + replace = ( " ; " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" ); - replace = ( "$2 $3 $4" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" ); + replace = ( "$2 $3 $4" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\!]" ); - replace = ( " ! " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\!]" ); + replace = ( " ! " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\(]" ); - replace = ( " ( " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\(]" ); + replace = ( " ( " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\\)]" ); - replace = ( " ) " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\\)]" ); + replace = ( " ) " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[\"]" ); - replace = ( " \" " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[\"]" ); + replace = ( " \" " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" ); - replace = ( "num_($2)" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" ); + replace = ( "num_($2)" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" ); - replace = ( "ordinal_($2)" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" ); + replace = ( "ordinal_($2)" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([Mm]) \\.)" ); - replace = ( "$2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([Mm]) \\.)" ); + replace = ( "$2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([Mm]) \\.)" ); - replace = ( " $2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([Mm]) \\.)" ); + replace = ( " $2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([Dd]r) \\.)" ); - replace = ( "$2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([Dd]r) \\.)" ); + replace = ( "$2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([Dd]r) \\.)" ); - replace = ( " $2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([Dd]r) \\.)" ); + replace = ( " $2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([Mm]r) \\.)" ); - replace = ( "$2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([Mm]r) \\.)" ); + replace = ( "$2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([Mm]r) \\.)" ); - replace = ( " $2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([Mm]r) \\.)" ); + replace = ( " $2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([Mm]rs) \\.)" ); - replace = ( "$2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([Mm]rs) \\.)" ); + replace = ( "$2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([Mm]rs) \\.)" ); - replace = ( " $2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([Mm]rs) \\.)" ); + replace = ( " $2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^([Nn]o) \\.)" ); - replace = ( "$2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^([Nn]o) \\.)" ); + replace = ( "$2." ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( ([Nn]o) \\.)" ); - replace = ( " $2." ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( ([Nn]o) \\.)" ); + replace = ( " $2." ); + str_mod = regex_replace ( str_mod, rex, replace ); // rex = sregex::compile ( "(^(([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" ); // replace = ( "$2." ); // str_mod = regex_replace ( str_mod, rex, replace ); -// +// // rex = sregex::compile ( "( (([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" ); // replace = ( " $2." ); // str_mod = regex_replace ( str_mod, rex, replace ); -// +// // rex = sregex::compile ( "(^(([Gg]en)|([Cc]ol)) \\.)" ); // replace = ( "$2." ); // str_mod = regex_replace ( str_mod, rex, replace ); -// +// // rex = sregex::compile ( "( (([Gg]en)|([Cc]ol)) \\.)" ); // replace = ( " $2." ); // str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" ); - replace = ( "$2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); - - rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" ); - replace = ( " $2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" ); + replace = ( "$2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" ); - replace = ( "$2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" ); + replace = ( " $2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" ); - replace = ( " $2. " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" ); + replace = ( "$2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+" ); - replace = " "; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" ); + replace = ( " $2. " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "^[ ]+" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "[ ]+" ); + replace = " "; + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "[ ]+$" ); - replace = ""; - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "^[ ]+" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); - return str_mod; -} + rex = sregex::compile ( "[ ]+$" ); + replace = ""; + str_mod = regex_replace ( str_mod, rex, replace ); + + return str_mod; + } -string normalizeStd ( string str ) -{ - string str_mod = str; - sregex rex = sregex::compile ( "(<skipped>)" ); - string replace ( "" ); - str_mod = regex_replace ( str_mod, rex, replace ); + string normalizeStd ( string str ) + { + string str_mod = str; + sregex rex = sregex::compile ( "(<skipped>)" ); + string replace ( "" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "-\n" ); - replace = ( "" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "-\n" ); + replace = ( "" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "\n" ); - replace = ( " " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "\n" ); + replace = ( " " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( """ ); - replace = ( "\"" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( """ ); + replace = ( "\"" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "&" ); - replace = ( "& " ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "&" ); + replace = ( "& " ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( "<" ); - replace = ( "<" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( "<" ); + replace = ( "<" ); + str_mod = regex_replace ( str_mod, rex, replace ); - rex = sregex::compile ( ">" ); - replace = ( ">" ); - str_mod = regex_replace ( str_mod, rex, replace ); + rex = sregex::compile ( ">" ); + replace = ( ">" ); + str_mod = regex_replace ( str_mod, rex, replace ); - return str_mod; -} + return str_mod; + } -param copyParam ( param p ) -{ - param to_return; - to_return.caseOn = p.caseOn; - to_return.noPunct = p.noPunct; - to_return.debugMode = p.debugMode; - to_return.debugLevel = p.debugLevel; - to_return.hypothesisFile = p.hypothesisFile; - to_return.referenceFile = p.referenceFile; - to_return.normalize = p.normalize; - to_return.noTxtIds = p.noTxtIds; - to_return.outputFileExtension = p.outputFileExtension; - to_return.outputFileName = p.outputFileName; - to_return.sgmlInputs = p.sgmlInputs; - to_return.tercomLike = p.tercomLike; - to_return.printAlignments = p.printAlignments; - to_return.WER=p.WER; - return to_return; -} -string printParams ( param p ) -{ - stringstream s; - s << "caseOn = " << p.caseOn << endl; - s << "noPunct = " << p.noPunct << endl; - s << "debugMode = " << p.debugMode << endl; - s << "debugLevel = " << p.debugLevel << endl; - s << "hypothesisFile = " << p.hypothesisFile << endl; - s << "referenceFile = " << p.referenceFile << endl; - s << "normalize = " << p.normalize << endl; - s << "noTxtIds = " << p.noTxtIds << endl; - s << "outputFileExtension = " << p.outputFileExtension << endl; - s << "outputFileName = " << p.outputFileName << endl; - s << "sgmlInputs = " << p.sgmlInputs << endl; - s << "tercomLike = " << p.tercomLike << endl; - return s.str(); + param copyParam ( param p ) + { + param to_return; + to_return.caseOn = p.caseOn; + to_return.noPunct = p.noPunct; + to_return.debugMode = p.debugMode; + to_return.debugLevel = p.debugLevel; + to_return.hypothesisFile = p.hypothesisFile; + to_return.referenceFile = p.referenceFile; + to_return.normalize = p.normalize; + to_return.noTxtIds = p.noTxtIds; + to_return.verbose = p.verbose; + to_return.count_verbose = p.count_verbose; + to_return.outputFileExtension = p.outputFileExtension; + to_return.outputFileName = p.outputFileName; + to_return.sgmlInputs = p.sgmlInputs; + to_return.tercomLike = p.tercomLike; + to_return.printAlignments = p.printAlignments; + to_return.WER=p.WER; + return to_return; + } + string printParams ( param p ) + { + stringstream s; + s << "caseOn = " << p.caseOn << endl; + s << "noPunct = " << p.noPunct << endl; + s << "debugMode = " << p.debugMode << endl; + s << "debugLevel = " << p.debugLevel << endl; + s << "hypothesisFile = " << p.hypothesisFile << endl; + s << "referenceFile = " << p.referenceFile << endl; + s << "normalize = " << p.normalize << endl; + s << "noTxtIds = " << p.noTxtIds << endl; + s << "outputFileExtension = " << p.outputFileExtension << endl; + s << "outputFileName = " << p.outputFileName << endl; + s << "sgmlInputs = " << p.sgmlInputs << endl; + s << "tercomLike = " << p.tercomLike << endl; + s << "verbose = " << p.verbose << endl; + s << "count_verbose = " << p.count_verbose << endl; + return s.str(); -} -string join ( string delim, vector<string> arr ) -{ - if ( ( int ) arr.size() == 0 ) return ""; + } + string join ( string delim, vector<string> arr ) + { + if ( ( int ) arr.size() == 0 ) return ""; // if ((int)delim.compare("") == 0) delim = new String(""); // String s = new String(""); - stringstream s; - s.str ( "" ); - for ( int i = 0; i < ( int ) arr.size(); i++ ) { - if ( i == 0 ) { - s << arr.at ( i ); - } else { - s << delim << arr.at ( i ); - } - } - return s.str(); + stringstream s; + s.str ( "" ); + for ( int i = 0; i < ( int ) arr.size(); i++ ) + { + if ( i == 0 ) + { + s << arr.at ( i ); + } + else + { + s << delim << arr.at ( i ); + } + } + return s.str(); // return ""; -} + } } diff --git a/mert/TER/tools.h b/mert/TER/tools.h index 157b739a5..4c3b108cd 100644 --- a/mert/TER/tools.h +++ b/mert/TER/tools.h @@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France Contact: christophe.servan@lium.univ-lemans.fr The tercpp tool and library are free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by +under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the licence, or (at your option) any later version. @@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA **********************************/ -#ifndef MERT_TER_TOOLS_H_ -#define MERT_TER_TOOLS_H_ +#ifndef __TERCPPTOOLS_H__ +#define __TERCPPTOOLS_H__ #include <vector> @@ -35,31 +35,34 @@ using namespace std; namespace Tools { -typedef vector<double> vecDouble; -typedef vector<char> vecChar; -typedef vector<int> vecInt; -typedef vector<float> vecFloat; -typedef vector<size_t> vecSize_t; -typedef vector<string> vecString; -typedef vector<string> alignmentElement; -typedef vector<alignmentElement> WERalignment; + typedef vector<double> vecDouble; + typedef vector<char> vecChar; + typedef vector<int> vecInt; + typedef vector<float> vecFloat; + typedef vector<size_t> vecSize_t; + typedef vector<string> vecString; + typedef vector<string> alignmentElement; + typedef vector<alignmentElement> WERalignment; -struct param { - bool debugMode; - string referenceFile; // path to the resources - string hypothesisFile; // path to the configuration files - string outputFileExtension; - string outputFileName; - bool noPunct; - bool caseOn; - bool normalize; - bool tercomLike; - bool sgmlInputs; - bool noTxtIds; - bool printAlignments; - bool WER; - int debugLevel; +struct param +{ + bool debugMode; + string referenceFile; // path to the resources + string hypothesisFile; // path to the configuration files + string outputFileExtension; + string outputFileName; + bool noPunct; + bool caseOn; + bool normalize; + bool tercomLike; + bool sgmlInputs; + bool verbose; + bool count_verbose; + bool noTxtIds; + bool printAlignments; + bool WER; + int debugLevel; }; // param = { false, "","","","" }; @@ -67,35 +70,38 @@ struct param { // private: // public: -string vectorToString ( vector<string> vec ); -string vectorToString ( vector<char> vec ); -string vectorToString ( vector<int> vec ); -string vectorToString ( vector<string> vec, string s ); -string vectorToString ( vector<char> vec, string s ); -string vectorToString ( vector<int> vec, string s ); -string vectorToString ( vector<bool> vec, string s ); -string vectorToString ( char* vec, string s, int taille ); -string vectorToString ( int* vec, string s , int taille ); -string vectorToString ( bool* vec, string s , int taille ); -vector<string> subVector ( vector<string> vec, int start, int end ); -vector<int> subVector ( vector<int> vec, int start, int end ); -vector<float> subVector ( vector<float> vec, int start, int end ); -vector<string> copyVector ( vector<string> vec ); -vector<int> copyVector ( vector<int> vec ); -vector<float> copyVector ( vector<float> vec ); -vector<string> stringToVector ( string s, string tok ); -vector<string> stringToVector ( char s, string tok ); -vector<string> stringToVector ( int s, string tok ); -vector<int> stringToVectorInt ( string s, string tok ); -vector<float> stringToVectorFloat ( string s, string tok ); -string lowerCase(string str); -string removePunct(string str); -string tokenizePunct(string str); -string removePunctTercom(string str); -string normalizeStd(string str); -string printParams(param p); -string join ( string delim, vector<string> arr ); + string vectorToString ( vector<string> vec ); + string vectorToString ( vector<char> vec ); + string vectorToString ( vector<int> vec ); + string vectorToString ( vector<string> vec, string s ); + string vectorToString ( vector<char> vec, string s ); + string vectorToString ( vector<int> vec, string s ); + string vectorToString ( vector<bool> vec, string s ); + string vectorToString ( char* vec, string s, int taille ); + string vectorToString ( int* vec, string s , int taille ); + string vectorToString ( bool* vec, string s , int taille ); + string vectorToString ( vector<char>* vec, string s, int taille ); + string vectorToString ( vector<int>* vec, string s , int taille ); + string vectorToString ( vector<bool>* vec, string s , int taille ); + vector<string> subVector ( vector<string> vec, int start, int end ); + vector<int> subVector ( vector<int> vec, int start, int end ); + vector<float> subVector ( vector<float> vec, int start, int end ); + vector<string> copyVector ( vector<string> vec ); + vector<int> copyVector ( vector<int> vec ); + vector<float> copyVector ( vector<float> vec ); + vector<string> stringToVector ( string s, string tok ); + vector<string> stringToVector ( char s, string tok ); + vector<string> stringToVector ( int s, string tok ); + vector<int> stringToVectorInt ( string s, string tok ); + vector<float> stringToVectorFloat ( string s, string tok ); + string lowerCase(string str); + string removePunct(string str); + string tokenizePunct(string str); + string removePunctTercom(string str); + string normalizeStd(string str); + string printParams(param p); + string join ( string delim, vector<string> arr ); // }; -param copyParam(param p); + param copyParam(param p); } #endif |