Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert/TER
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2015-01-14 14:07:42 +0300
committerHieu Hoang <hieuhoang@gmail.com>2015-01-14 14:07:42 +0300
commit05ead45e71916c5763c5c4b6375e2ca6838f3995 (patch)
treec279bd4aacfb31758720ffbaf5aaf62022574a52 /mert/TER
parent91cb549ccf09fc33122f3d531f47c38ad0e99b3d (diff)
beautify
Diffstat (limited to 'mert/TER')
-rw-r--r--mert/TER/alignmentStruct.cpp16
-rw-r--r--mert/TER/alignmentStruct.h26
-rw-r--r--mert/TER/bestShiftStruct.h22
-rw-r--r--mert/TER/hashMap.cpp232
-rw-r--r--mert/TER/hashMap.h40
-rw-r--r--mert/TER/hashMapInfos.cpp239
-rw-r--r--mert/TER/hashMapInfos.h42
-rw-r--r--mert/TER/hashMapStringInfos.cpp313
-rw-r--r--mert/TER/hashMapStringInfos.h42
-rw-r--r--mert/TER/infosHasher.cpp58
-rw-r--r--mert/TER/infosHasher.h36
-rw-r--r--mert/TER/stringHasher.cpp46
-rw-r--r--mert/TER/stringHasher.h28
-rw-r--r--mert/TER/stringInfosHasher.cpp58
-rw-r--r--mert/TER/stringInfosHasher.h36
-rw-r--r--mert/TER/terAlignment.cpp286
-rw-r--r--mert/TER/terAlignment.h72
-rw-r--r--mert/TER/terShift.cpp116
-rw-r--r--mert/TER/terShift.h46
-rw-r--r--mert/TER/tercalc.cpp1512
-rw-r--r--mert/TER/tercalc.h90
-rw-r--r--mert/TER/tools.cpp1167
-rw-r--r--mert/TER/tools.h107
23 files changed, 2153 insertions, 2477 deletions
diff --git a/mert/TER/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp
index 544ee61ac..e42ec4a14 100644
--- a/mert/TER/alignmentStruct.cpp
+++ b/mert/TER/alignmentStruct.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -23,15 +23,15 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace TERCpp
{
- string alignmentStruct::toString()
- {
- stringstream s;
+string alignmentStruct::toString()
+{
+ stringstream s;
// s << "nword : " << vectorToString(nwords)<<endl;
// s << "alignment" << vectorToString(alignment)<<endl;
// s << "afterShift" << vectorToString(alignment)<<endl;
- s << "Nothing to be printed" <<endl;
- return s.str();
- }
+ s << "Nothing to be printed" <<endl;
+ return s.str();
+}
// alignmentStruct::alignmentStruct()
// {
@@ -99,7 +99,7 @@ namespace TERCpp
// return s.str();
// }
- /* The distance of the shift. */
+/* The distance of the shift. */
// int alignmentStruct::distance()
// {
// if (moveto < start)
diff --git a/mert/TER/alignmentStruct.h b/mert/TER/alignmentStruct.h
index adda2c345..c1459960b 100644
--- a/mert/TER/alignmentStruct.h
+++ b/mert/TER/alignmentStruct.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,10 +34,10 @@ using namespace Tools;
namespace TERCpp
{
- class alignmentStruct
- {
- private:
- public:
+class alignmentStruct
+{
+private:
+public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -53,14 +53,14 @@ namespace TERCpp
// int end;
// int moveto;
// int newloc;
- vector<string> nwords; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
- string toString();
- };
+ vector<string> nwords; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<vecInt> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+ string toString();
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/bestShiftStruct.h b/mert/TER/bestShiftStruct.h
index 9457fd1d8..d68f2319f 100644
--- a/mert/TER/bestShiftStruct.h
+++ b/mert/TER/bestShiftStruct.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -36,10 +36,10 @@ using namespace Tools;
namespace TERCpp
{
- class bestShiftStruct
- {
- private:
- public:
+class bestShiftStruct
+{
+private:
+public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -55,16 +55,16 @@ namespace TERCpp
// int end;
// int moveto;
// int newloc;
- terShift m_best_shift;
- terAlignment m_best_align;
- bool m_empty;
+ terShift m_best_shift;
+ terAlignment m_best_align;
+ bool m_empty;
// vector<string> nwords; // The words we shifted
// char* alignment ; // for pra_more output
// vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
// double cost;
- };
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/hashMap.cpp b/mert/TER/hashMap.cpp
index de84ff796..253fda715 100644
--- a/mert/TER/hashMap.cpp
+++ b/mert/TER/hashMap.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,156 +28,142 @@ using namespace std;
namespace HashMapSpace
{
// hashMap::hashMap();
- /* hashMap::~hashMap()
- {
- // vector<stringHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
- /**
- * int hashMap::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
- int hashMap::trouve ( long searchKey )
+/* hashMap::~hashMap()
{
- long foundKey;
+// vector<stringHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+/**
+ * int hashMap::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+int hashMap::trouve ( long searchKey )
+{
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
- int hashMap::trouve ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;;
+ }
+ return 0;
+}
+int hashMap::trouve ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
- /**
- * long hashMap::hashValue ( string key )
- * @param key
- * @return
- */
- long hashMap::hashValue ( string key )
- {
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+ }
+ return 0;
+}
+/**
+ * long hashMap::hashValue ( string key )
+ * @param key
+ * @return
+ */
+long hashMap::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
- }
- /**
- * void hashMap::addHasher ( string key, string value )
- * @param key
- * @param value
- */
- void hashMap::addHasher ( string key, string value )
- {
- if ( trouve ( hashValue ( key ) ) ==0 )
- {
+}
+/**
+ * void hashMap::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+void hashMap::addHasher ( string key, string value )
+{
+ if ( trouve ( hashValue ( key ) ) ==0 ) {
// cerr << "ICI1" <<endl;
- stringHasher H ( hashValue ( key ),key,value );
+ stringHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
- }
- stringHasher hashMap::getHasher ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
- stringHasher defaut(0,"","");
+ m_hasher.push_back ( H );
+ }
+}
+stringHasher hashMap::getHasher ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+ stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return ( *l_hasher );
- }
- }
- return defaut;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
}
- string hashMap::getValue ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+ }
+ return defaut;
+}
+string hashMap::getValue ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
- }
- }
- return "";
+ return ( *l_hasher ).getValue();
}
- string hashMap::searchValue ( string value )
- {
+ }
+ return "";
+}
+string hashMap::searchValue ( string value )
+{
// long searchKey=hashValue ( key );
// long foundKey;
- string foundValue;
+ string foundValue;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundValue= ( *l_hasher ).getValue();
- if ( foundValue.compare ( value ) == 0 )
- {
- return ( *l_hasher ).getKey();
- }
- }
- return "";
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundValue= ( *l_hasher ).getValue();
+ if ( foundValue.compare ( value ) == 0 ) {
+ return ( *l_hasher ).getKey();
}
+ }
+ return "";
+}
- void hashMap::setValue ( string key , string value )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+void hashMap::setValue ( string key , string value )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- ( *l_hasher ).setValue ( value );
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
- }
- }
}
+ }
+}
- /**
- *
- */
- void hashMap::printHash()
- {
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- }
+/**
+ *
+ */
+void hashMap::printHash()
+{
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+}
diff --git a/mert/TER/hashMap.h b/mert/TER/hashMap.h
index 6cb721573..c2708b360 100644
--- a/mert/TER/hashMap.h
+++ b/mert/TER/hashMap.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -35,27 +35,27 @@ using namespace std;
namespace HashMapSpace
{
- class hashMap
- {
- private:
- vector<stringHasher> m_hasher;
+class hashMap
+{
+private:
+ vector<stringHasher> m_hasher;
- public:
+public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, string value );
- stringHasher getHasher ( string key );
- string getValue ( string key );
- string searchValue ( string key );
- void setValue ( string key , string value );
- void printHash();
- vector<stringHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
- };
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, string value );
+ stringHasher getHasher ( string key );
+ string getValue ( string key );
+ string searchValue ( string key );
+ void setValue ( string key , string value );
+ void printHash();
+ vector<stringHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
}
diff --git a/mert/TER/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp
index 23f57d808..0ab6d21b2 100644
--- a/mert/TER/hashMapInfos.cpp
+++ b/mert/TER/hashMapInfos.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,117 +28,108 @@ using namespace std;
namespace HashMapSpace
{
// hashMapInfos::hashMap();
- /* hashMapInfos::~hashMap()
- {
- // vector<infosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
- /**
- * int hashMapInfos::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
- int hashMapInfos::trouve ( long searchKey )
+/* hashMapInfos::~hashMap()
{
- long foundKey;
+// vector<infosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+/**
+ * int hashMapInfos::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+int hashMapInfos::trouve ( long searchKey )
+{
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
- int hashMapInfos::trouve ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;;
+ }
+ return 0;
+}
+int hashMapInfos::trouve ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
+ }
+ return 0;
+}
- /**
- * long hashMapInfos::hashValue ( string key )
- * @param key
- * @return
- */
- long hashMapInfos::hashValue ( string key )
- {
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+/**
+ * long hashMapInfos::hashValue ( string key )
+ * @param key
+ * @return
+ */
+long hashMapInfos::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
- }
- /**
- * void hashMapInfos::addHasher ( string key, string value )
- * @param key
- * @param value
- */
- void hashMapInfos::addHasher ( string key, vector<int> value )
- {
- if ( trouve ( hashValue ( key ) ) ==0 )
- {
+}
+/**
+ * void hashMapInfos::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+void hashMapInfos::addHasher ( string key, vector<int> value )
+{
+ if ( trouve ( hashValue ( key ) ) ==0 ) {
// cerr << "ICI1" <<endl;
- infosHasher H ( hashValue ( key ),key,value );
+ infosHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
- }
- void hashMapInfos::addValue ( string key, vector<int> value )
- {
- addHasher ( key, value );
- }
- infosHasher hashMapInfos::getHasher ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+ m_hasher.push_back ( H );
+ }
+}
+void hashMapInfos::addValue ( string key, vector<int> value )
+{
+ addHasher ( key, value );
+}
+infosHasher hashMapInfos::getHasher ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return ( *l_hasher );
- }
- }
- vector<int> temp;
- infosHasher defaut(0,"",temp);
- return defaut;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
}
- vector<int> hashMapInfos::getValue ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
- vector<int> retour;
+ }
+ vector<int> temp;
+ infosHasher defaut(0,"",temp);
+ return defaut;
+}
+vector<int> hashMapInfos::getValue ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+ vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
- }
- }
- return retour;
+ return ( *l_hasher ).getValue();
}
+ }
+ return retour;
+}
// string hashMapInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
@@ -158,42 +149,38 @@ namespace HashMapSpace
// }
//
- void hashMapInfos::setValue ( string key , vector<int> value )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+void hashMapInfos::setValue ( string key , vector<int> value )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- ( *l_hasher ).setValue ( value );
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
- }
- }
- }
- string hashMapInfos::toString ()
- {
- stringstream to_return;
- for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- to_return << (*l_hasher).toString();
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- return to_return.str();
}
+ }
+}
+string hashMapInfos::toString ()
+{
+ stringstream to_return;
+ for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
+}
- /**
- *
- */
- void hashMapInfos::printHash()
- {
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
+/**
+ *
+ */
+void hashMapInfos::printHash()
+{
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- }
+ }
+}
diff --git a/mert/TER/hashMapInfos.h b/mert/TER/hashMapInfos.h
index 5e7dbb6e7..e975aa738 100644
--- a/mert/TER/hashMapInfos.h
+++ b/mert/TER/hashMapInfos.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace
{
- class hashMapInfos
- {
- private:
- vector<infosHasher> m_hasher;
+class hashMapInfos
+{
+private:
+ vector<infosHasher> m_hasher;
- public:
+public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<int> value );
- void addValue ( string key, vector<int> value );
- infosHasher getHasher ( string key );
- vector<int> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<int> value );
+ void addValue ( string key, vector<int> value );
+ infosHasher getHasher ( string key );
+ vector<int> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<int> value );
- void printHash();
- string toString();
- vector<infosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
- };
+ void setValue ( string key , vector<int> value );
+ void printHash();
+ string toString();
+ vector<infosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
}
diff --git a/mert/TER/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp
index 773c148d4..d984bdadc 100644
--- a/mert/TER/hashMapStringInfos.cpp
+++ b/mert/TER/hashMapStringInfos.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,179 +27,166 @@ using namespace std;
namespace HashMapSpace
{
- // hashMapStringInfos::hashMap();
- /* hashMapStringInfos::~hashMap()
- {
- // vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
- /**
- * int hashMapStringInfos::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
- int hashMapStringInfos::trouve ( long searchKey )
- {
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+// hashMapStringInfos::hashMap();
+/* hashMapStringInfos::~hashMap()
+{
+// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+}*/
+/**
+* int hashMapStringInfos::trouve ( long searchKey )
+* @param searchKey
+* @return
+*/
+int hashMapStringInfos::trouve ( long searchKey )
+{
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
+ }
+ return 0;
+}
- int hashMapStringInfos::trouve ( string key )
- {
- long searchKey = hashValue ( key );
- long foundKey;;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+int hashMapStringInfos::trouve ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
+ }
+ return 0;
+}
- /**
- * long hashMapStringInfos::hashValue ( string key )
- * @param key
- * @return
- */
- long hashMapStringInfos::hashValue ( string key )
- {
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> > ( loc );
- return coll.hash ( key.data(), key.data() + key.length() );
+/**
+* long hashMapStringInfos::hashValue ( string key )
+* @param key
+* @return
+*/
+long hashMapStringInfos::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> > ( loc );
+ return coll.hash ( key.data(), key.data() + key.length() );
// boost::hash<string> hasher;
// return hasher ( key );
+}
+/**
+* void hashMapStringInfos::addHasher ( string key, string value )
+* @param key
+* @param value
+*/
+void hashMapStringInfos::addHasher ( string key, vector<string> value )
+{
+ if ( trouve ( hashValue ( key ) ) == 0 ) {
+ // cerr << "ICI1" <<endl;
+ stringInfosHasher H ( hashValue ( key ), key, value );
+ // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
+ // cerr << "ICI2" <<endl;
+
+ m_hasher.push_back ( H );
+ }
+}
+void hashMapStringInfos::addValue ( string key, vector<string> value )
+{
+ addHasher ( key, value );
+}
+stringInfosHasher hashMapStringInfos::getHasher ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
}
- /**
- * void hashMapStringInfos::addHasher ( string key, string value )
- * @param key
- * @param value
- */
- void hashMapStringInfos::addHasher ( string key, vector<string> value )
- {
- if ( trouve ( hashValue ( key ) ) == 0 )
- {
- // cerr << "ICI1" <<endl;
- stringInfosHasher H ( hashValue ( key ), key, value );
- // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
- // cerr << "ICI2" <<endl;
-
- m_hasher.push_back ( H );
- }
- }
- void hashMapStringInfos::addValue ( string key, vector<string> value )
- {
- addHasher ( key, value );
- }
- stringInfosHasher hashMapStringInfos::getHasher ( string key )
- {
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return ( *l_hasher );
- }
- }
- vector<string> tmp;
- stringInfosHasher defaut ( 0, "", tmp );
- return defaut;
- }
- vector<string> hashMapStringInfos::getValue ( string key )
- {
- long searchKey = hashValue ( key );
- long foundKey;
- vector<string> retour;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
- }
- }
- return retour;
+ }
+ vector<string> tmp;
+ stringInfosHasher defaut ( 0, "", tmp );
+ return defaut;
+}
+vector<string> hashMapStringInfos::getValue ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ vector<string> retour;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
+ return ( *l_hasher ).getValue();
}
- // string hashMapStringInfos::searchValue ( string value )
- // {
- // // long searchKey=hashValue ( key );
- // // long foundKey;
- // vector<int> foundValue;
- //
- // // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- // for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- // {
- // foundValue= ( *l_hasher ).getValue();
- // /* if ( foundValue.compare ( value ) == 0 )
- // {
- // return ( *l_hasher ).getKey();
- // }*/
- // }
- // return "";
- // }
- //
-
- void hashMapStringInfos::setValue ( string key , vector<string> value )
- {
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- ( *l_hasher ).setValue ( value );
- // return ( *l_hasher ).getValue();
- }
- }
+ }
+ return retour;
+}
+// string hashMapStringInfos::searchValue ( string value )
+// {
+// // long searchKey=hashValue ( key );
+// // long foundKey;
+// vector<int> foundValue;
+//
+// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+// {
+// foundValue= ( *l_hasher ).getValue();
+// /* if ( foundValue.compare ( value ) == 0 )
+// {
+// return ( *l_hasher ).getKey();
+// }*/
+// }
+// return "";
+// }
+//
+
+void hashMapStringInfos::setValue ( string key , vector<string> value )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
+ // return ( *l_hasher ).getValue();
}
+ }
+}
- string hashMapStringInfos::toString ()
- {
- stringstream to_return;
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- to_return << (*l_hasher).toString();
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- return to_return.str();
- }
+string hashMapStringInfos::toString ()
+{
+ stringstream to_return;
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
+}
- /**
- *
- */
- void hashMapStringInfos::printHash()
- {
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- }
- vector< stringInfosHasher > hashMapStringInfos::getHashMap()
- {
- return m_hasher;
- }
+/**
+*
+*/
+void hashMapStringInfos::printHash()
+{
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+}
+vector< stringInfosHasher > hashMapStringInfos::getHashMap()
+{
+ return m_hasher;
+}
diff --git a/mert/TER/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h
index 5337d50f2..a0eae951d 100644
--- a/mert/TER/hashMapStringInfos.h
+++ b/mert/TER/hashMapStringInfos.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace
{
- class hashMapStringInfos
- {
- private:
- vector<stringInfosHasher> m_hasher;
+class hashMapStringInfos
+{
+private:
+ vector<stringInfosHasher> m_hasher;
- public:
+public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<string> value );
- void addValue ( string key, vector<string> value );
- stringInfosHasher getHasher ( string key );
- vector<string> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<string> value );
+ void addValue ( string key, vector<string> value );
+ stringInfosHasher getHasher ( string key );
+ vector<string> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<string> value );
- void printHash();
- string toString();
- vector<stringInfosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
- };
+ void setValue ( string key , vector<string> value );
+ void printHash();
+ string toString();
+ vector<stringInfosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
}
diff --git a/mert/TER/infosHasher.cpp b/mert/TER/infosHasher.cpp
index 8ce23ae44..450b70d94 100644
--- a/mert/TER/infosHasher.cpp
+++ b/mert/TER/infosHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace
{
- infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
- {
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
- }
+infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+}
// infosHasher::~infosHasher(){};*/
- long infosHasher::getHashKey()
- {
- return m_hashKey;
- }
- string infosHasher::getKey()
- {
- return m_key;
- }
- vector<int> infosHasher::getValue()
- {
- return m_value;
- }
- void infosHasher::setValue ( vector<int> value )
- {
- m_value=value;
- }
- string infosHasher::toString()
- {
- stringstream to_return;
- to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
- return to_return.str();
- }
+long infosHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string infosHasher::getKey()
+{
+ return m_key;
+}
+vector<int> infosHasher::getValue()
+{
+ return m_value;
+}
+void infosHasher::setValue ( vector<int> value )
+{
+ m_value=value;
+}
+string infosHasher::toString()
+{
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+}
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;
diff --git a/mert/TER/infosHasher.h b/mert/TER/infosHasher.h
index d3d56317a..ab9c7b5ed 100644
--- a/mert/TER/infosHasher.h
+++ b/mert/TER/infosHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -31,23 +31,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace HashMapSpace
{
- class infosHasher
- {
- private:
- long m_hashKey;
- string m_key;
- vector<int> m_value;
-
- public:
- infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
- long getHashKey();
- string getKey();
- vector<int> getValue();
- void setValue ( vector<int> value );
- string toString();
-
-
- };
+class infosHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ vector<int> m_value;
+
+public:
+ infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<int> getValue();
+ void setValue ( vector<int> value );
+ string toString();
+
+
+};
}
diff --git a/mert/TER/stringHasher.cpp b/mert/TER/stringHasher.cpp
index f4d1526e8..729310352 100644
--- a/mert/TER/stringHasher.cpp
+++ b/mert/TER/stringHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -26,29 +26,29 @@ using namespace std;
namespace HashMapSpace
{
- stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
- {
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueTxt;
- }
+stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueTxt;
+}
// stringHasher::~stringHasher(){};*/
- long stringHasher::getHashKey()
- {
- return m_hashKey;
- }
- string stringHasher::getKey()
- {
- return m_key;
- }
- string stringHasher::getValue()
- {
- return m_value;
- }
- void stringHasher::setValue ( string value )
- {
- m_value=value;
- }
+long stringHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string stringHasher::getKey()
+{
+ return m_key;
+}
+string stringHasher::getValue()
+{
+ return m_value;
+}
+void stringHasher::setValue ( string value )
+{
+ m_value=value;
+}
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringHasher.h b/mert/TER/stringHasher.h
index d831f642c..5b0ccfc94 100644
--- a/mert/TER/stringHasher.h
+++ b/mert/TER/stringHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,22 +28,22 @@ using namespace std;
namespace HashMapSpace
{
- class stringHasher
- {
- private:
- long m_hashKey;
- string m_key;
- string m_value;
+class stringHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ string m_value;
- public:
- stringHasher ( long cle, string cleTxt, string valueTxt );
- long getHashKey();
- string getKey();
- string getValue();
- void setValue ( string value );
+public:
+ stringHasher ( long cle, string cleTxt, string valueTxt );
+ long getHashKey();
+ string getKey();
+ string getValue();
+ void setValue ( string value );
- };
+};
}
diff --git a/mert/TER/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp
index 007fd720f..ecbc10fa5 100644
--- a/mert/TER/stringInfosHasher.cpp
+++ b/mert/TER/stringInfosHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace
{
- stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
- {
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
- }
+stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+}
// stringInfosHasher::~stringInfosHasher(){};*/
- long stringInfosHasher::getHashKey()
- {
- return m_hashKey;
- }
- string stringInfosHasher::getKey()
- {
- return m_key;
- }
- vector<string> stringInfosHasher::getValue()
- {
- return m_value;
- }
- void stringInfosHasher::setValue ( vector<string> value )
- {
- m_value=value;
- }
- string stringInfosHasher::toString()
- {
- stringstream to_return;
- to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
- return to_return.str();
- }
+long stringInfosHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string stringInfosHasher::getKey()
+{
+ return m_key;
+}
+vector<string> stringInfosHasher::getValue()
+{
+ return m_value;
+}
+void stringInfosHasher::setValue ( vector<string> value )
+{
+ m_value=value;
+}
+string stringInfosHasher::toString()
+{
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+}
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringInfosHasher.h b/mert/TER/stringInfosHasher.h
index 307b48da7..e4369f27a 100644
--- a/mert/TER/stringInfosHasher.h
+++ b/mert/TER/stringInfosHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -29,23 +29,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace HashMapSpace
{
- class stringInfosHasher
- {
- private:
- long m_hashKey;
- string m_key;
- vector<string> m_value;
-
- public:
- stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
- long getHashKey();
- string getKey();
- vector<string> getValue();
- void setValue ( vector<string> value );
- string toString();
-
-
- };
+class stringInfosHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ vector<string> m_value;
+
+public:
+ stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<string> getValue();
+ void setValue ( vector<string> value );
+ string toString();
+
+
+};
}
diff --git a/mert/TER/terAlignment.cpp b/mert/TER/terAlignment.cpp
index 6c5d35cc5..ec7bcafb7 100644
--- a/mert/TER/terAlignment.cpp
+++ b/mert/TER/terAlignment.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -24,191 +24,163 @@ using namespace std;
namespace TERCpp
{
- terAlignment::terAlignment()
- {
+terAlignment::terAlignment()
+{
// vector<string> ref;
// vector<string> hyp;
// vector<string> aftershift;
- // TERshift[] allshifts = null;
+ // TERshift[] allshifts = null;
- numEdits=0;
- numWords=0;
- bestRef="";
+ numEdits=0;
+ numWords=0;
+ bestRef="";
- numIns=0;
- numDel=0;
- numSub=0;
- numSft=0;
- numWsf=0;
- }
- string terAlignment::toString()
- {
- stringstream s;
- s.str ( "" );
- s << "Original Ref: \t" << join ( " ", ref ) << endl;
- s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
- s << "Hyp After Shift:\t" << join ( " ", aftershift );
+ numIns=0;
+ numDel=0;
+ numSub=0;
+ numSft=0;
+ numWsf=0;
+}
+string terAlignment::toString()
+{
+ stringstream s;
+ s.str ( "" );
+ s << "Original Ref: \t" << join ( " ", ref ) << endl;
+ s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
+ s << "Hyp After Shift:\t" << join ( " ", aftershift );
// s << "Hyp After Shift: " << join ( " ", aftershift );
- s << endl;
+ s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
- if ( ( int ) sizeof ( alignment ) >0 )
- {
- s << "Alignment: (";
+ if ( ( int ) sizeof ( alignment ) >0 ) {
+ s << "Alignment: (";
// s += "\nAlignment: (";
- for ( int i = 0; i < ( int ) ( alignment.size() ); i++ )
- {
- s << alignment[i];
+ for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
+ s << alignment[i];
// s+=alignment[i];
- }
+ }
// s += ")";
- s << ")";
- }
- s << endl;
- if ( ( int ) allshifts.size() == 0 )
- {
+ s << ")";
+ }
+ s << endl;
+ if ( ( int ) allshifts.size() == 0 ) {
// s += "\nNumShifts: 0";
- s << "NumShifts: 0";
- }
- else
- {
+ s << "NumShifts: 0";
+ } else {
// s += "\nNumShifts: " + (int)allshifts.size();
- s << "NumShifts: "<< ( int ) allshifts.size();
- for ( int i = 0; i < ( int ) allshifts.size(); i++ )
- {
- s << endl << " " ;
- s << ( ( terShift ) allshifts[i] ).toString();
+ s << "NumShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
+ s << endl << " " ;
+ s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
- }
- }
- s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
+ }
+ }
+ s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
- return s.str();
+ return s.str();
- }
- string terAlignment::join ( string delim, vector<string> arr )
- {
- if ( ( int ) arr.size() == 0 ) return "";
+}
+string terAlignment::join ( string delim, vector<string> arr )
+{
+ if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
- stringstream s;
- s.str ( "" );
- for ( int i = 0; i < ( int ) arr.size(); i++ )
- {
- if ( i == 0 )
- {
- s << arr.at ( i );
- }
- else
- {
- s << delim << arr.at ( i );
- }
- }
- return s.str();
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ ) {
+ if ( i == 0 ) {
+ s << arr.at ( i );
+ } else {
+ s << delim << arr.at ( i );
+ }
+ }
+ return s.str();
// return "";
+}
+double terAlignment::score()
+{
+ if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
+ return 1.0;
+ }
+ if ( numWords <= 0.0 ) {
+ return 0.0;
+ }
+ return ( double ) numEdits / numWords;
+}
+double terAlignment::scoreAv()
+{
+ if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
+ return 1.0;
+ }
+ if ( averageWords <= 0.0 ) {
+ return 0.0;
+ }
+ return ( double ) numEdits / averageWords;
+}
+
+void terAlignment::scoreDetails()
+{
+ numIns = numDel = numSub = numWsf = numSft = 0;
+ if((int)allshifts.size()>0) {
+ for(int i = 0; i < (int)allshifts.size(); ++i) {
+ numWsf += allshifts[i].size();
}
- double terAlignment::score()
- {
- if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
- {
- return 1.0;
- }
- if ( numWords <= 0.0 )
- {
- return 0.0;
- }
- return ( double ) numEdits / numWords;
+ numSft = allshifts.size();
+ }
+
+ if((int)alignment.size()>0 ) {
+ for(int i = 0; i < (int)alignment.size(); ++i) {
+ switch (alignment[i]) {
+ case 'S':
+ case 'T':
+ numSub++;
+ break;
+ case 'D':
+ numDel++;
+ break;
+ case 'I':
+ numIns++;
+ break;
+ }
}
- double terAlignment::scoreAv()
- {
- if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) )
- {
- return 1.0;
- }
- if ( averageWords <= 0.0 )
- {
- return 0.0;
- }
- return ( double ) numEdits / averageWords;
+ }
+ // if(numEdits != numSft + numDel + numIns + numSub)
+ // System.out.println("** Error, unmatch edit erros " + numEdits +
+ // " vs " + (numSft + numDel + numIns + numSub));
+}
+string terAlignment::printAlignments()
+{
+ stringstream to_return;
+ for(int i = 0; i < (int)alignment.size(); ++i) {
+ char alignInfo=alignment.at(i);
+ if (alignInfo == 'A' ) {
+ alignInfo='A';
}
- void terAlignment::scoreDetails()
- {
- numIns = numDel = numSub = numWsf = numSft = 0;
- if((int)allshifts.size()>0)
- {
- for(int i = 0; i < (int)allshifts.size(); ++i)
- {
- numWsf += allshifts[i].size();
- }
- numSft = allshifts.size();
- }
-
- if((int)alignment.size()>0 )
- {
- for(int i = 0; i < (int)alignment.size(); ++i)
- {
- switch (alignment[i])
- {
- case 'S':
- case 'T':
- numSub++;
- break;
- case 'D':
- numDel++;
- break;
- case 'I':
- numIns++;
- break;
- }
- }
- }
- // if(numEdits != numSft + numDel + numIns + numSub)
- // System.out.println("** Error, unmatch edit erros " + numEdits +
- // " vs " + (numSft + numDel + numIns + numSub));
- }
- string terAlignment::printAlignments()
- {
- stringstream to_return;
- for(int i = 0; i < (int)alignment.size(); ++i)
- {
- char alignInfo=alignment.at(i);
- if (alignInfo == 'A' )
- {
- alignInfo='A';
- }
-
- if (i==0)
- {
- to_return << alignInfo;
- }
- else
- {
- to_return << " " << alignInfo;
- }
- }
- return to_return.str();
+ if (i==0) {
+ to_return << alignInfo;
+ } else {
+ to_return << " " << alignInfo;
+ }
}
+ return to_return.str();
+}
string terAlignment::printAllShifts()
{
- stringstream to_return;
- if ( ( int ) allshifts.size() == 0 )
- {
+ stringstream to_return;
+ if ( ( int ) allshifts.size() == 0 ) {
// s += "\nNumShifts: 0";
- to_return << "NbrShifts: 0";
- }
- else
- {
+ to_return << "NbrShifts: 0";
+ } else {
// s += "\nNumShifts: " + (int)allshifts.size();
- to_return << "NbrShifts: "<< ( int ) allshifts.size();
- for ( int i = 0; i < ( int ) allshifts.size(); i++ )
- {
- to_return << "\t" ;
- to_return << ( ( terShift ) allshifts[i] ).toString();
+ to_return << "NbrShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
+ to_return << "\t" ;
+ to_return << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
- }
- }
- return to_return.str();
+ }
+ }
+ return to_return.str();
}
} \ No newline at end of file
diff --git a/mert/TER/terAlignment.h b/mert/TER/terAlignment.h
index 0af86f663..2af0b7490 100644
--- a/mert/TER/terAlignment.h
+++ b/mert/TER/terAlignment.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,41 +34,41 @@ using namespace std;
namespace TERCpp
{
- class terAlignment
- {
- private:
- public:
-
- terAlignment();
- string toString();
- void scoreDetails();
-
- vector<string> ref;
- vector<string> hyp;
- vector<string> aftershift;
- vector<terShift> allshifts;
- vector<int> hyp_int;
- vector<int> aftershift_int;
-
- double numEdits;
- double numWords;
- double averageWords;
- vector<char> alignment;
- string bestRef;
-
- int numIns;
- int numDel;
- int numSub;
- int numSft;
- int numWsf;
-
-
- string join ( string delim, vector<string> arr );
- double score();
- double scoreAv();
- string printAlignments();
- string printAllShifts();
- };
+class terAlignment
+{
+private:
+public:
+
+ terAlignment();
+ string toString();
+ void scoreDetails();
+
+ vector<string> ref;
+ vector<string> hyp;
+ vector<string> aftershift;
+ vector<terShift> allshifts;
+ vector<int> hyp_int;
+ vector<int> aftershift_int;
+
+ double numEdits;
+ double numWords;
+ double averageWords;
+ vector<char> alignment;
+ string bestRef;
+
+ int numIns;
+ int numDel;
+ int numSub;
+ int numSft;
+ int numWsf;
+
+
+ string join ( string delim, vector<string> arr );
+ double score();
+ double scoreAv();
+ string printAlignments();
+ string printAllShifts();
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/terShift.cpp b/mert/TER/terShift.cpp
index c1106db76..440b4d2ce 100644
--- a/mert/TER/terShift.cpp
+++ b/mert/TER/terShift.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -42,32 +42,32 @@ namespace TERCpp
// numSft=0;
// numWsf=0;
// }
- terShift::terShift ()
- {
- start = 0;
- end = 0;
- moveto = 0;
- newloc = 0;
- cost=1.0;
- }
- terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
- {
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- cost=1.0;
- }
+terShift::terShift ()
+{
+ start = 0;
+ end = 0;
+ moveto = 0;
+ newloc = 0;
+ cost=1.0;
+}
+terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
+{
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ cost=1.0;
+}
- terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
- {
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- shifted = _shifted;
- cost=1.0;
- }
+terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
+{
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ shifted = _shifted;
+ cost=1.0;
+}
// string terShift::vectorToString(vector<string> vec)
// {
// string retour("");
@@ -78,44 +78,38 @@ namespace TERCpp
// return retour;
// }
- string terShift::toString()
- {
- stringstream s;
- s.str ( "" );
- s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
- if ( ( int ) shifted.size() > 0 )
- {
- s << " (" << vectorToString ( shifted ) << ")";
- }
- return s.str();
- }
+string terShift::toString()
+{
+ stringstream s;
+ s.str ( "" );
+ s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
+ if ( ( int ) shifted.size() > 0 ) {
+ s << " (" << vectorToString ( shifted ) << ")";
+ }
+ return s.str();
+}
- /* The distance of the shift. */
- int terShift::distance()
- {
- if ( moveto < start )
- {
- return start - moveto;
- }
- else if ( moveto > end )
- {
- return moveto - end;
- }
- else
- {
- return moveto - start;
- }
- }
+/* The distance of the shift. */
+int terShift::distance()
+{
+ if ( moveto < start ) {
+ return start - moveto;
+ } else if ( moveto > end ) {
+ return moveto - end;
+ } else {
+ return moveto - start;
+ }
+}
- bool terShift::leftShift()
- {
- return ( moveto < start );
- }
+bool terShift::leftShift()
+{
+ return ( moveto < start );
+}
- int terShift::size()
- {
- return ( end - start ) + 1;
- }
+int terShift::size()
+{
+ return ( end - start ) + 1;
+}
// terShift terShift::operator=(terShift t)
// {
//
diff --git a/mert/TER/terShift.h b/mert/TER/terShift.h
index ba84a5947..74545e0de 100644
--- a/mert/TER/terShift.h
+++ b/mert/TER/terShift.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,32 +34,32 @@ using namespace Tools;
namespace TERCpp
{
- class terShift
- {
- private:
- public:
+class terShift
+{
+private:
+public:
- terShift();
- terShift ( int _start, int _end, int _moveto, int _newloc );
- terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
- string toString();
- int distance() ;
- bool leftShift();
- int size();
+ terShift();
+ terShift ( int _start, int _end, int _moveto, int _newloc );
+ terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
+ string toString();
+ int distance() ;
+ bool leftShift();
+ int size();
// terShift operator=(terShift t);
// string vectorToString(vector<string> vec);
- int start;
- int end;
- int moveto;
- int newloc;
- vector<string> shifted; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<string> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
- };
+ int start;
+ int end;
+ int moveto;
+ int newloc;
+ vector<string> shifted; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<string> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp
index b7f63772c..c4629c639 100644
--- a/mert/TER/tercalc.cpp
+++ b/mert/TER/tercalc.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -35,902 +35,724 @@ using namespace Tools;
namespace TERCpp
{
- terCalc::terCalc()
- {
- TAILLE_PERMUT_MAX = 50;
- infinite = 999999.0;
- shift_cost = 1.0;
- insert_cost = 1.0;
- delete_cost = 1.0;
- substitute_cost = 1.0;
- match_cost = 0.0;
- NBR_SEGS_EVALUATED = 0;
- NBR_PERMUTS_CONSID = 0;
- NBR_BS_APPELS = 0;
- TAILLE_BEAM = 20;
- DIST_MAX_PERMUT = 50;
- PRINT_DEBUG = false;
- hypSpans.clear();
- refSpans.clear();
- }
+terCalc::terCalc()
+{
+ TAILLE_PERMUT_MAX = 50;
+ infinite = 999999.0;
+ shift_cost = 1.0;
+ insert_cost = 1.0;
+ delete_cost = 1.0;
+ substitute_cost = 1.0;
+ match_cost = 0.0;
+ NBR_SEGS_EVALUATED = 0;
+ NBR_PERMUTS_CONSID = 0;
+ NBR_BS_APPELS = 0;
+ TAILLE_BEAM = 20;
+ DIST_MAX_PERMUT = 50;
+ PRINT_DEBUG = false;
+ hypSpans.clear();
+ refSpans.clear();
+}
- terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref )
- {
-
- return minimizeDistanceEdition ( hyp, ref, hypSpans );
-
- }
+terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref )
+{
- terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
- {
- stringstream s;
- s.str ( "" );
- string stringRef ( "" );
- string stringHyp ( "" );
- for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ )
- {
- if ( l_it == ref.begin() )
- {
- s << ( *l_it );
- }
- else
- {
- s << " " << ( *l_it );
- }
- }
- stringRef = s.str();
- s.str ( "" );
- for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ )
- {
- if ( l_itHyp == hyp.begin() )
- {
- s << ( *l_itHyp );
- }
- else
- {
- s << " " << ( *l_itHyp );
- }
- }
- stringHyp = s.str();
- s.str ( "" );
- return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
- }
+ return minimizeDistanceEdition ( hyp, ref, hypSpans );
+}
- hashMapInfos terCalc::createConcordMots ( vector<string> hyp, vector<string> ref )
- {
- hashMap tempHash;
- hashMapInfos retour;
- for ( int i = 0; i < ( int ) hyp.size(); i++ )
- {
- tempHash.addHasher ( hyp.at ( i ), "" );
- }
- bool cor[ref.size() ];
- for ( int i = 0; i < ( int ) ref.size(); i++ )
- {
- if ( tempHash.trouve ( ( string ) ref.at ( i ) ) )
- {
- cor[i] = true;
- }
- else
- {
- cor[i] = false;
- }
- }
- for ( int start = 0; start < ( int ) ref.size(); start++ )
- {
- if ( cor[start] )
- {
- for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) );end++ )
- {
- vector<string> ajouter = subVector ( ref, start, end + 1 );
- string ajouterString = vectorToString ( ajouter );
- vector<int> values = retour.getValue ( ajouterString );
- values.push_back ( start );
- if ( values.size() > 1 )
- {
- retour.setValue ( ajouterString, values );
- }
- else
- {
- retour.addValue ( ajouterString, values );
- }
- }
- }
- }
- return retour;
+terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
+{
+ stringstream s;
+ s.str ( "" );
+ string stringRef ( "" );
+ string stringHyp ( "" );
+ for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) {
+ if ( l_it == ref.begin() ) {
+ s << ( *l_it );
+ } else {
+ s << " " << ( *l_it );
+ }
+ }
+ stringRef = s.str();
+ s.str ( "" );
+ for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) {
+ if ( l_itHyp == hyp.begin() ) {
+ s << ( *l_itHyp );
+ } else {
+ s << " " << ( *l_itHyp );
}
+ }
+ stringHyp = s.str();
+ s.str ( "" );
+ return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
+}
+
- bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan )
- {
- if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) )
- {
- return true;
+hashMapInfos terCalc::createConcordMots ( vector<string> hyp, vector<string> ref )
+{
+ hashMap tempHash;
+ hashMapInfos retour;
+ for ( int i = 0; i < ( int ) hyp.size(); i++ ) {
+ tempHash.addHasher ( hyp.at ( i ), "" );
+ }
+ bool cor[ref.size() ];
+ for ( int i = 0; i < ( int ) ref.size(); i++ ) {
+ if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) {
+ cor[i] = true;
+ } else {
+ cor[i] = false;
+ }
+ }
+ for ( int start = 0; start < ( int ) ref.size(); start++ ) {
+ if ( cor[start] ) {
+ for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) ); end++ ) {
+ vector<string> ajouter = subVector ( ref, start, end + 1 );
+ string ajouterString = vectorToString ( ajouter );
+ vector<int> values = retour.getValue ( ajouterString );
+ values.push_back ( start );
+ if ( values.size() > 1 ) {
+ retour.setValue ( ajouterString, values );
+ } else {
+ retour.addValue ( ajouterString, values );
}
- return false;
+ }
}
+ }
+ return retour;
+}
+bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan )
+{
+ if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) {
+ return true;
+ }
+ return false;
+}
- terAlignment terCalc::minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
- {
- double current_best = infinite;
- double last_best = infinite;
- int first_good = 0;
- int current_first_good = 0;
- int last_good = -1;
- int cur_last_good = 0;
- int last_peak = 0;
- int cur_last_peak = 0;
- int i, j;
- double cost, icost, dcost;
- double score;
-
-
-
- NBR_BS_APPELS++;
-
-
- for ( i = 0; i <= ( int ) ref.size(); i++ )
- {
- for ( j = 0; j <= ( int ) hyp.size(); j++ )
- {
- S[i][j] = -1.0;
- P[i][j] = '0';
- }
- }
- S[0][0] = 0.0;
- for ( j = 0; j <= ( int ) hyp.size(); j++ )
- {
- last_best = current_best;
- current_best = infinite;
- first_good = current_first_good;
- current_first_good = -1;
- last_good = cur_last_good;
- cur_last_good = -1;
- last_peak = cur_last_peak;
- cur_last_peak = 0;
- for ( i = first_good; i <= ( int ) ref.size(); i++ )
- {
- if ( i > last_good )
- {
- break;
- }
- if ( S[i][j] < 0 )
- {
- continue;
- }
- score = S[i][j];
- if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) )
- {
- continue;
- }
- if ( current_first_good == -1 )
- {
- current_first_good = i ;
- }
- if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) )
- {
- if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) )
- {
- if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 )
- {
- cost = match_cost + score;
- if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) )
- {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = 'A';
- }
- if ( cost < current_best )
- {
- current_best = cost;
- }
- if ( current_best == cost )
- {
- cur_last_peak = i + 1;
- }
- }
- else
- {
- cost = substitute_cost + score;
- if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) )
- {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = 'S';
- if ( cost < current_best )
- {
- current_best = cost;
- }
- if ( current_best == cost )
- {
- cur_last_peak = i + 1 ;
- }
- }
- }
- }
- }
- cur_last_good = i + 1;
- if ( j < ( int ) hyp.size() )
- {
- icost = score + insert_cost;
- if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) )
- {
- S[i][j+1] = icost;
- P[i][j+1] = 'I';
- if ( ( cur_last_peak < i ) && ( current_best == icost ) )
- {
- cur_last_peak = i;
- }
- }
- }
- if ( i < ( int ) ref.size() )
- {
- dcost = score + delete_cost;
- if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) )
- {
- S[i+1][j] = dcost;
- P[i+1][j] = 'D';
- if ( i >= last_good )
- {
- last_good = i + 1 ;
- }
- }
- }
- }
- }
+
+terAlignment terCalc::minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
+{
+ double current_best = infinite;
+ double last_best = infinite;
+ int first_good = 0;
+ int current_first_good = 0;
+ int last_good = -1;
+ int cur_last_good = 0;
+ int last_peak = 0;
+ int cur_last_peak = 0;
+ int i, j;
+ double cost, icost, dcost;
+ double score;
- int tracelength = 0;
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) )
- {
- tracelength++;
- if ( P[i][j] == 'A' )
- {
- i--;
- j--;
+
+ NBR_BS_APPELS++;
+
+
+ for ( i = 0; i <= ( int ) ref.size(); i++ ) {
+ for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
+ S[i][j] = -1.0;
+ P[i][j] = '0';
+ }
+ }
+ S[0][0] = 0.0;
+ for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
+ last_best = current_best;
+ current_best = infinite;
+ first_good = current_first_good;
+ current_first_good = -1;
+ last_good = cur_last_good;
+ cur_last_good = -1;
+ last_peak = cur_last_peak;
+ cur_last_peak = 0;
+ for ( i = first_good; i <= ( int ) ref.size(); i++ ) {
+ if ( i > last_good ) {
+ break;
+ }
+ if ( S[i][j] < 0 ) {
+ continue;
+ }
+ score = S[i][j];
+ if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) ) {
+ continue;
+ }
+ if ( current_first_good == -1 ) {
+ current_first_good = i ;
+ }
+ if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) ) {
+ if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) {
+ if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) {
+ cost = match_cost + score;
+ if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) ) {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'A';
}
- else
- if ( P[i][j] == 'S' )
- {
- i--;
- j--;
- }
- else
- if ( P[i][j] == 'D' )
- {
- i--;
- }
- else
- if ( P[i][j] == 'I' )
- {
- j--;
- }
- else
- {
- cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl;
- exit ( -1 );
- }
- }
- vector<char> path ( tracelength );
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) )
- {
- path[--tracelength] = P[i][j];
- if ( P[i][j] == 'A' )
- {
- i--;
- j--;
+ if ( cost < current_best ) {
+ current_best = cost;
+ }
+ if ( current_best == cost ) {
+ cur_last_peak = i + 1;
}
- else
- if ( P[i][j] == 'S' )
- {
- i--;
- j--;
- }
- else
- if ( P[i][j] == 'D' )
- {
- i--;
- }
- else
- if ( P[i][j] == 'I' )
- {
- j--;
- }
+ } else {
+ cost = substitute_cost + score;
+ if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) ) {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'S';
+ if ( cost < current_best ) {
+ current_best = cost;
+ }
+ if ( current_best == cost ) {
+ cur_last_peak = i + 1 ;
+ }
+ }
+ }
}
- terAlignment to_return;
- to_return.numWords = ref.size();
- to_return.alignment = path;
- to_return.numEdits = S[ref.size() ][hyp.size() ];
- to_return.hyp = hyp;
- to_return.ref = ref;
- to_return.averageWords = (int)ref.size();
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
+ }
+ cur_last_good = i + 1;
+ if ( j < ( int ) hyp.size() ) {
+ icost = score + insert_cost;
+ if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) ) {
+ S[i][j+1] = icost;
+ P[i][j+1] = 'I';
+ if ( ( cur_last_peak < i ) && ( current_best == icost ) ) {
+ cur_last_peak = i;
+ }
}
- return to_return;
-
+ }
+ if ( i < ( int ) ref.size() ) {
+ dcost = score + delete_cost;
+ if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) ) {
+ S[i+1][j] = dcost;
+ P[i+1][j] = 'D';
+ if ( i >= last_good ) {
+ last_good = i + 1 ;
+ }
+ }
+ }
+ }
+ }
+
+
+ int tracelength = 0;
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) ) {
+ tracelength++;
+ if ( P[i][j] == 'A' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'S' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'D' ) {
+ i--;
+ } else if ( P[i][j] == 'I' ) {
+ j--;
+ } else {
+ cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl;
+ exit ( -1 );
+ }
+ }
+ vector<char> path ( tracelength );
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) ) {
+ path[--tracelength] = P[i][j];
+ if ( P[i][j] == 'A' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'S' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'D' ) {
+ i--;
+ } else if ( P[i][j] == 'I' ) {
+ j--;
}
- terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
- {
- hashMapInfos rloc = createConcordMots ( hyp, ref );
- terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans );
- vector<string> cur = hyp;
- cur_align.hyp = hyp;
- cur_align.ref = ref;
- cur_align.aftershift = hyp;
- double edits = 0;
+ }
+ terAlignment to_return;
+ to_return.numWords = ref.size();
+ to_return.alignment = path;
+ to_return.numEdits = S[ref.size() ][hyp.size() ];
+ to_return.hyp = hyp;
+ to_return.ref = ref;
+ to_return.averageWords = (int)ref.size();
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
+ }
+ return to_return;
+
+}
+terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
+{
+ hashMapInfos rloc = createConcordMots ( hyp, ref );
+ terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans );
+ vector<string> cur = hyp;
+ cur_align.hyp = hyp;
+ cur_align.ref = ref;
+ cur_align.aftershift = hyp;
+ double edits = 0;
// int numshifts = 0;
- vector<terShift> allshifts;
+ vector<terShift> allshifts;
// cerr << "Initial Alignment:" << endl << cur_align.toString() <<endl;
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
- }
- while ( true )
- {
- bestShiftStruct returns;
- returns = findBestShift ( cur, hyp, ref, rloc, cur_align );
- if ( returns.m_empty )
- {
- break;
- }
- terShift bestShift = returns.m_best_shift;
- cur_align = returns.m_best_align;
- edits += bestShift.cost;
- bestShift.alignment = cur_align.alignment;
- bestShift.aftershift = cur_align.aftershift;
- allshifts.push_back ( bestShift );
- cur = cur_align.aftershift;
- }
- terAlignment to_return;
- to_return = cur_align;
- to_return.allshifts = allshifts;
- to_return.numEdits += edits;
- NBR_SEGS_EVALUATED++;
- return to_return;
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
+ }
+ while ( true ) {
+ bestShiftStruct returns;
+ returns = findBestShift ( cur, hyp, ref, rloc, cur_align );
+ if ( returns.m_empty ) {
+ break;
}
- bestShiftStruct terCalc::findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
- {
- bestShiftStruct to_return;
- bool anygain = false;
- bool herr[ ( int ) hyp.size() ];
- bool rerr[ ( int ) ref.size() ];
- int ralign[ ( int ) ref.size() ];
- calculateTerAlignment ( med_align, herr, rerr, ralign );
- vector<vecTerShift> poss_shifts;
-
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl;
- cerr << "indices: ";
- for (int l_i=0; l_i < ( int ) ref.size() ; l_i++)
- {
- cerr << l_i << "\t";
- }
- cerr << endl;
- cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl;
- cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl;
- cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl;
- cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl;
- cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl;
- cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl;
- cerr << "END DEBUG " << endl;
- }
- poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign );
- double curerr = med_align.numEdits;
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Possible Shifts:" << endl;
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- )
- {
- for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ )
- {
- cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
- }
- }
- cerr << endl;
- cerr << "END DEBUG " << endl;
- }
+ terShift bestShift = returns.m_best_shift;
+ cur_align = returns.m_best_align;
+ edits += bestShift.cost;
+ bestShift.alignment = cur_align.alignment;
+ bestShift.aftershift = cur_align.aftershift;
+ allshifts.push_back ( bestShift );
+ cur = cur_align.aftershift;
+ }
+ terAlignment to_return;
+ to_return = cur_align;
+ to_return.allshifts = allshifts;
+ to_return.numEdits += edits;
+ NBR_SEGS_EVALUATED++;
+ return to_return;
+}
+bestShiftStruct terCalc::findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
+{
+ bestShiftStruct to_return;
+ bool anygain = false;
+ bool herr[ ( int ) hyp.size() ];
+ bool rerr[ ( int ) ref.size() ];
+ int ralign[ ( int ) ref.size() ];
+ calculateTerAlignment ( med_align, herr, rerr, ralign );
+ vector<vecTerShift> poss_shifts;
+
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl;
+ cerr << "indices: ";
+ for (int l_i=0; l_i < ( int ) ref.size() ; l_i++) {
+ cerr << l_i << "\t";
+ }
+ cerr << endl;
+ cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl;
+ cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl;
+ cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl;
+ cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl;
+ cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl;
+ cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign );
+ double curerr = med_align.numEdits;
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Possible Shifts:" << endl;
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
+ for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ ) {
+ cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
+ }
+ }
+ cerr << endl;
+ cerr << "END DEBUG " << endl;
+ }
// exit(0);
- double cur_best_shift_cost = 0.0;
- terAlignment cur_best_align = med_align;
- terShift cur_best_shift;
+ double cur_best_shift_cost = 0.0;
+ terAlignment cur_best_align = med_align;
+ terShift cur_best_shift;
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- )
- {
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
- cerr << "END DEBUG " << endl;
- }
- /* Consider shifts of length i+1 */
- double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- double maxfix = ( 2 * ( 1 + i ) );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
- {
- break;
- }
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ /* Consider shifts of length i+1 */
+ double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ double maxfix = ( 2 * ( 1 + i ) );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
+ break;
+ }
- for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ )
- {
- curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
- {
- break;
- }
- terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "cur : "<< join(" ",cur) << endl;
- cerr << "curshift : "<< curshift.toString() << endl;
-
- }
- alignmentStruct shiftReturns = permuter ( cur, curshift );
- vector<string> shiftarr = shiftReturns.nwords;
- vector<vecInt> curHypSpans = shiftReturns.aftershift;
-
- if ( PRINT_DEBUG )
- {
- cerr << "shiftarr : "<< join(" ",shiftarr) << endl;
+ for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ ) {
+ curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
+ break;
+ }
+ terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "cur : "<< join(" ",cur) << endl;
+ cerr << "curshift : "<< curshift.toString() << endl;
+
+ }
+ alignmentStruct shiftReturns = permuter ( cur, curshift );
+ vector<string> shiftarr = shiftReturns.nwords;
+ vector<vecInt> curHypSpans = shiftReturns.aftershift;
+
+ if ( PRINT_DEBUG ) {
+ cerr << "shiftarr : "<< join(" ",shiftarr) << endl;
// cerr << "curHypSpans : "<< curHypSpans.toString() << endl;
- cerr << "END DEBUG " << endl;
- }
- terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans );
-
- curalign.hyp = hyp;
- curalign.ref = ref;
- curalign.aftershift = shiftarr;
-
-
- double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
-
- // if (DEBUG) {
- // string testeuh=terAlignment join(" ", shiftarr);
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
- cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"<<endl;
- cerr << "Details of gains : gain = ("<<cur_best_align.numEdits << "+" << cur_best_shift_cost << ") - (" << curalign.numEdits << "+" << curshift.cost << ")"<<endl;
- cerr << "" << curalign.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
- }
- // }
- //
- if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) )
- {
- anygain = true;
- cur_best_shift = curshift;
- cur_best_shift_cost = curshift.cost;
- cur_best_align = curalign;
- // if (DEBUG)
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
- }
- }
- }
- }
- if ( anygain )
- {
- to_return.m_best_shift = cur_best_shift;
- to_return.m_best_align = cur_best_align;
- to_return.m_empty = false;
+ cerr << "END DEBUG " << endl;
+ }
+ terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans );
+
+ curalign.hyp = hyp;
+ curalign.ref = ref;
+ curalign.aftershift = shiftarr;
+
+
+ double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
+
+ // if (DEBUG) {
+ // string testeuh=terAlignment join(" ", shiftarr);
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
+ cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"<<endl;
+ cerr << "Details of gains : gain = ("<<cur_best_align.numEdits << "+" << cur_best_shift_cost << ") - (" << curalign.numEdits << "+" << curshift.cost << ")"<<endl;
+ cerr << "" << curalign.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ // }
+ //
+ if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) {
+ anygain = true;
+ cur_best_shift = curshift;
+ cur_best_shift_cost = curshift.cost;
+ cur_best_align = curalign;
+ // if (DEBUG)
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
}
- else
- {
- to_return.m_empty = true;
- }
- return to_return;
+ }
}
+ }
+ if ( anygain ) {
+ to_return.m_best_shift = cur_best_shift;
+ to_return.m_best_align = cur_best_align;
+ to_return.m_empty = false;
+ } else {
+ to_return.m_empty = true;
+ }
+ return to_return;
+}
- void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign )
- {
- int hpos = -1;
- int rpos = -1;
- if ( PRINT_DEBUG )
- {
+void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign )
+{
+ int hpos = -1;
+ int rpos = -1;
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
+ herr[i] = false;
+ rerr[i] = false;
+ ralign[i] = -1;
+ }
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
+ char sym = align.alignment[i];
+ if ( sym == 'A' ) {
+ hpos++;
+ rpos++;
+ herr[hpos] = false;
+ rerr[rpos] = false;
+ ralign[rpos] = hpos;
+ } else if ( sym == 'S' ) {
+ hpos++;
+ rpos++;
+ herr[hpos] = true;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos;
+ } else if ( sym == 'I' ) {
+ hpos++;
+ herr[hpos] = true;
+ } else if ( sym == 'D' ) {
+ rpos++;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos+1;
+ } else {
+ cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl;
+ exit ( -1 );
+ }
+ }
+}
- cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl;
- cerr << "END DEBUG " << endl;
- }
- for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
- {
- herr[i] = false;
- rerr[i] = false;
- ralign[i] = -1;
- }
- for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
- {
- char sym = align.alignment[i];
- if ( sym == 'A' )
- {
- hpos++;
- rpos++;
- herr[hpos] = false;
- rerr[rpos] = false;
- ralign[rpos] = hpos;
- }
- else
- if ( sym == 'S' )
- {
- hpos++;
- rpos++;
- herr[hpos] = true;
- rerr[rpos] = true;
- ralign[rpos] = hpos;
- }
- else
- if ( sym == 'I' )
- {
- hpos++;
- herr[hpos] = true;
- }
- else
- if ( sym == 'D' )
- {
- rpos++;
- rerr[rpos] = true;
- ralign[rpos] = hpos+1;
- }
- else
- {
- cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl;
- exit ( -1 );
- }
- }
+vector<vecTerShift> terCalc::calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
+{
+ vector<vecTerShift> to_return;
+ if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) ) {
+ return to_return;
+ }
+
+ vector<vecTerShift> allshifts ( TAILLE_PERMUT_MAX + 1 );
+ for ( int start = 0; start < ( int ) hyp.size(); start++ ) {
+ string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
+ if ( ! rloc.trouve ( subVectorHypString ) ) {
+ continue;
}
- vector<vecTerShift> terCalc::calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
- {
- vector<vecTerShift> to_return;
- if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) )
- {
- return to_return;
+ bool ok = false;
+ vector<int> mtiVec = rloc.getValue ( subVectorHypString );
+ vector<int>::iterator mti = mtiVec.begin();
+ while ( mti != mtiVec.end() && ( ! ok ) ) {
+ int moveto = ( *mti );
+ mti++;
+ if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) ) {
+ ok = true;
+ }
+ }
+ if ( ! ok ) {
+ continue;
+ }
+ ok = true;
+ for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ ) {
+ /* check if cand is good if so, add it */
+ vector<string> cand = subVector ( hyp, start, end + 1 );
+ ok = false;
+ if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) {
+ continue;
+ }
+
+ bool any_herr = false;
+
+ for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) {
+ if ( herr[start+i] ) {
+ any_herr = true;
}
+ }
+ if ( any_herr == false ) {
+ ok = true;
+ continue;
+ }
+
+ vector<int> movetoitVec;
+ movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
+// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl;
+ vector<int>::iterator movetoit = movetoitVec.begin();
+ while ( movetoit != movetoitVec.end() ) {
+ int moveto = ( *movetoit );
+ movetoit++;
+ if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) ) {
+ continue;
+ }
+ ok = true;
- vector<vecTerShift> allshifts ( TAILLE_PERMUT_MAX + 1 );
- for ( int start = 0; start < ( int ) hyp.size(); start++ )
- {
- string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
- if ( ! rloc.trouve ( subVectorHypString ) )
- {
- continue;
- }
+ /* check to see if there are any errors in either string
+ (only move if this is the case!)
+ */
+
+ bool any_rerr = false;
+ for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) {
+ if ( rerr[moveto+i] ) {
+ any_rerr = true;
+ }
+ }
+ if ( ! any_rerr ) {
+ continue;
+ }
+ for ( int roff = -1; roff <= ( end - start ); roff++ ) {
+ terShift topush;
+ bool topushNull = true;
+ if ( ( roff == -1 ) && ( moveto == 0 ) ) {
+ if ( PRINT_DEBUG ) {
- bool ok = false;
- vector<int> mtiVec = rloc.getValue ( subVectorHypString );
- vector<int>::iterator mti = mtiVec.begin();
- while ( mti != mtiVec.end() && ( ! ok ) )
- {
- int moveto = ( *mti );
- mti++;
- if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) )
- {
- ok = true;
- }
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
}
- if ( ! ok )
- {
- continue;
+ terShift t01 ( start, end, -1, -1 );
+ topush = t01;
+ topushNull = false;
+ } else if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) ) {
+ int newloc = ralign[moveto+roff];
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
}
- ok = true;
- for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ )
- {
- /* check if cand is good if so, add it */
- vector<string> cand = subVector ( hyp, start, end + 1 );
- ok = false;
- if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) )
- {
- continue;
- }
-
- bool any_herr = false;
-
- for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ )
- {
- if ( herr[start+i] )
- {
- any_herr = true;
- }
- }
- if ( any_herr == false )
- {
- ok = true;
- continue;
- }
-
- vector<int> movetoitVec;
- movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
-// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl;
- vector<int>::iterator movetoit = movetoitVec.begin();
- while ( movetoit != movetoitVec.end() )
- {
- int moveto = ( *movetoit );
- movetoit++;
- if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) )
- {
- continue;
- }
- ok = true;
-
- /* check to see if there are any errors in either string
- (only move if this is the case!)
- */
-
- bool any_rerr = false;
- for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ )
- {
- if ( rerr[moveto+i] )
- {
- any_rerr = true;
- }
- }
- if ( ! any_rerr )
- {
- continue;
- }
- for ( int roff = -1; roff <= ( end - start ); roff++ )
- {
- terShift topush;
- bool topushNull = true;
- if ( ( roff == -1 ) && ( moveto == 0 ) )
- {
- if ( PRINT_DEBUG )
- {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
- }
- terShift t01 ( start, end, -1, -1 );
- topush = t01;
- topushNull = false;
- }
- else
- if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) )
- {
- int newloc = ralign[moveto+roff];
- if ( PRINT_DEBUG )
- {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
- }
- terShift t02 ( start, end, moveto + roff, newloc );
- topush = t02;
- topushNull = false;
- }
- if ( !topushNull )
- {
- topush.shifted = cand;
- topush.cost = shift_cost;
- if ( PRINT_DEBUG )
- {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl;
- cerr << "start : " << start << endl;
- cerr << "end : " << end << endl;
- cerr << "end - start : " << end - start << endl;
- cerr << "END DEBUG " << endl;
- }
- ( allshifts.at ( end - start ) ).push_back ( topush );
- }
- }
- }
+ terShift t02 ( start, end, moveto + roff, newloc );
+ topush = t02;
+ topushNull = false;
+ }
+ if ( !topushNull ) {
+ topush.shifted = cand;
+ topush.cost = shift_cost;
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl;
+ cerr << "start : " << start << endl;
+ cerr << "end : " << end << endl;
+ cerr << "end - start : " << end - start << endl;
+ cerr << "END DEBUG " << endl;
}
+ ( allshifts.at ( end - start ) ).push_back ( topush );
+ }
}
- to_return.clear();
- for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ )
- {
- to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
- }
- return to_return;
+ }
}
+ }
+ to_return.clear();
+ for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ ) {
+ to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
+ }
+ return to_return;
+}
- alignmentStruct terCalc::permuter ( vector<string> words, terShift s )
- {
- return permuter ( words, s.start, s.end, s.newloc );
- }
+alignmentStruct terCalc::permuter ( vector<string> words, terShift s )
+{
+ return permuter ( words, s.start, s.end, s.newloc );
+}
- alignmentStruct terCalc::permuter ( vector<string> words, int start, int end, int newloc )
- {
- int c = 0;
- vector<string> nwords ( words );
- vector<vecInt> spans ( ( int ) hypSpans.size() );
- alignmentStruct to_return;
- if ( PRINT_DEBUG )
- {
+alignmentStruct terCalc::permuter ( vector<string> words, int start, int end, int newloc )
+{
+ int c = 0;
+ vector<string> nwords ( words );
+ vector<vecInt> spans ( ( int ) hypSpans.size() );
+ alignmentStruct to_return;
+ if ( PRINT_DEBUG ) {
+
+ if ( ( int ) hypSpans.size() > 0 ) {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ;
+ } else {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ;
+ }
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl;
+ }
+ if (newloc >= ( int ) words.size()) {
+ if ( PRINT_DEBUG ) {
+ cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl;
+ }
+ newloc = ( int ) words.size()-1;
+ }
- if ( ( int ) hypSpans.size() > 0 )
- {
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ;
- }
- else
- {
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ;
- }
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl;
- }
- if (newloc >= ( int ) words.size())
- {
- if ( PRINT_DEBUG )
- {
- cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl;
- }
- newloc = ( int ) words.size()-1;
- }
-
// }
- if ( newloc == -1 )
- {
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = 0; i <= start - 1;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
+ if ( newloc == -1 ) {
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ if ( newloc < start ) {
+
+ for ( int i = 0; i < newloc; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
}
- else
- {
- if ( newloc < start )
- {
-
- for ( int i = 0; i < newloc; i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc ; i < start ;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- }
- else
- {
- if ( newloc > end )
- {
- for ( int i = 0; i <= start - 1; i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i <= newloc;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc + 1; i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- }
- else
- {
- // we are moving inside of ourselves
- for ( int i = 0; i <= start - 1; i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- }
- }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
}
- NBR_PERMUTS_CONSID++;
-
- if ( PRINT_DEBUG )
- {
- cerr << "nwords" << join(" ",nwords) << endl;
-// cerr << "spans" << spans. << endl;
- }
-
- to_return.nwords = nwords;
- to_return.aftershift = spans;
- return to_return;
- }
- void terCalc::setDebugMode ( bool b )
- {
- PRINT_DEBUG = b;
+ }
+ for ( int i = newloc ; i < start ; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ if ( newloc > end ) {
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i <= newloc; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ // we are moving inside of ourselves
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
}
+ }
+ NBR_PERMUTS_CONSID++;
+
+ if ( PRINT_DEBUG ) {
+ cerr << "nwords" << join(" ",nwords) << endl;
+// cerr << "spans" << spans. << endl;
+ }
+
+ to_return.nwords = nwords;
+ to_return.aftershift = spans;
+ return to_return;
+}
+void terCalc::setDebugMode ( bool b )
+{
+ PRINT_DEBUG = b;
+}
}
diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h
index 92d9caf2b..778d83395 100644
--- a/mert/TER/tercalc.h
+++ b/mert/TER/tercalc.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -41,62 +41,62 @@ namespace TERCpp
{
// typedef size_t WERelement[2];
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
- typedef vector<terShift> vecTerShift;
- /**
- @author
- */
- class terCalc
- {
- private :
+typedef vector<terShift> vecTerShift;
+/**
+ @author
+*/
+class terCalc
+{
+private :
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
- WERalignment l_WERalignment;
+ WERalignment l_WERalignment;
// HashMap contenant les valeurs de hash de chaque mot
- hashMap bagOfWords;
- int TAILLE_PERMUT_MAX;
- // Increments internes
- int NBR_SEGS_EVALUATED;
- int NBR_PERMUTS_CONSID;
- int NBR_BS_APPELS;
- int DIST_MAX_PERMUT;
- bool PRINT_DEBUG;
+ hashMap bagOfWords;
+ int TAILLE_PERMUT_MAX;
+ // Increments internes
+ int NBR_SEGS_EVALUATED;
+ int NBR_PERMUTS_CONSID;
+ int NBR_BS_APPELS;
+ int DIST_MAX_PERMUT;
+ bool PRINT_DEBUG;
- // Utilisés dans minDistEdit et ils ne sont pas réajustés
- double S[1000][1000];
- char P[1000][1000];
- vector<vecInt> refSpans;
- vector<vecInt> hypSpans;
- int TAILLE_BEAM;
+ // Utilisés dans minDistEdit et ils ne sont pas réajustés
+ double S[1000][1000];
+ char P[1000][1000];
+ vector<vecInt> refSpans;
+ vector<vecInt> hypSpans;
+ int TAILLE_BEAM;
- public:
- int shift_cost;
- int insert_cost;
- int delete_cost;
- int substitute_cost;
- int match_cost;
- double infinite;
- terCalc();
+public:
+ int shift_cost;
+ int insert_cost;
+ int delete_cost;
+ int substitute_cost;
+ int match_cost;
+ double infinite;
+ terCalc();
// ~terCalc();
// size_t* hashVec ( vector<string> s );
- void setDebugMode ( bool b );
+ void setDebugMode ( bool b );
// int WERCalculation ( size_t * ref, size_t * hyp );
// int WERCalculation ( vector<string> ref, vector<string> hyp );
// int WERCalculation ( vector<int> ref, vector<int> hyp );
- terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
+ terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
// string vectorToString(vector<string> vec);
// vector<string> subVector(vector<string> vec, int start, int end);
- hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
- terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
- bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
- terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
- terAlignment TER ( vector<string> hyp, vector<string> ref );
- terAlignment TER ( vector<int> hyp, vector<int> ref );
- bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
- void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
- vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
- alignmentStruct permuter ( vector<string> words, terShift s );
- alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
- };
+ hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
+ terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
+ bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
+ terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
+ terAlignment TER ( vector<string> hyp, vector<string> ref );
+ terAlignment TER ( vector<int> hyp, vector<int> ref );
+ bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
+ void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
+ vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
+ alignmentStruct permuter ( vector<string> words, terShift s );
+ alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
+};
}
diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp
index 64e1483b6..8858a7119 100644
--- a/mert/TER/tools.cpp
+++ b/mert/TER/tools.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -25,748 +25,677 @@ using namespace boost::xpressive;
namespace Tools
{
- string vectorToString ( vector<string> vec )
- {
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour += ( *vecIter );
- }
- else
- {
- retour += "\t" + ( *vecIter );
- }
- }
- return retour;
+string vectorToString ( vector<string> vec )
+{
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour += ( *vecIter );
+ } else {
+ retour += "\t" + ( *vecIter );
}
- string vectorToString ( vector<char> vec )
- {
- stringstream retour;
- retour.str("");
- for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << "\t" << ( *vecIter );
- }
- }
- return retour.str();
+ }
+ return retour;
+}
+string vectorToString ( vector<char> vec )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << "\t" << ( *vecIter );
}
- string vectorToString ( vector<int> vec )
- {
- stringstream retour;
- retour.str("");
- for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << "\t" << ( *vecIter );
- }
- }
- return retour.str();
+ }
+ return retour.str();
+}
+string vectorToString ( vector<int> vec )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << "\t" << ( *vecIter );
}
+ }
+ return retour.str();
+}
- string vectorToString ( vector< string > vec, string s )
- {
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour += ( *vecIter );
- }
- else
- {
- retour += s + ( *vecIter );
- }
- }
- return retour;
-
+string vectorToString ( vector< string > vec, string s )
+{
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour += ( *vecIter );
+ } else {
+ retour += s + ( *vecIter );
}
+ }
+ return retour;
- string vectorToString ( vector< char > vec, string s )
- {
- stringstream retour;
- retour.str("");
- for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << s << ( *vecIter );
- }
- }
- return retour.str();
+}
+string vectorToString ( vector< char > vec, string s )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << s << ( *vecIter );
}
+ }
+ return retour.str();
- string vectorToString ( vector< int > vec, string s )
- {
- stringstream retour;
- retour.str("");
- for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << s << ( *vecIter );
- }
- }
- return retour.str();
+}
+string vectorToString ( vector< int > vec, string s )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << s << ( *vecIter );
}
+ }
+ return retour.str();
- string vectorToString ( vector< bool > vec, string s )
- {
- stringstream retour;
- retour.str("");
- for ( vector<bool>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << s << ( *vecIter );
- }
- }
- return retour.str();
+}
+string vectorToString ( vector< bool > vec, string s )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<bool>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << s << ( *vecIter );
}
- string vectorToString ( char* vec, string s , int taille)
- {
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++)
- {
- if ( l_i == 0 )
- {
- retour << vec[l_i];
- }
- else
- {
- retour << s << vec[l_i];
- }
- }
- return retour.str();
+ }
+ return retour.str();
+}
+string vectorToString ( char* vec, string s , int taille)
+{
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++) {
+ if ( l_i == 0 ) {
+ retour << vec[l_i];
+ } else {
+ retour << s << vec[l_i];
}
+ }
+ return retour.str();
- string vectorToString ( int* vec, string s , int taille)
- {
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++)
- {
- if ( l_i == 0 )
- {
- retour << vec[l_i];
- }
- else
- {
- retour << s << vec[l_i];
- }
- }
- return retour.str();
+}
+string vectorToString ( int* vec, string s , int taille)
+{
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++) {
+ if ( l_i == 0 ) {
+ retour << vec[l_i];
+ } else {
+ retour << s << vec[l_i];
}
+ }
+ return retour.str();
- string vectorToString ( bool* vec, string s , int taille)
- {
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++)
- {
- if ( l_i == 0 )
- {
- retour << vec[l_i];
- }
- else
- {
- retour << s << vec[l_i];
- }
- }
- return retour.str();
+}
+string vectorToString ( bool* vec, string s , int taille)
+{
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++) {
+ if ( l_i == 0 ) {
+ retour << vec[l_i];
+ } else {
+ retour << s << vec[l_i];
}
-
- vector<string> subVector ( vector<string> vec, int start, int end )
- {
- vector<string> retour;
- if ( start > end )
- {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
- }
-
- vector<int> subVector ( vector<int> vec, int start, int end )
- {
- vector<int> retour;
- if ( start > end )
- {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
- }
-
- vector<float> subVector ( vector<float> vec, int start, int end )
- {
- vector<float> retour;
- if ( start > end )
- {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
- }
-
- vector<string> copyVector ( vector<string> vec )
- {
- vector<string> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
+ }
+ return retour.str();
+
+}
+
+vector<string> subVector ( vector<string> vec, int start, int end )
+{
+ vector<string> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<int> subVector ( vector<int> vec, int start, int end )
+{
+ vector<int> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<float> subVector ( vector<float> vec, int start, int end )
+{
+ vector<float> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<string> copyVector ( vector<string> vec )
+{
+ vector<string> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<int> copyVector ( vector<int> vec )
+{
+ vector<int> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<float> copyVector ( vector<float> vec )
+{
+ vector<float> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<string> stringToVector ( string s, string tok )
+{
+ vector<string> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ to_return.push_back ( to_push );
+ to_push = "";
+ pushed = true;
+ }
}
- vector<int> copyVector ( vector<int> vec )
- {
- vector<int> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
}
- vector<float> copyVector ( vector<float> vec )
- {
- vector<float> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ )
- {
- retour.push_back ( vec.at ( i ) );
+ }
+ to_return.push_back ( to_push );
+ return to_return;
+}
+vector<int> stringToVectorInt ( string s, string tok )
+{
+ vector<int> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
}
- return retour;
+ to_push = "";
+ pushed = true;
+ }
}
- vector<string> stringToVector ( string s, string tok )
- {
- vector<string> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ )
- {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
- {
- if ( ( *sIt ) == ( *sTok ) )
- {
- to_return.push_back ( to_push );
- to_push = "";
- pushed = true;
- }
- }
- if ( !pushed )
- {
- to_push.push_back ( ( *sIt ) );
- }
- }
- to_return.push_back ( to_push );
- return to_return;
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
}
- vector<int> stringToVectorInt ( string s, string tok )
- {
- vector<int> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ )
- {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
- {
- if ( ( *sIt ) == ( *sTok ) )
- {
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- to_push = "";
- pushed = true;
- }
- }
- if ( !pushed )
- {
- to_push.push_back ( ( *sIt ) );
- }
- }
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+}
+vector<float> stringToVectorFloat ( string s, string tok )
+{
+ vector<float> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atof ( to_push.c_str() ) );
}
- return to_return;
+ to_push = "";
+ pushed = true;
+ }
}
- vector<float> stringToVectorFloat ( string s, string tok )
- {
- vector<float> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ )
- {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
- {
- if ( ( *sIt ) == ( *sTok ) )
- {
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atof ( to_push.c_str() ) );
- }
- to_push = "";
- pushed = true;
- }
- }
- if ( !pushed )
- {
- to_push.push_back ( ( *sIt ) );
- }
- }
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- return to_return;
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
}
+ }
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+}
- string lowerCase ( string str )
- {
- for ( int i = 0;i < ( int ) str.size();i++ )
- {
- if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) )
- {
- str[i] = str[i] + 0x20;
- }
- }
- return str;
+string lowerCase ( string str )
+{
+ for ( int i = 0; i < ( int ) str.size(); i++ ) {
+ if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) {
+ str[i] = str[i] + 0x20;
}
- string removePunctTercom ( string str )
- {
- string str_mod = str;
- sregex rex;
- string replace;
+ }
+ return str;
+}
+string removePunctTercom ( string str )
+{
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
- string removePunct ( string str )
- {
- string str_mod = str;
- sregex rex;
- string replace;
+ return str_mod;
+}
+string removePunct ( string str )
+{
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
- string tokenizePunct ( string str )
- {
- string str_mod = str;
- sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
- string replace ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ return str_mod;
+}
+string tokenizePunct ( string str )
+{
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
+ string replace ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
- replace = ( "$2.$4. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
+ replace = ( "$2.$4. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " ? " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " ? " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " ; " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " ; " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " ! " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " ! " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " ( " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " ( " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " ) " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " ) " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " \" " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " \" " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
- replace = ( "num_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
+ replace = ( "num_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
- replace = ( "ordinal_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
+ replace = ( "ordinal_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Dd]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Dd]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Dd]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Dd]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]rs) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]rs) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]rs) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]rs) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Nn]o) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Nn]o) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Nn]o) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Nn]o) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
// rex = sregex::compile ( "(^(([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "(^(([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
+ return str_mod;
+}
- string normalizeStd ( string str )
- {
- string str_mod = str;
- sregex rex = sregex::compile ( "(<skipped>)" );
- string replace ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+string normalizeStd ( string str )
+{
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(<skipped>)" );
+ string replace ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "-\n" );
- replace = ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "-\n" );
+ replace = ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "\n" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "\n" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&quot;" );
- replace = ( "\"" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&quot;" );
+ replace = ( "\"" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&amp;" );
- replace = ( "& " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&amp;" );
+ replace = ( "& " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&lt;" );
- replace = ( "<" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&lt;" );
+ replace = ( "<" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&gt;" );
- replace = ( ">" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&gt;" );
+ replace = ( ">" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
+ return str_mod;
+}
- param copyParam ( param p )
- {
- param to_return;
- to_return.caseOn = p.caseOn;
- to_return.noPunct = p.noPunct;
- to_return.debugMode = p.debugMode;
- to_return.debugLevel = p.debugLevel;
- to_return.hypothesisFile = p.hypothesisFile;
- to_return.referenceFile = p.referenceFile;
- to_return.normalize = p.normalize;
- to_return.noTxtIds = p.noTxtIds;
- to_return.outputFileExtension = p.outputFileExtension;
- to_return.outputFileName = p.outputFileName;
- to_return.sgmlInputs = p.sgmlInputs;
- to_return.tercomLike = p.tercomLike;
- to_return.printAlignments = p.printAlignments;
- to_return.WER=p.WER;
- return to_return;
- }
- string printParams ( param p )
- {
- stringstream s;
- s << "caseOn = " << p.caseOn << endl;
- s << "noPunct = " << p.noPunct << endl;
- s << "debugMode = " << p.debugMode << endl;
- s << "debugLevel = " << p.debugLevel << endl;
- s << "hypothesisFile = " << p.hypothesisFile << endl;
- s << "referenceFile = " << p.referenceFile << endl;
- s << "normalize = " << p.normalize << endl;
- s << "noTxtIds = " << p.noTxtIds << endl;
- s << "outputFileExtension = " << p.outputFileExtension << endl;
- s << "outputFileName = " << p.outputFileName << endl;
- s << "sgmlInputs = " << p.sgmlInputs << endl;
- s << "tercomLike = " << p.tercomLike << endl;
- return s.str();
+param copyParam ( param p )
+{
+ param to_return;
+ to_return.caseOn = p.caseOn;
+ to_return.noPunct = p.noPunct;
+ to_return.debugMode = p.debugMode;
+ to_return.debugLevel = p.debugLevel;
+ to_return.hypothesisFile = p.hypothesisFile;
+ to_return.referenceFile = p.referenceFile;
+ to_return.normalize = p.normalize;
+ to_return.noTxtIds = p.noTxtIds;
+ to_return.outputFileExtension = p.outputFileExtension;
+ to_return.outputFileName = p.outputFileName;
+ to_return.sgmlInputs = p.sgmlInputs;
+ to_return.tercomLike = p.tercomLike;
+ to_return.printAlignments = p.printAlignments;
+ to_return.WER=p.WER;
+ return to_return;
+}
+string printParams ( param p )
+{
+ stringstream s;
+ s << "caseOn = " << p.caseOn << endl;
+ s << "noPunct = " << p.noPunct << endl;
+ s << "debugMode = " << p.debugMode << endl;
+ s << "debugLevel = " << p.debugLevel << endl;
+ s << "hypothesisFile = " << p.hypothesisFile << endl;
+ s << "referenceFile = " << p.referenceFile << endl;
+ s << "normalize = " << p.normalize << endl;
+ s << "noTxtIds = " << p.noTxtIds << endl;
+ s << "outputFileExtension = " << p.outputFileExtension << endl;
+ s << "outputFileName = " << p.outputFileName << endl;
+ s << "sgmlInputs = " << p.sgmlInputs << endl;
+ s << "tercomLike = " << p.tercomLike << endl;
+ return s.str();
- }
- string join ( string delim, vector<string> arr )
- {
- if ( ( int ) arr.size() == 0 ) return "";
+}
+string join ( string delim, vector<string> arr )
+{
+ if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
- stringstream s;
- s.str ( "" );
- for ( int i = 0; i < ( int ) arr.size(); i++ )
- {
- if ( i == 0 )
- {
- s << arr.at ( i );
- }
- else
- {
- s << delim << arr.at ( i );
- }
- }
- return s.str();
-// return "";
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ ) {
+ if ( i == 0 ) {
+ s << arr.at ( i );
+ } else {
+ s << delim << arr.at ( i );
}
+ }
+ return s.str();
+// return "";
+}
}
diff --git a/mert/TER/tools.h b/mert/TER/tools.h
index 0a85e7b4b..157b739a5 100644
--- a/mert/TER/tools.h
+++ b/mert/TER/tools.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -35,32 +35,31 @@ using namespace std;
namespace Tools
{
- typedef vector<double> vecDouble;
- typedef vector<char> vecChar;
- typedef vector<int> vecInt;
- typedef vector<float> vecFloat;
- typedef vector<size_t> vecSize_t;
- typedef vector<string> vecString;
- typedef vector<string> alignmentElement;
- typedef vector<alignmentElement> WERalignment;
+typedef vector<double> vecDouble;
+typedef vector<char> vecChar;
+typedef vector<int> vecInt;
+typedef vector<float> vecFloat;
+typedef vector<size_t> vecSize_t;
+typedef vector<string> vecString;
+typedef vector<string> alignmentElement;
+typedef vector<alignmentElement> WERalignment;
-struct param
-{
- bool debugMode;
- string referenceFile; // path to the resources
- string hypothesisFile; // path to the configuration files
- string outputFileExtension;
- string outputFileName;
- bool noPunct;
- bool caseOn;
- bool normalize;
- bool tercomLike;
- bool sgmlInputs;
- bool noTxtIds;
- bool printAlignments;
- bool WER;
- int debugLevel;
+struct param {
+ bool debugMode;
+ string referenceFile; // path to the resources
+ string hypothesisFile; // path to the configuration files
+ string outputFileExtension;
+ string outputFileName;
+ bool noPunct;
+ bool caseOn;
+ bool normalize;
+ bool tercomLike;
+ bool sgmlInputs;
+ bool noTxtIds;
+ bool printAlignments;
+ bool WER;
+ int debugLevel;
};
// param = { false, "","","","" };
@@ -68,35 +67,35 @@ struct param
// private:
// public:
- string vectorToString ( vector<string> vec );
- string vectorToString ( vector<char> vec );
- string vectorToString ( vector<int> vec );
- string vectorToString ( vector<string> vec, string s );
- string vectorToString ( vector<char> vec, string s );
- string vectorToString ( vector<int> vec, string s );
- string vectorToString ( vector<bool> vec, string s );
- string vectorToString ( char* vec, string s, int taille );
- string vectorToString ( int* vec, string s , int taille );
- string vectorToString ( bool* vec, string s , int taille );
- vector<string> subVector ( vector<string> vec, int start, int end );
- vector<int> subVector ( vector<int> vec, int start, int end );
- vector<float> subVector ( vector<float> vec, int start, int end );
- vector<string> copyVector ( vector<string> vec );
- vector<int> copyVector ( vector<int> vec );
- vector<float> copyVector ( vector<float> vec );
- vector<string> stringToVector ( string s, string tok );
- vector<string> stringToVector ( char s, string tok );
- vector<string> stringToVector ( int s, string tok );
- vector<int> stringToVectorInt ( string s, string tok );
- vector<float> stringToVectorFloat ( string s, string tok );
- string lowerCase(string str);
- string removePunct(string str);
- string tokenizePunct(string str);
- string removePunctTercom(string str);
- string normalizeStd(string str);
- string printParams(param p);
- string join ( string delim, vector<string> arr );
+string vectorToString ( vector<string> vec );
+string vectorToString ( vector<char> vec );
+string vectorToString ( vector<int> vec );
+string vectorToString ( vector<string> vec, string s );
+string vectorToString ( vector<char> vec, string s );
+string vectorToString ( vector<int> vec, string s );
+string vectorToString ( vector<bool> vec, string s );
+string vectorToString ( char* vec, string s, int taille );
+string vectorToString ( int* vec, string s , int taille );
+string vectorToString ( bool* vec, string s , int taille );
+vector<string> subVector ( vector<string> vec, int start, int end );
+vector<int> subVector ( vector<int> vec, int start, int end );
+vector<float> subVector ( vector<float> vec, int start, int end );
+vector<string> copyVector ( vector<string> vec );
+vector<int> copyVector ( vector<int> vec );
+vector<float> copyVector ( vector<float> vec );
+vector<string> stringToVector ( string s, string tok );
+vector<string> stringToVector ( char s, string tok );
+vector<string> stringToVector ( int s, string tok );
+vector<int> stringToVectorInt ( string s, string tok );
+vector<float> stringToVectorFloat ( string s, string tok );
+string lowerCase(string str);
+string removePunct(string str);
+string tokenizePunct(string str);
+string removePunctTercom(string str);
+string normalizeStd(string str);
+string printParams(param p);
+string join ( string delim, vector<string> arr );
// };
- param copyParam(param p);
+param copyParam(param p);
}
#endif