Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert/TER
diff options
context:
space:
mode:
authorTetsuo Kiso <tetsuo-s@is.naist.jp>2012-02-20 04:46:08 +0400
committerTetsuo Kiso <tetsuo-s@is.naist.jp>2012-02-20 04:46:08 +0400
commit8c3b82e59636d54936563f1e9eae767fe2247242 (patch)
tree785d87fe7782013bc6ced3e08a0a61ce6247338d /mert/TER
parent94888b258dcfd4ca68f4fc5ecad8b0410829e63b (diff)
Change the naming conventions for the guard macros; Rename TER directory.
This change might be useful to avoid duplicating the names. The reason is that although MERT programs are standalone applications, some header files such as data.h and point.h have common guard macro names like "DATA_H" and "POINT_H", and this is not good naming conventions when you want to include external headers. Some files actually include headers in Moses and KenLM's util.
Diffstat (limited to 'mert/TER')
-rw-r--r--mert/TER/alignmentStruct.cpp115
-rw-r--r--mert/TER/alignmentStruct.h45
-rw-r--r--mert/TER/bestShiftStruct.h50
-rw-r--r--mert/TER/hashMap.cpp153
-rw-r--r--mert/TER/hashMap.h43
-rw-r--r--mert/TER/hashMapInfos.cpp162
-rw-r--r--mert/TER/hashMapInfos.h42
-rw-r--r--mert/TER/hashMapStringInfos.cpp167
-rw-r--r--mert/TER/hashMapStringInfos.h42
-rw-r--r--mert/TER/infosHasher.cpp35
-rw-r--r--mert/TER/infosHasher.h31
-rw-r--r--mert/TER/stringHasher.cpp35
-rw-r--r--mert/TER/stringHasher.h28
-rw-r--r--mert/TER/stringInfosHasher.cpp35
-rw-r--r--mert/TER/stringInfosHasher.h28
-rw-r--r--mert/TER/terAlignment.cpp131
-rw-r--r--mert/TER/terAlignment.h51
-rw-r--r--mert/TER/terShift.cpp100
-rw-r--r--mert/TER/terShift.h44
-rw-r--r--mert/TER/tercalc.cpp1035
-rw-r--r--mert/TER/tercalc.h82
-rw-r--r--mert/TER/tools.cpp545
-rw-r--r--mert/TER/tools.h65
23 files changed, 3064 insertions, 0 deletions
diff --git a/mert/TER/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp
new file mode 100644
index 000000000..15b4a8032
--- /dev/null
+++ b/mert/TER/alignmentStruct.cpp
@@ -0,0 +1,115 @@
+#include "alignmentStruct.h"
+
+using namespace std;
+namespace TERCpp
+{
+string alignmentStruct::toString()
+{
+ stringstream s;
+// s << "nword : " << vectorToString(nwords)<<endl;
+// s << "alignment" << vectorToString(alignment)<<endl;
+// s << "afterShift" << vectorToString(alignment)<<endl;
+ s << "Nothing to be printed" <<endl;
+ return s.str();
+}
+
+// alignmentStruct::alignmentStruct()
+// {
+// // vector<string> ref;
+// // vector<string> hyp;
+// // vector<string> aftershift;
+//
+// // alignmentStruct[] allshifts = null;
+//
+// numEdits=0;
+// numWords=0;
+// bestRef="";
+//
+// numIns=0;
+// numDel=0;
+// numSub=0;
+// numSft=0;
+// numWsf=0;
+// }
+// alignmentStruct::alignmentStruct ()
+// {
+// start = 0;
+// end = 0;
+// moveto = 0;
+// newloc = 0;
+// cost=1.0;
+// }
+// alignmentStruct::alignmentStruct (int _start, int _end, int _moveto, int _newloc)
+// {
+// start = _start;
+// end = _end;
+// moveto = _moveto;
+// newloc = _newloc;
+// cost=1.0;
+// }
+
+// alignmentStruct::alignmentStruct (int _start, int _end, int _moveto, int _newloc, vector<string> _shifted)
+// {
+// start = _start;
+// end = _end;
+// moveto = _moveto;
+// newloc = _newloc;
+// shifted = _shifted;
+// cost=1.0;
+// }
+// string alignmentStruct::vectorToString(vector<string> vec)
+// {
+// string retour("");
+// for (vector<string>::iterator vecIter=vec.begin();vecIter!=vec.end(); vecIter++)
+// {
+// retour+=(*vecIter)+"\t";
+// }
+// return retour;
+// }
+
+// string alignmentStruct::toString()
+// {
+// stringstream s;
+// s.str("");
+// s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
+// if ((int)shifted.size() > 0)
+// {
+// s << " (" << vectorToString(shifted) << ")";
+// }
+// return s.str();
+// }
+
+/* The distance of the shift. */
+// int alignmentStruct::distance()
+// {
+// if (moveto < start)
+// {
+// return start - moveto;
+// }
+// else if (moveto > end)
+// {
+// return moveto - end;
+// }
+// else
+// {
+// return moveto - start;
+// }
+// }
+//
+// bool alignmentStruct::leftShift()
+// {
+// return (moveto < start);
+// }
+//
+// int alignmentStruct::size()
+// {
+// return (end - start) + 1;
+// }
+// alignmentStruct alignmentStruct::operator=(alignmentStruct t)
+// {
+//
+// return t;
+// }
+
+
+} \ No newline at end of file
diff --git a/mert/TER/alignmentStruct.h b/mert/TER/alignmentStruct.h
new file mode 100644
index 000000000..9e9a75468
--- /dev/null
+++ b/mert/TER/alignmentStruct.h
@@ -0,0 +1,45 @@
+#ifndef MERT_TER_ALIGNMENT_STRUCT_H_
+#define MERT_TER_ALIGNMENT_STRUCT_H_
+
+#include <vector>
+#include <stdio.h>
+#include <string>
+#include <sstream>
+#include "tools.h"
+
+using namespace std;
+using namespace Tools;
+
+namespace TERCpp
+{
+class alignmentStruct
+{
+private:
+public:
+
+// alignmentStruct();
+// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
+// alignmentStruct (int _start, int _end, int _moveto, int _newloc, vector<string> _shifted);
+// string toString();
+// int distance() ;
+// bool leftShift();
+// int size();
+// alignmentStruct operator=(alignmentStruct t);
+// string vectorToString(vector<string> vec);
+
+// int start;
+// int end;
+// int moveto;
+// int newloc;
+ vector<string> nwords; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<vecInt> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+ string toString();
+};
+
+}
+
+#endif // MERT_TER_ALIGNMENT_STRUCT_H_
diff --git a/mert/TER/bestShiftStruct.h b/mert/TER/bestShiftStruct.h
new file mode 100644
index 000000000..bfebe3b1e
--- /dev/null
+++ b/mert/TER/bestShiftStruct.h
@@ -0,0 +1,50 @@
+#ifndef MERT_TER_BEST_SHIFT_STRUCT_H_
+#define MERT_TER_BEST_SHIFT_STRUCT_H_
+
+#include <vector>
+#include <stdio.h>
+#include <string>
+#include <sstream>
+#include "tools.h"
+#include "terShift.h"
+#include "terAlignment.h"
+
+
+using namespace std;
+using namespace Tools;
+
+namespace TERCpp
+{
+class bestShiftStruct
+{
+private:
+public:
+
+// alignmentStruct();
+// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
+// alignmentStruct (int _start, int _end, int _moveto, int _newloc, vector<string> _shifted);
+// string toString();
+// int distance() ;
+// bool leftShift();
+// int size();
+// alignmentStruct operator=(alignmentStruct t);
+// string vectorToString(vector<string> vec);
+
+// int start;
+// int end;
+// int moveto;
+// int newloc;
+ terShift m_best_shift;
+ terAlignment m_best_align;
+ bool m_empty;
+// vector<string> nwords; // The words we shifted
+// char* alignment ; // for pra_more output
+// vector<vecInt> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+// double cost;
+};
+
+}
+
+#endif // MERT_TER_BEST_SHIFT_STRUCT_H_
diff --git a/mert/TER/hashMap.cpp b/mert/TER/hashMap.cpp
new file mode 100644
index 000000000..469167aaa
--- /dev/null
+++ b/mert/TER/hashMap.cpp
@@ -0,0 +1,153 @@
+#include "hashMap.h"
+
+// The following class defines a hash function for strings
+
+
+using namespace std;
+
+namespace HashMapSpace
+{
+// hashMap::hashMap();
+/* hashMap::~hashMap()
+ {
+// vector<stringHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+/**
+ * int hashMap::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+int hashMap::trouve ( long searchKey )
+{
+ long foundKey;
+// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
+ }
+ }
+ return 0;
+}
+int hashMap::trouve ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;;
+// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
+ }
+ }
+ return 0;
+}
+/**
+ * long hashMap::hashValue ( string key )
+ * @param key
+ * @return
+ */
+long hashMap::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
+// boost::hash<string> hasher;
+// return hasher ( key );
+}
+/**
+ * void hashMap::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+void hashMap::addHasher ( string key, string value )
+{
+ if ( trouve ( hashValue ( key ) ) ==0 ) {
+// cerr << "ICI1" <<endl;
+ stringHasher H ( hashValue ( key ),key,value );
+// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
+// cerr << "ICI2" <<endl;
+
+ m_hasher.push_back ( H );
+ }
+}
+stringHasher hashMap::getHasher ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+ stringHasher defaut(0,"","");
+// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
+ }
+ }
+ return defaut;
+}
+string hashMap::getValue ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return "";
+}
+string hashMap::searchValue ( string value )
+{
+// long searchKey=hashValue ( key );
+// long foundKey;
+ string foundValue;
+
+// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundValue= ( *l_hasher ).getValue();
+ if ( foundValue.compare ( value ) == 0 ) {
+ return ( *l_hasher ).getKey();
+ }
+ }
+ return "";
+}
+
+
+void hashMap::setValue ( string key , string value )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
+// return ( *l_hasher ).getValue();
+ }
+ }
+}
+
+
+/**
+ *
+ */
+void hashMap::printHash()
+{
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+}
+
+
+
+// long hashValue(string key){}
+
+}
+
diff --git a/mert/TER/hashMap.h b/mert/TER/hashMap.h
new file mode 100644
index 000000000..85020d041
--- /dev/null
+++ b/mert/TER/hashMap.h
@@ -0,0 +1,43 @@
+/*
+ * Generic hashmap manipulation functions
+ */
+
+#ifndef MERT_TER_HASHMAP_H_
+#define MERT_TER_HASHMAP_H_
+
+#include "stringHasher.h"
+#include <vector>
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <locale>
+
+using namespace std;
+
+namespace HashMapSpace
+{
+class hashMap
+{
+private:
+ vector<stringHasher> m_hasher;
+
+public:
+// ~hashMap();
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, string value );
+ stringHasher getHasher ( string key );
+ string getValue ( string key );
+ string searchValue ( string key );
+ void setValue ( string key , string value );
+ void printHash();
+ vector<stringHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
+
+}
+
+#endif // MERT_TER_HASHMAP_H_
diff --git a/mert/TER/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp
new file mode 100644
index 000000000..9cd431196
--- /dev/null
+++ b/mert/TER/hashMapInfos.cpp
@@ -0,0 +1,162 @@
+#include "hashMapInfos.h"
+
+// The following class defines a hash function for strings
+
+
+using namespace std;
+
+namespace HashMapSpace
+{
+// hashMapInfos::hashMap();
+/* hashMapInfos::~hashMap()
+ {
+// vector<infosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+/**
+ * int hashMapInfos::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+int hashMapInfos::trouve ( long searchKey )
+{
+ long foundKey;
+// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
+ }
+ }
+ return 0;
+}
+int hashMapInfos::trouve ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;;
+// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/**
+ * long hashMapInfos::hashValue ( string key )
+ * @param key
+ * @return
+ */
+long hashMapInfos::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
+// boost::hash<string> hasher;
+// return hasher ( key );
+}
+/**
+ * void hashMapInfos::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+void hashMapInfos::addHasher ( string key, vector<int> value )
+{
+ if ( trouve ( hashValue ( key ) ) ==0 ) {
+// cerr << "ICI1" <<endl;
+ infosHasher H ( hashValue ( key ),key,value );
+// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
+// cerr << "ICI2" <<endl;
+
+ m_hasher.push_back ( H );
+ }
+}
+void hashMapInfos::addValue ( string key, vector<int> value )
+{
+ addHasher ( key, value );
+}
+infosHasher hashMapInfos::getHasher ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
+ }
+ }
+ vector<int> temp;
+ infosHasher defaut(0,"",temp);
+ return defaut;
+}
+vector<int> hashMapInfos::getValue ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+ vector<int> retour;
+// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return retour;
+}
+// string hashMapInfos::searchValue ( string value )
+// {
+// // long searchKey=hashValue ( key );
+// // long foundKey;
+// vector<int> foundValue;
+//
+// // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
+// for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+// {
+// foundValue= ( *l_hasher ).getValue();
+// /* if ( foundValue.compare ( value ) == 0 )
+// {
+// return ( *l_hasher ).getKey();
+// }*/
+// }
+// return "";
+// }
+//
+
+void hashMapInfos::setValue ( string key , vector<int> value )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
+// return ( *l_hasher ).getValue();
+ }
+ }
+}
+
+
+/**
+ *
+ */
+void hashMapInfos::printHash()
+{
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+}
+
+
+
+// long hashValue(string key){}
+
+}
+
diff --git a/mert/TER/hashMapInfos.h b/mert/TER/hashMapInfos.h
new file mode 100644
index 000000000..8b56e9d02
--- /dev/null
+++ b/mert/TER/hashMapInfos.h
@@ -0,0 +1,42 @@
+/*
+ * Generic hashmap manipulation functions
+ */
+#ifndef MERT_TER_HASHMAP_INFOS_H_
+#define MERT_TER_HASHMAP_INFOS_H_
+
+#include "infosHasher.h"
+#include <vector>
+#include <string>
+#include <sstream>
+#include <fstream>
+
+using namespace std;
+
+namespace HashMapSpace
+{
+class hashMapInfos
+{
+private:
+ vector<infosHasher> m_hasher;
+
+public:
+// ~hashMap();
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<int> value );
+ void addValue ( string key, vector<int> value );
+ infosHasher getHasher ( string key );
+ vector<int> getValue ( string key );
+// string searchValue ( string key );
+ void setValue ( string key , vector<int> value );
+ void printHash();
+ vector<infosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
+
+}
+
+#endif // MERT_TER_HASHMAP_INFOS_H_
diff --git a/mert/TER/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp
new file mode 100644
index 000000000..0fbb0a98a
--- /dev/null
+++ b/mert/TER/hashMapStringInfos.cpp
@@ -0,0 +1,167 @@
+#include "hashMapStringInfos.h"
+
+// The following class defines a hash function for strings
+
+
+using namespace std;
+
+namespace HashMapSpace
+{
+// hashMapStringInfos::hashMap();
+/* hashMapStringInfos::~hashMap()
+{
+// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+}*/
+/**
+* int hashMapStringInfos::trouve ( long searchKey )
+* @param searchKey
+* @return
+*/
+int hashMapStringInfos::trouve ( long searchKey )
+{
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int hashMapStringInfos::trouve ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/**
+* long hashMapStringInfos::hashValue ( string key )
+* @param key
+* @return
+*/
+long hashMapStringInfos::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> > ( loc );
+ return coll.hash ( key.data(), key.data() + key.length() );
+// boost::hash<string> hasher;
+// return hasher ( key );
+}
+/**
+* void hashMapStringInfos::addHasher ( string key, string value )
+* @param key
+* @param value
+*/
+void hashMapStringInfos::addHasher ( string key, vector<string> value )
+{
+ if ( trouve ( hashValue ( key ) ) == 0 ) {
+ // cerr << "ICI1" <<endl;
+ stringInfosHasher H ( hashValue ( key ), key, value );
+ // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
+ // cerr << "ICI2" <<endl;
+
+ m_hasher.push_back ( H );
+ }
+}
+void hashMapStringInfos::addValue ( string key, vector<string> value )
+{
+ addHasher ( key, value );
+}
+stringInfosHasher hashMapStringInfos::getHasher ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
+ }
+ }
+ vector<string> tmp;
+ stringInfosHasher defaut ( 0, "", tmp );
+ return defaut;
+}
+vector<string> hashMapStringInfos::getValue ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ vector<string> retour;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return retour;
+}
+// string hashMapStringInfos::searchValue ( string value )
+// {
+// // long searchKey=hashValue ( key );
+// // long foundKey;
+// vector<int> foundValue;
+//
+// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+// {
+// foundValue= ( *l_hasher ).getValue();
+// /* if ( foundValue.compare ( value ) == 0 )
+// {
+// return ( *l_hasher ).getKey();
+// }*/
+// }
+// return "";
+// }
+//
+
+void hashMapStringInfos::setValue ( string key , vector<string> value )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
+ // return ( *l_hasher ).getValue();
+ }
+ }
+}
+
+
+/**
+*
+*/
+void hashMapStringInfos::printHash()
+{
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+}
+vector< stringInfosHasher > hashMapStringInfos::getHashMap()
+{
+ return m_hasher;
+}
+
+
+
+// long hashValue(string key){}
+
+}
+
diff --git a/mert/TER/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h
new file mode 100644
index 000000000..870274f3d
--- /dev/null
+++ b/mert/TER/hashMapStringInfos.h
@@ -0,0 +1,42 @@
+/*
+ * Generic hashmap manipulation functions
+ */
+#ifndef MERT_TER_HASHMAP_STRING_INFOS_H_
+#define MERT_TER_HASHMAP_STRING_INFOS_H_
+
+#include "stringInfosHasher.h"
+#include <vector>
+#include <string>
+#include <sstream>
+#include <fstream>
+
+using namespace std;
+
+namespace HashMapSpace
+{
+class hashMapStringInfos
+{
+private:
+ vector<stringInfosHasher> m_hasher;
+
+public:
+// ~hashMap();
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<string> value );
+ void addValue ( string key, vector<string> value );
+ stringInfosHasher getHasher ( string key );
+ vector<string> getValue ( string key );
+// string searchValue ( string key );
+ void setValue ( string key , vector<string> value );
+ void printHash();
+ vector<stringInfosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
+
+}
+
+#endif // MERT_TER_HASHMAP_STRING_INFOS_H_
diff --git a/mert/TER/infosHasher.cpp b/mert/TER/infosHasher.cpp
new file mode 100644
index 000000000..654b0b26f
--- /dev/null
+++ b/mert/TER/infosHasher.cpp
@@ -0,0 +1,35 @@
+#include "infosHasher.h"
+// The following class defines a hash function for strings
+
+
+using namespace std;
+
+namespace HashMapSpace
+{
+infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+}
+// infosHasher::~infosHasher(){};*/
+long infosHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string infosHasher::getKey()
+{
+ return m_key;
+}
+vector<int> infosHasher::getValue()
+{
+ return m_value;
+}
+void infosHasher::setValue ( vector<int> value )
+{
+ m_value=value;
+}
+
+
+// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;
+}
diff --git a/mert/TER/infosHasher.h b/mert/TER/infosHasher.h
new file mode 100644
index 000000000..02a32280b
--- /dev/null
+++ b/mert/TER/infosHasher.h
@@ -0,0 +1,31 @@
+#ifndef MERT_TER_INFO_SHASHER_H_
+#define MERT_TER_INFO_SHASHER_H_
+
+#include <string>
+#include <stdio.h>
+#include <iostream>
+#include <vector>
+
+using namespace std;
+namespace HashMapSpace
+{
+class infosHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ vector<int> m_value;
+
+public:
+ infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<int> getValue();
+ void setValue ( vector<int> value );
+
+
+};
+
+}
+
+#endif // MERT_TER_INFO_SHASHER_H_
diff --git a/mert/TER/stringHasher.cpp b/mert/TER/stringHasher.cpp
new file mode 100644
index 000000000..24fde0e32
--- /dev/null
+++ b/mert/TER/stringHasher.cpp
@@ -0,0 +1,35 @@
+#include "stringHasher.h"
+// The following class defines a hash function for strings
+
+
+using namespace std;
+
+namespace HashMapSpace
+{
+stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueTxt;
+}
+// stringHasher::~stringHasher(){};*/
+long stringHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string stringHasher::getKey()
+{
+ return m_key;
+}
+string stringHasher::getValue()
+{
+ return m_value;
+}
+void stringHasher::setValue ( string value )
+{
+ m_value=value;
+}
+
+
+// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
+}
diff --git a/mert/TER/stringHasher.h b/mert/TER/stringHasher.h
new file mode 100644
index 000000000..897bd9ff5
--- /dev/null
+++ b/mert/TER/stringHasher.h
@@ -0,0 +1,28 @@
+#ifndef MERT_TER_STRING_HASHER_H_
+#define MERT_TER_STRING_HASHER_H_
+
+#include <string>
+#include <iostream>
+
+using namespace std;
+namespace HashMapSpace
+{
+
+class stringHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ string m_value;
+
+public:
+ stringHasher ( long cle, string cleTxt, string valueTxt );
+ long getHashKey();
+ string getKey();
+ string getValue();
+ void setValue ( string value );
+};
+
+}
+
+#endif // MERT_TER_STRING_HASHER_H_
diff --git a/mert/TER/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp
new file mode 100644
index 000000000..3e02e7a20
--- /dev/null
+++ b/mert/TER/stringInfosHasher.cpp
@@ -0,0 +1,35 @@
+#include "stringInfosHasher.h"
+// The following class defines a hash function for strings
+
+
+using namespace std;
+
+namespace HashMapSpace
+{
+stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+}
+// stringInfosHasher::~stringInfosHasher(){};*/
+long stringInfosHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string stringInfosHasher::getKey()
+{
+ return m_key;
+}
+vector<string> stringInfosHasher::getValue()
+{
+ return m_value;
+}
+void stringInfosHasher::setValue ( vector<string> value )
+{
+ m_value=value;
+}
+
+
+// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
+}
diff --git a/mert/TER/stringInfosHasher.h b/mert/TER/stringInfosHasher.h
new file mode 100644
index 000000000..c1b891662
--- /dev/null
+++ b/mert/TER/stringInfosHasher.h
@@ -0,0 +1,28 @@
+#ifndef MERT_TER_STRING_INFOS_HASHER_H_
+#define MERT_TER_STRING_INFOS_HASHER_H_
+
+#include <string>
+#include <iostream>
+#include <vector>
+
+using namespace std;
+namespace HashMapSpace
+{
+class stringInfosHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ vector<string> m_value;
+
+public:
+ stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<string> getValue();
+ void setValue ( vector<string> value );
+};
+
+}
+
+#endif // MERT_TER_STRING_INFOS_HASHER_H_
diff --git a/mert/TER/terAlignment.cpp b/mert/TER/terAlignment.cpp
new file mode 100644
index 000000000..87be53b11
--- /dev/null
+++ b/mert/TER/terAlignment.cpp
@@ -0,0 +1,131 @@
+
+#include "terAlignment.h"
+using namespace std;
+namespace TERCpp
+{
+
+terAlignment::terAlignment()
+{
+// vector<string> ref;
+// vector<string> hyp;
+// vector<string> aftershift;
+
+ // TERshift[] allshifts = null;
+
+ numEdits=0;
+ numWords=0;
+ bestRef="";
+
+ numIns=0;
+ numDel=0;
+ numSub=0;
+ numSft=0;
+ numWsf=0;
+}
+string terAlignment::toString()
+{
+ stringstream s;
+ s.str ( "" );
+ s << "Original Ref: " << join ( " ", ref ) << endl;
+ s << "Original Hyp: " << join ( " ", hyp ) <<endl;
+ s << "Hyp After Shift: " << join ( " ", aftershift );
+ s << endl;
+// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
+ if ( ( int ) sizeof ( alignment ) >0 ) {
+ s << "Alignment: (";
+// s += "\nAlignment: (";
+ for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
+ s << alignment[i];
+// s+=alignment[i];
+ }
+// s += ")";
+ s << ")";
+ }
+ s << endl;
+ if ( ( int ) allshifts.size() == 0 ) {
+// s += "\nNumShifts: 0";
+ s << "NumShifts: 0";
+ } else {
+// s += "\nNumShifts: " + (int)allshifts.size();
+ s << "NumShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
+ s << endl << " " ;
+ s << ( ( terShift ) allshifts[i] ).toString();
+// s += "\n " + allshifts[i];
+ }
+ }
+ s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
+// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
+ return s.str();
+
+}
+string terAlignment::join ( string delim, vector<string> arr )
+{
+ if ( ( int ) arr.size() == 0 ) return "";
+// if ((int)delim.compare("") == 0) delim = new String("");
+// String s = new String("");
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ ) {
+ if ( i == 0 ) {
+ s << arr.at ( i );
+ } else {
+ s << delim << arr.at ( i );
+ }
+ }
+ return s.str();
+// return "";
+}
+double terAlignment::score()
+{
+ if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
+ return 1.0;
+ }
+ if ( numWords <= 0.0 ) {
+ return 0.0;
+ }
+ return ( double ) numEdits / numWords;
+}
+double terAlignment::scoreAv()
+{
+ if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
+ return 1.0;
+ }
+ if ( averageWords <= 0.0 ) {
+ return 0.0;
+ }
+ return ( double ) numEdits / averageWords;
+}
+
+void terAlignment::scoreDetails()
+{
+ numIns = numDel = numSub = numWsf = numSft = 0;
+ if((int)allshifts.size()>0) {
+ for(int i = 0; i < (int)allshifts.size(); ++i) {
+ numWsf += allshifts[i].size();
+ }
+ numSft = allshifts.size();
+ }
+
+ if((int)alignment.size()>0 ) {
+ for(int i = 0; i < (int)alignment.size(); ++i) {
+ switch (alignment[i]) {
+ case 'S':
+ case 'T':
+ numSub++;
+ break;
+ case 'D':
+ numDel++;
+ break;
+ case 'I':
+ numIns++;
+ break;
+ }
+ }
+ }
+ // if(numEdits != numSft + numDel + numIns + numSub)
+ // System.out.println("** Error, unmatch edit erros " + numEdits +
+ // " vs " + (numSft + numDel + numIns + numSub));
+}
+
+} \ No newline at end of file
diff --git a/mert/TER/terAlignment.h b/mert/TER/terAlignment.h
new file mode 100644
index 000000000..c8c82eac8
--- /dev/null
+++ b/mert/TER/terAlignment.h
@@ -0,0 +1,51 @@
+#ifndef MERT_TER_TER_ALIGNMENT_H_
+#define MERT_TER_TER_ALIGNMENT_H_
+
+#include <vector>
+#include <stdio.h>
+#include <string.h>
+#include "tools.h"
+#include "terShift.h"
+
+
+using namespace std;
+// using namespace HashMapSpace;
+namespace TERCpp
+{
+
+class terAlignment
+{
+private:
+public:
+
+ terAlignment();
+ string toString();
+ void scoreDetails();
+
+ vector<string> ref;
+ vector<string> hyp;
+ vector<string> aftershift;
+
+ vector<terShift> allshifts;
+
+ double numEdits;
+ double numWords;
+ double averageWords;
+ vector<char> alignment;
+ string bestRef;
+
+ int numIns;
+ int numDel;
+ int numSub;
+ int numSft;
+ int numWsf;
+
+
+ string join ( string delim, vector<string> arr );
+ double score();
+ double scoreAv();
+};
+
+}
+
+#endif // MERT_TER_TER_ALIGNMENT_H__
diff --git a/mert/TER/terShift.cpp b/mert/TER/terShift.cpp
new file mode 100644
index 000000000..428803849
--- /dev/null
+++ b/mert/TER/terShift.cpp
@@ -0,0 +1,100 @@
+#include "terShift.h"
+
+using namespace std;
+namespace TERCpp
+{
+
+// terShift::terShift()
+// {
+// // vector<string> ref;
+// // vector<string> hyp;
+// // vector<string> aftershift;
+//
+// // terShift[] allshifts = null;
+//
+// numEdits=0;
+// numWords=0;
+// bestRef="";
+//
+// numIns=0;
+// numDel=0;
+// numSub=0;
+// numSft=0;
+// numWsf=0;
+// }
+terShift::terShift ()
+{
+ start = 0;
+ end = 0;
+ moveto = 0;
+ newloc = 0;
+ cost=1.0;
+}
+terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
+{
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ cost=1.0;
+}
+
+terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
+{
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ shifted = _shifted;
+ cost=1.0;
+}
+// string terShift::vectorToString(vector<string> vec)
+// {
+// string retour("");
+// for (vector<string>::iterator vecIter=vec.begin();vecIter!=vec.end(); vecIter++)
+// {
+// retour+=(*vecIter)+"\t";
+// }
+// return retour;
+// }
+
+string terShift::toString()
+{
+ stringstream s;
+ s.str ( "" );
+ s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
+ if ( ( int ) shifted.size() > 0 ) {
+ s << " (" << vectorToString ( shifted ) << ")";
+ }
+ return s.str();
+}
+
+/* The distance of the shift. */
+int terShift::distance()
+{
+ if ( moveto < start ) {
+ return start - moveto;
+ } else if ( moveto > end ) {
+ return moveto - end;
+ } else {
+ return moveto - start;
+ }
+}
+
+bool terShift::leftShift()
+{
+ return ( moveto < start );
+}
+
+int terShift::size()
+{
+ return ( end - start ) + 1;
+}
+// terShift terShift::operator=(terShift t)
+// {
+//
+// return t;
+// }
+
+
+} \ No newline at end of file
diff --git a/mert/TER/terShift.h b/mert/TER/terShift.h
new file mode 100644
index 000000000..679a7c8bb
--- /dev/null
+++ b/mert/TER/terShift.h
@@ -0,0 +1,44 @@
+#ifndef MERT_TER_TER_SHIFT_H_
+#define MERT_TER_TER_SHIFT_H_
+
+#include <vector>
+#include <stdio.h>
+#include <string>
+#include <sstream>
+#include "tools.h"
+
+using namespace std;
+using namespace Tools;
+
+namespace TERCpp
+{
+class terShift
+{
+private:
+public:
+
+ terShift();
+ terShift ( int _start, int _end, int _moveto, int _newloc );
+ terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
+ string toString();
+ int distance() ;
+ bool leftShift();
+ int size();
+// terShift operator=(terShift t);
+// string vectorToString(vector<string> vec);
+
+ int start;
+ int end;
+ int moveto;
+ int newloc;
+ vector<string> shifted; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<string> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+};
+
+}
+
+#endif // MERT_TER_TER_SHIFT_H_
diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp
new file mode 100644
index 000000000..e16f692e8
--- /dev/null
+++ b/mert/TER/tercalc.cpp
@@ -0,0 +1,1035 @@
+//
+// C++ Implementation: tercalc
+//
+// Description:
+//
+//
+// Author: <>, (C) 2010
+//
+// Copyright: See COPYING file that comes with this distribution
+//
+//
+#include "tercalc.h"
+using namespace std;
+using namespace Tools;
+namespace TERCpp
+{
+
+terCalc::terCalc()
+{
+ MAX_SHIFT_SIZE = 50;
+ INF = 999999.0;
+ shift_cost = 1.0;
+ insert_cost = 1.0;
+ delete_cost = 1.0;
+ substitute_cost = 1.0;
+ match_cost = 0.0;
+ NUM_SEGMENTS_SCORED = 0;
+ NUM_SHIFTS_CONSIDERED = 0;
+ NUM_BEAM_SEARCH_CALLS = 0;
+ BEAM_WIDTH = 20;
+ MAX_SHIFT_DIST = 50;
+ PRINT_DEBUG = false;
+}
+
+
+// terCalc::~terCalc()
+// {
+// }
+// size_t* terCalc::hashVec ( vector<string> s )
+// {
+// size_t retour[ ( int ) s.size() ];
+// int i=0;
+// for ( i=0; i< ( int ) s.size(); i++ )
+// {
+// boost::hash<std::string> hasher;
+// retour[i]=hasher ( s.at ( i ) );
+// }
+// return retour;
+// }
+
+
+int terCalc::WERCalculation ( size_t * ref, size_t * hyp )
+{
+ int retour;
+ int REFSize = sizeof ( ref ) + 1;
+ int HYPSize = sizeof ( hyp ) + 1;
+ int WER[REFSize][HYPSize];
+ int i = 0;
+ int j = 0;
+ for ( i = 0; i < REFSize; i++ ) {
+ WER[i][0] = ( int ) i;
+ }
+ for ( j = 0; j < HYPSize; j++ ) {
+ WER[0][j] = ( int ) j;
+ }
+ for ( j = 1; j < HYPSize; j++ ) {
+ for ( i = 1; i < REFSize; i++ ) {
+ if ( i == 1 ) {
+ cerr << endl;
+ }
+ if ( ref[i-1] == hyp[j-1] ) {
+ WER[i][j] = WER[i-1][j-1];
+ cerr << "- ";
+ cerr << WER[i][j] << "-\t";
+ } else {
+ if ( ( ( WER[i-1][ j] + 1 ) < ( WER[i][j-1] + 1 ) ) && ( ( WER[i-1][j] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
+ WER[i][j] = ( WER[i-1][j] + 1 );
+// cerr << "D ";
+ cerr << WER[i][j] << "D\t";
+ } else {
+ if ( ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j] + 1 ) ) && ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
+ WER[i][j] = ( WER[i][j-1] + 1 );
+// cerr << "I ";
+ cerr << WER[i][j] << "I\t";
+ } else {
+ WER[i][j] = ( WER[i-1][j-1] + 1 );
+// cerr << "S ";
+ cerr << WER[i][j] << "S\t";
+ }
+ }
+ }
+ }
+ }
+ cerr << endl;
+ retour = WER[i-1][j-1];
+ cerr << "i : " << i - 1 << "\tj : " << j - 1 << endl;
+ return retour;
+}
+int terCalc::WERCalculation ( std::vector< int > ref, std::vector< int > hyp )
+{
+ stringstream s;
+ s.str ( "" );
+ string stringRef ( "" );
+ string stringHyp ( "" );
+ for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) {
+ if ( l_it == ref.begin() ) {
+ s << ( *l_it );
+ } else {
+ s << " " << ( *l_it );
+ }
+ }
+ stringRef = s.str();
+ s.str ( "" );
+ for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) {
+ if ( l_itHyp == hyp.begin() ) {
+ s << ( *l_itHyp );
+ } else {
+ s << " " << ( *l_itHyp );
+ }
+ }
+ stringHyp = s.str();
+ s.str ( "" );
+ return WERCalculation ( stringToVector ( stringRef, " " ), stringToVector ( stringHyp , " " ) );
+}
+
+terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
+{
+ stringstream s;
+ s.str ( "" );
+ string stringRef ( "" );
+ string stringHyp ( "" );
+ for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) {
+ if ( l_it == ref.begin() ) {
+ s << ( *l_it );
+ } else {
+ s << " " << ( *l_it );
+ }
+ }
+ stringRef = s.str();
+ s.str ( "" );
+ for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) {
+ if ( l_itHyp == hyp.begin() ) {
+ s << ( *l_itHyp );
+ } else {
+ s << " " << ( *l_itHyp );
+ }
+ }
+ stringHyp = s.str();
+ s.str ( "" );
+ return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
+}
+
+int terCalc::WERCalculation ( vector<string> ref, vector<string> hyp )
+{
+ int retour;
+ int REFSize = ( int ) ref.size() + 1;
+ int HYPSize = ( int ) hyp.size() + 1;
+ int WER[REFSize][HYPSize];
+ char WERchar[REFSize][HYPSize];
+ int i = 0;
+ int j = 0;
+ for ( i = 0; i < REFSize; i++ ) {
+ WER[i][0] = ( int ) i;
+ }
+ for ( j = 0; j < HYPSize; j++ ) {
+ WER[0][j] = ( int ) j;
+ }
+ for ( j = 1; j < HYPSize; j++ ) {
+ for ( i = 1; i < REFSize; i++ ) {
+// if (i==1)
+// {
+// cerr << endl;
+// }
+ if ( ref[i-1] == hyp[j-1] ) {
+ WER[i][j] = WER[i-1][j-1];
+// cerr << "- ";
+// cerr << WER[i][j]<< "-\t";
+ WERchar[i][j] = '-';
+ } else {
+ if ( ( ( WER[i-1][ j] + 1 ) < ( WER[i][j-1] + 1 ) ) && ( ( WER[i-1][j] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
+ WER[i][j] = ( WER[i-1][j] + 1 );
+// cerr << "D ";
+// cerr << WER[i][j]<< "D\t";
+ WERchar[i][j] = 'D';
+ } else {
+ if ( ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j] + 1 ) ) && ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
+ WER[i][j] = ( WER[i][j-1] + 1 );
+// cerr << "I ";
+// cerr << WER[i][j]<< "I\t";
+ WERchar[i][j] = 'I';
+ } else {
+ WER[i][j] = ( WER[i-1][j-1] + 1 );
+// cerr << "S ";
+// cerr << WER[i][j]<< "S\t";
+ WERchar[i][j] = 'S';
+ }
+ }
+ }
+ }
+ }
+ cerr << endl;
+ retour = WER[REFSize-1][HYPSize-1];
+ cerr << "i : " << i - 1 << "\tj : " << j - 1 << endl;
+ j = HYPSize - 1;
+ i = REFSize - 1;
+ int k;
+ stringstream s;
+// WERalignment local[HYPSize];
+ if ( HYPSize > REFSize ) {
+ k = HYPSize;
+ } else {
+ k = REFSize;
+ }
+ WERalignment local;
+ while ( j > 0 && i > 0 ) {
+ cerr << "indice i : " << i << "\t";
+ cerr << "indice j : " << j << endl;
+ if ( ( j == HYPSize - 1 ) && ( i == REFSize - 1 ) ) {
+ alignmentElement localInfos;
+ s << WER[i][j];
+ localInfos.push_back ( s.str() );
+ s.str ( "" );
+ s << WERchar[i][j];
+ localInfos.push_back ( s.str() );
+ s.str ( "" );
+ local.push_back ( localInfos );
+// // i--;
+// j--;
+ }
+// else
+ {
+ if ( ( ( WER[i-1][j-1] ) <= ( WER[i-1][j] ) ) && ( ( WER[i-1][j-1] ) <= ( WER[i][j-1] ) ) ) {
+ alignmentElement localInfos;
+ s << WER[i-1][j-1];
+ localInfos.push_back ( s.str() );
+ s.str ( "" );
+ s << WERchar[i-1][j-1];
+ localInfos.push_back ( s.str() );
+ s.str ( "" );
+ local.push_back ( localInfos );
+ i--;
+ j--;
+ } else {
+ if ( ( ( WER[i][j-1] ) <= ( WER[i-1][j] ) ) && ( ( WER[i][j-1] ) <= ( WER[i-1][j-1] ) ) ) {
+ alignmentElement localInfos;
+ s << WER[i][j-1];
+ localInfos.push_back ( s.str() );
+ s.str ( "" );
+ s << WERchar[i][j-1];
+ localInfos.push_back ( s.str() );
+ s.str ( "" );
+ local.push_back ( localInfos );
+ j--;
+ } else {
+ alignmentElement localInfos;
+ s << WER[i-1][j];
+ localInfos.push_back ( s.str() );
+ s.str ( "" );
+ s << WERchar[i-1][j];
+ localInfos.push_back ( s.str() );
+ s.str ( "" );
+ local.push_back ( localInfos );
+ i--;
+ }
+ }
+ }
+ }
+
+ for ( j = 1; j < HYPSize; j++ ) {
+ for ( i = 1; i < REFSize; i++ ) {
+ cerr << WERchar[i][j] << " ";
+ }
+ cerr << endl;
+ }
+ cerr << endl;
+ for ( j = 1; j < HYPSize; j++ ) {
+ for ( i = 1; i < REFSize; i++ ) {
+ cerr << WER[i][j] << " ";
+ }
+ cerr << endl;
+ }
+
+ cerr << "=================" << endl;
+// k=local.size()-1;
+// while (k>0)
+// {
+// alignmentElement localInfos;
+// localInfos=local.at(k-1);
+
+// l_WERalignment.push_back(localInfos);
+// cerr << (string)localInfos.at(1)+"\t";
+ k--;
+// }
+// cerr<<endl;
+ k = local.size() - 1;
+ int l = 0;
+ int m = 0;
+ while ( k > 0 ) {
+ alignmentElement localInfos;
+ localInfos = local.at ( k - 1 );
+ if ( ( int ) ( localInfos.at ( 1 ).compare ( "D" ) ) == 0 || l > HYPSize - 1 ) {
+ localInfos.push_back ( "***" );
+ } else {
+ localInfos.push_back ( hyp.at ( l ) );
+ l++;
+ }
+ if ( ( int ) ( localInfos.at ( 1 ).compare ( "I" ) ) == 0 || m > REFSize - 1 ) {
+ localInfos.push_back ( "***" );
+ } else {
+ localInfos.push_back ( ref.at ( m ) );
+ m++;
+ }
+// cerr << vectorToString(localInfos)<<endl;
+// cerr <<localInfos.at(0)<<"\t"<<localInfos.at(1)<<"\t"<<localInfos.at(2)<<"\t"<<localInfos.at(3)<<endl;
+ l_WERalignment.push_back ( localInfos );
+// cerr << (string)localInfos.at(1)+"\t";
+ k--;
+ }
+ cerr << endl;
+ /* k=local.size()-1;
+ while (k>0)
+ {
+ alignmentElement localInfos;
+ localInfos=local.at(k-1);
+ // l_WERalignment.push_back(localInfos);
+ cerr << (string)localInfos.at(0)+"\t";
+ k--;
+ }
+ cerr<<endl;*/
+ k = 0;
+// k=l_WERalignment.size()-1;
+ m = 0;
+ while ( k < ( int ) l_WERalignment.size() ) {
+ alignmentElement localInfos;
+ localInfos = l_WERalignment.at ( k );
+ cerr << localInfos.at ( 0 ) << "\t" << localInfos.at ( 1 ) << "\t" << localInfos.at ( 2 ) << "\t" << localInfos.at ( 3 ) << endl;
+ /* if ((int)(localInfos.at(1).compare("I"))==0)
+ {
+ cerr << "***\t";
+ }
+ else
+ {
+ // if (m<ref.size())
+ {
+ cerr << ref.at(m) << "\t";
+ }
+ m++;
+ }
+ */
+ k++;
+ }
+ cerr << endl;
+ /* k=local.size()-1;
+ l=0;
+ while (k>0)
+ {
+ alignmentElement localInfos;
+ localInfos=local.at(k-1);
+ // l_WERalignment.push_back(localInfos);
+ if ((int)(localInfos.at(1).compare("D"))==0)
+ {
+ cerr << "***\t";
+ }
+ else
+ {
+ cerr << hyp.at(l) << "\t";
+ l++;
+ }
+ k--;
+ }
+ cerr<<endl;*/
+ cerr << "=================" << endl;
+ return retour;
+}
+
+// string terCalc::vectorToString(vector<string> vec)
+// {
+// string retour("");
+// for (vector<string>::iterator vecIter=vec.begin();vecIter!=vec.end(); vecIter++)
+// {
+// retour+=(*vecIter)+"\t";
+// }
+// return retour;
+// }
+
+// vector<string> terCalc::subVector(vector<string> vec, int start, int end)
+// {
+// if (start>end)
+// {
+// cerr << "ERREUR : terCalc::subVector : end > start"<<endl;
+// exit(0);
+// }
+// vector<string> retour;
+// for (int i=start; ((i<end) && (i< vec.size())); i++)
+// {
+// retour.push_back(vec.at(i));
+// }
+// return retour;
+// }
+
+hashMapInfos terCalc::BuildWordMatches ( vector<string> hyp, vector<string> ref )
+{
+ hashMap tempHash;
+ hashMapInfos retour;
+ for ( int i = 0; i < ( int ) hyp.size(); i++ ) {
+ tempHash.addHasher ( hyp.at ( i ), "" );
+ }
+ bool cor[ref.size() ];
+ for ( int i = 0; i < ( int ) ref.size(); i++ ) {
+ if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) {
+ cor[i] = true;
+ } else {
+ cor[i] = false;
+ }
+ }
+ for ( int start = 0; start < ( int ) ref.size(); start++ ) {
+ if ( cor[start] ) {
+ for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= MAX_SHIFT_SIZE ) && ( cor[end] ) ); end++ ) {
+ vector<string> ajouter = subVector ( ref, start, end + 1 );
+ string ajouterString = vectorToString ( ajouter );
+ vector<int> values = retour.getValue ( ajouterString );
+ values.push_back ( start );
+ if ( values.size() > 1 ) {
+ retour.setValue ( ajouterString, values );
+ } else {
+ retour.addValue ( ajouterString, values );
+ }
+ }
+ }
+ }
+ return retour;
+}
+
+bool terCalc::spanIntersection ( vecInt refSpan, vecInt hypSpan )
+{
+ if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) {
+ return true;
+ }
+ return false;
+}
+
+
+terAlignment terCalc::MinEditDist ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
+{
+ double current_best = INF;
+ double last_best = INF;
+ int first_good = 0;
+ int current_first_good = 0;
+ int last_good = -1;
+ int cur_last_good = 0;
+ int last_peak = 0;
+ int cur_last_peak = 0;
+ int i, j;
+ double cost, icost, dcost;
+ double score;
+
+// int hwsize = hyp.size()-1;
+// int rwsize = ref.size()-1;
+ NUM_BEAM_SEARCH_CALLS++;
+// if ((ref.size()+1 > sizeof(S)) || (hyp.size()+1 > sizeof(S)))
+// {
+// int max = ref.size();
+// if (hyp.size() > ref.size()) max = hyp.size();
+// max += 26; // we only need a +1 here, but let's pad for future use
+// S = new double[max][max];
+// P = new char[max][max];
+// }
+ for ( i = 0; i <= ( int ) ref.size(); i++ ) {
+ for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
+ S[i][j] = -1.0;
+ P[i][j] = '0';
+ }
+ }
+ S[0][0] = 0.0;
+ for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
+ last_best = current_best;
+ current_best = INF;
+ first_good = current_first_good;
+ current_first_good = -1;
+ last_good = cur_last_good;
+ cur_last_good = -1;
+ last_peak = cur_last_peak;
+ cur_last_peak = 0;
+ for ( i = first_good; i <= ( int ) ref.size(); i++ ) {
+ if ( i > last_good ) {
+ break;
+ }
+ if ( S[i][j] < 0 ) {
+ continue;
+ }
+ score = S[i][j];
+ if ( ( j < ( int ) hyp.size() ) && ( score > last_best + BEAM_WIDTH ) ) {
+ continue;
+ }
+ if ( current_first_good == -1 ) {
+ current_first_good = i ;
+ }
+ if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) ) {
+ if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || spanIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) {
+ if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) {
+ cost = match_cost + score;
+ if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) ) {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = ' ';
+ }
+ if ( cost < current_best ) {
+ current_best = cost;
+ }
+ if ( current_best == cost ) {
+ cur_last_peak = i + 1;
+ }
+ } else {
+ cost = substitute_cost + score;
+ if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) ) {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'S';
+ if ( cost < current_best ) {
+ current_best = cost;
+ }
+ if ( current_best == cost ) {
+ cur_last_peak = i + 1 ;
+ }
+ }
+ }
+ }
+ }
+ cur_last_good = i + 1;
+ if ( j < ( int ) hyp.size() ) {
+ icost = score + insert_cost;
+ if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) ) {
+ S[i][j+1] = icost;
+ P[i][j+1] = 'I';
+ if ( ( cur_last_peak < i ) && ( current_best == icost ) ) {
+ cur_last_peak = i;
+ }
+ }
+ }
+ if ( i < ( int ) ref.size() ) {
+ dcost = score + delete_cost;
+ if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) ) {
+ S[i+1][j] = dcost;
+ P[i+1][j] = 'D';
+ if ( i >= last_good ) {
+ last_good = i + 1 ;
+ }
+ }
+ }
+ }
+ }
+
+
+ int tracelength = 0;
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) ) {
+ tracelength++;
+ if ( P[i][j] == ' ' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'S' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'D' ) {
+ i--;
+ } else if ( P[i][j] == 'I' ) {
+ j--;
+ } else {
+ cerr << "ERROR : terCalc::MinEditDist : Invalid path : " << P[i][j] << endl;
+ exit ( -1 );
+ }
+ }
+ vector<char> path ( tracelength );
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) ) {
+ path[--tracelength] = P[i][j];
+ if ( P[i][j] == ' ' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'S' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'D' ) {
+ i--;
+ } else if ( P[i][j] == 'I' ) {
+ j--;
+ }
+ }
+ terAlignment to_return;
+ to_return.numWords = ref.size();
+ to_return.alignment = path;
+ to_return.numEdits = S[ref.size() ][hyp.size() ];
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::MinEditDist : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
+ }
+ return to_return;
+
+}
+terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
+{
+ hashMapInfos rloc = BuildWordMatches ( hyp, ref );
+ terAlignment cur_align = MinEditDist ( hyp, ref, hypSpans );
+ vector<string> cur = hyp;
+ cur_align.hyp = hyp;
+ cur_align.ref = ref;
+ cur_align.aftershift = hyp;
+ double edits = 0;
+// int numshifts = 0;
+
+ vector<terShift> allshifts;
+
+// cerr << "Initial Alignment:" << endl << cur_align.toString() <<endl;
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
+ }
+ while ( true ) {
+ bestShiftStruct returns;
+ returns = CalcBestShift ( cur, hyp, ref, rloc, cur_align );
+ if ( returns.m_empty ) {
+ break;
+ }
+ terShift bestShift = returns.m_best_shift;
+ cur_align = returns.m_best_align;
+ edits += bestShift.cost;
+ bestShift.alignment = cur_align.alignment;
+ bestShift.aftershift = cur_align.aftershift;
+ allshifts.push_back ( bestShift );
+ cur = cur_align.aftershift;
+ }
+ terAlignment to_return;
+ to_return = cur_align;
+ to_return.allshifts = allshifts;
+ to_return.numEdits += edits;
+ NUM_SEGMENTS_SCORED++;
+ return to_return;
+}
+bestShiftStruct terCalc::CalcBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
+{
+ bestShiftStruct to_return;
+ bool anygain = false;
+ bool herr[ ( int ) hyp.size() ];
+ bool rerr[ ( int ) ref.size() ];
+ int ralign[ ( int ) ref.size() ];
+ FindAlignErr ( med_align, herr, rerr, ralign );
+ vector<vecTerShift> poss_shifts;
+ poss_shifts = GatherAllPossShifts ( cur, ref, rloc, med_align, herr, rerr, ralign );
+ double curerr = med_align.numEdits;
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
+ cerr << "Possible Shifts:" << endl;
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
+ for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ ) {
+ cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
+ }
+ }
+ cerr << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ double cur_best_shift_cost = 0.0;
+ terAlignment cur_best_align = med_align;
+ terShift cur_best_shift;
+
+
+
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
+ cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ /* Consider shifts of length i+1 */
+ double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ double maxfix = ( 2 * ( 1 + i ) );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
+ break;
+ }
+
+ for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ ) {
+ curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
+ break;
+ }
+ terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
+
+ alignmentStruct shiftReturns = PerformShift ( cur, curshift );
+ vector<string> shiftarr = shiftReturns.nwords;
+ vector<vecInt> curHypSpans = shiftReturns.aftershift;
+
+ terAlignment curalign = MinEditDist ( shiftarr, ref, curHypSpans );
+
+ curalign.hyp = hyp;
+ curalign.ref = ref;
+ curalign.aftershift = shiftarr;
+
+ double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
+
+ // if (DEBUG) {
+ // string testeuh=terAlignment join(" ", shiftarr);
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
+ cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
+ cerr << "" << curalign.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ // }
+ //
+ if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) {
+ anygain = true;
+ cur_best_shift = curshift;
+ cur_best_shift_cost = curshift.cost;
+ cur_best_align = curalign;
+ // if (DEBUG)
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
+ cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ }
+ }
+ }
+ if ( anygain ) {
+ to_return.m_best_shift = cur_best_shift;
+ to_return.m_best_align = cur_best_align;
+ to_return.m_empty = false;
+ } else {
+ to_return.m_empty = true;
+ }
+ return to_return;
+}
+
+void terCalc::FindAlignErr ( terAlignment align, bool* herr, bool* rerr, int* ralign )
+{
+ int hpos = -1;
+ int rpos = -1;
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::FindAlignErr : " << endl << align.toString() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
+ char sym = align.alignment[i];
+ if ( sym == ' ' ) {
+ hpos++;
+ rpos++;
+ herr[hpos] = false;
+ rerr[rpos] = false;
+ ralign[rpos] = hpos;
+ } else if ( sym == 'S' ) {
+ hpos++;
+ rpos++;
+ herr[hpos] = true;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos;
+ } else if ( sym == 'I' ) {
+ hpos++;
+ herr[hpos] = true;
+ } else if ( sym == 'D' ) {
+ rpos++;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos;
+ } else {
+ cerr << "ERROR : terCalc::FindAlignErr : Invalid mini align sequence " << sym << " at pos " << i << endl;
+ exit ( -1 );
+ }
+ }
+}
+
+vector<vecTerShift> terCalc::GatherAllPossShifts ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
+{
+ vector<vecTerShift> to_return;
+ // Don't even bother to look if shifts can't be done
+ if ( ( MAX_SHIFT_SIZE <= 0 ) || ( MAX_SHIFT_DIST <= 0 ) ) {
+// terShift[][] to_return = new terShift[0][];
+ return to_return;
+ }
+
+ vector<vecTerShift> allshifts ( MAX_SHIFT_SIZE + 1 );
+
+// ArrayList[] allshifts = new ArrayList[MAX_SHIFT_SIZE+1];
+// for (int i = 0; i < allshifts.length; i++)
+// {
+// allshifts[i] = new ArrayList();
+// }
+
+// List hyplist = Arrays.asList(hyp);
+ for ( int start = 0; start < ( int ) hyp.size(); start++ ) {
+ string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
+ if ( ! rloc.trouve ( subVectorHypString ) ) {
+ continue;
+ }
+
+ bool ok = false;
+ vector<int> mtiVec = rloc.getValue ( subVectorHypString );
+ vector<int>::iterator mti = mtiVec.begin();
+ while ( mti != mtiVec.end() && ( ! ok ) ) {
+ int moveto = ( *mti );
+ mti++;
+ if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= MAX_SHIFT_DIST ) && ( ( start - ralign[moveto] - 1 ) <= MAX_SHIFT_DIST ) ) {
+ ok = true;
+ }
+ }
+ if ( ! ok ) {
+ continue;
+ }
+ ok = true;
+ for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + MAX_SHIFT_SIZE ) ); end++ ) {
+ /* check if cand is good if so, add it */
+ vector<string> cand = subVector ( hyp, start, end + 1 );
+ ok = false;
+ if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) {
+ continue;
+ }
+
+ bool any_herr = false;
+
+ for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) {
+ if ( herr[start+i] ) {
+ any_herr = true;
+ }
+ }
+ if ( any_herr == false ) {
+ ok = true;
+ continue;
+ }
+
+ vector<int> movetoitVec;
+ movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
+ vector<int>::iterator movetoit = movetoitVec.begin();
+ while ( movetoit != movetoitVec.end() ) {
+ int moveto = ( *movetoit );
+ movetoit++;
+ if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= MAX_SHIFT_DIST ) && ( ( start - ralign[moveto] ) <= MAX_SHIFT_DIST ) ) ) {
+ continue;
+ }
+ ok = true;
+
+ /* check to see if there are any errors in either string
+ (only move if this is the case!)
+ */
+
+ bool any_rerr = false;
+ for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) {
+ if ( rerr[moveto+i] ) {
+ any_rerr = true;
+ }
+ }
+ if ( ! any_rerr ) {
+ continue;
+ }
+ for ( int roff = -1; roff <= ( end - start ); roff++ ) {
+ terShift topush;
+ bool topushNull = true;
+ if ( ( roff == -1 ) && ( moveto == 0 ) ) {
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 01 : " << endl << "Consider making " << start << "..." << end << " moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
+ }
+ terShift t01 ( start, end, -1, -1 );
+ topush = t01;
+ topushNull = false;
+ } else if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) ) {
+ int newloc = ralign[moveto+roff];
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 02 : " << endl << "Consider making " << start << "..." << end << " moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
+ }
+ terShift t02 ( start, end, moveto + roff, newloc );
+ topush = t02;
+ topushNull = false;
+ }
+ if ( !topushNull ) {
+ topush.shifted = cand;
+ topush.cost = shift_cost;
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 02 : " << endl;
+ cerr << "start : " << start << endl;
+ cerr << "end : " << end << endl;
+ cerr << "end - start : " << end - start << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ ( allshifts.at ( end - start ) ).push_back ( topush );
+ }
+ }
+ }
+ }
+ }
+// vector<vecTerShift> to_return;
+ to_return.clear();
+// terShift[][] to_return = new terShift[MAX_SHIFT_SIZE+1][];
+ for ( int i = 0; i < MAX_SHIFT_SIZE + 1; i++ ) {
+// to_return[i] = (terShift[]) allshifts[i].toArray(new terShift[0]);
+ to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
+ }
+ return to_return;
+}
+
+
+alignmentStruct terCalc::PerformShift ( vector<string> words, terShift s )
+{
+ return PerformShift ( words, s.start, s.end, s.newloc );
+}
+
+
+alignmentStruct terCalc::PerformShift ( vector<string> words, int start, int end, int newloc )
+{
+ int c = 0;
+ vector<string> nwords ( words );
+ vector<vecInt> spans ( ( int ) hypSpans.size() );
+ alignmentStruct toreturn;
+// ON EST ICI
+// if((int)hypSpans.size()>0) spans = new TERintpair[(int)hypSpans.size()];
+// if(DEBUG) {
+ if ( PRINT_DEBUG ) {
+
+ if ( ( int ) hypSpans.size() > 0 ) {
+ cerr << "BEGIN DEBUG : terCalc::PerformShift :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl << "END DEBUG " << endl;
+ } else {
+ cerr << "BEGIN DEBUG : terCalc::PerformShift :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl << "END DEBUG " << endl;
+ }
+ }
+// }
+
+ if ( newloc == -1 ) {
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ if ( newloc < start ) {
+ for ( int i = 0; i <= newloc; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc + 1; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ if ( newloc > end ) {
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i <= newloc; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ // we are moving inside of ourselves
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
+ }
+ }
+ NUM_SHIFTS_CONSIDERED++;
+
+ toreturn.nwords = nwords;
+ toreturn.aftershift = spans;
+ return toreturn;
+}
+void terCalc::setDebugMode ( bool b )
+{
+ PRINT_DEBUG = b;
+}
+
+}
diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h
new file mode 100644
index 000000000..9e1a01f65
--- /dev/null
+++ b/mert/TER/tercalc.h
@@ -0,0 +1,82 @@
+#ifndef MERT_TER_TER_CALC_H_
+#define MERT_TER_TER_CALC_H_
+
+#include <vector>
+#include <stdio.h>
+#include <string.h>
+#include <sstream>
+#include "hashMap.h"
+#include "hashMapInfos.h"
+#include "hashMapStringInfos.h"
+#include "terAlignment.h"
+#include "tools.h"
+#include "terShift.h"
+#include "alignmentStruct.h"
+#include "bestShiftStruct.h"
+
+using namespace std;
+using namespace Tools;
+using namespace HashMapSpace;
+namespace TERCpp
+{
+// typedef size_t WERelement[2];
+// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
+typedef vector<terShift> vecTerShift;
+/**
+ @author
+*/
+class terCalc
+{
+private :
+// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
+ WERalignment l_WERalignment;
+// HashMap contenant les caleurs de hash de chaque mot
+ hashMap bagOfWords;
+ int MAX_SHIFT_SIZE;
+ /* Variables for some internal counting. */
+ int NUM_SEGMENTS_SCORED;
+ int NUM_SHIFTS_CONSIDERED;
+ int NUM_BEAM_SEARCH_CALLS;
+ int MAX_SHIFT_DIST;
+ bool PRINT_DEBUG;
+
+ /* These are resized by the MIN_EDIT_DIST code if they aren't big enough */
+ double S[1000][1000];
+ char P[1000][1000];
+ vector<vecInt> refSpans;
+ vector<vecInt> hypSpans;
+ int BEAM_WIDTH;
+
+public:
+ int shift_cost;
+ int insert_cost;
+ int delete_cost;
+ int substitute_cost;
+ int match_cost;
+ double INF;
+ terCalc();
+
+// ~terCalc();
+// size_t* hashVec ( vector<string> s );
+ void setDebugMode ( bool b );
+ int WERCalculation ( size_t * ref, size_t * hyp );
+ int WERCalculation ( vector<string> ref, vector<string> hyp );
+ int WERCalculation ( vector<int> ref, vector<int> hyp );
+// string vectorToString(vector<string> vec);
+// vector<string> subVector(vector<string> vec, int start, int end);
+ hashMapInfos BuildWordMatches ( vector<string> hyp, vector<string> ref );
+ terAlignment MinEditDist ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
+ bool spanIntersection ( vecInt refSpan, vecInt hypSpan );
+ terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
+ terAlignment TER ( vector<string> hyp, vector<string> ref );
+ terAlignment TER ( vector<int> hyp, vector<int> ref );
+ bestShiftStruct CalcBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
+ void FindAlignErr ( terAlignment align, bool* herr, bool* rerr, int* ralign );
+ vector<vecTerShift> GatherAllPossShifts ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
+ alignmentStruct PerformShift ( vector<string> words, terShift s );
+ alignmentStruct PerformShift ( vector<string> words, int start, int end, int newloc );
+};
+
+}
+
+#endif // MERT_TER_TER_CALC_H_
diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp
new file mode 100644
index 000000000..2d910ec05
--- /dev/null
+++ b/mert/TER/tools.cpp
@@ -0,0 +1,545 @@
+#include "tools.h"
+
+using namespace std;
+namespace Tools
+{
+
+string vectorToString ( vector<string> vec )
+{
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour += ( *vecIter );
+ } else {
+ retour += "\t" + ( *vecIter );
+ }
+ }
+ return retour;
+}
+
+string vectorToString ( vector< string > vec, string s )
+{
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour += ( *vecIter );
+ } else {
+ retour += s + ( *vecIter );
+ }
+ }
+ return retour;
+
+}
+
+vector<string> subVector ( vector<string> vec, int start, int end )
+{
+ vector<string> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<int> subVector ( vector<int> vec, int start, int end )
+{
+ vector<int> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<float> subVector ( vector<float> vec, int start, int end )
+{
+ vector<float> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<string> copyVector ( vector<string> vec )
+{
+ vector<string> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<int> copyVector ( vector<int> vec )
+{
+ vector<int> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<float> copyVector ( vector<float> vec )
+{
+ vector<float> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<string> stringToVector ( string s, string tok )
+{
+ vector<string> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ to_return.push_back ( to_push );
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ to_return.push_back ( to_push );
+ return to_return;
+}
+vector<int> stringToVectorInt ( string s, string tok )
+{
+ vector<int> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+}
+vector<float> stringToVectorFloat ( string s, string tok )
+{
+ vector<float> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atof ( to_push.c_str() ) );
+ }
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+}
+
+string lowerCase ( string str )
+{
+ for ( int i = 0; i < ( int ) str.size(); i++ ) {
+ if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) {
+ str[i] = str[i] + 0x20;
+ }
+ }
+ return str;
+}
+
+/*
+string removePunctTercom ( string str )
+{
+ string str_mod = str;
+ sregex rex;
+ string replace;
+
+
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ return str_mod;
+}
+string removePunct ( string str )
+{
+ string str_mod = str;
+ sregex rex;
+ string replace;
+
+
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ return str_mod;
+}
+string tokenizePunct ( string str )
+{
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
+ string replace ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
+ replace = ( "$2.$4. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " ? " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " ; " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " ! " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " ( " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " ) " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " \" " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
+ replace = ( "num_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
+ replace = ( "ordinal_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(^([Mm]) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "( ([Mm]) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(^([Dd]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "( ([Dd]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(^([Mm]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "( ([Mm]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(^([Mm]rs) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "( ([Mm]rs) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(^([Nn]o) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "( ([Nn]o) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+// rex = sregex::compile ( "(^(([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
+// replace = ( "$2." );
+// str_mod = regex_replace ( str_mod, rex, replace );
+//
+// rex = sregex::compile ( "( (([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
+// replace = ( " $2." );
+// str_mod = regex_replace ( str_mod, rex, replace );
+//
+// rex = sregex::compile ( "(^(([Gg]en)|([Cc]ol)) \\.)" );
+// replace = ( "$2." );
+// str_mod = regex_replace ( str_mod, rex, replace );
+//
+// rex = sregex::compile ( "( (([Gg]en)|([Cc]ol)) \\.)" );
+// replace = ( " $2." );
+// str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ return str_mod;
+}
+
+string normalizeStd ( string str )
+{
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(<skipped>)" );
+ string replace ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "-\n" );
+ replace = ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "\n" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "&quot;" );
+ replace = ( "\"" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "&amp;" );
+ replace = ( "& " );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "&lt;" );
+ replace = ( "<" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ rex = sregex::compile ( "&gt;" );
+ replace = ( ">" );
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ return str_mod;
+}
+*/
+
+param copyParam ( param p )
+{
+ param to_return;
+ to_return.caseOn = p.caseOn;
+ to_return.noPunct = p.noPunct;
+ to_return.debugMode = p.debugMode;
+ to_return.hypothesisFile = p.hypothesisFile;
+ to_return.referenceFile = p.referenceFile;
+ to_return.normalize = p.normalize;
+ to_return.noTxtIds = p.noTxtIds;
+ to_return.outputFileExtension = p.outputFileExtension;
+ to_return.outputFileName = p.outputFileName;
+ to_return.sgmlInputs = p.sgmlInputs;
+ to_return.tercomLike = p.tercomLike;
+ return to_return;
+}
+string printParams ( param p )
+{
+ stringstream s;
+ s << "caseOn = " << p.caseOn << endl;
+ s << "noPunct = " << p.noPunct << endl;
+ s << "debugMode = " << p.debugMode << endl;
+ s << "hypothesisFile = " << p.hypothesisFile << endl;
+ s << "referenceFile = " << p.referenceFile << endl;
+ s << "normalize = " << p.normalize << endl;
+ s << "noTxtIds = " << p.noTxtIds << endl;
+ s << "outputFileExtension = " << p.outputFileExtension << endl;
+ s << "outputFileName = " << p.outputFileName << endl;
+ s << "sgmlInputs = " << p.sgmlInputs << endl;
+ s << "tercomLike = " << p.tercomLike << endl;
+ return s.str();
+
+}
+
+
+}
diff --git a/mert/TER/tools.h b/mert/TER/tools.h
new file mode 100644
index 000000000..6f78b9a6a
--- /dev/null
+++ b/mert/TER/tools.h
@@ -0,0 +1,65 @@
+#ifndef MERT_TER_TOOLS_H_
+#define MERT_TER_TOOLS_H_
+
+#include <vector>
+#include <iostream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include <sstream>
+
+using namespace std;
+
+namespace Tools
+{
+typedef vector<double> vecDouble;
+typedef vector<char> vecChar;
+typedef vector<int> vecInt;
+typedef vector<float> vecFloat;
+typedef vector<string> vecString;
+typedef vector<string> alignmentElement;
+typedef vector<alignmentElement> WERalignment;
+
+
+struct param {
+ bool debugMode;
+ string referenceFile; // path to the resources
+ string hypothesisFile; // path to the configuration files
+ string outputFileExtension;
+ string outputFileName;
+ bool noPunct;
+ bool caseOn;
+ bool normalize;
+ bool tercomLike;
+ bool sgmlInputs;
+ bool noTxtIds;
+};
+// param = { false, "","","","" };
+
+// class tools{
+// private:
+// public:
+
+string vectorToString ( vector<string> vec );
+string vectorToString ( vector<string> vec, string s );
+vector<string> subVector ( vector<string> vec, int start, int end );
+vector<int> subVector ( vector<int> vec, int start, int end );
+vector<float> subVector ( vector<float> vec, int start, int end );
+vector<string> copyVector ( vector<string> vec );
+vector<int> copyVector ( vector<int> vec );
+vector<float> copyVector ( vector<float> vec );
+vector<string> stringToVector ( string s, string tok );
+vector<int> stringToVectorInt ( string s, string tok );
+vector<float> stringToVectorFloat ( string s, string tok );
+string lowerCase(string str);
+string removePunct(string str);
+string tokenizePunct(string str);
+string removePunctTercom(string str);
+string normalizeStd(string str);
+string printParams(param p);
+// };
+param copyParam(param p);
+
+}
+
+#endif // MERT_TER_TOOLS_H_