Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert/TER
diff options
context:
space:
mode:
authorChristophe SERVAN <cservan@ontario.grenoble.xrce.xerox.com>2014-08-29 16:46:56 +0400
committerChristophe SERVAN <cservan@ontario.grenoble.xrce.xerox.com>2014-08-29 16:46:56 +0400
commitbe9b3cb1c6875c26f0ef8d1240f4eb0b747fa86b (patch)
tree1a6f95af8333eae88dfe2ca41745c97e88be731e /mert/TER
parent049a9a9ea701e879d0fbc1a219b874679ccbe15a (diff)
Bug fix about the TER calculation
Diffstat (limited to 'mert/TER')
-rw-r--r--mert/TER/alignmentStruct.cpp34
-rw-r--r--mert/TER/alignmentStruct.h53
-rw-r--r--mert/TER/bestShiftStruct.h48
-rw-r--r--mert/TER/hashMap.cpp250
-rw-r--r--mert/TER/hashMap.h69
-rw-r--r--mert/TER/hashMapInfos.cpp249
-rw-r--r--mert/TER/hashMapInfos.h69
-rw-r--r--mert/TER/hashMapStringInfos.cpp322
-rw-r--r--mert/TER/hashMapStringInfos.h69
-rw-r--r--mert/TER/infosHasher.cpp71
-rw-r--r--mert/TER/infosHasher.h57
-rw-r--r--mert/TER/stringHasher.cpp64
-rw-r--r--mert/TER/stringHasher.h58
-rw-r--r--mert/TER/stringInfosHasher.cpp71
-rw-r--r--mert/TER/stringInfosHasher.h60
-rw-r--r--mert/TER/terAlignment.cpp285
-rw-r--r--mert/TER/terAlignment.h83
-rw-r--r--mert/TER/terShift.cpp134
-rw-r--r--mert/TER/terShift.h75
-rw-r--r--mert/TER/tercalc.cpp1823
-rw-r--r--mert/TER/tercalc.h121
-rw-r--r--mert/TER/tools.cpp1085
-rw-r--r--mert/TER/tools.h117
23 files changed, 2997 insertions, 2270 deletions
diff --git a/mert/TER/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp
index 15b4a8032..544ee61ac 100644
--- a/mert/TER/alignmentStruct.cpp
+++ b/mert/TER/alignmentStruct.cpp
@@ -1,17 +1,37 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "alignmentStruct.h"
using namespace std;
namespace TERCpp
{
-string alignmentStruct::toString()
-{
- stringstream s;
+ string alignmentStruct::toString()
+ {
+ stringstream s;
// s << "nword : " << vectorToString(nwords)<<endl;
// s << "alignment" << vectorToString(alignment)<<endl;
// s << "afterShift" << vectorToString(alignment)<<endl;
- s << "Nothing to be printed" <<endl;
- return s.str();
-}
+ s << "Nothing to be printed" <<endl;
+ return s.str();
+ }
// alignmentStruct::alignmentStruct()
// {
@@ -79,7 +99,7 @@ string alignmentStruct::toString()
// return s.str();
// }
-/* The distance of the shift. */
+ /* The distance of the shift. */
// int alignmentStruct::distance()
// {
// if (moveto < start)
diff --git a/mert/TER/alignmentStruct.h b/mert/TER/alignmentStruct.h
index 9e9a75468..adda2c345 100644
--- a/mert/TER/alignmentStruct.h
+++ b/mert/TER/alignmentStruct.h
@@ -1,5 +1,26 @@
-#ifndef MERT_TER_ALIGNMENT_STRUCT_H_
-#define MERT_TER_ALIGNMENT_STRUCT_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef MERT_TER_ALIGNMENTSTRUCT_H_
+#define MERT_TER_ALIGNMENTSTRUCT_H_
+
#include <vector>
#include <stdio.h>
@@ -7,15 +28,16 @@
#include <sstream>
#include "tools.h"
+
using namespace std;
using namespace Tools;
namespace TERCpp
{
-class alignmentStruct
-{
-private:
-public:
+ class alignmentStruct
+ {
+ private:
+ public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -31,15 +53,14 @@ public:
// int end;
// int moveto;
// int newloc;
- vector<string> nwords; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
- string toString();
-};
+ vector<string> nwords; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<vecInt> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+ string toString();
+ };
}
-
-#endif // MERT_TER_ALIGNMENT_STRUCT_H_
+#endif \ No newline at end of file
diff --git a/mert/TER/bestShiftStruct.h b/mert/TER/bestShiftStruct.h
index bfebe3b1e..9457fd1d8 100644
--- a/mert/TER/bestShiftStruct.h
+++ b/mert/TER/bestShiftStruct.h
@@ -1,5 +1,26 @@
-#ifndef MERT_TER_BEST_SHIFT_STRUCT_H_
-#define MERT_TER_BEST_SHIFT_STRUCT_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef __BESTSHIFTSTRUCT_H_
+#define __BESTSHIFTSTRUCT_H_
+
#include <vector>
#include <stdio.h>
@@ -15,10 +36,10 @@ using namespace Tools;
namespace TERCpp
{
-class bestShiftStruct
-{
-private:
-public:
+ class bestShiftStruct
+ {
+ private:
+ public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -34,17 +55,16 @@ public:
// int end;
// int moveto;
// int newloc;
- terShift m_best_shift;
- terAlignment m_best_align;
- bool m_empty;
+ terShift m_best_shift;
+ terAlignment m_best_align;
+ bool m_empty;
// vector<string> nwords; // The words we shifted
// char* alignment ; // for pra_more output
// vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
// double cost;
-};
+ };
}
-
-#endif // MERT_TER_BEST_SHIFT_STRUCT_H_
+#endif \ No newline at end of file
diff --git a/mert/TER/hashMap.cpp b/mert/TER/hashMap.cpp
index 469167aaa..de84ff796 100644
--- a/mert/TER/hashMap.cpp
+++ b/mert/TER/hashMap.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "hashMap.h"
// The following class defines a hash function for strings
@@ -8,142 +28,156 @@ using namespace std;
namespace HashMapSpace
{
// hashMap::hashMap();
-/* hashMap::~hashMap()
+ /* hashMap::~hashMap()
+ {
+ // vector<stringHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+ /**
+ * int hashMap::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+ int hashMap::trouve ( long searchKey )
{
-// vector<stringHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
-/**
- * int hashMap::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
-int hashMap::trouve ( long searchKey )
-{
- long foundKey;
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-int hashMap::trouve ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;;
+ int hashMap::trouve ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-/**
- * long hashMap::hashValue ( string key )
- * @param key
- * @return
- */
-long hashMap::hashValue ( string key )
-{
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+ /**
+ * long hashMap::hashValue ( string key )
+ * @param key
+ * @return
+ */
+ long hashMap::hashValue ( string key )
+ {
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
-}
-/**
- * void hashMap::addHasher ( string key, string value )
- * @param key
- * @param value
- */
-void hashMap::addHasher ( string key, string value )
-{
- if ( trouve ( hashValue ( key ) ) ==0 ) {
+ }
+ /**
+ * void hashMap::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+ void hashMap::addHasher ( string key, string value )
+ {
+ if ( trouve ( hashValue ( key ) ) ==0 )
+ {
// cerr << "ICI1" <<endl;
- stringHasher H ( hashValue ( key ),key,value );
+ stringHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
-}
-stringHasher hashMap::getHasher ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
- stringHasher defaut(0,"","");
+ m_hasher.push_back ( H );
+ }
+ }
+ stringHasher hashMap::getHasher ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
+ stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return ( *l_hasher );
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return ( *l_hasher );
+ }
+ }
+ return defaut;
}
- }
- return defaut;
-}
-string hashMap::getValue ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ string hashMap::getValue ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return "";
}
- }
- return "";
-}
-string hashMap::searchValue ( string value )
-{
+ string hashMap::searchValue ( string value )
+ {
// long searchKey=hashValue ( key );
// long foundKey;
- string foundValue;
+ string foundValue;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundValue= ( *l_hasher ).getValue();
- if ( foundValue.compare ( value ) == 0 ) {
- return ( *l_hasher ).getKey();
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundValue= ( *l_hasher ).getValue();
+ if ( foundValue.compare ( value ) == 0 )
+ {
+ return ( *l_hasher ).getKey();
+ }
+ }
+ return "";
}
- }
- return "";
-}
-void hashMap::setValue ( string key , string value )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ void hashMap::setValue ( string key , string value )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- ( *l_hasher ).setValue ( value );
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
+ }
+ }
}
- }
-}
-/**
- *
- */
-void hashMap::printHash()
-{
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
-}
+ /**
+ *
+ */
+ void hashMap::printHash()
+ {
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ }
diff --git a/mert/TER/hashMap.h b/mert/TER/hashMap.h
index 85020d041..6cb721573 100644
--- a/mert/TER/hashMap.h
+++ b/mert/TER/hashMap.h
@@ -1,10 +1,29 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
/*
* Generic hashmap manipulation functions
*/
-
-#ifndef MERT_TER_HASHMAP_H_
-#define MERT_TER_HASHMAP_H_
-
+#ifndef __HASHMAP_H_
+#define __HASHMAP_H_
+#include <boost/functional/hash.hpp>
#include "stringHasher.h"
#include <vector>
#include <string>
@@ -16,28 +35,30 @@ using namespace std;
namespace HashMapSpace
{
-class hashMap
-{
-private:
- vector<stringHasher> m_hasher;
+ class hashMap
+ {
+ private:
+ vector<stringHasher> m_hasher;
-public:
+ public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, string value );
- stringHasher getHasher ( string key );
- string getValue ( string key );
- string searchValue ( string key );
- void setValue ( string key , string value );
- void printHash();
- vector<stringHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
-};
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, string value );
+ stringHasher getHasher ( string key );
+ string getValue ( string key );
+ string searchValue ( string key );
+ void setValue ( string key , string value );
+ void printHash();
+ vector<stringHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+ };
+
}
-#endif // MERT_TER_HASHMAP_H_
+
+#endif
diff --git a/mert/TER/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp
index 9cd431196..23f57d808 100644
--- a/mert/TER/hashMapInfos.cpp
+++ b/mert/TER/hashMapInfos.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "hashMapInfos.h"
// The following class defines a hash function for strings
@@ -8,108 +28,117 @@ using namespace std;
namespace HashMapSpace
{
// hashMapInfos::hashMap();
-/* hashMapInfos::~hashMap()
+ /* hashMapInfos::~hashMap()
+ {
+ // vector<infosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+ /**
+ * int hashMapInfos::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+ int hashMapInfos::trouve ( long searchKey )
{
-// vector<infosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
-/**
- * int hashMapInfos::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
-int hashMapInfos::trouve ( long searchKey )
-{
- long foundKey;
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-int hashMapInfos::trouve ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;;
+ int hashMapInfos::trouve ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-/**
- * long hashMapInfos::hashValue ( string key )
- * @param key
- * @return
- */
-long hashMapInfos::hashValue ( string key )
-{
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+ /**
+ * long hashMapInfos::hashValue ( string key )
+ * @param key
+ * @return
+ */
+ long hashMapInfos::hashValue ( string key )
+ {
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
-}
-/**
- * void hashMapInfos::addHasher ( string key, string value )
- * @param key
- * @param value
- */
-void hashMapInfos::addHasher ( string key, vector<int> value )
-{
- if ( trouve ( hashValue ( key ) ) ==0 ) {
+ }
+ /**
+ * void hashMapInfos::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+ void hashMapInfos::addHasher ( string key, vector<int> value )
+ {
+ if ( trouve ( hashValue ( key ) ) ==0 )
+ {
// cerr << "ICI1" <<endl;
- infosHasher H ( hashValue ( key ),key,value );
+ infosHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
-}
-void hashMapInfos::addValue ( string key, vector<int> value )
-{
- addHasher ( key, value );
-}
-infosHasher hashMapInfos::getHasher ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ m_hasher.push_back ( H );
+ }
+ }
+ void hashMapInfos::addValue ( string key, vector<int> value )
+ {
+ addHasher ( key, value );
+ }
+ infosHasher hashMapInfos::getHasher ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return ( *l_hasher );
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return ( *l_hasher );
+ }
+ }
+ vector<int> temp;
+ infosHasher defaut(0,"",temp);
+ return defaut;
}
- }
- vector<int> temp;
- infosHasher defaut(0,"",temp);
- return defaut;
-}
-vector<int> hashMapInfos::getValue ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
- vector<int> retour;
+ vector<int> hashMapInfos::getValue ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
+ vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return retour;
}
- }
- return retour;
-}
// string hashMapInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
@@ -129,30 +158,42 @@ vector<int> hashMapInfos::getValue ( string key )
// }
//
-void hashMapInfos::setValue ( string key , vector<int> value )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ void hashMapInfos::setValue ( string key , vector<int> value )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- ( *l_hasher ).setValue ( value );
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
+ }
+ }
+ }
+ string hashMapInfos::toString ()
+ {
+ stringstream to_return;
+ for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
}
- }
-}
-
-/**
- *
- */
-void hashMapInfos::printHash()
-{
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ /**
+ *
+ */
+ void hashMapInfos::printHash()
+ {
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
-}
+ }
+ }
diff --git a/mert/TER/hashMapInfos.h b/mert/TER/hashMapInfos.h
index 8b56e9d02..5e7dbb6e7 100644
--- a/mert/TER/hashMapInfos.h
+++ b/mert/TER/hashMapInfos.h
@@ -1,9 +1,29 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
/*
* Generic hashmap manipulation functions
*/
-#ifndef MERT_TER_HASHMAP_INFOS_H_
-#define MERT_TER_HASHMAP_INFOS_H_
-
+#ifndef __HASHMAPINFOS_H_
+#define __HASHMAPINFOS_H_
+#include <boost/functional/hash.hpp>
#include "infosHasher.h"
#include <vector>
#include <string>
@@ -14,29 +34,32 @@ using namespace std;
namespace HashMapSpace
{
-class hashMapInfos
-{
-private:
- vector<infosHasher> m_hasher;
+ class hashMapInfos
+ {
+ private:
+ vector<infosHasher> m_hasher;
-public:
+ public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<int> value );
- void addValue ( string key, vector<int> value );
- infosHasher getHasher ( string key );
- vector<int> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<int> value );
+ void addValue ( string key, vector<int> value );
+ infosHasher getHasher ( string key );
+ vector<int> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<int> value );
- void printHash();
- vector<infosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
-};
+ void setValue ( string key , vector<int> value );
+ void printHash();
+ string toString();
+ vector<infosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+ };
+
}
-#endif // MERT_TER_HASHMAP_INFOS_H_
+
+#endif
diff --git a/mert/TER/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp
index 0fbb0a98a..773c148d4 100644
--- a/mert/TER/hashMapStringInfos.cpp
+++ b/mert/TER/hashMapStringInfos.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "hashMapStringInfos.h"
// The following class defines a hash function for strings
@@ -7,157 +27,179 @@ using namespace std;
namespace HashMapSpace
{
-// hashMapStringInfos::hashMap();
-/* hashMapStringInfos::~hashMap()
-{
-// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
-}*/
-/**
-* int hashMapStringInfos::trouve ( long searchKey )
-* @param searchKey
-* @return
-*/
-int hashMapStringInfos::trouve ( long searchKey )
-{
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ // hashMapStringInfos::hashMap();
+ /* hashMapStringInfos::~hashMap()
+ {
+ // vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+ /**
+ * int hashMapStringInfos::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+ int hashMapStringInfos::trouve ( long searchKey )
+ {
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-int hashMapStringInfos::trouve ( string key )
-{
- long searchKey = hashValue ( key );
- long foundKey;;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ int hashMapStringInfos::trouve ( string key )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-/**
-* long hashMapStringInfos::hashValue ( string key )
-* @param key
-* @return
-*/
-long hashMapStringInfos::hashValue ( string key )
-{
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> > ( loc );
- return coll.hash ( key.data(), key.data() + key.length() );
+ /**
+ * long hashMapStringInfos::hashValue ( string key )
+ * @param key
+ * @return
+ */
+ long hashMapStringInfos::hashValue ( string key )
+ {
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> > ( loc );
+ return coll.hash ( key.data(), key.data() + key.length() );
// boost::hash<string> hasher;
// return hasher ( key );
-}
-/**
-* void hashMapStringInfos::addHasher ( string key, string value )
-* @param key
-* @param value
-*/
-void hashMapStringInfos::addHasher ( string key, vector<string> value )
-{
- if ( trouve ( hashValue ( key ) ) == 0 ) {
- // cerr << "ICI1" <<endl;
- stringInfosHasher H ( hashValue ( key ), key, value );
- // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
- // cerr << "ICI2" <<endl;
-
- m_hasher.push_back ( H );
- }
-}
-void hashMapStringInfos::addValue ( string key, vector<string> value )
-{
- addHasher ( key, value );
-}
-stringInfosHasher hashMapStringInfos::getHasher ( string key )
-{
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return ( *l_hasher );
}
- }
- vector<string> tmp;
- stringInfosHasher defaut ( 0, "", tmp );
- return defaut;
-}
-vector<string> hashMapStringInfos::getValue ( string key )
-{
- long searchKey = hashValue ( key );
- long foundKey;
- vector<string> retour;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
+ /**
+ * void hashMapStringInfos::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+ void hashMapStringInfos::addHasher ( string key, vector<string> value )
+ {
+ if ( trouve ( hashValue ( key ) ) == 0 )
+ {
+ // cerr << "ICI1" <<endl;
+ stringInfosHasher H ( hashValue ( key ), key, value );
+ // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
+ // cerr << "ICI2" <<endl;
+
+ m_hasher.push_back ( H );
+ }
}
- }
- return retour;
-}
-// string hashMapStringInfos::searchValue ( string value )
-// {
-// // long searchKey=hashValue ( key );
-// // long foundKey;
-// vector<int> foundValue;
-//
-// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
-// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
-// {
-// foundValue= ( *l_hasher ).getValue();
-// /* if ( foundValue.compare ( value ) == 0 )
-// {
-// return ( *l_hasher ).getKey();
-// }*/
-// }
-// return "";
-// }
-//
-
-void hashMapStringInfos::setValue ( string key , vector<string> value )
-{
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- ( *l_hasher ).setValue ( value );
- // return ( *l_hasher ).getValue();
+ void hashMapStringInfos::addValue ( string key, vector<string> value )
+ {
+ addHasher ( key, value );
+ }
+ stringInfosHasher hashMapStringInfos::getHasher ( string key )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return ( *l_hasher );
+ }
+ }
+ vector<string> tmp;
+ stringInfosHasher defaut ( 0, "", tmp );
+ return defaut;
+ }
+ vector<string> hashMapStringInfos::getValue ( string key )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;
+ vector<string> retour;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return retour;
+ }
+ // string hashMapStringInfos::searchValue ( string value )
+ // {
+ // // long searchKey=hashValue ( key );
+ // // long foundKey;
+ // vector<int> foundValue;
+ //
+ // // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ // for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ // {
+ // foundValue= ( *l_hasher ).getValue();
+ // /* if ( foundValue.compare ( value ) == 0 )
+ // {
+ // return ( *l_hasher ).getKey();
+ // }*/
+ // }
+ // return "";
+ // }
+ //
+
+ void hashMapStringInfos::setValue ( string key , vector<string> value )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ ( *l_hasher ).setValue ( value );
+ // return ( *l_hasher ).getValue();
+ }
+ }
}
- }
-}
+ string hashMapStringInfos::toString ()
+ {
+ stringstream to_return;
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
+ }
-/**
-*
-*/
-void hashMapStringInfos::printHash()
-{
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
-}
-vector< stringInfosHasher > hashMapStringInfos::getHashMap()
-{
- return m_hasher;
-}
+ /**
+ *
+ */
+ void hashMapStringInfos::printHash()
+ {
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ }
+ vector< stringInfosHasher > hashMapStringInfos::getHashMap()
+ {
+ return m_hasher;
+ }
diff --git a/mert/TER/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h
index 870274f3d..5337d50f2 100644
--- a/mert/TER/hashMapStringInfos.h
+++ b/mert/TER/hashMapStringInfos.h
@@ -1,9 +1,29 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
/*
* Generic hashmap manipulation functions
*/
-#ifndef MERT_TER_HASHMAP_STRING_INFOS_H_
-#define MERT_TER_HASHMAP_STRING_INFOS_H_
-
+#ifndef __HASHMAPSTRINGINFOS_H_
+#define __HASHMAPSTRINGINFOS_H_
+#include <boost/functional/hash.hpp>
#include "stringInfosHasher.h"
#include <vector>
#include <string>
@@ -14,29 +34,32 @@ using namespace std;
namespace HashMapSpace
{
-class hashMapStringInfos
-{
-private:
- vector<stringInfosHasher> m_hasher;
+ class hashMapStringInfos
+ {
+ private:
+ vector<stringInfosHasher> m_hasher;
-public:
+ public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<string> value );
- void addValue ( string key, vector<string> value );
- stringInfosHasher getHasher ( string key );
- vector<string> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<string> value );
+ void addValue ( string key, vector<string> value );
+ stringInfosHasher getHasher ( string key );
+ vector<string> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<string> value );
- void printHash();
- vector<stringInfosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
-};
+ void setValue ( string key , vector<string> value );
+ void printHash();
+ string toString();
+ vector<stringInfosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+ };
+
}
-#endif // MERT_TER_HASHMAP_STRING_INFOS_H_
+
+#endif
diff --git a/mert/TER/infosHasher.cpp b/mert/TER/infosHasher.cpp
index 654b0b26f..8ce23ae44 100644
--- a/mert/TER/infosHasher.cpp
+++ b/mert/TER/infosHasher.cpp
@@ -1,34 +1,61 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "infosHasher.h"
// The following class defines a hash function for strings
using namespace std;
+using namespace Tools;
namespace HashMapSpace
{
-infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
-{
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
-}
+ infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
+ {
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+ }
// infosHasher::~infosHasher(){};*/
-long infosHasher::getHashKey()
-{
- return m_hashKey;
-}
-string infosHasher::getKey()
-{
- return m_key;
-}
-vector<int> infosHasher::getValue()
-{
- return m_value;
-}
-void infosHasher::setValue ( vector<int> value )
-{
- m_value=value;
-}
+ long infosHasher::getHashKey()
+ {
+ return m_hashKey;
+ }
+ string infosHasher::getKey()
+ {
+ return m_key;
+ }
+ vector<int> infosHasher::getValue()
+ {
+ return m_value;
+ }
+ void infosHasher::setValue ( vector<int> value )
+ {
+ m_value=value;
+ }
+ string infosHasher::toString()
+ {
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+ }
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;
diff --git a/mert/TER/infosHasher.h b/mert/TER/infosHasher.h
index 02a32280b..d3d56317a 100644
--- a/mert/TER/infosHasher.h
+++ b/mert/TER/infosHasher.h
@@ -1,31 +1,54 @@
-#ifndef MERT_TER_INFO_SHASHER_H_
-#define MERT_TER_INFO_SHASHER_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef __INFOSHASHER_H_
+#define __INFOSHASHER_H_
#include <string>
+// #include <ext/hash_map>
#include <stdio.h>
#include <iostream>
+#include <sstream>
#include <vector>
+#include "tools.h"
using namespace std;
namespace HashMapSpace
{
-class infosHasher
-{
-private:
- long m_hashKey;
- string m_key;
- vector<int> m_value;
+ class infosHasher
+ {
+ private:
+ long m_hashKey;
+ string m_key;
+ vector<int> m_value;
-public:
- infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
- long getHashKey();
- string getKey();
- vector<int> getValue();
- void setValue ( vector<int> value );
+ public:
+ infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<int> getValue();
+ void setValue ( vector<int> value );
+ string toString();
-};
+ };
-}
-#endif // MERT_TER_INFO_SHASHER_H_
+}
+#endif \ No newline at end of file
diff --git a/mert/TER/stringHasher.cpp b/mert/TER/stringHasher.cpp
index 24fde0e32..f4d1526e8 100644
--- a/mert/TER/stringHasher.cpp
+++ b/mert/TER/stringHasher.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "stringHasher.h"
// The following class defines a hash function for strings
@@ -6,29 +26,29 @@ using namespace std;
namespace HashMapSpace
{
-stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
-{
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueTxt;
-}
+ stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
+ {
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueTxt;
+ }
// stringHasher::~stringHasher(){};*/
-long stringHasher::getHashKey()
-{
- return m_hashKey;
-}
-string stringHasher::getKey()
-{
- return m_key;
-}
-string stringHasher::getValue()
-{
- return m_value;
-}
-void stringHasher::setValue ( string value )
-{
- m_value=value;
-}
+ long stringHasher::getHashKey()
+ {
+ return m_hashKey;
+ }
+ string stringHasher::getKey()
+ {
+ return m_key;
+ }
+ string stringHasher::getValue()
+ {
+ return m_value;
+ }
+ void stringHasher::setValue ( string value )
+ {
+ m_value=value;
+ }
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringHasher.h b/mert/TER/stringHasher.h
index 897bd9ff5..d831f642c 100644
--- a/mert/TER/stringHasher.h
+++ b/mert/TER/stringHasher.h
@@ -1,28 +1,50 @@
-#ifndef MERT_TER_STRING_HASHER_H_
-#define MERT_TER_STRING_HASHER_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef __STRINGHASHER_H_
+#define __STRINGHASHER_H_
#include <string>
+//#include <ext/hash_map>
#include <iostream>
using namespace std;
namespace HashMapSpace
{
-class stringHasher
-{
-private:
- long m_hashKey;
- string m_key;
- string m_value;
-
-public:
- stringHasher ( long cle, string cleTxt, string valueTxt );
- long getHashKey();
- string getKey();
- string getValue();
- void setValue ( string value );
-};
+ class stringHasher
+ {
+ private:
+ long m_hashKey;
+ string m_key;
+ string m_value;
-}
+ public:
+ stringHasher ( long cle, string cleTxt, string valueTxt );
+ long getHashKey();
+ string getKey();
+ string getValue();
+ void setValue ( string value );
-#endif // MERT_TER_STRING_HASHER_H_
+
+ };
+
+
+}
+#endif
diff --git a/mert/TER/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp
index 3e02e7a20..007fd720f 100644
--- a/mert/TER/stringInfosHasher.cpp
+++ b/mert/TER/stringInfosHasher.cpp
@@ -1,34 +1,61 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "stringInfosHasher.h"
// The following class defines a hash function for strings
using namespace std;
+using namespace Tools;
namespace HashMapSpace
{
-stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
-{
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
-}
+ stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
+ {
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+ }
// stringInfosHasher::~stringInfosHasher(){};*/
-long stringInfosHasher::getHashKey()
-{
- return m_hashKey;
-}
-string stringInfosHasher::getKey()
-{
- return m_key;
-}
-vector<string> stringInfosHasher::getValue()
-{
- return m_value;
-}
-void stringInfosHasher::setValue ( vector<string> value )
-{
- m_value=value;
-}
+ long stringInfosHasher::getHashKey()
+ {
+ return m_hashKey;
+ }
+ string stringInfosHasher::getKey()
+ {
+ return m_key;
+ }
+ vector<string> stringInfosHasher::getValue()
+ {
+ return m_value;
+ }
+ void stringInfosHasher::setValue ( vector<string> value )
+ {
+ m_value=value;
+ }
+ string stringInfosHasher::toString()
+ {
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+ }
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringInfosHasher.h b/mert/TER/stringInfosHasher.h
index c1b891662..307b48da7 100644
--- a/mert/TER/stringInfosHasher.h
+++ b/mert/TER/stringInfosHasher.h
@@ -1,28 +1,52 @@
-#ifndef MERT_TER_STRING_INFOS_HASHER_H_
-#define MERT_TER_STRING_INFOS_HASHER_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef __STRINGINFOSHASHER_H_
+#define __STRINGINFOSHASHER_H_
#include <string>
+// #include <ext/hash_map>
#include <iostream>
#include <vector>
+#include "tools.h"
using namespace std;
namespace HashMapSpace
{
-class stringInfosHasher
-{
-private:
- long m_hashKey;
- string m_key;
- vector<string> m_value;
-
-public:
- stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
- long getHashKey();
- string getKey();
- vector<string> getValue();
- void setValue ( vector<string> value );
-};
+ class stringInfosHasher
+ {
+ private:
+ long m_hashKey;
+ string m_key;
+ vector<string> m_value;
-}
+ public:
+ stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<string> getValue();
+ void setValue ( vector<string> value );
+ string toString();
-#endif // MERT_TER_STRING_INFOS_HASHER_H_
+
+ };
+
+
+}
+#endif \ No newline at end of file
diff --git a/mert/TER/terAlignment.cpp b/mert/TER/terAlignment.cpp
index 87be53b11..6c5d35cc5 100644
--- a/mert/TER/terAlignment.cpp
+++ b/mert/TER/terAlignment.cpp
@@ -1,131 +1,214 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "terAlignment.h"
using namespace std;
namespace TERCpp
{
-terAlignment::terAlignment()
-{
+ terAlignment::terAlignment()
+ {
// vector<string> ref;
// vector<string> hyp;
// vector<string> aftershift;
- // TERshift[] allshifts = null;
+ // TERshift[] allshifts = null;
- numEdits=0;
- numWords=0;
- bestRef="";
+ numEdits=0;
+ numWords=0;
+ bestRef="";
- numIns=0;
- numDel=0;
- numSub=0;
- numSft=0;
- numWsf=0;
-}
-string terAlignment::toString()
-{
- stringstream s;
- s.str ( "" );
- s << "Original Ref: " << join ( " ", ref ) << endl;
- s << "Original Hyp: " << join ( " ", hyp ) <<endl;
- s << "Hyp After Shift: " << join ( " ", aftershift );
- s << endl;
+ numIns=0;
+ numDel=0;
+ numSub=0;
+ numSft=0;
+ numWsf=0;
+ }
+ string terAlignment::toString()
+ {
+ stringstream s;
+ s.str ( "" );
+ s << "Original Ref: \t" << join ( " ", ref ) << endl;
+ s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
+ s << "Hyp After Shift:\t" << join ( " ", aftershift );
+// s << "Hyp After Shift: " << join ( " ", aftershift );
+ s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
- if ( ( int ) sizeof ( alignment ) >0 ) {
- s << "Alignment: (";
+ if ( ( int ) sizeof ( alignment ) >0 )
+ {
+ s << "Alignment: (";
// s += "\nAlignment: (";
- for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
- s << alignment[i];
+ for ( int i = 0; i < ( int ) ( alignment.size() ); i++ )
+ {
+ s << alignment[i];
// s+=alignment[i];
- }
+ }
// s += ")";
- s << ")";
- }
- s << endl;
- if ( ( int ) allshifts.size() == 0 ) {
+ s << ")";
+ }
+ s << endl;
+ if ( ( int ) allshifts.size() == 0 )
+ {
// s += "\nNumShifts: 0";
- s << "NumShifts: 0";
- } else {
+ s << "NumShifts: 0";
+ }
+ else
+ {
// s += "\nNumShifts: " + (int)allshifts.size();
- s << "NumShifts: "<< ( int ) allshifts.size();
- for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
- s << endl << " " ;
- s << ( ( terShift ) allshifts[i] ).toString();
+ s << "NumShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ )
+ {
+ s << endl << " " ;
+ s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
- }
- }
- s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
+ }
+ }
+ s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
- return s.str();
+ return s.str();
-}
-string terAlignment::join ( string delim, vector<string> arr )
-{
- if ( ( int ) arr.size() == 0 ) return "";
+ }
+ string terAlignment::join ( string delim, vector<string> arr )
+ {
+ if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
- stringstream s;
- s.str ( "" );
- for ( int i = 0; i < ( int ) arr.size(); i++ ) {
- if ( i == 0 ) {
- s << arr.at ( i );
- } else {
- s << delim << arr.at ( i );
- }
- }
- return s.str();
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ )
+ {
+ if ( i == 0 )
+ {
+ s << arr.at ( i );
+ }
+ else
+ {
+ s << delim << arr.at ( i );
+ }
+ }
+ return s.str();
// return "";
-}
-double terAlignment::score()
-{
- if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
- return 1.0;
- }
- if ( numWords <= 0.0 ) {
- return 0.0;
- }
- return ( double ) numEdits / numWords;
-}
-double terAlignment::scoreAv()
-{
- if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
- return 1.0;
- }
- if ( averageWords <= 0.0 ) {
- return 0.0;
- }
- return ( double ) numEdits / averageWords;
-}
-
-void terAlignment::scoreDetails()
-{
- numIns = numDel = numSub = numWsf = numSft = 0;
- if((int)allshifts.size()>0) {
- for(int i = 0; i < (int)allshifts.size(); ++i) {
- numWsf += allshifts[i].size();
}
- numSft = allshifts.size();
- }
-
- if((int)alignment.size()>0 ) {
- for(int i = 0; i < (int)alignment.size(); ++i) {
- switch (alignment[i]) {
- case 'S':
- case 'T':
- numSub++;
- break;
- case 'D':
- numDel++;
- break;
- case 'I':
- numIns++;
- break;
- }
+ double terAlignment::score()
+ {
+ if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
+ {
+ return 1.0;
+ }
+ if ( numWords <= 0.0 )
+ {
+ return 0.0;
+ }
+ return ( double ) numEdits / numWords;
+ }
+ double terAlignment::scoreAv()
+ {
+ if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) )
+ {
+ return 1.0;
+ }
+ if ( averageWords <= 0.0 )
+ {
+ return 0.0;
+ }
+ return ( double ) numEdits / averageWords;
}
+
+ void terAlignment::scoreDetails()
+ {
+ numIns = numDel = numSub = numWsf = numSft = 0;
+ if((int)allshifts.size()>0)
+ {
+ for(int i = 0; i < (int)allshifts.size(); ++i)
+ {
+ numWsf += allshifts[i].size();
+ }
+ numSft = allshifts.size();
+ }
+
+ if((int)alignment.size()>0 )
+ {
+ for(int i = 0; i < (int)alignment.size(); ++i)
+ {
+ switch (alignment[i])
+ {
+ case 'S':
+ case 'T':
+ numSub++;
+ break;
+ case 'D':
+ numDel++;
+ break;
+ case 'I':
+ numIns++;
+ break;
+ }
+ }
+ }
+ // if(numEdits != numSft + numDel + numIns + numSub)
+ // System.out.println("** Error, unmatch edit erros " + numEdits +
+ // " vs " + (numSft + numDel + numIns + numSub));
+ }
+ string terAlignment::printAlignments()
+ {
+ stringstream to_return;
+ for(int i = 0; i < (int)alignment.size(); ++i)
+ {
+ char alignInfo=alignment.at(i);
+ if (alignInfo == 'A' )
+ {
+ alignInfo='A';
+ }
+
+ if (i==0)
+ {
+ to_return << alignInfo;
+ }
+ else
+ {
+ to_return << " " << alignInfo;
+ }
+ }
+ return to_return.str();
}
- // if(numEdits != numSft + numDel + numIns + numSub)
- // System.out.println("** Error, unmatch edit erros " + numEdits +
- // " vs " + (numSft + numDel + numIns + numSub));
+string terAlignment::printAllShifts()
+{
+ stringstream to_return;
+ if ( ( int ) allshifts.size() == 0 )
+ {
+// s += "\nNumShifts: 0";
+ to_return << "NbrShifts: 0";
+ }
+ else
+ {
+// s += "\nNumShifts: " + (int)allshifts.size();
+ to_return << "NbrShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ )
+ {
+ to_return << "\t" ;
+ to_return << ( ( terShift ) allshifts[i] ).toString();
+// s += "\n " + allshifts[i];
+ }
+ }
+ return to_return.str();
}
} \ No newline at end of file
diff --git a/mert/TER/terAlignment.h b/mert/TER/terAlignment.h
index c8c82eac8..0af86f663 100644
--- a/mert/TER/terAlignment.h
+++ b/mert/TER/terAlignment.h
@@ -1,5 +1,26 @@
-#ifndef MERT_TER_TER_ALIGNMENT_H_
-#define MERT_TER_TER_ALIGNMENT_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef MERT_TER_TERALIGNMENT_H_
+#define MERT_TER_TERALIGNMENT_H_
+
#include <vector>
#include <stdio.h>
@@ -13,39 +34,41 @@ using namespace std;
namespace TERCpp
{
-class terAlignment
-{
-private:
-public:
-
- terAlignment();
- string toString();
- void scoreDetails();
+ class terAlignment
+ {
+ private:
+ public:
- vector<string> ref;
- vector<string> hyp;
- vector<string> aftershift;
+ terAlignment();
+ string toString();
+ void scoreDetails();
- vector<terShift> allshifts;
+ vector<string> ref;
+ vector<string> hyp;
+ vector<string> aftershift;
+ vector<terShift> allshifts;
+ vector<int> hyp_int;
+ vector<int> aftershift_int;
- double numEdits;
- double numWords;
- double averageWords;
- vector<char> alignment;
- string bestRef;
+ double numEdits;
+ double numWords;
+ double averageWords;
+ vector<char> alignment;
+ string bestRef;
- int numIns;
- int numDel;
- int numSub;
- int numSft;
- int numWsf;
+ int numIns;
+ int numDel;
+ int numSub;
+ int numSft;
+ int numWsf;
- string join ( string delim, vector<string> arr );
- double score();
- double scoreAv();
-};
+ string join ( string delim, vector<string> arr );
+ double score();
+ double scoreAv();
+ string printAlignments();
+ string printAllShifts();
+ };
}
-
-#endif // MERT_TER_TER_ALIGNMENT_H__
+#endif \ No newline at end of file
diff --git a/mert/TER/terShift.cpp b/mert/TER/terShift.cpp
index 428803849..c1106db76 100644
--- a/mert/TER/terShift.cpp
+++ b/mert/TER/terShift.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "terShift.h"
using namespace std;
@@ -22,32 +42,32 @@ namespace TERCpp
// numSft=0;
// numWsf=0;
// }
-terShift::terShift ()
-{
- start = 0;
- end = 0;
- moveto = 0;
- newloc = 0;
- cost=1.0;
-}
-terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
-{
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- cost=1.0;
-}
+ terShift::terShift ()
+ {
+ start = 0;
+ end = 0;
+ moveto = 0;
+ newloc = 0;
+ cost=1.0;
+ }
+ terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
+ {
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ cost=1.0;
+ }
-terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
-{
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- shifted = _shifted;
- cost=1.0;
-}
+ terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
+ {
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ shifted = _shifted;
+ cost=1.0;
+ }
// string terShift::vectorToString(vector<string> vec)
// {
// string retour("");
@@ -58,38 +78,44 @@ terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<stri
// return retour;
// }
-string terShift::toString()
-{
- stringstream s;
- s.str ( "" );
- s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
- if ( ( int ) shifted.size() > 0 ) {
- s << " (" << vectorToString ( shifted ) << ")";
- }
- return s.str();
-}
+ string terShift::toString()
+ {
+ stringstream s;
+ s.str ( "" );
+ s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
+ if ( ( int ) shifted.size() > 0 )
+ {
+ s << " (" << vectorToString ( shifted ) << ")";
+ }
+ return s.str();
+ }
-/* The distance of the shift. */
-int terShift::distance()
-{
- if ( moveto < start ) {
- return start - moveto;
- } else if ( moveto > end ) {
- return moveto - end;
- } else {
- return moveto - start;
- }
-}
+ /* The distance of the shift. */
+ int terShift::distance()
+ {
+ if ( moveto < start )
+ {
+ return start - moveto;
+ }
+ else if ( moveto > end )
+ {
+ return moveto - end;
+ }
+ else
+ {
+ return moveto - start;
+ }
+ }
-bool terShift::leftShift()
-{
- return ( moveto < start );
-}
+ bool terShift::leftShift()
+ {
+ return ( moveto < start );
+ }
-int terShift::size()
-{
- return ( end - start ) + 1;
-}
+ int terShift::size()
+ {
+ return ( end - start ) + 1;
+ }
// terShift terShift::operator=(terShift t)
// {
//
diff --git a/mert/TER/terShift.h b/mert/TER/terShift.h
index 679a7c8bb..ba84a5947 100644
--- a/mert/TER/terShift.h
+++ b/mert/TER/terShift.h
@@ -1,5 +1,26 @@
-#ifndef MERT_TER_TER_SHIFT_H_
-#define MERT_TER_TER_SHIFT_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef MERT_TER_TERSHIFT_H_
+#define MERT_TER_TERSHIFT_H_
+
#include <vector>
#include <stdio.h>
@@ -7,38 +28,38 @@
#include <sstream>
#include "tools.h"
+
using namespace std;
using namespace Tools;
namespace TERCpp
{
-class terShift
-{
-private:
-public:
-
- terShift();
- terShift ( int _start, int _end, int _moveto, int _newloc );
- terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
- string toString();
- int distance() ;
- bool leftShift();
- int size();
+ class terShift
+ {
+ private:
+ public:
+
+ terShift();
+ terShift ( int _start, int _end, int _moveto, int _newloc );
+ terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
+ string toString();
+ int distance() ;
+ bool leftShift();
+ int size();
// terShift operator=(terShift t);
// string vectorToString(vector<string> vec);
- int start;
- int end;
- int moveto;
- int newloc;
- vector<string> shifted; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<string> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
-};
+ int start;
+ int end;
+ int moveto;
+ int newloc;
+ vector<string> shifted; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<string> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+ };
}
-
-#endif // MERT_TER_TER_SHIFT_H_
+#endif \ No newline at end of file
diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp
index e16f692e8..b7f63772c 100644
--- a/mert/TER/tercalc.cpp
+++ b/mert/TER/tercalc.cpp
@@ -1,3 +1,23 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
//
// C++ Implementation: tercalc
//
@@ -15,1021 +35,902 @@ using namespace Tools;
namespace TERCpp
{
-terCalc::terCalc()
-{
- MAX_SHIFT_SIZE = 50;
- INF = 999999.0;
- shift_cost = 1.0;
- insert_cost = 1.0;
- delete_cost = 1.0;
- substitute_cost = 1.0;
- match_cost = 0.0;
- NUM_SEGMENTS_SCORED = 0;
- NUM_SHIFTS_CONSIDERED = 0;
- NUM_BEAM_SEARCH_CALLS = 0;
- BEAM_WIDTH = 20;
- MAX_SHIFT_DIST = 50;
- PRINT_DEBUG = false;
-}
-
-
-// terCalc::~terCalc()
-// {
-// }
-// size_t* terCalc::hashVec ( vector<string> s )
-// {
-// size_t retour[ ( int ) s.size() ];
-// int i=0;
-// for ( i=0; i< ( int ) s.size(); i++ )
-// {
-// boost::hash<std::string> hasher;
-// retour[i]=hasher ( s.at ( i ) );
-// }
-// return retour;
-// }
-
-
-int terCalc::WERCalculation ( size_t * ref, size_t * hyp )
-{
- int retour;
- int REFSize = sizeof ( ref ) + 1;
- int HYPSize = sizeof ( hyp ) + 1;
- int WER[REFSize][HYPSize];
- int i = 0;
- int j = 0;
- for ( i = 0; i < REFSize; i++ ) {
- WER[i][0] = ( int ) i;
- }
- for ( j = 0; j < HYPSize; j++ ) {
- WER[0][j] = ( int ) j;
- }
- for ( j = 1; j < HYPSize; j++ ) {
- for ( i = 1; i < REFSize; i++ ) {
- if ( i == 1 ) {
- cerr << endl;
- }
- if ( ref[i-1] == hyp[j-1] ) {
- WER[i][j] = WER[i-1][j-1];
- cerr << "- ";
- cerr << WER[i][j] << "-\t";
- } else {
- if ( ( ( WER[i-1][ j] + 1 ) < ( WER[i][j-1] + 1 ) ) && ( ( WER[i-1][j] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
- WER[i][j] = ( WER[i-1][j] + 1 );
-// cerr << "D ";
- cerr << WER[i][j] << "D\t";
- } else {
- if ( ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j] + 1 ) ) && ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
- WER[i][j] = ( WER[i][j-1] + 1 );
-// cerr << "I ";
- cerr << WER[i][j] << "I\t";
- } else {
- WER[i][j] = ( WER[i-1][j-1] + 1 );
-// cerr << "S ";
- cerr << WER[i][j] << "S\t";
- }
- }
- }
- }
- }
- cerr << endl;
- retour = WER[i-1][j-1];
- cerr << "i : " << i - 1 << "\tj : " << j - 1 << endl;
- return retour;
-}
-int terCalc::WERCalculation ( std::vector< int > ref, std::vector< int > hyp )
-{
- stringstream s;
- s.str ( "" );
- string stringRef ( "" );
- string stringHyp ( "" );
- for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) {
- if ( l_it == ref.begin() ) {
- s << ( *l_it );
- } else {
- s << " " << ( *l_it );
- }
- }
- stringRef = s.str();
- s.str ( "" );
- for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) {
- if ( l_itHyp == hyp.begin() ) {
- s << ( *l_itHyp );
- } else {
- s << " " << ( *l_itHyp );
- }
- }
- stringHyp = s.str();
- s.str ( "" );
- return WERCalculation ( stringToVector ( stringRef, " " ), stringToVector ( stringHyp , " " ) );
-}
-
-terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
-{
- stringstream s;
- s.str ( "" );
- string stringRef ( "" );
- string stringHyp ( "" );
- for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) {
- if ( l_it == ref.begin() ) {
- s << ( *l_it );
- } else {
- s << " " << ( *l_it );
- }
- }
- stringRef = s.str();
- s.str ( "" );
- for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) {
- if ( l_itHyp == hyp.begin() ) {
- s << ( *l_itHyp );
- } else {
- s << " " << ( *l_itHyp );
+ terCalc::terCalc()
+ {
+ TAILLE_PERMUT_MAX = 50;
+ infinite = 999999.0;
+ shift_cost = 1.0;
+ insert_cost = 1.0;
+ delete_cost = 1.0;
+ substitute_cost = 1.0;
+ match_cost = 0.0;
+ NBR_SEGS_EVALUATED = 0;
+ NBR_PERMUTS_CONSID = 0;
+ NBR_BS_APPELS = 0;
+ TAILLE_BEAM = 20;
+ DIST_MAX_PERMUT = 50;
+ PRINT_DEBUG = false;
+ hypSpans.clear();
+ refSpans.clear();
+ }
+
+
+ terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref )
+ {
+
+ return minimizeDistanceEdition ( hyp, ref, hypSpans );
+
}
- }
- stringHyp = s.str();
- s.str ( "" );
- return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
-}
-int terCalc::WERCalculation ( vector<string> ref, vector<string> hyp )
-{
- int retour;
- int REFSize = ( int ) ref.size() + 1;
- int HYPSize = ( int ) hyp.size() + 1;
- int WER[REFSize][HYPSize];
- char WERchar[REFSize][HYPSize];
- int i = 0;
- int j = 0;
- for ( i = 0; i < REFSize; i++ ) {
- WER[i][0] = ( int ) i;
- }
- for ( j = 0; j < HYPSize; j++ ) {
- WER[0][j] = ( int ) j;
- }
- for ( j = 1; j < HYPSize; j++ ) {
- for ( i = 1; i < REFSize; i++ ) {
-// if (i==1)
-// {
-// cerr << endl;
-// }
- if ( ref[i-1] == hyp[j-1] ) {
- WER[i][j] = WER[i-1][j-1];
-// cerr << "- ";
-// cerr << WER[i][j]<< "-\t";
- WERchar[i][j] = '-';
- } else {
- if ( ( ( WER[i-1][ j] + 1 ) < ( WER[i][j-1] + 1 ) ) && ( ( WER[i-1][j] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
- WER[i][j] = ( WER[i-1][j] + 1 );
-// cerr << "D ";
-// cerr << WER[i][j]<< "D\t";
- WERchar[i][j] = 'D';
- } else {
- if ( ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j] + 1 ) ) && ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) {
- WER[i][j] = ( WER[i][j-1] + 1 );
-// cerr << "I ";
-// cerr << WER[i][j]<< "I\t";
- WERchar[i][j] = 'I';
- } else {
- WER[i][j] = ( WER[i-1][j-1] + 1 );
-// cerr << "S ";
-// cerr << WER[i][j]<< "S\t";
- WERchar[i][j] = 'S';
- }
- }
- }
- }
- }
- cerr << endl;
- retour = WER[REFSize-1][HYPSize-1];
- cerr << "i : " << i - 1 << "\tj : " << j - 1 << endl;
- j = HYPSize - 1;
- i = REFSize - 1;
- int k;
- stringstream s;
-// WERalignment local[HYPSize];
- if ( HYPSize > REFSize ) {
- k = HYPSize;
- } else {
- k = REFSize;
- }
- WERalignment local;
- while ( j > 0 && i > 0 ) {
- cerr << "indice i : " << i << "\t";
- cerr << "indice j : " << j << endl;
- if ( ( j == HYPSize - 1 ) && ( i == REFSize - 1 ) ) {
- alignmentElement localInfos;
- s << WER[i][j];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- s << WERchar[i][j];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- local.push_back ( localInfos );
-// // i--;
-// j--;
- }
-// else
+ terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
{
- if ( ( ( WER[i-1][j-1] ) <= ( WER[i-1][j] ) ) && ( ( WER[i-1][j-1] ) <= ( WER[i][j-1] ) ) ) {
- alignmentElement localInfos;
- s << WER[i-1][j-1];
- localInfos.push_back ( s.str() );
+ stringstream s;
s.str ( "" );
- s << WERchar[i-1][j-1];
- localInfos.push_back ( s.str() );
+ string stringRef ( "" );
+ string stringHyp ( "" );
+ for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ )
+ {
+ if ( l_it == ref.begin() )
+ {
+ s << ( *l_it );
+ }
+ else
+ {
+ s << " " << ( *l_it );
+ }
+ }
+ stringRef = s.str();
s.str ( "" );
- local.push_back ( localInfos );
- i--;
- j--;
- } else {
- if ( ( ( WER[i][j-1] ) <= ( WER[i-1][j] ) ) && ( ( WER[i][j-1] ) <= ( WER[i-1][j-1] ) ) ) {
- alignmentElement localInfos;
- s << WER[i][j-1];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- s << WERchar[i][j-1];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- local.push_back ( localInfos );
- j--;
- } else {
- alignmentElement localInfos;
- s << WER[i-1][j];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- s << WERchar[i-1][j];
- localInfos.push_back ( s.str() );
- s.str ( "" );
- local.push_back ( localInfos );
- i--;
+ for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ )
+ {
+ if ( l_itHyp == hyp.begin() )
+ {
+ s << ( *l_itHyp );
+ }
+ else
+ {
+ s << " " << ( *l_itHyp );
+ }
}
- }
- }
- }
-
- for ( j = 1; j < HYPSize; j++ ) {
- for ( i = 1; i < REFSize; i++ ) {
- cerr << WERchar[i][j] << " ";
- }
- cerr << endl;
- }
- cerr << endl;
- for ( j = 1; j < HYPSize; j++ ) {
- for ( i = 1; i < REFSize; i++ ) {
- cerr << WER[i][j] << " ";
+ stringHyp = s.str();
+ s.str ( "" );
+ return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
}
- cerr << endl;
- }
- cerr << "=================" << endl;
-// k=local.size()-1;
-// while (k>0)
-// {
-// alignmentElement localInfos;
-// localInfos=local.at(k-1);
-// l_WERalignment.push_back(localInfos);
-// cerr << (string)localInfos.at(1)+"\t";
- k--;
-// }
-// cerr<<endl;
- k = local.size() - 1;
- int l = 0;
- int m = 0;
- while ( k > 0 ) {
- alignmentElement localInfos;
- localInfos = local.at ( k - 1 );
- if ( ( int ) ( localInfos.at ( 1 ).compare ( "D" ) ) == 0 || l > HYPSize - 1 ) {
- localInfos.push_back ( "***" );
- } else {
- localInfos.push_back ( hyp.at ( l ) );
- l++;
- }
- if ( ( int ) ( localInfos.at ( 1 ).compare ( "I" ) ) == 0 || m > REFSize - 1 ) {
- localInfos.push_back ( "***" );
- } else {
- localInfos.push_back ( ref.at ( m ) );
- m++;
+ hashMapInfos terCalc::createConcordMots ( vector<string> hyp, vector<string> ref )
+ {
+ hashMap tempHash;
+ hashMapInfos retour;
+ for ( int i = 0; i < ( int ) hyp.size(); i++ )
+ {
+ tempHash.addHasher ( hyp.at ( i ), "" );
+ }
+ bool cor[ref.size() ];
+ for ( int i = 0; i < ( int ) ref.size(); i++ )
+ {
+ if ( tempHash.trouve ( ( string ) ref.at ( i ) ) )
+ {
+ cor[i] = true;
+ }
+ else
+ {
+ cor[i] = false;
+ }
+ }
+ for ( int start = 0; start < ( int ) ref.size(); start++ )
+ {
+ if ( cor[start] )
+ {
+ for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) );end++ )
+ {
+ vector<string> ajouter = subVector ( ref, start, end + 1 );
+ string ajouterString = vectorToString ( ajouter );
+ vector<int> values = retour.getValue ( ajouterString );
+ values.push_back ( start );
+ if ( values.size() > 1 )
+ {
+ retour.setValue ( ajouterString, values );
+ }
+ else
+ {
+ retour.addValue ( ajouterString, values );
+ }
+ }
+ }
+ }
+ return retour;
}
-// cerr << vectorToString(localInfos)<<endl;
-// cerr <<localInfos.at(0)<<"\t"<<localInfos.at(1)<<"\t"<<localInfos.at(2)<<"\t"<<localInfos.at(3)<<endl;
- l_WERalignment.push_back ( localInfos );
-// cerr << (string)localInfos.at(1)+"\t";
- k--;
- }
- cerr << endl;
- /* k=local.size()-1;
- while (k>0)
- {
- alignmentElement localInfos;
- localInfos=local.at(k-1);
- // l_WERalignment.push_back(localInfos);
- cerr << (string)localInfos.at(0)+"\t";
- k--;
- }
- cerr<<endl;*/
- k = 0;
-// k=l_WERalignment.size()-1;
- m = 0;
- while ( k < ( int ) l_WERalignment.size() ) {
- alignmentElement localInfos;
- localInfos = l_WERalignment.at ( k );
- cerr << localInfos.at ( 0 ) << "\t" << localInfos.at ( 1 ) << "\t" << localInfos.at ( 2 ) << "\t" << localInfos.at ( 3 ) << endl;
- /* if ((int)(localInfos.at(1).compare("I"))==0)
- {
- cerr << "***\t";
- }
- else
- {
- // if (m<ref.size())
- {
- cerr << ref.at(m) << "\t";
- }
- m++;
- }
- */
- k++;
- }
- cerr << endl;
- /* k=local.size()-1;
- l=0;
- while (k>0)
- {
- alignmentElement localInfos;
- localInfos=local.at(k-1);
- // l_WERalignment.push_back(localInfos);
- if ((int)(localInfos.at(1).compare("D"))==0)
- {
- cerr << "***\t";
- }
- else
- {
- cerr << hyp.at(l) << "\t";
- l++;
- }
- k--;
- }
- cerr<<endl;*/
- cerr << "=================" << endl;
- return retour;
-}
-
-// string terCalc::vectorToString(vector<string> vec)
-// {
-// string retour("");
-// for (vector<string>::iterator vecIter=vec.begin();vecIter!=vec.end(); vecIter++)
-// {
-// retour+=(*vecIter)+"\t";
-// }
-// return retour;
-// }
-// vector<string> terCalc::subVector(vector<string> vec, int start, int end)
-// {
-// if (start>end)
-// {
-// cerr << "ERREUR : terCalc::subVector : end > start"<<endl;
-// exit(0);
-// }
-// vector<string> retour;
-// for (int i=start; ((i<end) && (i< vec.size())); i++)
-// {
-// retour.push_back(vec.at(i));
-// }
-// return retour;
-// }
-
-hashMapInfos terCalc::BuildWordMatches ( vector<string> hyp, vector<string> ref )
-{
- hashMap tempHash;
- hashMapInfos retour;
- for ( int i = 0; i < ( int ) hyp.size(); i++ ) {
- tempHash.addHasher ( hyp.at ( i ), "" );
- }
- bool cor[ref.size() ];
- for ( int i = 0; i < ( int ) ref.size(); i++ ) {
- if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) {
- cor[i] = true;
- } else {
- cor[i] = false;
- }
- }
- for ( int start = 0; start < ( int ) ref.size(); start++ ) {
- if ( cor[start] ) {
- for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= MAX_SHIFT_SIZE ) && ( cor[end] ) ); end++ ) {
- vector<string> ajouter = subVector ( ref, start, end + 1 );
- string ajouterString = vectorToString ( ajouter );
- vector<int> values = retour.getValue ( ajouterString );
- values.push_back ( start );
- if ( values.size() > 1 ) {
- retour.setValue ( ajouterString, values );
- } else {
- retour.addValue ( ajouterString, values );
+ bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan )
+ {
+ if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) )
+ {
+ return true;
}
- }
+ return false;
}
- }
- return retour;
-}
-
-bool terCalc::spanIntersection ( vecInt refSpan, vecInt hypSpan )
-{
- if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) {
- return true;
- }
- return false;
-}
-terAlignment terCalc::MinEditDist ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
-{
- double current_best = INF;
- double last_best = INF;
- int first_good = 0;
- int current_first_good = 0;
- int last_good = -1;
- int cur_last_good = 0;
- int last_peak = 0;
- int cur_last_peak = 0;
- int i, j;
- double cost, icost, dcost;
- double score;
-
-// int hwsize = hyp.size()-1;
-// int rwsize = ref.size()-1;
- NUM_BEAM_SEARCH_CALLS++;
-// if ((ref.size()+1 > sizeof(S)) || (hyp.size()+1 > sizeof(S)))
-// {
-// int max = ref.size();
-// if (hyp.size() > ref.size()) max = hyp.size();
-// max += 26; // we only need a +1 here, but let's pad for future use
-// S = new double[max][max];
-// P = new char[max][max];
-// }
- for ( i = 0; i <= ( int ) ref.size(); i++ ) {
- for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
- S[i][j] = -1.0;
- P[i][j] = '0';
- }
- }
- S[0][0] = 0.0;
- for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
- last_best = current_best;
- current_best = INF;
- first_good = current_first_good;
- current_first_good = -1;
- last_good = cur_last_good;
- cur_last_good = -1;
- last_peak = cur_last_peak;
- cur_last_peak = 0;
- for ( i = first_good; i <= ( int ) ref.size(); i++ ) {
- if ( i > last_good ) {
- break;
- }
- if ( S[i][j] < 0 ) {
- continue;
- }
- score = S[i][j];
- if ( ( j < ( int ) hyp.size() ) && ( score > last_best + BEAM_WIDTH ) ) {
- continue;
- }
- if ( current_first_good == -1 ) {
- current_first_good = i ;
- }
- if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) ) {
- if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || spanIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) {
- if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) {
- cost = match_cost + score;
- if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) ) {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = ' ';
- }
- if ( cost < current_best ) {
- current_best = cost;
+ terAlignment terCalc::minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
+ {
+ double current_best = infinite;
+ double last_best = infinite;
+ int first_good = 0;
+ int current_first_good = 0;
+ int last_good = -1;
+ int cur_last_good = 0;
+ int last_peak = 0;
+ int cur_last_peak = 0;
+ int i, j;
+ double cost, icost, dcost;
+ double score;
+
+
+
+ NBR_BS_APPELS++;
+
+
+ for ( i = 0; i <= ( int ) ref.size(); i++ )
+ {
+ for ( j = 0; j <= ( int ) hyp.size(); j++ )
+ {
+ S[i][j] = -1.0;
+ P[i][j] = '0';
}
- if ( current_best == cost ) {
- cur_last_peak = i + 1;
+ }
+ S[0][0] = 0.0;
+ for ( j = 0; j <= ( int ) hyp.size(); j++ )
+ {
+ last_best = current_best;
+ current_best = infinite;
+ first_good = current_first_good;
+ current_first_good = -1;
+ last_good = cur_last_good;
+ cur_last_good = -1;
+ last_peak = cur_last_peak;
+ cur_last_peak = 0;
+ for ( i = first_good; i <= ( int ) ref.size(); i++ )
+ {
+ if ( i > last_good )
+ {
+ break;
+ }
+ if ( S[i][j] < 0 )
+ {
+ continue;
+ }
+ score = S[i][j];
+ if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) )
+ {
+ continue;
+ }
+ if ( current_first_good == -1 )
+ {
+ current_first_good = i ;
+ }
+ if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) )
+ {
+ if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) )
+ {
+ if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 )
+ {
+ cost = match_cost + score;
+ if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) )
+ {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'A';
+ }
+ if ( cost < current_best )
+ {
+ current_best = cost;
+ }
+ if ( current_best == cost )
+ {
+ cur_last_peak = i + 1;
+ }
+ }
+ else
+ {
+ cost = substitute_cost + score;
+ if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) )
+ {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'S';
+ if ( cost < current_best )
+ {
+ current_best = cost;
+ }
+ if ( current_best == cost )
+ {
+ cur_last_peak = i + 1 ;
+ }
+ }
+ }
+ }
+ }
+ cur_last_good = i + 1;
+ if ( j < ( int ) hyp.size() )
+ {
+ icost = score + insert_cost;
+ if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) )
+ {
+ S[i][j+1] = icost;
+ P[i][j+1] = 'I';
+ if ( ( cur_last_peak < i ) && ( current_best == icost ) )
+ {
+ cur_last_peak = i;
+ }
+ }
+ }
+ if ( i < ( int ) ref.size() )
+ {
+ dcost = score + delete_cost;
+ if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) )
+ {
+ S[i+1][j] = dcost;
+ P[i+1][j] = 'D';
+ if ( i >= last_good )
+ {
+ last_good = i + 1 ;
+ }
+ }
+ }
}
- } else {
- cost = substitute_cost + score;
- if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) ) {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = 'S';
- if ( cost < current_best ) {
- current_best = cost;
- }
- if ( current_best == cost ) {
- cur_last_peak = i + 1 ;
- }
+ }
+
+
+ int tracelength = 0;
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) )
+ {
+ tracelength++;
+ if ( P[i][j] == 'A' )
+ {
+ i--;
+ j--;
}
- }
+ else
+ if ( P[i][j] == 'S' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if ( P[i][j] == 'D' )
+ {
+ i--;
+ }
+ else
+ if ( P[i][j] == 'I' )
+ {
+ j--;
+ }
+ else
+ {
+ cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl;
+ exit ( -1 );
+ }
}
- }
- cur_last_good = i + 1;
- if ( j < ( int ) hyp.size() ) {
- icost = score + insert_cost;
- if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) ) {
- S[i][j+1] = icost;
- P[i][j+1] = 'I';
- if ( ( cur_last_peak < i ) && ( current_best == icost ) ) {
- cur_last_peak = i;
- }
+ vector<char> path ( tracelength );
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) )
+ {
+ path[--tracelength] = P[i][j];
+ if ( P[i][j] == 'A' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if ( P[i][j] == 'S' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if ( P[i][j] == 'D' )
+ {
+ i--;
+ }
+ else
+ if ( P[i][j] == 'I' )
+ {
+ j--;
+ }
}
- }
- if ( i < ( int ) ref.size() ) {
- dcost = score + delete_cost;
- if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) ) {
- S[i+1][j] = dcost;
- P[i+1][j] = 'D';
- if ( i >= last_good ) {
- last_good = i + 1 ;
- }
+ terAlignment to_return;
+ to_return.numWords = ref.size();
+ to_return.alignment = path;
+ to_return.numEdits = S[ref.size() ][hyp.size() ];
+ to_return.hyp = hyp;
+ to_return.ref = ref;
+ to_return.averageWords = (int)ref.size();
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
}
- }
- }
- }
-
+ return to_return;
- int tracelength = 0;
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) ) {
- tracelength++;
- if ( P[i][j] == ' ' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'S' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'D' ) {
- i--;
- } else if ( P[i][j] == 'I' ) {
- j--;
- } else {
- cerr << "ERROR : terCalc::MinEditDist : Invalid path : " << P[i][j] << endl;
- exit ( -1 );
}
- }
- vector<char> path ( tracelength );
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) ) {
- path[--tracelength] = P[i][j];
- if ( P[i][j] == ' ' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'S' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'D' ) {
- i--;
- } else if ( P[i][j] == 'I' ) {
- j--;
- }
- }
- terAlignment to_return;
- to_return.numWords = ref.size();
- to_return.alignment = path;
- to_return.numEdits = S[ref.size() ][hyp.size() ];
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::MinEditDist : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
- }
- return to_return;
-
-}
-terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
-{
- hashMapInfos rloc = BuildWordMatches ( hyp, ref );
- terAlignment cur_align = MinEditDist ( hyp, ref, hypSpans );
- vector<string> cur = hyp;
- cur_align.hyp = hyp;
- cur_align.ref = ref;
- cur_align.aftershift = hyp;
- double edits = 0;
+ terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
+ {
+ hashMapInfos rloc = createConcordMots ( hyp, ref );
+ terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans );
+ vector<string> cur = hyp;
+ cur_align.hyp = hyp;
+ cur_align.ref = ref;
+ cur_align.aftershift = hyp;
+ double edits = 0;
// int numshifts = 0;
- vector<terShift> allshifts;
+ vector<terShift> allshifts;
// cerr << "Initial Alignment:" << endl << cur_align.toString() <<endl;
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
- }
- while ( true ) {
- bestShiftStruct returns;
- returns = CalcBestShift ( cur, hyp, ref, rloc, cur_align );
- if ( returns.m_empty ) {
- break;
- }
- terShift bestShift = returns.m_best_shift;
- cur_align = returns.m_best_align;
- edits += bestShift.cost;
- bestShift.alignment = cur_align.alignment;
- bestShift.aftershift = cur_align.aftershift;
- allshifts.push_back ( bestShift );
- cur = cur_align.aftershift;
- }
- terAlignment to_return;
- to_return = cur_align;
- to_return.allshifts = allshifts;
- to_return.numEdits += edits;
- NUM_SEGMENTS_SCORED++;
- return to_return;
-}
-bestShiftStruct terCalc::CalcBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
-{
- bestShiftStruct to_return;
- bool anygain = false;
- bool herr[ ( int ) hyp.size() ];
- bool rerr[ ( int ) ref.size() ];
- int ralign[ ( int ) ref.size() ];
- FindAlignErr ( med_align, herr, rerr, ralign );
- vector<vecTerShift> poss_shifts;
- poss_shifts = GatherAllPossShifts ( cur, ref, rloc, med_align, herr, rerr, ralign );
- double curerr = med_align.numEdits;
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
- cerr << "Possible Shifts:" << endl;
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
- for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ ) {
- cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
- }
- }
- cerr << endl;
- cerr << "END DEBUG " << endl;
- }
- double cur_best_shift_cost = 0.0;
- terAlignment cur_best_align = med_align;
- terShift cur_best_shift;
-
-
-
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
- cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
- cerr << "END DEBUG " << endl;
- }
- /* Consider shifts of length i+1 */
- double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- double maxfix = ( 2 * ( 1 + i ) );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
- break;
- }
-
- for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ ) {
- curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
- break;
- }
- terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
-
- alignmentStruct shiftReturns = PerformShift ( cur, curshift );
- vector<string> shiftarr = shiftReturns.nwords;
- vector<vecInt> curHypSpans = shiftReturns.aftershift;
-
- terAlignment curalign = MinEditDist ( shiftarr, ref, curHypSpans );
-
- curalign.hyp = hyp;
- curalign.ref = ref;
- curalign.aftershift = shiftarr;
-
- double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
-
- // if (DEBUG) {
- // string testeuh=terAlignment join(" ", shiftarr);
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
- cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
- cerr << "" << curalign.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
- }
- // }
- //
- if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) {
- anygain = true;
- cur_best_shift = curshift;
- cur_best_shift_cost = curshift.cost;
- cur_best_align = curalign;
- // if (DEBUG)
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl;
- cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
}
- }
- }
- }
- if ( anygain ) {
- to_return.m_best_shift = cur_best_shift;
- to_return.m_best_align = cur_best_align;
- to_return.m_empty = false;
- } else {
- to_return.m_empty = true;
- }
- return to_return;
-}
-
-void terCalc::FindAlignErr ( terAlignment align, bool* herr, bool* rerr, int* ralign )
-{
- int hpos = -1;
- int rpos = -1;
- if ( PRINT_DEBUG ) {
-
- cerr << "BEGIN DEBUG : terCalc::FindAlignErr : " << endl << align.toString() << endl;
- cerr << "END DEBUG " << endl;
- }
- for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
- char sym = align.alignment[i];
- if ( sym == ' ' ) {
- hpos++;
- rpos++;
- herr[hpos] = false;
- rerr[rpos] = false;
- ralign[rpos] = hpos;
- } else if ( sym == 'S' ) {
- hpos++;
- rpos++;
- herr[hpos] = true;
- rerr[rpos] = true;
- ralign[rpos] = hpos;
- } else if ( sym == 'I' ) {
- hpos++;
- herr[hpos] = true;
- } else if ( sym == 'D' ) {
- rpos++;
- rerr[rpos] = true;
- ralign[rpos] = hpos;
- } else {
- cerr << "ERROR : terCalc::FindAlignErr : Invalid mini align sequence " << sym << " at pos " << i << endl;
- exit ( -1 );
- }
- }
-}
-
-vector<vecTerShift> terCalc::GatherAllPossShifts ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
-{
- vector<vecTerShift> to_return;
- // Don't even bother to look if shifts can't be done
- if ( ( MAX_SHIFT_SIZE <= 0 ) || ( MAX_SHIFT_DIST <= 0 ) ) {
-// terShift[][] to_return = new terShift[0][];
- return to_return;
- }
+ while ( true )
+ {
+ bestShiftStruct returns;
+ returns = findBestShift ( cur, hyp, ref, rloc, cur_align );
+ if ( returns.m_empty )
+ {
+ break;
+ }
+ terShift bestShift = returns.m_best_shift;
+ cur_align = returns.m_best_align;
+ edits += bestShift.cost;
+ bestShift.alignment = cur_align.alignment;
+ bestShift.aftershift = cur_align.aftershift;
+ allshifts.push_back ( bestShift );
+ cur = cur_align.aftershift;
+ }
+ terAlignment to_return;
+ to_return = cur_align;
+ to_return.allshifts = allshifts;
+ to_return.numEdits += edits;
+ NBR_SEGS_EVALUATED++;
+ return to_return;
+ }
+ bestShiftStruct terCalc::findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
+ {
+ bestShiftStruct to_return;
+ bool anygain = false;
+ bool herr[ ( int ) hyp.size() ];
+ bool rerr[ ( int ) ref.size() ];
+ int ralign[ ( int ) ref.size() ];
+ calculateTerAlignment ( med_align, herr, rerr, ralign );
+ vector<vecTerShift> poss_shifts;
+
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl;
+ cerr << "indices: ";
+ for (int l_i=0; l_i < ( int ) ref.size() ; l_i++)
+ {
+ cerr << l_i << "\t";
+ }
+ cerr << endl;
+ cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl;
+ cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl;
+ cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl;
+ cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl;
+ cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl;
+ cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign );
+ double curerr = med_align.numEdits;
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Possible Shifts:" << endl;
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- )
+ {
+ for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ )
+ {
+ cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
+ }
+ }
+ cerr << endl;
+ cerr << "END DEBUG " << endl;
+ }
+// exit(0);
+ double cur_best_shift_cost = 0.0;
+ terAlignment cur_best_align = med_align;
+ terShift cur_best_shift;
- vector<vecTerShift> allshifts ( MAX_SHIFT_SIZE + 1 );
-// ArrayList[] allshifts = new ArrayList[MAX_SHIFT_SIZE+1];
-// for (int i = 0; i < allshifts.length; i++)
-// {
-// allshifts[i] = new ArrayList();
-// }
-// List hyplist = Arrays.asList(hyp);
- for ( int start = 0; start < ( int ) hyp.size(); start++ ) {
- string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
- if ( ! rloc.trouve ( subVectorHypString ) ) {
- continue;
- }
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- )
+ {
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ /* Consider shifts of length i+1 */
+ double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ double maxfix = ( 2 * ( 1 + i ) );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
+ {
+ break;
+ }
- bool ok = false;
- vector<int> mtiVec = rloc.getValue ( subVectorHypString );
- vector<int>::iterator mti = mtiVec.begin();
- while ( mti != mtiVec.end() && ( ! ok ) ) {
- int moveto = ( *mti );
- mti++;
- if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= MAX_SHIFT_DIST ) && ( ( start - ralign[moveto] - 1 ) <= MAX_SHIFT_DIST ) ) {
- ok = true;
- }
- }
- if ( ! ok ) {
- continue;
+ for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ )
+ {
+ curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
+ {
+ break;
+ }
+ terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "cur : "<< join(" ",cur) << endl;
+ cerr << "curshift : "<< curshift.toString() << endl;
+
+ }
+ alignmentStruct shiftReturns = permuter ( cur, curshift );
+ vector<string> shiftarr = shiftReturns.nwords;
+ vector<vecInt> curHypSpans = shiftReturns.aftershift;
+
+ if ( PRINT_DEBUG )
+ {
+ cerr << "shiftarr : "<< join(" ",shiftarr) << endl;
+// cerr << "curHypSpans : "<< curHypSpans.toString() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans );
+
+ curalign.hyp = hyp;
+ curalign.ref = ref;
+ curalign.aftershift = shiftarr;
+
+
+ double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
+
+ // if (DEBUG) {
+ // string testeuh=terAlignment join(" ", shiftarr);
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
+ cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"<<endl;
+ cerr << "Details of gains : gain = ("<<cur_best_align.numEdits << "+" << cur_best_shift_cost << ") - (" << curalign.numEdits << "+" << curshift.cost << ")"<<endl;
+ cerr << "" << curalign.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ // }
+ //
+ if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) )
+ {
+ anygain = true;
+ cur_best_shift = curshift;
+ cur_best_shift_cost = curshift.cost;
+ cur_best_align = curalign;
+ // if (DEBUG)
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ }
+ }
+ }
+ if ( anygain )
+ {
+ to_return.m_best_shift = cur_best_shift;
+ to_return.m_best_align = cur_best_align;
+ to_return.m_empty = false;
+ }
+ else
+ {
+ to_return.m_empty = true;
+ }
+ return to_return;
}
- ok = true;
- for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + MAX_SHIFT_SIZE ) ); end++ ) {
- /* check if cand is good if so, add it */
- vector<string> cand = subVector ( hyp, start, end + 1 );
- ok = false;
- if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) {
- continue;
- }
- bool any_herr = false;
+ void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign )
+ {
+ int hpos = -1;
+ int rpos = -1;
+ if ( PRINT_DEBUG )
+ {
- for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) {
- if ( herr[start+i] ) {
- any_herr = true;
+ cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl;
+ cerr << "END DEBUG " << endl;
}
- }
- if ( any_herr == false ) {
- ok = true;
- continue;
- }
-
- vector<int> movetoitVec;
- movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
- vector<int>::iterator movetoit = movetoitVec.begin();
- while ( movetoit != movetoitVec.end() ) {
- int moveto = ( *movetoit );
- movetoit++;
- if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= MAX_SHIFT_DIST ) && ( ( start - ralign[moveto] ) <= MAX_SHIFT_DIST ) ) ) {
- continue;
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
+ {
+ herr[i] = false;
+ rerr[i] = false;
+ ralign[i] = -1;
+ }
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
+ {
+ char sym = align.alignment[i];
+ if ( sym == 'A' )
+ {
+ hpos++;
+ rpos++;
+ herr[hpos] = false;
+ rerr[rpos] = false;
+ ralign[rpos] = hpos;
+ }
+ else
+ if ( sym == 'S' )
+ {
+ hpos++;
+ rpos++;
+ herr[hpos] = true;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos;
+ }
+ else
+ if ( sym == 'I' )
+ {
+ hpos++;
+ herr[hpos] = true;
+ }
+ else
+ if ( sym == 'D' )
+ {
+ rpos++;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos+1;
+ }
+ else
+ {
+ cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl;
+ exit ( -1 );
+ }
}
- ok = true;
-
- /* check to see if there are any errors in either string
- (only move if this is the case!)
- */
+ }
- bool any_rerr = false;
- for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) {
- if ( rerr[moveto+i] ) {
- any_rerr = true;
- }
- }
- if ( ! any_rerr ) {
- continue;
+ vector<vecTerShift> terCalc::calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
+ {
+ vector<vecTerShift> to_return;
+ if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) )
+ {
+ return to_return;
}
- for ( int roff = -1; roff <= ( end - start ); roff++ ) {
- terShift topush;
- bool topushNull = true;
- if ( ( roff == -1 ) && ( moveto == 0 ) ) {
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 01 : " << endl << "Consider making " << start << "..." << end << " moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
+ vector<vecTerShift> allshifts ( TAILLE_PERMUT_MAX + 1 );
+ for ( int start = 0; start < ( int ) hyp.size(); start++ )
+ {
+ string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
+ if ( ! rloc.trouve ( subVectorHypString ) )
+ {
+ continue;
}
- terShift t01 ( start, end, -1, -1 );
- topush = t01;
- topushNull = false;
- } else if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) ) {
- int newloc = ralign[moveto+roff];
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 02 : " << endl << "Consider making " << start << "..." << end << " moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
+ bool ok = false;
+ vector<int> mtiVec = rloc.getValue ( subVectorHypString );
+ vector<int>::iterator mti = mtiVec.begin();
+ while ( mti != mtiVec.end() && ( ! ok ) )
+ {
+ int moveto = ( *mti );
+ mti++;
+ if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) )
+ {
+ ok = true;
+ }
}
- terShift t02 ( start, end, moveto + roff, newloc );
- topush = t02;
- topushNull = false;
- }
- if ( !topushNull ) {
- topush.shifted = cand;
- topush.cost = shift_cost;
- if ( PRINT_DEBUG ) {
-
- cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 02 : " << endl;
- cerr << "start : " << start << endl;
- cerr << "end : " << end << endl;
- cerr << "end - start : " << end - start << endl;
- cerr << "END DEBUG " << endl;
+ if ( ! ok )
+ {
+ continue;
+ }
+ ok = true;
+ for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ )
+ {
+ /* check if cand is good if so, add it */
+ vector<string> cand = subVector ( hyp, start, end + 1 );
+ ok = false;
+ if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) )
+ {
+ continue;
+ }
+
+ bool any_herr = false;
+
+ for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ )
+ {
+ if ( herr[start+i] )
+ {
+ any_herr = true;
+ }
+ }
+ if ( any_herr == false )
+ {
+ ok = true;
+ continue;
+ }
+
+ vector<int> movetoitVec;
+ movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
+// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl;
+ vector<int>::iterator movetoit = movetoitVec.begin();
+ while ( movetoit != movetoitVec.end() )
+ {
+ int moveto = ( *movetoit );
+ movetoit++;
+ if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) )
+ {
+ continue;
+ }
+ ok = true;
+
+ /* check to see if there are any errors in either string
+ (only move if this is the case!)
+ */
+
+ bool any_rerr = false;
+ for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ )
+ {
+ if ( rerr[moveto+i] )
+ {
+ any_rerr = true;
+ }
+ }
+ if ( ! any_rerr )
+ {
+ continue;
+ }
+ for ( int roff = -1; roff <= ( end - start ); roff++ )
+ {
+ terShift topush;
+ bool topushNull = true;
+ if ( ( roff == -1 ) && ( moveto == 0 ) )
+ {
+ if ( PRINT_DEBUG )
+ {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
+ }
+ terShift t01 ( start, end, -1, -1 );
+ topush = t01;
+ topushNull = false;
+ }
+ else
+ if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) )
+ {
+ int newloc = ralign[moveto+roff];
+ if ( PRINT_DEBUG )
+ {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
+ }
+ terShift t02 ( start, end, moveto + roff, newloc );
+ topush = t02;
+ topushNull = false;
+ }
+ if ( !topushNull )
+ {
+ topush.shifted = cand;
+ topush.cost = shift_cost;
+ if ( PRINT_DEBUG )
+ {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl;
+ cerr << "start : " << start << endl;
+ cerr << "end : " << end << endl;
+ cerr << "end - start : " << end - start << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ ( allshifts.at ( end - start ) ).push_back ( topush );
+ }
+ }
+ }
}
- ( allshifts.at ( end - start ) ).push_back ( topush );
- }
}
- }
+ to_return.clear();
+ for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ )
+ {
+ to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
+ }
+ return to_return;
}
- }
-// vector<vecTerShift> to_return;
- to_return.clear();
-// terShift[][] to_return = new terShift[MAX_SHIFT_SIZE+1][];
- for ( int i = 0; i < MAX_SHIFT_SIZE + 1; i++ ) {
-// to_return[i] = (terShift[]) allshifts[i].toArray(new terShift[0]);
- to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
- }
- return to_return;
-}
-alignmentStruct terCalc::PerformShift ( vector<string> words, terShift s )
-{
- return PerformShift ( words, s.start, s.end, s.newloc );
-}
-
+ alignmentStruct terCalc::permuter ( vector<string> words, terShift s )
+ {
+ return permuter ( words, s.start, s.end, s.newloc );
+ }
-alignmentStruct terCalc::PerformShift ( vector<string> words, int start, int end, int newloc )
-{
- int c = 0;
- vector<string> nwords ( words );
- vector<vecInt> spans ( ( int ) hypSpans.size() );
- alignmentStruct toreturn;
-// ON EST ICI
-// if((int)hypSpans.size()>0) spans = new TERintpair[(int)hypSpans.size()];
-// if(DEBUG) {
- if ( PRINT_DEBUG ) {
- if ( ( int ) hypSpans.size() > 0 ) {
- cerr << "BEGIN DEBUG : terCalc::PerformShift :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl << "END DEBUG " << endl;
- } else {
- cerr << "BEGIN DEBUG : terCalc::PerformShift :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl << "END DEBUG " << endl;
- }
- }
+ alignmentStruct terCalc::permuter ( vector<string> words, int start, int end, int newloc )
+ {
+ int c = 0;
+ vector<string> nwords ( words );
+ vector<vecInt> spans ( ( int ) hypSpans.size() );
+ alignmentStruct to_return;
+ if ( PRINT_DEBUG )
+ {
+
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ;
+ }
+ else
+ {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ;
+ }
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl;
+ }
+ if (newloc >= ( int ) words.size())
+ {
+ if ( PRINT_DEBUG )
+ {
+ cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl;
+ }
+ newloc = ( int ) words.size()-1;
+ }
+
// }
- if ( newloc == -1 ) {
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = 0; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- } else {
- if ( newloc < start ) {
- for ( int i = 0; i <= newloc; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc + 1; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- } else {
- if ( newloc > end ) {
- for ( int i = 0; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i <= newloc; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc + 1; i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- } else {
- // we are moving inside of ourselves
- for ( int i = 0; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
+ if ( newloc == -1 )
+ {
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = 0; i <= start - 1;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
}
- for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
+ else
+ {
+ if ( newloc < start )
+ {
+
+ for ( int i = 0; i < newloc; i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc ; i < start ;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
+ else
+ {
+ if ( newloc > end )
+ {
+ for ( int i = 0; i <= start - 1; i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i <= newloc;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc + 1; i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
+ else
+ {
+ // we are moving inside of ourselves
+ for ( int i = 0; i <= start - 1; i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
+ }
}
- }
+ NBR_PERMUTS_CONSID++;
+
+ if ( PRINT_DEBUG )
+ {
+ cerr << "nwords" << join(" ",nwords) << endl;
+// cerr << "spans" << spans. << endl;
+ }
+
+ to_return.nwords = nwords;
+ to_return.aftershift = spans;
+ return to_return;
+ }
+ void terCalc::setDebugMode ( bool b )
+ {
+ PRINT_DEBUG = b;
}
- }
- NUM_SHIFTS_CONSIDERED++;
-
- toreturn.nwords = nwords;
- toreturn.aftershift = spans;
- return toreturn;
-}
-void terCalc::setDebugMode ( bool b )
-{
- PRINT_DEBUG = b;
-}
}
diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h
index 9e1a01f65..92d9caf2b 100644
--- a/mert/TER/tercalc.h
+++ b/mert/TER/tercalc.h
@@ -1,5 +1,25 @@
-#ifndef MERT_TER_TER_CALC_H_
-#define MERT_TER_TER_CALC_H_
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#ifndef _TERCPPTERCALC_H__
+#define _TERCPPTERCALC_H__
#include <vector>
#include <stdio.h>
@@ -21,62 +41,63 @@ namespace TERCpp
{
// typedef size_t WERelement[2];
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
-typedef vector<terShift> vecTerShift;
-/**
- @author
-*/
-class terCalc
-{
-private :
+ typedef vector<terShift> vecTerShift;
+ /**
+ @author
+ */
+ class terCalc
+ {
+ private :
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
- WERalignment l_WERalignment;
-// HashMap contenant les caleurs de hash de chaque mot
- hashMap bagOfWords;
- int MAX_SHIFT_SIZE;
- /* Variables for some internal counting. */
- int NUM_SEGMENTS_SCORED;
- int NUM_SHIFTS_CONSIDERED;
- int NUM_BEAM_SEARCH_CALLS;
- int MAX_SHIFT_DIST;
- bool PRINT_DEBUG;
+ WERalignment l_WERalignment;
+// HashMap contenant les valeurs de hash de chaque mot
+ hashMap bagOfWords;
+ int TAILLE_PERMUT_MAX;
+ // Increments internes
+ int NBR_SEGS_EVALUATED;
+ int NBR_PERMUTS_CONSID;
+ int NBR_BS_APPELS;
+ int DIST_MAX_PERMUT;
+ bool PRINT_DEBUG;
- /* These are resized by the MIN_EDIT_DIST code if they aren't big enough */
- double S[1000][1000];
- char P[1000][1000];
- vector<vecInt> refSpans;
- vector<vecInt> hypSpans;
- int BEAM_WIDTH;
+ // Utilisés dans minDistEdit et ils ne sont pas réajustés
+ double S[1000][1000];
+ char P[1000][1000];
+ vector<vecInt> refSpans;
+ vector<vecInt> hypSpans;
+ int TAILLE_BEAM;
-public:
- int shift_cost;
- int insert_cost;
- int delete_cost;
- int substitute_cost;
- int match_cost;
- double INF;
- terCalc();
+ public:
+ int shift_cost;
+ int insert_cost;
+ int delete_cost;
+ int substitute_cost;
+ int match_cost;
+ double infinite;
+ terCalc();
// ~terCalc();
// size_t* hashVec ( vector<string> s );
- void setDebugMode ( bool b );
- int WERCalculation ( size_t * ref, size_t * hyp );
- int WERCalculation ( vector<string> ref, vector<string> hyp );
- int WERCalculation ( vector<int> ref, vector<int> hyp );
+ void setDebugMode ( bool b );
+// int WERCalculation ( size_t * ref, size_t * hyp );
+// int WERCalculation ( vector<string> ref, vector<string> hyp );
+// int WERCalculation ( vector<int> ref, vector<int> hyp );
+ terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
// string vectorToString(vector<string> vec);
// vector<string> subVector(vector<string> vec, int start, int end);
- hashMapInfos BuildWordMatches ( vector<string> hyp, vector<string> ref );
- terAlignment MinEditDist ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
- bool spanIntersection ( vecInt refSpan, vecInt hypSpan );
- terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
- terAlignment TER ( vector<string> hyp, vector<string> ref );
- terAlignment TER ( vector<int> hyp, vector<int> ref );
- bestShiftStruct CalcBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
- void FindAlignErr ( terAlignment align, bool* herr, bool* rerr, int* ralign );
- vector<vecTerShift> GatherAllPossShifts ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
- alignmentStruct PerformShift ( vector<string> words, terShift s );
- alignmentStruct PerformShift ( vector<string> words, int start, int end, int newloc );
-};
+ hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
+ terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
+ bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
+ terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
+ terAlignment TER ( vector<string> hyp, vector<string> ref );
+ terAlignment TER ( vector<int> hyp, vector<int> ref );
+ bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
+ void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
+ vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
+ alignmentStruct permuter ( vector<string> words, terShift s );
+ alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
+ };
}
-#endif // MERT_TER_TER_CALC_H_
+#endif
diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp
index 2d910ec05..64e1483b6 100644
--- a/mert/TER/tools.cpp
+++ b/mert/TER/tools.cpp
@@ -1,545 +1,772 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#include "tools.h"
using namespace std;
+using namespace boost::xpressive;
namespace Tools
{
-string vectorToString ( vector<string> vec )
-{
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour += ( *vecIter );
- } else {
- retour += "\t" + ( *vecIter );
+ string vectorToString ( vector<string> vec )
+ {
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour += ( *vecIter );
+ }
+ else
+ {
+ retour += "\t" + ( *vecIter );
+ }
+ }
+ return retour;
+ }
+ string vectorToString ( vector<char> vec )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << "\t" << ( *vecIter );
+ }
+ }
+ return retour.str();
+ }
+ string vectorToString ( vector<int> vec )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << "\t" << ( *vecIter );
+ }
+ }
+ return retour.str();
}
- }
- return retour;
-}
-string vectorToString ( vector< string > vec, string s )
-{
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour += ( *vecIter );
- } else {
- retour += s + ( *vecIter );
+ string vectorToString ( vector< string > vec, string s )
+ {
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour += ( *vecIter );
+ }
+ else
+ {
+ retour += s + ( *vecIter );
+ }
+ }
+ return retour;
+
}
- }
- return retour;
-}
+ string vectorToString ( vector< char > vec, string s )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << s << ( *vecIter );
+ }
+ }
+ return retour.str();
-vector<string> subVector ( vector<string> vec, int start, int end )
-{
- vector<string> retour;
- if ( start > end ) {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
+ }
-vector<int> subVector ( vector<int> vec, int start, int end )
-{
- vector<int> retour;
- if ( start > end ) {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
+ string vectorToString ( vector< int > vec, string s )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << s << ( *vecIter );
+ }
+ }
+ return retour.str();
-vector<float> subVector ( vector<float> vec, int start, int end )
-{
- vector<float> retour;
- if ( start > end ) {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
+ }
+
+ string vectorToString ( vector< bool > vec, string s )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<bool>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << s << ( *vecIter );
+ }
+ }
+ return retour.str();
-vector<string> copyVector ( vector<string> vec )
-{
- vector<string> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
-vector<int> copyVector ( vector<int> vec )
-{
- vector<int> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
-vector<float> copyVector ( vector<float> vec )
-{
- vector<float> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
-vector<string> stringToVector ( string s, string tok )
-{
- vector<string> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
- if ( ( *sIt ) == ( *sTok ) ) {
- to_return.push_back ( to_push );
- to_push = "";
- pushed = true;
- }
}
- if ( !pushed ) {
- to_push.push_back ( ( *sIt ) );
+ string vectorToString ( char* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec[l_i];
+ }
+ else
+ {
+ retour << s << vec[l_i];
+ }
+ }
+ return retour.str();
+
}
- }
- to_return.push_back ( to_push );
- return to_return;
-}
-vector<int> stringToVectorInt ( string s, string tok )
-{
- vector<int> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
- if ( ( *sIt ) == ( *sTok ) ) {
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atoi ( to_push.c_str() ) );
+
+ string vectorToString ( int* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec[l_i];
+ }
+ else
+ {
+ retour << s << vec[l_i];
+ }
}
- to_push = "";
- pushed = true;
- }
+ return retour.str();
+
}
- if ( !pushed ) {
- to_push.push_back ( ( *sIt ) );
+
+ string vectorToString ( bool* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec[l_i];
+ }
+ else
+ {
+ retour << s << vec[l_i];
+ }
+ }
+ return retour.str();
+
}
- }
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- return to_return;
-}
-vector<float> stringToVectorFloat ( string s, string tok )
-{
- vector<float> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
- if ( ( *sIt ) == ( *sTok ) ) {
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atof ( to_push.c_str() ) );
+
+ vector<string> subVector ( vector<string> vec, int start, int end )
+ {
+ vector<string> retour;
+ if ( start > end )
+ {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
}
- to_push = "";
- pushed = true;
- }
+ return retour;
}
- if ( !pushed ) {
- to_push.push_back ( ( *sIt ) );
+
+ vector<int> subVector ( vector<int> vec, int start, int end )
+ {
+ vector<int> retour;
+ if ( start > end )
+ {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
}
- }
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- return to_return;
-}
-
-string lowerCase ( string str )
-{
- for ( int i = 0; i < ( int ) str.size(); i++ ) {
- if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) {
- str[i] = str[i] + 0x20;
+
+ vector<float> subVector ( vector<float> vec, int start, int end )
+ {
+ vector<float> retour;
+ if ( start > end )
+ {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+ }
+
+ vector<string> copyVector ( vector<string> vec )
+ {
+ vector<string> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+ }
+ vector<int> copyVector ( vector<int> vec )
+ {
+ vector<int> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+ }
+ vector<float> copyVector ( vector<float> vec )
+ {
+ vector<float> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+ }
+ vector<string> stringToVector ( string s, string tok )
+ {
+ vector<string> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ )
+ {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
+ {
+ if ( ( *sIt ) == ( *sTok ) )
+ {
+ to_return.push_back ( to_push );
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed )
+ {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ to_return.push_back ( to_push );
+ return to_return;
+ }
+ vector<int> stringToVectorInt ( string s, string tok )
+ {
+ vector<int> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ )
+ {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
+ {
+ if ( ( *sIt ) == ( *sTok ) )
+ {
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed )
+ {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+ }
+ vector<float> stringToVectorFloat ( string s, string tok )
+ {
+ vector<float> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ )
+ {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
+ {
+ if ( ( *sIt ) == ( *sTok ) )
+ {
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atof ( to_push.c_str() ) );
+ }
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed )
+ {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
}
- }
- return str;
-}
-/*
-string removePunctTercom ( string str )
-{
- string str_mod = str;
- sregex rex;
- string replace;
+ string lowerCase ( string str )
+ {
+ for ( int i = 0;i < ( int ) str.size();i++ )
+ {
+ if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) )
+ {
+ str[i] = str[i] + 0x20;
+ }
+ }
+ return str;
+ }
+ string removePunctTercom ( string str )
+ {
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
-string removePunct ( string str )
-{
- string str_mod = str;
- sregex rex;
- string replace;
+ return str_mod;
+ }
+ string removePunct ( string str )
+ {
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
-string tokenizePunct ( string str )
-{
- string str_mod = str;
- sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
- string replace ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ return str_mod;
+ }
+ string tokenizePunct ( string str )
+ {
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
+ string replace ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
- replace = ( "$2.$4. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
+ replace = ( "$2.$4. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " ? " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " ? " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " ; " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " ; " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " ! " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " ! " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " ( " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " ( " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " ) " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " ) " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " \" " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " \" " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
- replace = ( "num_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
+ replace = ( "num_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
- replace = ( "ordinal_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
+ replace = ( "ordinal_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Dd]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Dd]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Dd]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Dd]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]rs) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]rs) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]rs) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]rs) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Nn]o) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Nn]o) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Nn]o) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Nn]o) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
// rex = sregex::compile ( "(^(([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "(^(([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
-
-string normalizeStd ( string str )
-{
- string str_mod = str;
- sregex rex = sregex::compile ( "(<skipped>)" );
- string replace ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ return str_mod;
+ }
- rex = sregex::compile ( "-\n" );
- replace = ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ string normalizeStd ( string str )
+ {
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(<skipped>)" );
+ string replace ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "\n" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "-\n" );
+ replace = ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&quot;" );
- replace = ( "\"" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "\n" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&amp;" );
- replace = ( "& " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&quot;" );
+ replace = ( "\"" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&lt;" );
- replace = ( "<" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&amp;" );
+ replace = ( "& " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&gt;" );
- replace = ( ">" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&lt;" );
+ replace = ( "<" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
-*/
+ rex = sregex::compile ( "&gt;" );
+ replace = ( ">" );
+ str_mod = regex_replace ( str_mod, rex, replace );
-param copyParam ( param p )
-{
- param to_return;
- to_return.caseOn = p.caseOn;
- to_return.noPunct = p.noPunct;
- to_return.debugMode = p.debugMode;
- to_return.hypothesisFile = p.hypothesisFile;
- to_return.referenceFile = p.referenceFile;
- to_return.normalize = p.normalize;
- to_return.noTxtIds = p.noTxtIds;
- to_return.outputFileExtension = p.outputFileExtension;
- to_return.outputFileName = p.outputFileName;
- to_return.sgmlInputs = p.sgmlInputs;
- to_return.tercomLike = p.tercomLike;
- return to_return;
-}
-string printParams ( param p )
-{
- stringstream s;
- s << "caseOn = " << p.caseOn << endl;
- s << "noPunct = " << p.noPunct << endl;
- s << "debugMode = " << p.debugMode << endl;
- s << "hypothesisFile = " << p.hypothesisFile << endl;
- s << "referenceFile = " << p.referenceFile << endl;
- s << "normalize = " << p.normalize << endl;
- s << "noTxtIds = " << p.noTxtIds << endl;
- s << "outputFileExtension = " << p.outputFileExtension << endl;
- s << "outputFileName = " << p.outputFileName << endl;
- s << "sgmlInputs = " << p.sgmlInputs << endl;
- s << "tercomLike = " << p.tercomLike << endl;
- return s.str();
+ return str_mod;
+ }
-}
+ param copyParam ( param p )
+ {
+ param to_return;
+ to_return.caseOn = p.caseOn;
+ to_return.noPunct = p.noPunct;
+ to_return.debugMode = p.debugMode;
+ to_return.debugLevel = p.debugLevel;
+ to_return.hypothesisFile = p.hypothesisFile;
+ to_return.referenceFile = p.referenceFile;
+ to_return.normalize = p.normalize;
+ to_return.noTxtIds = p.noTxtIds;
+ to_return.outputFileExtension = p.outputFileExtension;
+ to_return.outputFileName = p.outputFileName;
+ to_return.sgmlInputs = p.sgmlInputs;
+ to_return.tercomLike = p.tercomLike;
+ to_return.printAlignments = p.printAlignments;
+ to_return.WER=p.WER;
+ return to_return;
+ }
+ string printParams ( param p )
+ {
+ stringstream s;
+ s << "caseOn = " << p.caseOn << endl;
+ s << "noPunct = " << p.noPunct << endl;
+ s << "debugMode = " << p.debugMode << endl;
+ s << "debugLevel = " << p.debugLevel << endl;
+ s << "hypothesisFile = " << p.hypothesisFile << endl;
+ s << "referenceFile = " << p.referenceFile << endl;
+ s << "normalize = " << p.normalize << endl;
+ s << "noTxtIds = " << p.noTxtIds << endl;
+ s << "outputFileExtension = " << p.outputFileExtension << endl;
+ s << "outputFileName = " << p.outputFileName << endl;
+ s << "sgmlInputs = " << p.sgmlInputs << endl;
+ s << "tercomLike = " << p.tercomLike << endl;
+ return s.str();
+ }
+ string join ( string delim, vector<string> arr )
+ {
+ if ( ( int ) arr.size() == 0 ) return "";
+// if ((int)delim.compare("") == 0) delim = new String("");
+// String s = new String("");
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ )
+ {
+ if ( i == 0 )
+ {
+ s << arr.at ( i );
+ }
+ else
+ {
+ s << delim << arr.at ( i );
+ }
+ }
+ return s.str();
+// return "";
+ }
}
diff --git a/mert/TER/tools.h b/mert/TER/tools.h
index 6f78b9a6a..0a85e7b4b 100644
--- a/mert/TER/tools.h
+++ b/mert/TER/tools.h
@@ -1,38 +1,66 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
#ifndef MERT_TER_TOOLS_H_
#define MERT_TER_TOOLS_H_
+
#include <vector>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <sstream>
+#include <boost/xpressive/xpressive.hpp>
+
using namespace std;
namespace Tools
{
-typedef vector<double> vecDouble;
-typedef vector<char> vecChar;
-typedef vector<int> vecInt;
-typedef vector<float> vecFloat;
-typedef vector<string> vecString;
-typedef vector<string> alignmentElement;
-typedef vector<alignmentElement> WERalignment;
+ typedef vector<double> vecDouble;
+ typedef vector<char> vecChar;
+ typedef vector<int> vecInt;
+ typedef vector<float> vecFloat;
+ typedef vector<size_t> vecSize_t;
+ typedef vector<string> vecString;
+ typedef vector<string> alignmentElement;
+ typedef vector<alignmentElement> WERalignment;
-struct param {
- bool debugMode;
- string referenceFile; // path to the resources
- string hypothesisFile; // path to the configuration files
- string outputFileExtension;
- string outputFileName;
- bool noPunct;
- bool caseOn;
- bool normalize;
- bool tercomLike;
- bool sgmlInputs;
- bool noTxtIds;
+struct param
+{
+ bool debugMode;
+ string referenceFile; // path to the resources
+ string hypothesisFile; // path to the configuration files
+ string outputFileExtension;
+ string outputFileName;
+ bool noPunct;
+ bool caseOn;
+ bool normalize;
+ bool tercomLike;
+ bool sgmlInputs;
+ bool noTxtIds;
+ bool printAlignments;
+ bool WER;
+ int debugLevel;
};
// param = { false, "","","","" };
@@ -40,26 +68,35 @@ struct param {
// private:
// public:
-string vectorToString ( vector<string> vec );
-string vectorToString ( vector<string> vec, string s );
-vector<string> subVector ( vector<string> vec, int start, int end );
-vector<int> subVector ( vector<int> vec, int start, int end );
-vector<float> subVector ( vector<float> vec, int start, int end );
-vector<string> copyVector ( vector<string> vec );
-vector<int> copyVector ( vector<int> vec );
-vector<float> copyVector ( vector<float> vec );
-vector<string> stringToVector ( string s, string tok );
-vector<int> stringToVectorInt ( string s, string tok );
-vector<float> stringToVectorFloat ( string s, string tok );
-string lowerCase(string str);
-string removePunct(string str);
-string tokenizePunct(string str);
-string removePunctTercom(string str);
-string normalizeStd(string str);
-string printParams(param p);
+ string vectorToString ( vector<string> vec );
+ string vectorToString ( vector<char> vec );
+ string vectorToString ( vector<int> vec );
+ string vectorToString ( vector<string> vec, string s );
+ string vectorToString ( vector<char> vec, string s );
+ string vectorToString ( vector<int> vec, string s );
+ string vectorToString ( vector<bool> vec, string s );
+ string vectorToString ( char* vec, string s, int taille );
+ string vectorToString ( int* vec, string s , int taille );
+ string vectorToString ( bool* vec, string s , int taille );
+ vector<string> subVector ( vector<string> vec, int start, int end );
+ vector<int> subVector ( vector<int> vec, int start, int end );
+ vector<float> subVector ( vector<float> vec, int start, int end );
+ vector<string> copyVector ( vector<string> vec );
+ vector<int> copyVector ( vector<int> vec );
+ vector<float> copyVector ( vector<float> vec );
+ vector<string> stringToVector ( string s, string tok );
+ vector<string> stringToVector ( char s, string tok );
+ vector<string> stringToVector ( int s, string tok );
+ vector<int> stringToVectorInt ( string s, string tok );
+ vector<float> stringToVectorFloat ( string s, string tok );
+ string lowerCase(string str);
+ string removePunct(string str);
+ string tokenizePunct(string str);
+ string removePunctTercom(string str);
+ string normalizeStd(string str);
+ string printParams(param p);
+ string join ( string delim, vector<string> arr );
// };
-param copyParam(param p);
-
+ param copyParam(param p);
}
-
-#endif // MERT_TER_TOOLS_H_
+#endif