Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authorChristophe SERVAN <cservan@trou-de-fer.grenoble.xrce.xerox.com>2015-02-16 21:02:46 +0300
committerChristophe SERVAN <cservan@trou-de-fer.grenoble.xrce.xerox.com>2015-02-16 21:02:46 +0300
commit87a4f1954619a4a7eec531349e0865b245df7a0b (patch)
treefcbf12e7bbbf32e866e127844224435277e31fbf /mert
parentf6884c55a1095b484195acc8570b280e638a72bf (diff)
Memory leak correction in TER algorithm
Diffstat (limited to 'mert')
-rw-r--r--mert/Jamfile1
-rw-r--r--mert/TER/alignmentStruct.cpp25
-rw-r--r--mert/TER/alignmentStruct.h31
-rw-r--r--mert/TER/bestShiftStruct.cpp66
-rw-r--r--mert/TER/bestShiftStruct.h48
-rw-r--r--mert/TER/hashMap.cpp232
-rw-r--r--mert/TER/hashMap.h44
-rw-r--r--mert/TER/hashMapInfos.cpp239
-rw-r--r--mert/TER/hashMapInfos.h46
-rw-r--r--mert/TER/hashMapStringInfos.cpp313
-rw-r--r--mert/TER/hashMapStringInfos.h46
-rw-r--r--mert/TER/infosHasher.cpp58
-rw-r--r--mert/TER/infosHasher.h40
-rw-r--r--mert/TER/stringHasher.cpp46
-rw-r--r--mert/TER/stringHasher.h32
-rw-r--r--mert/TER/stringInfosHasher.cpp58
-rw-r--r--mert/TER/stringInfosHasher.h40
-rw-r--r--mert/TER/terAlignment.cpp339
-rw-r--r--mert/TER/terAlignment.h79
-rw-r--r--mert/TER/terShift.cpp164
-rw-r--r--mert/TER/terShift.h53
-rw-r--r--mert/TER/tercalc.cpp1832
-rw-r--r--mert/TER/tercalc.h104
-rw-r--r--mert/TER/tools.cpp1224
-rw-r--r--mert/TER/tools.h116
25 files changed, 3108 insertions, 2168 deletions
diff --git a/mert/Jamfile b/mert/Jamfile
index ee8a1fcc3..4dd2fb540 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -46,6 +46,7 @@ TER/infosHasher.cpp
TER/stringInfosHasher.cpp
TER/tercalc.cpp
TER/tools.cpp
+TER/bestShiftStruct.cpp
TerScorer.cpp
CderScorer.cpp
MeteorScorer.cpp
diff --git a/mert/TER/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp
index e42ec4a14..e2a880396 100644
--- a/mert/TER/alignmentStruct.cpp
+++ b/mert/TER/alignmentStruct.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -23,15 +23,24 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace TERCpp
{
-string alignmentStruct::toString()
-{
- stringstream s;
+ string alignmentStruct::toString()
+ {
+ stringstream s;
// s << "nword : " << vectorToString(nwords)<<endl;
// s << "alignment" << vectorToString(alignment)<<endl;
// s << "afterShift" << vectorToString(alignment)<<endl;
- s << "Nothing to be printed" <<endl;
- return s.str();
-}
+ s << "Nothing to be printed" <<endl;
+ return s.str();
+ }
+ void alignmentStruct::set(alignmentStruct l_alignmentStruct)
+ {
+ nwords=l_alignmentStruct.nwords; // The words we shifted
+ alignment=l_alignmentStruct.alignment ; // for pra_more output
+ aftershift=l_alignmentStruct.aftershift; // for pra_more output
+ cost=l_alignmentStruct.cost;
+ }
+
+
// alignmentStruct::alignmentStruct()
// {
@@ -99,7 +108,7 @@ string alignmentStruct::toString()
// return s.str();
// }
-/* The distance of the shift. */
+ /* The distance of the shift. */
// int alignmentStruct::distance()
// {
// if (moveto < start)
diff --git a/mert/TER/alignmentStruct.h b/mert/TER/alignmentStruct.h
index c1459960b..0963fbe94 100644
--- a/mert/TER/alignmentStruct.h
+++ b/mert/TER/alignmentStruct.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
-#ifndef MERT_TER_ALIGNMENTSTRUCT_H_
-#define MERT_TER_ALIGNMENTSTRUCT_H_
+#ifndef __TERCPPALIGNMENTSTRUCT_H__
+#define __TERCPPALIGNMENTSTRUCT_H__
#include <vector>
@@ -34,10 +34,10 @@ using namespace Tools;
namespace TERCpp
{
-class alignmentStruct
-{
-private:
-public:
+ class alignmentStruct
+ {
+ private:
+ public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -53,14 +53,15 @@ public:
// int end;
// int moveto;
// int newloc;
- vector<string> nwords; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
- string toString();
-};
+ vector<string> nwords; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<vecInt> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+ string toString();
+ void set(alignmentStruct l_alignmentStruct);
+ };
}
#endif \ No newline at end of file
diff --git a/mert/TER/bestShiftStruct.cpp b/mert/TER/bestShiftStruct.cpp
new file mode 100644
index 000000000..8c27f1ff8
--- /dev/null
+++ b/mert/TER/bestShiftStruct.cpp
@@ -0,0 +1,66 @@
+/*********************************
+tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
+
+Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
+Contact: christophe.servan@lium.univ-lemans.fr
+
+The tercpp tool and library are free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the licence, or
+(at your option) any later version.
+
+This program and library are distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+**********************************/
+#include "bestShiftStruct.h"
+
+using namespace std;
+
+namespace TERCpp
+{
+ bestShiftStruct::bestShiftStruct()
+ {
+ m_best_shift=new terShift();
+ m_best_align=new terAlignment();
+ m_empty=new bool(false);
+ }
+ bestShiftStruct::~bestShiftStruct()
+ {
+ delete(m_best_align);
+ delete(m_best_shift);
+ }
+ void bestShiftStruct::setEmpty(bool b)
+ {
+ m_empty=new bool(b);
+ }
+ void bestShiftStruct::setBestShift(terShift * l_terShift)
+ {
+ m_best_shift->set(l_terShift);
+ }
+ void bestShiftStruct::setBestAlign(terAlignment * l_terAlignment)
+ {
+ m_best_align->set(l_terAlignment);
+ }
+ string bestShiftStruct::toString()
+ {
+ stringstream s;
+ s << m_best_shift->toString() << endl;
+ s << m_best_align->toString() << endl;
+// s << (*m_empty) << endl;
+ }
+ bool bestShiftStruct::getEmpty()
+ {
+ return (*(m_empty));
+ }
+
+
+
+
+
+}
diff --git a/mert/TER/bestShiftStruct.h b/mert/TER/bestShiftStruct.h
index d68f2319f..144787faa 100644
--- a/mert/TER/bestShiftStruct.h
+++ b/mert/TER/bestShiftStruct.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
-#ifndef __BESTSHIFTSTRUCT_H_
-#define __BESTSHIFTSTRUCT_H_
+#ifndef __BESTSHIFTSTRUCT_H__
+#define __BESTSHIFTSTRUCT_H__
#include <vector>
@@ -36,10 +36,10 @@ using namespace Tools;
namespace TERCpp
{
-class bestShiftStruct
-{
-private:
-public:
+ class bestShiftStruct
+ {
+ private:
+ public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -55,16 +55,36 @@ public:
// int end;
// int moveto;
// int newloc;
- terShift m_best_shift;
- terAlignment m_best_align;
- bool m_empty;
+ terShift * m_best_shift;
+ terAlignment * m_best_align;
+ bool * m_empty;
+ bestShiftStruct();
+ ~bestShiftStruct();
+ inline void set(bestShiftStruct l_bestShiftStruct)
+ {
+ m_best_shift->set(l_bestShiftStruct.m_best_shift);
+ m_best_align->set(l_bestShiftStruct.m_best_align);
+ setEmpty(l_bestShiftStruct.getEmpty());
+ }
+ inline void set(bestShiftStruct * l_bestShiftStruct)
+ {
+ m_best_shift->set(l_bestShiftStruct->m_best_shift);
+ m_best_align->set(l_bestShiftStruct->m_best_align);
+ setEmpty(l_bestShiftStruct->getEmpty());
+ }
+ void setEmpty(bool b);
+ void setBestShift(terShift * l_terShift);
+ void setBestAlign(terAlignment * l_terAlignment);
+ string toString();
+ bool getEmpty();
+
// vector<string> nwords; // The words we shifted
// char* alignment ; // for pra_more output
// vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
// double cost;
-};
+ };
}
-#endif \ No newline at end of file
+#endif
diff --git a/mert/TER/hashMap.cpp b/mert/TER/hashMap.cpp
index 253fda715..de84ff796 100644
--- a/mert/TER/hashMap.cpp
+++ b/mert/TER/hashMap.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,142 +28,156 @@ using namespace std;
namespace HashMapSpace
{
// hashMap::hashMap();
-/* hashMap::~hashMap()
+ /* hashMap::~hashMap()
+ {
+ // vector<stringHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+ /**
+ * int hashMap::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+ int hashMap::trouve ( long searchKey )
{
-// vector<stringHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
-/**
- * int hashMap::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
-int hashMap::trouve ( long searchKey )
-{
- long foundKey;
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-int hashMap::trouve ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;;
+ int hashMap::trouve ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-/**
- * long hashMap::hashValue ( string key )
- * @param key
- * @return
- */
-long hashMap::hashValue ( string key )
-{
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+ /**
+ * long hashMap::hashValue ( string key )
+ * @param key
+ * @return
+ */
+ long hashMap::hashValue ( string key )
+ {
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
-}
-/**
- * void hashMap::addHasher ( string key, string value )
- * @param key
- * @param value
- */
-void hashMap::addHasher ( string key, string value )
-{
- if ( trouve ( hashValue ( key ) ) ==0 ) {
+ }
+ /**
+ * void hashMap::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+ void hashMap::addHasher ( string key, string value )
+ {
+ if ( trouve ( hashValue ( key ) ) ==0 )
+ {
// cerr << "ICI1" <<endl;
- stringHasher H ( hashValue ( key ),key,value );
+ stringHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
-}
-stringHasher hashMap::getHasher ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
- stringHasher defaut(0,"","");
+ m_hasher.push_back ( H );
+ }
+ }
+ stringHasher hashMap::getHasher ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
+ stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return ( *l_hasher );
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return ( *l_hasher );
+ }
+ }
+ return defaut;
}
- }
- return defaut;
-}
-string hashMap::getValue ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ string hashMap::getValue ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return "";
}
- }
- return "";
-}
-string hashMap::searchValue ( string value )
-{
+ string hashMap::searchValue ( string value )
+ {
// long searchKey=hashValue ( key );
// long foundKey;
- string foundValue;
+ string foundValue;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundValue= ( *l_hasher ).getValue();
- if ( foundValue.compare ( value ) == 0 ) {
- return ( *l_hasher ).getKey();
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundValue= ( *l_hasher ).getValue();
+ if ( foundValue.compare ( value ) == 0 )
+ {
+ return ( *l_hasher ).getKey();
+ }
+ }
+ return "";
}
- }
- return "";
-}
-void hashMap::setValue ( string key , string value )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ void hashMap::setValue ( string key , string value )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- ( *l_hasher ).setValue ( value );
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
+ }
+ }
}
- }
-}
-/**
- *
- */
-void hashMap::printHash()
-{
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
-}
+ /**
+ *
+ */
+ void hashMap::printHash()
+ {
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ }
diff --git a/mert/TER/hashMap.h b/mert/TER/hashMap.h
index c2708b360..017e6b831 100644
--- a/mert/TER/hashMap.h
+++ b/mert/TER/hashMap.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/*
* Generic hashmap manipulation functions
*/
-#ifndef __HASHMAP_H_
-#define __HASHMAP_H_
+#ifndef __HASHMAP_H__
+#define __HASHMAP_H__
#include <boost/functional/hash.hpp>
#include "stringHasher.h"
#include <vector>
@@ -35,27 +35,27 @@ using namespace std;
namespace HashMapSpace
{
-class hashMap
-{
-private:
- vector<stringHasher> m_hasher;
+ class hashMap
+ {
+ private:
+ vector<stringHasher> m_hasher;
-public:
+ public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, string value );
- stringHasher getHasher ( string key );
- string getValue ( string key );
- string searchValue ( string key );
- void setValue ( string key , string value );
- void printHash();
- vector<stringHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
-};
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, string value );
+ stringHasher getHasher ( string key );
+ string getValue ( string key );
+ string searchValue ( string key );
+ void setValue ( string key , string value );
+ void printHash();
+ vector<stringHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+ };
}
diff --git a/mert/TER/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp
index 0ab6d21b2..23f57d808 100644
--- a/mert/TER/hashMapInfos.cpp
+++ b/mert/TER/hashMapInfos.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,108 +28,117 @@ using namespace std;
namespace HashMapSpace
{
// hashMapInfos::hashMap();
-/* hashMapInfos::~hashMap()
+ /* hashMapInfos::~hashMap()
+ {
+ // vector<infosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+ /**
+ * int hashMapInfos::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+ int hashMapInfos::trouve ( long searchKey )
{
-// vector<infosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
-/**
- * int hashMapInfos::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
-int hashMapInfos::trouve ( long searchKey )
-{
- long foundKey;
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-int hashMapInfos::trouve ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;;
+ int hashMapInfos::trouve ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-/**
- * long hashMapInfos::hashValue ( string key )
- * @param key
- * @return
- */
-long hashMapInfos::hashValue ( string key )
-{
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+ /**
+ * long hashMapInfos::hashValue ( string key )
+ * @param key
+ * @return
+ */
+ long hashMapInfos::hashValue ( string key )
+ {
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
-}
-/**
- * void hashMapInfos::addHasher ( string key, string value )
- * @param key
- * @param value
- */
-void hashMapInfos::addHasher ( string key, vector<int> value )
-{
- if ( trouve ( hashValue ( key ) ) ==0 ) {
+ }
+ /**
+ * void hashMapInfos::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+ void hashMapInfos::addHasher ( string key, vector<int> value )
+ {
+ if ( trouve ( hashValue ( key ) ) ==0 )
+ {
// cerr << "ICI1" <<endl;
- infosHasher H ( hashValue ( key ),key,value );
+ infosHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
-}
-void hashMapInfos::addValue ( string key, vector<int> value )
-{
- addHasher ( key, value );
-}
-infosHasher hashMapInfos::getHasher ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ m_hasher.push_back ( H );
+ }
+ }
+ void hashMapInfos::addValue ( string key, vector<int> value )
+ {
+ addHasher ( key, value );
+ }
+ infosHasher hashMapInfos::getHasher ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return ( *l_hasher );
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return ( *l_hasher );
+ }
+ }
+ vector<int> temp;
+ infosHasher defaut(0,"",temp);
+ return defaut;
}
- }
- vector<int> temp;
- infosHasher defaut(0,"",temp);
- return defaut;
-}
-vector<int> hashMapInfos::getValue ( string key )
-{
- long searchKey=hashValue ( key );
- long foundKey;
- vector<int> retour;
+ vector<int> hashMapInfos::getValue ( string key )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
+ vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return retour;
}
- }
- return retour;
-}
// string hashMapInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
@@ -149,38 +158,42 @@ vector<int> hashMapInfos::getValue ( string key )
// }
//
-void hashMapInfos::setValue ( string key , vector<int> value )
-{
- long searchKey=hashValue ( key );
- long foundKey;
+ void hashMapInfos::setValue ( string key , vector<int> value )
+ {
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- ( *l_hasher ).setValue ( value );
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
+ }
+ }
+ }
+ string hashMapInfos::toString ()
+ {
+ stringstream to_return;
+ for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
}
- }
-}
-string hashMapInfos::toString ()
-{
- stringstream to_return;
- for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- to_return << (*l_hasher).toString();
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- return to_return.str();
-}
-/**
- *
- */
-void hashMapInfos::printHash()
-{
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ /**
+ *
+ */
+ void hashMapInfos::printHash()
+ {
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ {
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
-}
+ }
+ }
diff --git a/mert/TER/hashMapInfos.h b/mert/TER/hashMapInfos.h
index e975aa738..58cd50aef 100644
--- a/mert/TER/hashMapInfos.h
+++ b/mert/TER/hashMapInfos.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/*
* Generic hashmap manipulation functions
*/
-#ifndef __HASHMAPINFOS_H_
-#define __HASHMAPINFOS_H_
+#ifndef __HASHMAPINFOS_H__
+#define __HASHMAPINFOS_H__
#include <boost/functional/hash.hpp>
#include "infosHasher.h"
#include <vector>
@@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace
{
-class hashMapInfos
-{
-private:
- vector<infosHasher> m_hasher;
+ class hashMapInfos
+ {
+ private:
+ vector<infosHasher> m_hasher;
-public:
+ public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<int> value );
- void addValue ( string key, vector<int> value );
- infosHasher getHasher ( string key );
- vector<int> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<int> value );
+ void addValue ( string key, vector<int> value );
+ infosHasher getHasher ( string key );
+ vector<int> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<int> value );
- void printHash();
- string toString();
- vector<infosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
-};
+ void setValue ( string key , vector<int> value );
+ void printHash();
+ string toString();
+ vector<infosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+ };
}
diff --git a/mert/TER/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp
index d984bdadc..773c148d4 100644
--- a/mert/TER/hashMapStringInfos.cpp
+++ b/mert/TER/hashMapStringInfos.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,166 +27,179 @@ using namespace std;
namespace HashMapSpace
{
-// hashMapStringInfos::hashMap();
-/* hashMapStringInfos::~hashMap()
-{
-// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
-}*/
-/**
-* int hashMapStringInfos::trouve ( long searchKey )
-* @param searchKey
-* @return
-*/
-int hashMapStringInfos::trouve ( long searchKey )
-{
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ // hashMapStringInfos::hashMap();
+ /* hashMapStringInfos::~hashMap()
+ {
+ // vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+ /**
+ * int hashMapStringInfos::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+ int hashMapStringInfos::trouve ( long searchKey )
+ {
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-int hashMapStringInfos::trouve ( string key )
-{
- long searchKey = hashValue ( key );
- long foundKey;;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return 1;
+ int hashMapStringInfos::trouve ( string key )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return 1;
+ }
+ }
+ return 0;
}
- }
- return 0;
-}
-/**
-* long hashMapStringInfos::hashValue ( string key )
-* @param key
-* @return
-*/
-long hashMapStringInfos::hashValue ( string key )
-{
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> > ( loc );
- return coll.hash ( key.data(), key.data() + key.length() );
+ /**
+ * long hashMapStringInfos::hashValue ( string key )
+ * @param key
+ * @return
+ */
+ long hashMapStringInfos::hashValue ( string key )
+ {
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> > ( loc );
+ return coll.hash ( key.data(), key.data() + key.length() );
// boost::hash<string> hasher;
// return hasher ( key );
-}
-/**
-* void hashMapStringInfos::addHasher ( string key, string value )
-* @param key
-* @param value
-*/
-void hashMapStringInfos::addHasher ( string key, vector<string> value )
-{
- if ( trouve ( hashValue ( key ) ) == 0 ) {
- // cerr << "ICI1" <<endl;
- stringInfosHasher H ( hashValue ( key ), key, value );
- // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
- // cerr << "ICI2" <<endl;
-
- m_hasher.push_back ( H );
- }
-}
-void hashMapStringInfos::addValue ( string key, vector<string> value )
-{
- addHasher ( key, value );
-}
-stringInfosHasher hashMapStringInfos::getHasher ( string key )
-{
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- return ( *l_hasher );
}
- }
- vector<string> tmp;
- stringInfosHasher defaut ( 0, "", tmp );
- return defaut;
-}
-vector<string> hashMapStringInfos::getValue ( string key )
-{
- long searchKey = hashValue ( key );
- long foundKey;
- vector<string> retour;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
+ /**
+ * void hashMapStringInfos::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+ void hashMapStringInfos::addHasher ( string key, vector<string> value )
+ {
+ if ( trouve ( hashValue ( key ) ) == 0 )
+ {
+ // cerr << "ICI1" <<endl;
+ stringInfosHasher H ( hashValue ( key ), key, value );
+ // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
+ // cerr << "ICI2" <<endl;
+
+ m_hasher.push_back ( H );
+ }
}
- }
- return retour;
-}
-// string hashMapStringInfos::searchValue ( string value )
-// {
-// // long searchKey=hashValue ( key );
-// // long foundKey;
-// vector<int> foundValue;
-//
-// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
-// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
-// {
-// foundValue= ( *l_hasher ).getValue();
-// /* if ( foundValue.compare ( value ) == 0 )
-// {
-// return ( *l_hasher ).getKey();
-// }*/
-// }
-// return "";
-// }
-//
-
-void hashMapStringInfos::setValue ( string key , vector<string> value )
-{
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey ) {
- ( *l_hasher ).setValue ( value );
- // return ( *l_hasher ).getValue();
+ void hashMapStringInfos::addValue ( string key, vector<string> value )
+ {
+ addHasher ( key, value );
+ }
+ stringInfosHasher hashMapStringInfos::getHasher ( string key )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ return ( *l_hasher );
+ }
+ }
+ vector<string> tmp;
+ stringInfosHasher defaut ( 0, "", tmp );
+ return defaut;
+ }
+ vector<string> hashMapStringInfos::getValue ( string key )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;
+ vector<string> retour;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
+ return ( *l_hasher ).getValue();
+ }
+ }
+ return retour;
+ }
+ // string hashMapStringInfos::searchValue ( string value )
+ // {
+ // // long searchKey=hashValue ( key );
+ // // long foundKey;
+ // vector<int> foundValue;
+ //
+ // // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ // for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+ // {
+ // foundValue= ( *l_hasher ).getValue();
+ // /* if ( foundValue.compare ( value ) == 0 )
+ // {
+ // return ( *l_hasher ).getKey();
+ // }*/
+ // }
+ // return "";
+ // }
+ //
+
+ void hashMapStringInfos::setValue ( string key , vector<string> value )
+ {
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey )
+ {
+ ( *l_hasher ).setValue ( value );
+ // return ( *l_hasher ).getValue();
+ }
+ }
}
- }
-}
-string hashMapStringInfos::toString ()
-{
- stringstream to_return;
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- to_return << (*l_hasher).toString();
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- return to_return.str();
-}
+ string hashMapStringInfos::toString ()
+ {
+ stringstream to_return;
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
+ }
-/**
-*
-*/
-void hashMapStringInfos::printHash()
-{
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
-}
-vector< stringInfosHasher > hashMapStringInfos::getHashMap()
-{
- return m_hasher;
-}
+ /**
+ *
+ */
+ void hashMapStringInfos::printHash()
+ {
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
+ {
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ }
+ vector< stringInfosHasher > hashMapStringInfos::getHashMap()
+ {
+ return m_hasher;
+ }
diff --git a/mert/TER/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h
index a0eae951d..3ea3794e5 100644
--- a/mert/TER/hashMapStringInfos.h
+++ b/mert/TER/hashMapStringInfos.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/*
* Generic hashmap manipulation functions
*/
-#ifndef __HASHMAPSTRINGINFOS_H_
-#define __HASHMAPSTRINGINFOS_H_
+#ifndef __HASHMAPSTRINGINFOS_H__
+#define __HASHMAPSTRINGINFOS_H__
#include <boost/functional/hash.hpp>
#include "stringInfosHasher.h"
#include <vector>
@@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace
{
-class hashMapStringInfos
-{
-private:
- vector<stringInfosHasher> m_hasher;
+ class hashMapStringInfos
+ {
+ private:
+ vector<stringInfosHasher> m_hasher;
-public:
+ public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<string> value );
- void addValue ( string key, vector<string> value );
- stringInfosHasher getHasher ( string key );
- vector<string> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<string> value );
+ void addValue ( string key, vector<string> value );
+ stringInfosHasher getHasher ( string key );
+ vector<string> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<string> value );
- void printHash();
- string toString();
- vector<stringInfosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
-};
+ void setValue ( string key , vector<string> value );
+ void printHash();
+ string toString();
+ vector<stringInfosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+ };
}
diff --git a/mert/TER/infosHasher.cpp b/mert/TER/infosHasher.cpp
index 450b70d94..8ce23ae44 100644
--- a/mert/TER/infosHasher.cpp
+++ b/mert/TER/infosHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace
{
-infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
-{
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
-}
+ infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
+ {
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+ }
// infosHasher::~infosHasher(){};*/
-long infosHasher::getHashKey()
-{
- return m_hashKey;
-}
-string infosHasher::getKey()
-{
- return m_key;
-}
-vector<int> infosHasher::getValue()
-{
- return m_value;
-}
-void infosHasher::setValue ( vector<int> value )
-{
- m_value=value;
-}
-string infosHasher::toString()
-{
- stringstream to_return;
- to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
- return to_return.str();
-}
+ long infosHasher::getHashKey()
+ {
+ return m_hashKey;
+ }
+ string infosHasher::getKey()
+ {
+ return m_key;
+ }
+ vector<int> infosHasher::getValue()
+ {
+ return m_value;
+ }
+ void infosHasher::setValue ( vector<int> value )
+ {
+ m_value=value;
+ }
+ string infosHasher::toString()
+ {
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+ }
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;
diff --git a/mert/TER/infosHasher.h b/mert/TER/infosHasher.h
index ab9c7b5ed..692bde49d 100644
--- a/mert/TER/infosHasher.h
+++ b/mert/TER/infosHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
-#ifndef __INFOSHASHER_H_
-#define __INFOSHASHER_H_
+#ifndef __INFOSHASHER_H__
+#define __INFOSHASHER_H__
#include <string>
// #include <ext/hash_map>
#include <stdio.h>
@@ -31,23 +31,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace HashMapSpace
{
-class infosHasher
-{
-private:
- long m_hashKey;
- string m_key;
- vector<int> m_value;
-
-public:
- infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
- long getHashKey();
- string getKey();
- vector<int> getValue();
- void setValue ( vector<int> value );
- string toString();
-
-
-};
+ class infosHasher
+ {
+ private:
+ long m_hashKey;
+ string m_key;
+ vector<int> m_value;
+
+ public:
+ infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<int> getValue();
+ void setValue ( vector<int> value );
+ string toString();
+
+
+ };
}
diff --git a/mert/TER/stringHasher.cpp b/mert/TER/stringHasher.cpp
index 729310352..f4d1526e8 100644
--- a/mert/TER/stringHasher.cpp
+++ b/mert/TER/stringHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -26,29 +26,29 @@ using namespace std;
namespace HashMapSpace
{
-stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
-{
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueTxt;
-}
+ stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
+ {
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueTxt;
+ }
// stringHasher::~stringHasher(){};*/
-long stringHasher::getHashKey()
-{
- return m_hashKey;
-}
-string stringHasher::getKey()
-{
- return m_key;
-}
-string stringHasher::getValue()
-{
- return m_value;
-}
-void stringHasher::setValue ( string value )
-{
- m_value=value;
-}
+ long stringHasher::getHashKey()
+ {
+ return m_hashKey;
+ }
+ string stringHasher::getKey()
+ {
+ return m_key;
+ }
+ string stringHasher::getValue()
+ {
+ return m_value;
+ }
+ void stringHasher::setValue ( string value )
+ {
+ m_value=value;
+ }
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringHasher.h b/mert/TER/stringHasher.h
index 5b0ccfc94..e2a79834c 100644
--- a/mert/TER/stringHasher.h
+++ b/mert/TER/stringHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
-#ifndef __STRINGHASHER_H_
-#define __STRINGHASHER_H_
+#ifndef __STRINGHASHER_H__
+#define __STRINGHASHER_H__
#include <string>
//#include <ext/hash_map>
#include <iostream>
@@ -28,22 +28,22 @@ using namespace std;
namespace HashMapSpace
{
-class stringHasher
-{
-private:
- long m_hashKey;
- string m_key;
- string m_value;
+ class stringHasher
+ {
+ private:
+ long m_hashKey;
+ string m_key;
+ string m_value;
-public:
- stringHasher ( long cle, string cleTxt, string valueTxt );
- long getHashKey();
- string getKey();
- string getValue();
- void setValue ( string value );
+ public:
+ stringHasher ( long cle, string cleTxt, string valueTxt );
+ long getHashKey();
+ string getKey();
+ string getValue();
+ void setValue ( string value );
-};
+ };
}
diff --git a/mert/TER/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp
index ecbc10fa5..007fd720f 100644
--- a/mert/TER/stringInfosHasher.cpp
+++ b/mert/TER/stringInfosHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace
{
-stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
-{
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
-}
+ stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
+ {
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+ }
// stringInfosHasher::~stringInfosHasher(){};*/
-long stringInfosHasher::getHashKey()
-{
- return m_hashKey;
-}
-string stringInfosHasher::getKey()
-{
- return m_key;
-}
-vector<string> stringInfosHasher::getValue()
-{
- return m_value;
-}
-void stringInfosHasher::setValue ( vector<string> value )
-{
- m_value=value;
-}
-string stringInfosHasher::toString()
-{
- stringstream to_return;
- to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
- return to_return.str();
-}
+ long stringInfosHasher::getHashKey()
+ {
+ return m_hashKey;
+ }
+ string stringInfosHasher::getKey()
+ {
+ return m_key;
+ }
+ vector<string> stringInfosHasher::getValue()
+ {
+ return m_value;
+ }
+ void stringInfosHasher::setValue ( vector<string> value )
+ {
+ m_value=value;
+ }
+ string stringInfosHasher::toString()
+ {
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+ }
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringInfosHasher.h b/mert/TER/stringInfosHasher.h
index e4369f27a..f35e4596b 100644
--- a/mert/TER/stringInfosHasher.h
+++ b/mert/TER/stringInfosHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
-#ifndef __STRINGINFOSHASHER_H_
-#define __STRINGINFOSHASHER_H_
+#ifndef __STRINGINFOSHASHER_H__
+#define __STRINGINFOSHASHER_H__
#include <string>
// #include <ext/hash_map>
#include <iostream>
@@ -29,23 +29,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace HashMapSpace
{
-class stringInfosHasher
-{
-private:
- long m_hashKey;
- string m_key;
- vector<string> m_value;
-
-public:
- stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
- long getHashKey();
- string getKey();
- vector<string> getValue();
- void setValue ( vector<string> value );
- string toString();
-
-
-};
+ class stringInfosHasher
+ {
+ private:
+ long m_hashKey;
+ string m_key;
+ vector<string> m_value;
+
+ public:
+ stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<string> getValue();
+ void setValue ( vector<string> value );
+ string toString();
+
+
+ };
}
diff --git a/mert/TER/terAlignment.cpp b/mert/TER/terAlignment.cpp
index ec7bcafb7..dda4a4239 100644
--- a/mert/TER/terAlignment.cpp
+++ b/mert/TER/terAlignment.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -24,163 +24,244 @@ using namespace std;
namespace TERCpp
{
-terAlignment::terAlignment()
-{
+ terAlignment::terAlignment()
+ {
// vector<string> ref;
// vector<string> hyp;
// vector<string> aftershift;
- // TERshift[] allshifts = null;
+ // TERshift[] allshifts = null;
- numEdits=0;
- numWords=0;
- bestRef="";
+ numEdits=0;
+ numWords=0;
+// bestRef="";
- numIns=0;
- numDel=0;
- numSub=0;
- numSft=0;
- numWsf=0;
-}
-string terAlignment::toString()
-{
- stringstream s;
- s.str ( "" );
- s << "Original Ref: \t" << join ( " ", ref ) << endl;
- s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
- s << "Hyp After Shift:\t" << join ( " ", aftershift );
+ numIns=0;
+ numDel=0;
+ numSub=0;
+ numSft=0;
+ numWsf=0;
+ averageWords=0;
+
+ }
+ void terAlignment::set(terAlignment& l_terAlignment)
+ {
+ numEdits=l_terAlignment.numEdits;
+ numWords=l_terAlignment.numWords;
+ bestRef=l_terAlignment.bestRef;
+ numIns=l_terAlignment.numIns;
+ numDel=l_terAlignment.numDel;
+ numSub=l_terAlignment.numSub;
+ numSft=l_terAlignment.numSft;
+ numWsf=l_terAlignment.numWsf;
+ averageWords=l_terAlignment.averageWords;
+ ref=l_terAlignment.ref;
+ hyp=l_terAlignment.hyp;
+ aftershift=l_terAlignment.aftershift;
+// allshifts=l_terAlignment.allshifts;
+ hyp_int=l_terAlignment.hyp_int;
+ aftershift_int=l_terAlignment.aftershift_int;
+ alignment=l_terAlignment.alignment;
+ allshifts=(*(new vector<terShift>((int)l_terAlignment.allshifts.size())));
+ for (int l_i=0; l_i< (int)l_terAlignment.allshifts.size(); l_i++)
+ {
+ allshifts.at(l_i).set(l_terAlignment.allshifts.at(l_i));
+ }
+
+ }
+ void terAlignment::set(terAlignment* l_terAlignment)
+ {
+ numEdits=l_terAlignment->numEdits;
+ numWords=l_terAlignment->numWords;
+ bestRef=l_terAlignment->bestRef;
+ numIns=l_terAlignment->numIns;
+ numDel=l_terAlignment->numDel;
+ numSub=l_terAlignment->numSub;
+ numSft=l_terAlignment->numSft;
+ numWsf=l_terAlignment->numWsf;
+ averageWords=l_terAlignment->averageWords;
+ ref=l_terAlignment->ref;
+ hyp=l_terAlignment->hyp;
+ aftershift=l_terAlignment->aftershift;
+// allshifts=l_terAlignment->allshifts;
+ hyp_int=l_terAlignment->hyp_int;
+ aftershift_int=l_terAlignment->aftershift_int;
+ alignment=l_terAlignment->alignment;
+ allshifts=(*(new vector<terShift>((int)l_terAlignment->allshifts.size())));
+ for (int l_i=0; l_i< (int)l_terAlignment->allshifts.size(); l_i++)
+ {
+ allshifts.at(l_i).set(l_terAlignment->allshifts.at(l_i));
+ }
+
+ }
+
+ string terAlignment::toString()
+ {
+ stringstream s;
+ s.str ( "" );
+ s << "Original Ref: \t" << join ( " ", ref ) << endl;
+ s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
+ s << "Hyp After Shift:\t" << join ( " ", aftershift );
// s << "Hyp After Shift: " << join ( " ", aftershift );
- s << endl;
+ s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
- if ( ( int ) sizeof ( alignment ) >0 ) {
- s << "Alignment: (";
+ if ( ( int ) sizeof ( alignment ) >0 )
+ {
+ s << "Alignment: (";
// s += "\nAlignment: (";
- for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
- s << alignment[i];
+ for ( int i = 0; i < ( int ) ( alignment.size() ); i++ )
+ {
+ s << alignment[i];
// s+=alignment[i];
- }
+ }
// s += ")";
- s << ")";
- }
- s << endl;
- if ( ( int ) allshifts.size() == 0 ) {
+ s << ")";
+ }
+ s << endl;
+ if ( ( int ) allshifts.size() == 0 )
+ {
// s += "\nNumShifts: 0";
- s << "NumShifts: 0";
- } else {
+ s << "NumShifts: 0";
+ }
+ else
+ {
// s += "\nNumShifts: " + (int)allshifts.size();
- s << "NumShifts: "<< ( int ) allshifts.size();
- for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
- s << endl << " " ;
- s << ( ( terShift ) allshifts[i] ).toString();
+ s << "NumShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ )
+ {
+ s << endl << " " ;
+ s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
- }
- }
- s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
+ }
+ }
+ s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
- return s.str();
+ return s.str();
-}
-string terAlignment::join ( string delim, vector<string> arr )
-{
- if ( ( int ) arr.size() == 0 ) return "";
+ }
+ string terAlignment::join ( string delim, vector<string> arr )
+ {
+ if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
- stringstream s;
- s.str ( "" );
- for ( int i = 0; i < ( int ) arr.size(); i++ ) {
- if ( i == 0 ) {
- s << arr.at ( i );
- } else {
- s << delim << arr.at ( i );
- }
- }
- return s.str();
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ )
+ {
+ if ( i == 0 )
+ {
+ s << arr.at ( i );
+ }
+ else
+ {
+ s << delim << arr.at ( i );
+ }
+ }
+ return s.str();
// return "";
-}
-double terAlignment::score()
-{
- if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
- return 1.0;
- }
- if ( numWords <= 0.0 ) {
- return 0.0;
- }
- return ( double ) numEdits / numWords;
-}
-double terAlignment::scoreAv()
-{
- if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
- return 1.0;
- }
- if ( averageWords <= 0.0 ) {
- return 0.0;
- }
- return ( double ) numEdits / averageWords;
-}
-
-void terAlignment::scoreDetails()
-{
- numIns = numDel = numSub = numWsf = numSft = 0;
- if((int)allshifts.size()>0) {
- for(int i = 0; i < (int)allshifts.size(); ++i) {
- numWsf += allshifts[i].size();
}
- numSft = allshifts.size();
- }
-
- if((int)alignment.size()>0 ) {
- for(int i = 0; i < (int)alignment.size(); ++i) {
- switch (alignment[i]) {
- case 'S':
- case 'T':
- numSub++;
- break;
- case 'D':
- numDel++;
- break;
- case 'I':
- numIns++;
- break;
- }
+ double terAlignment::score()
+ {
+ if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
+ {
+ return 1.0;
+ }
+ if ( numWords <= 0.0 )
+ {
+ return 0.0;
+ }
+ return ( double ) numEdits / numWords;
}
- }
- // if(numEdits != numSft + numDel + numIns + numSub)
- // System.out.println("** Error, unmatch edit erros " + numEdits +
- // " vs " + (numSft + numDel + numIns + numSub));
-}
-string terAlignment::printAlignments()
-{
- stringstream to_return;
- for(int i = 0; i < (int)alignment.size(); ++i) {
- char alignInfo=alignment.at(i);
- if (alignInfo == 'A' ) {
- alignInfo='A';
+ double terAlignment::scoreAv()
+ {
+ if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) )
+ {
+ return 1.0;
+ }
+ if ( averageWords <= 0.0 )
+ {
+ return 0.0;
+ }
+ return ( double ) numEdits / averageWords;
}
- if (i==0) {
- to_return << alignInfo;
- } else {
- to_return << " " << alignInfo;
- }
+ void terAlignment::scoreDetails()
+ {
+ numIns = numDel = numSub = numWsf = numSft = 0;
+ if((int)allshifts.size()>0)
+ {
+ for(int i = 0; i < (int)allshifts.size(); ++i)
+ {
+ numWsf += allshifts[i].size();
+ }
+ numSft = allshifts.size();
+ }
+
+ if((int)alignment.size()>0 )
+ {
+ for(int i = 0; i < (int)alignment.size(); ++i)
+ {
+ switch (alignment[i])
+ {
+ case 'S':
+ case 'T':
+ numSub++;
+ break;
+ case 'D':
+ numDel++;
+ break;
+ case 'I':
+ numIns++;
+ break;
+ }
+ }
+ }
+ // if(numEdits != numSft + numDel + numIns + numSub)
+ // System.out.println("** Error, unmatch edit erros " + numEdits +
+ // " vs " + (numSft + numDel + numIns + numSub));
+ }
+ string terAlignment::printAlignments()
+ {
+ stringstream to_return;
+ for(int i = 0; i < (int)alignment.size(); ++i)
+ {
+ char alignInfo=alignment.at(i);
+ if (alignInfo == 'A' )
+ {
+ alignInfo='A';
+ }
+
+ if (i==0)
+ {
+ to_return << alignInfo;
+ }
+ else
+ {
+ to_return << " " << alignInfo;
+ }
+ }
+ return to_return.str();
}
- return to_return.str();
-}
string terAlignment::printAllShifts()
{
- stringstream to_return;
- if ( ( int ) allshifts.size() == 0 ) {
+ stringstream to_return;
+ if ( ( int ) allshifts.size() == 0 )
+ {
// s += "\nNumShifts: 0";
- to_return << "NbrShifts: 0";
- } else {
+ to_return << "NbrShifts: 0";
+ }
+ else
+ {
// s += "\nNumShifts: " + (int)allshifts.size();
- to_return << "NbrShifts: "<< ( int ) allshifts.size();
- for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
- to_return << "\t" ;
- to_return << ( ( terShift ) allshifts[i] ).toString();
+ to_return << "NbrShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ )
+ {
+ to_return << "\t" ;
+ to_return << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
- }
- }
- return to_return.str();
+ }
+ }
+ return to_return.str();
}
} \ No newline at end of file
diff --git a/mert/TER/terAlignment.h b/mert/TER/terAlignment.h
index 2af0b7490..e9524df7c 100644
--- a/mert/TER/terAlignment.h
+++ b/mert/TER/terAlignment.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
-#ifndef MERT_TER_TERALIGNMENT_H_
-#define MERT_TER_TERALIGNMENT_H_
+#ifndef __TERCPPTERALIGNMENT_H__
+#define __TERCPPTERALIGNMENT_H__
#include <vector>
@@ -34,41 +34,44 @@ using namespace std;
namespace TERCpp
{
-class terAlignment
-{
-private:
-public:
-
- terAlignment();
- string toString();
- void scoreDetails();
-
- vector<string> ref;
- vector<string> hyp;
- vector<string> aftershift;
- vector<terShift> allshifts;
- vector<int> hyp_int;
- vector<int> aftershift_int;
-
- double numEdits;
- double numWords;
- double averageWords;
- vector<char> alignment;
- string bestRef;
-
- int numIns;
- int numDel;
- int numSub;
- int numSft;
- int numWsf;
-
-
- string join ( string delim, vector<string> arr );
- double score();
- double scoreAv();
- string printAlignments();
- string printAllShifts();
-};
+ class terAlignment
+ {
+ private:
+ public:
+
+ vector<string> ref;
+ vector<string> hyp;
+ vector<string> aftershift;
+ vector<terShift> allshifts;
+ vector<int> hyp_int;
+ vector<int> aftershift_int;
+
+ double numEdits;
+ double numWords;
+ double averageWords;
+ vector<char> alignment;
+ string bestRef;
+
+ int numIns;
+ int numDel;
+ int numSub;
+ int numSft;
+ int numWsf;
+
+
+ terAlignment();
+ string toString();
+ void scoreDetails();
+
+
+ string join ( string delim, vector<string> arr );
+ double score();
+ double scoreAv();
+ string printAlignments();
+ string printAllShifts();
+ void set(terAlignment& l_terAlignment);
+ void set(terAlignment* l_terAlignment);
+ };
}
#endif \ No newline at end of file
diff --git a/mert/TER/terShift.cpp b/mert/TER/terShift.cpp
index 440b4d2ce..e271ad6a7 100644
--- a/mert/TER/terShift.cpp
+++ b/mert/TER/terShift.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -42,32 +42,70 @@ namespace TERCpp
// numSft=0;
// numWsf=0;
// }
-terShift::terShift ()
-{
- start = 0;
- end = 0;
- moveto = 0;
- newloc = 0;
- cost=1.0;
-}
-terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
-{
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- cost=1.0;
-}
+ terShift::terShift ()
+ {
+ start = 0;
+ end = 0;
+ moveto = 0;
+ newloc = 0;
+ cost=1.0;
+ shifted.clear();
+ alignment.clear();
+ aftershift.clear();
+ }
+ terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
+ {
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ cost=1.0;
+ }
-terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
-{
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- shifted = _shifted;
- cost=1.0;
-}
+ terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
+ {
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ shifted = _shifted;
+ cost=1.0;
+ }
+ void terShift::set(terShift l_terShift)
+ {
+ start=l_terShift.start;
+ end=l_terShift.end;
+ moveto=l_terShift.moveto;
+ newloc=l_terShift.newloc;
+ shifted=l_terShift.shifted;
+// alignment=l_terShift.alignment;
+// aftershift=l_terShift.aftershift;
+ }
+ void terShift::set(terShift *l_terShift)
+ {
+ start=l_terShift->start;
+ end=l_terShift->end;
+ moveto=l_terShift->moveto;
+ newloc=l_terShift->newloc;
+ shifted=l_terShift->shifted;
+// alignment=l_terShift->alignment;
+// aftershift=l_terShift->aftershift;
+ }
+
+ void terShift::erase()
+ {
+ start = 0;
+ end = 0;
+ moveto = 0;
+ newloc = 0;
+ cost=1.0;
+ shifted.clear();
+ alignment.clear();
+ aftershift.clear();
+ }
+
+
+
// string terShift::vectorToString(vector<string> vec)
// {
// string retour("");
@@ -78,38 +116,54 @@ terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<stri
// return retour;
// }
-string terShift::toString()
-{
- stringstream s;
- s.str ( "" );
- s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
- if ( ( int ) shifted.size() > 0 ) {
- s << " (" << vectorToString ( shifted ) << ")";
- }
- return s.str();
-}
+ string terShift::toString()
+ {
+ stringstream s;
+ s.str ( "" );
+ s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
+ if ( ( int ) shifted.size() > 0 )
+ {
+ s << " (" << vectorToString ( shifted ) << ")";
+ }
+// s<< endl;
+// if ( ( int ) shifted.size() > 0 )
+// {
+// s << " (" << vectorToString ( alignment ) << ")";
+// }
+// s<< endl;
+// if ( ( int ) shifted.size() > 0 )
+// {
+// s << " (" << vectorToString ( aftershift ) << ")";
+// }
+ return s.str();
+ }
-/* The distance of the shift. */
-int terShift::distance()
-{
- if ( moveto < start ) {
- return start - moveto;
- } else if ( moveto > end ) {
- return moveto - end;
- } else {
- return moveto - start;
- }
-}
+ /* The distance of the shift. */
+ int terShift::distance()
+ {
+ if ( moveto < start )
+ {
+ return start - moveto;
+ }
+ else if ( moveto > end )
+ {
+ return moveto - end;
+ }
+ else
+ {
+ return moveto - start;
+ }
+ }
-bool terShift::leftShift()
-{
- return ( moveto < start );
-}
+ bool terShift::leftShift()
+ {
+ return ( moveto < start );
+ }
-int terShift::size()
-{
- return ( end - start ) + 1;
-}
+ int terShift::size()
+ {
+ return ( end - start ) + 1;
+ }
// terShift terShift::operator=(terShift t)
// {
//
diff --git a/mert/TER/terShift.h b/mert/TER/terShift.h
index 74545e0de..65a812d15 100644
--- a/mert/TER/terShift.h
+++ b/mert/TER/terShift.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
-#ifndef MERT_TER_TERSHIFT_H_
-#define MERT_TER_TERSHIFT_H_
+#ifndef __TERCPPTERSHIFT_H__
+#define __TERCPPTERSHIFT_H__
#include <vector>
@@ -34,32 +34,35 @@ using namespace Tools;
namespace TERCpp
{
-class terShift
-{
-private:
-public:
+ class terShift
+ {
+ private:
+ public:
- terShift();
- terShift ( int _start, int _end, int _moveto, int _newloc );
- terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
- string toString();
- int distance() ;
- bool leftShift();
- int size();
+ terShift();
+ terShift ( int _start, int _end, int _moveto, int _newloc );
+ terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
+ string toString();
+ int distance() ;
+ bool leftShift();
+ int size();
// terShift operator=(terShift t);
// string vectorToString(vector<string> vec);
- int start;
- int end;
- int moveto;
- int newloc;
- vector<string> shifted; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<string> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
-};
+ int start;
+ int end;
+ int moveto;
+ int newloc;
+ vector<string> shifted; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<string> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+ void set(terShift l_terShift);
+ void set(terShift *l_terShift);
+ void erase();
+ };
}
#endif \ No newline at end of file
diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp
index c4629c639..8a84b49b3 100644
--- a/mert/TER/tercalc.cpp
+++ b/mert/TER/tercalc.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -35,724 +35,1238 @@ using namespace Tools;
namespace TERCpp
{
-terCalc::terCalc()
-{
- TAILLE_PERMUT_MAX = 50;
- infinite = 999999.0;
- shift_cost = 1.0;
- insert_cost = 1.0;
- delete_cost = 1.0;
- substitute_cost = 1.0;
- match_cost = 0.0;
- NBR_SEGS_EVALUATED = 0;
- NBR_PERMUTS_CONSID = 0;
- NBR_BS_APPELS = 0;
- TAILLE_BEAM = 20;
- DIST_MAX_PERMUT = 50;
- PRINT_DEBUG = false;
- hypSpans.clear();
- refSpans.clear();
-}
-
-
-terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref )
-{
+ terCalc::terCalc()
+ {
+ TAILLE_PERMUT_MAX = 10;
+ NBR_PERMUT_MAX = 10;
+ infinite = 99999.0;
+ shift_cost = 1.0;
+ insert_cost = 1.0;
+ delete_cost = 1.0;
+ substitute_cost = 1.0;
+ match_cost = 0.0;
+ NBR_SEGS_EVALUATED = 0;
+ NBR_PERMUTS_CONSID = 0;
+ NBR_BS_APPELS = 0;
+ TAILLE_BEAM = 10;
+ DIST_MAX_PERMUT = 25;
+ PRINT_DEBUG = false;
+ hypSpans.clear();
+ refSpans.clear();
+ CALL_TER_ALIGN=0;
+ CALL_CALC_PERMUT=0;
+ CALL_FIND_BSHIFT=0;
+ MAX_LENGTH_SENTENCE=10;
+ S = new vector < vector < double > >(MAX_LENGTH_SENTENCE, std::vector<double>(MAX_LENGTH_SENTENCE,0.0));
+ P = new vector < vector < char > >(MAX_LENGTH_SENTENCE, std::vector<char>(MAX_LENGTH_SENTENCE,' '));
+ }
- return minimizeDistanceEdition ( hyp, ref, hypSpans );
+ terCalc::~terCalc()
+ {
+ delete(S);
+ delete(P);
+ }
-}
-terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
-{
- stringstream s;
- s.str ( "" );
- string stringRef ( "" );
- string stringHyp ( "" );
- for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) {
- if ( l_it == ref.begin() ) {
- s << ( *l_it );
- } else {
- s << " " << ( *l_it );
+ terAlignment terCalc::WERCalculation ( vector< string >& hyp , vector< string >& ref )
+ {
+
+ return minimizeDistanceEdition ( hyp, ref, hypSpans );
+
}
- }
- stringRef = s.str();
- s.str ( "" );
- for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) {
- if ( l_itHyp == hyp.begin() ) {
- s << ( *l_itHyp );
- } else {
- s << " " << ( *l_itHyp );
+
+ terAlignment terCalc::TER ( vector< int >& hyp, vector< int >& ref )
+ {
+ stringstream s;
+ s.str ( "" );
+ string stringRef ( "" );
+ string stringHyp ( "" );
+ for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ )
+ {
+ if ( l_it == ref.begin() )
+ {
+ s << ( *l_it );
+ }
+ else
+ {
+ s << " " << ( *l_it );
+ }
+ }
+ stringRef = s.str();
+ s.str ( "" );
+ for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ )
+ {
+ if ( l_itHyp == hyp.begin() )
+ {
+ s << ( *l_itHyp );
+ }
+ else
+ {
+ s << " " << ( *l_itHyp );
+ }
+ }
+ stringHyp = s.str();
+ s.str ( "" );
+ vector<string> l_vref=stringToVector ( stringRef , " " );
+ vector<string> l_vhyp=stringToVector ( stringHyp , " " );
+ return TER ( l_vhyp , l_vref);
}
- }
- stringHyp = s.str();
- s.str ( "" );
- return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
-}
-hashMapInfos terCalc::createConcordMots ( vector<string> hyp, vector<string> ref )
-{
- hashMap tempHash;
- hashMapInfos retour;
- for ( int i = 0; i < ( int ) hyp.size(); i++ ) {
- tempHash.addHasher ( hyp.at ( i ), "" );
- }
- bool cor[ref.size() ];
- for ( int i = 0; i < ( int ) ref.size(); i++ ) {
- if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) {
- cor[i] = true;
- } else {
- cor[i] = false;
- }
- }
- for ( int start = 0; start < ( int ) ref.size(); start++ ) {
- if ( cor[start] ) {
- for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) ); end++ ) {
- vector<string> ajouter = subVector ( ref, start, end + 1 );
- string ajouterString = vectorToString ( ajouter );
- vector<int> values = retour.getValue ( ajouterString );
- values.push_back ( start );
- if ( values.size() > 1 ) {
- retour.setValue ( ajouterString, values );
- } else {
- retour.addValue ( ajouterString, values );
+ hashMapInfos terCalc::createConcordMots ( vector< string >& hyp, vector< string >& ref )
+ {
+ hashMap tempHash;
+ hashMapInfos retour;
+ for ( int i = 0; i < ( int ) hyp.size(); i++ )
+ {
+ tempHash.addHasher ( hyp.at ( i ), "" );
+ }
+ bool cor[ref.size() ];
+ for ( int i = 0; i < ( int ) ref.size(); i++ )
+ {
+ if ( tempHash.trouve ( ( string ) ref.at ( i ) ) )
+ {
+ cor[i] = true;
+ }
+ else
+ {
+ cor[i] = false;
+ }
+ }
+ for ( int start = 0; start < ( int ) ref.size(); start++ )
+ {
+ if ( cor[start] )
+ {
+ for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) );end++ )
+ {
+ vector<string> ajouter = subVector ( ref, start, end + 1 );
+ string ajouterString = vectorToString ( ajouter );
+ vector<int> values = retour.getValue ( ajouterString );
+ values.push_back ( start );
+ if ( values.size() > 1 )
+ {
+ retour.setValue ( ajouterString, values );
+ }
+ else
+ {
+ retour.addValue ( ajouterString, values );
+ }
+ }
+ }
}
- }
+ return retour;
}
- }
- return retour;
-}
-
-bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan )
-{
- if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) {
- return true;
- }
- return false;
-}
+ bool terCalc::trouverIntersection ( vecInt& refSpan, vecInt& hypSpan )
+ {
+ if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) )
+ {
+ return true;
+ }
+ return false;
+ }
-terAlignment terCalc::minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
-{
- double current_best = infinite;
- double last_best = infinite;
- int first_good = 0;
- int current_first_good = 0;
- int last_good = -1;
- int cur_last_good = 0;
- int last_peak = 0;
- int cur_last_peak = 0;
- int i, j;
- double cost, icost, dcost;
- double score;
+ terAlignment terCalc::minimizeDistanceEdition ( vector< string >& hyp, vector< string >& ref, vector< vecInt >& curHypSpans )
+ {
+ double current_best = infinite;
+ double last_best = infinite;
+ int first_good = 0;
+ int current_first_good = 0;
+ int last_good = -1;
+ int cur_last_good = 0;
+ int last_peak = 0;
+ int cur_last_peak = 0;
+ int i=0;
+ int j=0;
+ int ref_size=0 ;
+ ref_size=( int ) ref.size();
+ int hyp_size=0;
+ hyp_size=( int ) hyp.size();
+ double cost, icost, dcost;
+ double score;
+ delete(S);
+ delete(P);
+ S = new vector < vector < double > >(ref_size+1, std::vector<double>(hyp_size+1,-1.0));
+ P = new vector < vector < char > >(ref_size+1, std::vector<char>(hyp_size+1,'0'));
- NBR_BS_APPELS++;
+
+ NBR_BS_APPELS++;
+// cerr << "Appels : " << NBR_BS_APPELS << endl;
+
+// for ( i = 0; i <= ref_size; i++ )
+// {
+// for ( j = 0; j <= hyp_size; j++ )
+// {
+// S->at(i).at(j) = -1.0;
+// P->at(i).at(j) = '0';
+// }
+// }
+ S->at(0).at(0) = 0.0;
+ for ( j = 0; j <= hyp_size; j++ )
+ {
+ last_best = current_best;
+ current_best = infinite;
+ first_good = current_first_good;
+ current_first_good = -1;
+ last_good = cur_last_good;
+ cur_last_good = -1;
+ last_peak = cur_last_peak;
+ cur_last_peak = 0;
+ for ( i = first_good; i <= ref_size; i++ )
+ {
+ if ( i > last_good )
+ {
+ break;
+ }
+ if ( S->at(i).at(j) < 0 )
+ {
+ continue;
+ }
+ score = S->at(i).at(j);
+ if ( ( j < hyp_size ) && ( score > last_best + TAILLE_BEAM ) )
+ {
+ continue;
+ }
+ if ( current_first_good == -1 )
+ {
+ current_first_good = i ;
+ }
+ if ( ( i < ref_size ) && ( j < hyp_size ) )
+ {
+ if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) )
+ {
+ if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 )
+ {
+ cost = match_cost + score;
+ if ( ( S->at(i+1).at(j+1) == -1 ) || ( cost < S->at(i+1).at(j+1) ) )
+ {
+ S->at(i+1).at(j+1) = cost;
+ P->at(i+1).at(j+1) = 'A';
+ }
+ if ( cost < current_best )
+ {
+ current_best = cost;
+ }
+ if ( current_best == cost )
+ {
+ cur_last_peak = i + 1;
+ }
+ }
+ else
+ {
+ cost = substitute_cost + score;
+ if ( ( S->at(i+1).at(j+1) < 0 ) || ( cost < S->at(i+1).at(j+1) ) )
+ {
+ S->at(i+1).at(j+1) = cost;
+ P->at(i+1).at(j+1) = 'S';
+ if ( cost < current_best )
+ {
+ current_best = cost;
+ }
+ if ( current_best == cost )
+ {
+ cur_last_peak = i + 1 ;
+ }
+ }
+ }
+ }
+ }
+ cur_last_good = i + 1;
+ if ( j < hyp_size )
+ {
+ icost = score + insert_cost;
+ if ( ( S->at(i).at(j+1) < 0 ) || ( S->at(i).at(j+1) > icost ) )
+ {
+ S->at(i).at(j+1) = icost;
+ P->at(i).at(j+1) = 'I';
+ if ( ( cur_last_peak < i ) && ( current_best == icost ) )
+ {
+ cur_last_peak = i;
+ }
+ }
+ }
+ if ( i < ref_size )
+ {
+ dcost = score + delete_cost;
+ if ( ( S->at(i+1).at(j) < 0.0 ) || ( S->at(i+1).at(j) > dcost ) )
+ {
+ S->at(i+1).at(j) = dcost;
+ P->at(i+1).at(j) = 'D';
+ if ( i >= last_good )
+ {
+ last_good = i + 1 ;
+ }
+ }
+ }
+ }
+ }
- for ( i = 0; i <= ( int ) ref.size(); i++ ) {
- for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
- S[i][j] = -1.0;
- P[i][j] = '0';
- }
- }
- S[0][0] = 0.0;
- for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
- last_best = current_best;
- current_best = infinite;
- first_good = current_first_good;
- current_first_good = -1;
- last_good = cur_last_good;
- cur_last_good = -1;
- last_peak = cur_last_peak;
- cur_last_peak = 0;
- for ( i = first_good; i <= ( int ) ref.size(); i++ ) {
- if ( i > last_good ) {
- break;
- }
- if ( S[i][j] < 0 ) {
- continue;
- }
- score = S[i][j];
- if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) ) {
- continue;
- }
- if ( current_first_good == -1 ) {
- current_first_good = i ;
- }
- if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) ) {
- if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) {
- if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) {
- cost = match_cost + score;
- if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) ) {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = 'A';
+ int tracelength = 0;
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) )
+ {
+ tracelength++;
+ if ( P->at(i).at(j) == 'A' )
+ {
+ i--;
+ j--;
}
- if ( cost < current_best ) {
- current_best = cost;
+ else
+ if ( P->at(i).at(j) == 'S' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if ( P->at(i).at(j) == 'D' )
+ {
+ i--;
+ }
+ else
+ if ( P->at(i).at(j) == 'I' )
+ {
+ j--;
+ }
+ else
+ {
+ cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P->at(i).at(j) << endl;
+ exit ( -1 );
+ }
+ }
+ vector<char> path ( tracelength );
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) )
+ {
+ path[--tracelength] = P->at(i).at(j);
+ if ( P->at(i).at(j) == 'A' )
+ {
+ i--;
+ j--;
}
- if ( current_best == cost ) {
- cur_last_peak = i + 1;
+ else
+ if ( P->at(i).at(j) == 'S' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if ( P->at(i).at(j) == 'D' )
+ {
+ i--;
+ }
+ else
+ if ( P->at(i).at(j) == 'I' )
+ {
+ j--;
+ }
+ }
+ terAlignment to_return;
+ to_return.numWords = ref_size;
+ to_return.alignment = path;
+ to_return.numEdits = S->at(ref_size).at(hyp_size);
+ to_return.hyp = hyp;
+ to_return.ref = ref;
+ to_return.averageWords = ref_size;
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
+ }
+ return to_return;
+
+ }
+ void terCalc::minimizeDistanceEdition ( vector< string >& hyp, vector< string >& ref, vector< vecInt >& curHypSpans, terAlignment* to_return )
+ {
+ double current_best = infinite;
+ double last_best = infinite;
+ int first_good = 0;
+ int current_first_good = 0;
+ int last_good = -1;
+ int cur_last_good = 0;
+ int last_peak = 0;
+ int cur_last_peak = 0;
+ int i=0;
+ int j=0;
+ int ref_size=0 ;
+ ref_size=( int ) ref.size();
+ int hyp_size=0;
+ hyp_size=( int ) hyp.size();
+ double cost, icost, dcost;
+ double score;
+ delete(S);
+ delete(P);
+ S = new vector < vector < double > >(ref_size+1, std::vector<double>(hyp_size+1,-1.0));
+ P = new vector < vector < char > >(ref_size+1, std::vector<char>(hyp_size+1,'0'));
+
+ NBR_BS_APPELS++;
+// cerr << "Appels : " << NBR_BS_APPELS << endl;
+
+// for ( i = 0; i <= ref_size; i++ )
+// {
+// for ( j = 0; j <= hyp_size; j++ )
+// {
+// S->at(i).at(j) = -1.0;
+// P->at(i).at(j) = '0';
+// }
+// }
+ S->at(0).at(0) = 0.0;
+ for ( j = 0; j <= hyp_size; j++ )
+ {
+ last_best = current_best;
+ current_best = infinite;
+ first_good = current_first_good;
+ current_first_good = -1;
+ last_good = cur_last_good;
+ cur_last_good = -1;
+ last_peak = cur_last_peak;
+ cur_last_peak = 0;
+ for ( i = first_good; i <= ref_size; i++ )
+ {
+ if ( i > last_good )
+ {
+ break;
+ }
+ if (S->at(i).at(j) < 0 )
+ {
+ continue;
+ }
+ score = S->at(i).at(j);
+ if ( ( j < hyp_size ) && ( score > last_best + TAILLE_BEAM ) )
+ {
+ continue;
+ }
+ if ( current_first_good == -1 )
+ {
+ current_first_good = i ;
+ }
+ if ( ( i < ref_size ) && ( j < hyp_size ) )
+ {
+ if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) )
+ {
+ if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 )
+ {
+ cost = match_cost + score;
+ if ( ( S->at(i+1).at(j+1) == -1 ) || ( cost < S->at(i+1).at(j+1) ) )
+ {
+ S->at(i+1).at(j+1) = cost;
+ P->at(i+1).at(j+1) = 'A';
+ }
+ if ( cost < current_best )
+ {
+ current_best = cost;
+ }
+ if ( current_best == cost )
+ {
+ cur_last_peak = i + 1;
+ }
+ }
+ else
+ {
+ cost = substitute_cost + score;
+ if ( ( S->at(i+1).at(j+1) < 0 ) || ( cost < S->at(i+1).at(j+1) ) )
+ {
+ S->at(i+1).at(j+1) = cost;
+ P->at(i+1).at(j+1) = 'S';
+ if ( cost < current_best )
+ {
+ current_best = cost;
+ }
+ if ( current_best == cost )
+ {
+ cur_last_peak = i + 1 ;
+ }
+ }
+ }
+ }
+ }
+ cur_last_good = i + 1;
+ if ( j < hyp_size )
+ {
+ icost = score + insert_cost;
+ if ( ( S->at(i).at(j+1) < 0 ) || ( S->at(i).at(j+1) > icost ) )
+ {
+ S->at(i).at(j+1) = icost;
+ P->at(i).at(j+1) = 'I';
+ if ( ( cur_last_peak < i ) && ( current_best == icost ) )
+ {
+ cur_last_peak = i;
+ }
+ }
+ }
+ if ( i < ref_size )
+ {
+ dcost = score + delete_cost;
+ if ( ( S->at(i+1).at(j) < 0.0 ) || ( S->at(i+1).at(j) > dcost ) )
+ {
+ S->at(i+1).at(j) = dcost;
+ P->at(i+1).at(j) = 'D';
+ if ( i >= last_good )
+ {
+ last_good = i + 1 ;
+ }
+ }
+ }
}
- } else {
- cost = substitute_cost + score;
- if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) ) {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = 'S';
- if ( cost < current_best ) {
- current_best = cost;
- }
- if ( current_best == cost ) {
- cur_last_peak = i + 1 ;
- }
+ }
+
+
+ int tracelength = 0;
+ i = ref_size;;
+ j = hyp_size;
+ while ( ( i > 0 ) || ( j > 0 ) )
+ {
+ tracelength++;
+ if (P->at(i).at(j) == 'A' )
+ {
+ i--;
+ j--;
}
- }
+ else
+ if (P->at(i).at(j) == 'S' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if (P->at(i).at(j) == 'D' )
+ {
+ i--;
+ }
+ else
+ if (P->at(i).at(j) == 'I' )
+ {
+ j--;
+ }
+ else
+ {
+ cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " <<P->at(i).at(j) << endl;
+ exit ( -1 );
+ }
}
- }
- cur_last_good = i + 1;
- if ( j < ( int ) hyp.size() ) {
- icost = score + insert_cost;
- if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) ) {
- S[i][j+1] = icost;
- P[i][j+1] = 'I';
- if ( ( cur_last_peak < i ) && ( current_best == icost ) ) {
- cur_last_peak = i;
- }
+ vector<char> path ( tracelength );
+ i = ref_size;
+ j = hyp_size;
+ while ( ( i > 0 ) || ( j > 0 ) )
+ {
+ path[--tracelength] =P->at(i).at(j);
+ if (P->at(i).at(j) == 'A' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if (P->at(i).at(j) == 'S' )
+ {
+ i--;
+ j--;
+ }
+ else
+ if (P->at(i).at(j) == 'D' )
+ {
+ i--;
+ }
+ else
+ if (P->at(i).at(j) == 'I' )
+ {
+ j--;
+ }
}
- }
- if ( i < ( int ) ref.size() ) {
- dcost = score + delete_cost;
- if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) ) {
- S[i+1][j] = dcost;
- P[i+1][j] = 'D';
- if ( i >= last_good ) {
- last_good = i + 1 ;
- }
+// terAlignment to_return;
+ to_return->numWords = ref_size;
+ to_return->alignment = path;
+ to_return->numEdits = S->at(ref_size).at(hyp_size);
+ to_return->hyp = hyp;
+ to_return->ref = ref;
+ to_return->averageWords = ref_size;
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return->toString() << endl << "END DEBUG" << endl;
}
- }
- }
- }
-
-
- int tracelength = 0;
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) ) {
- tracelength++;
- if ( P[i][j] == 'A' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'S' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'D' ) {
- i--;
- } else if ( P[i][j] == 'I' ) {
- j--;
- } else {
- cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl;
- exit ( -1 );
- }
- }
- vector<char> path ( tracelength );
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) ) {
- path[--tracelength] = P[i][j];
- if ( P[i][j] == 'A' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'S' ) {
- i--;
- j--;
- } else if ( P[i][j] == 'D' ) {
- i--;
- } else if ( P[i][j] == 'I' ) {
- j--;
+// return to_return;
+
}
- }
- terAlignment to_return;
- to_return.numWords = ref.size();
- to_return.alignment = path;
- to_return.numEdits = S[ref.size() ][hyp.size() ];
- to_return.hyp = hyp;
- to_return.ref = ref;
- to_return.averageWords = (int)ref.size();
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
- }
- return to_return;
-}
-terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
-{
- hashMapInfos rloc = createConcordMots ( hyp, ref );
- terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans );
- vector<string> cur = hyp;
- cur_align.hyp = hyp;
- cur_align.ref = ref;
- cur_align.aftershift = hyp;
- double edits = 0;
+
+ terAlignment terCalc::TER ( vector<string>& hyp, vector<string>& ref )
+ {
+ hashMapInfos rloc = createConcordMots ( hyp, ref );
+ terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans );
+ vector<string> cur = hyp;
+ cur_align.hyp = hyp;
+ cur_align.ref = ref;
+ cur_align.aftershift = hyp;
+ double edits = 0;
// int numshifts = 0;
- vector<terShift> allshifts;
+ vector<terShift> * allshifts=new vector<terShift>(0);
+ bestShiftStruct * returns=new bestShiftStruct();
// cerr << "Initial Alignment:" << endl << cur_align.toString() <<endl;
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
- }
- while ( true ) {
- bestShiftStruct returns;
- returns = findBestShift ( cur, hyp, ref, rloc, cur_align );
- if ( returns.m_empty ) {
- break;
- }
- terShift bestShift = returns.m_best_shift;
- cur_align = returns.m_best_align;
- edits += bestShift.cost;
- bestShift.alignment = cur_align.alignment;
- bestShift.aftershift = cur_align.aftershift;
- allshifts.push_back ( bestShift );
- cur = cur_align.aftershift;
- }
- terAlignment to_return;
- to_return = cur_align;
- to_return.allshifts = allshifts;
- to_return.numEdits += edits;
- NBR_SEGS_EVALUATED++;
- return to_return;
-}
-bestShiftStruct terCalc::findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
-{
- bestShiftStruct to_return;
- bool anygain = false;
- bool herr[ ( int ) hyp.size() ];
- bool rerr[ ( int ) ref.size() ];
- int ralign[ ( int ) ref.size() ];
- calculateTerAlignment ( med_align, herr, rerr, ralign );
- vector<vecTerShift> poss_shifts;
-
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl;
- cerr << "indices: ";
- for (int l_i=0; l_i < ( int ) ref.size() ; l_i++) {
- cerr << l_i << "\t";
- }
- cerr << endl;
- cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl;
- cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl;
- cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl;
- cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl;
- cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl;
- cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl;
- cerr << "END DEBUG " << endl;
- }
- poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign );
- double curerr = med_align.numEdits;
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Possible Shifts:" << endl;
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
- for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ ) {
- cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
- }
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
+ }
+ while ( true )
+ {
+
+ returns=findBestShift ( cur, hyp, ref, rloc, cur_align );
+// cerr << "****************************************************************** " << returns->getEmpty() << endl;
+ if ( returns->getEmpty())
+ {
+ break;
+ }
+ terShift bestShift = (*(returns->m_best_shift));
+ cur_align = (*(returns->m_best_align));
+ edits += bestShift.cost;
+ bestShift.alignment = cur_align.alignment;
+ bestShift.aftershift = cur_align.aftershift;
+ allshifts->push_back ( bestShift );
+ cur = cur_align.aftershift;
+ delete(returns);
+ }
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::TER : Final to return :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
+ }
+ terAlignment to_return;
+ to_return = cur_align;
+ to_return.allshifts = (*(allshifts));
+ to_return.numEdits += edits;
+ NBR_SEGS_EVALUATED++;
+ return to_return;
}
- cerr << endl;
- cerr << "END DEBUG " << endl;
- }
+ bestShiftStruct * terCalc::findBestShift ( vector<string>& cur, vector<string>& hyp, vector<string>& ref, hashMapInfos& rloc, terAlignment& med_align )
+ {
+ CALL_FIND_BSHIFT++;
+// cerr << "CALL_FIND_BSHIFT " << CALL_FIND_BSHIFT <<endl;
+// to_return->m_empty = new bool(false);
+ bool anygain = false;
+ vector <bool> * herr = new vector<bool>(( int ) hyp.size() + 1 );
+ vector <bool> * rerr = new vector<bool>( ( int ) ref.size() + 1 );
+ vector <int> * ralign = new vector<int>( ( int ) ref.size() + 1 );
+ int l_i,i,j,s;
+ for (i = 0 ; i< ( int ) hyp.size() + 1 ; i++)
+ {
+ herr->at(i)=false;
+ }
+ for (i = 0 ; i< ( int ) ref.size() + 1 ; i++)
+ {
+ rerr->at(i)=false;
+ ralign->at(i)=-1;
+ }
+ calculateTerAlignment ( med_align, herr, rerr, ralign );
+ vector<vecTerShift> * poss_shifts = new vector< vector<terShift> >(0) ;
+ terAlignment * cur_best_align = new terAlignment();
+ terShift * cur_best_shift = new terShift();
+ double cur_best_shift_cost = 0.0;
+ vector<string> shiftarr;
+ vector<vecInt> curHypSpans;
+ terShift * curshift = new terShift();
+ alignmentStruct shiftReturns;
+ terAlignment * curalign = new terAlignment() ;
+
+
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl;
+ cerr << "indices: ";
+ for (l_i=0; l_i < ( int ) ref.size() ; l_i++)
+ {
+ cerr << l_i << "\t";
+ }
+ cerr << endl;
+ cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl;
+ cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl;
+ cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl;
+ cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl;
+ cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl;
+ cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign );
+ double curerr = med_align.numEdits;
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Possible Shifts:" << endl;
+ for ( i = ( int ) poss_shifts->size() - 1; i >= 0; i-- )
+ {
+ for ( j = 0; j < ( int ) ( poss_shifts->at ( i ) ).size(); j++ )
+ {
+ cerr << " [" << i << "] " << ( ( poss_shifts->at ( i ) ).at ( j ) ).toString() << endl;
+ }
+ }
+ cerr << endl;
+ cerr << "END DEBUG " << endl;
+ }
// exit(0);
- double cur_best_shift_cost = 0.0;
- terAlignment cur_best_align = med_align;
- terShift cur_best_shift;
-
+ cur_best_align->set(med_align);
+ for ( i = ( int ) poss_shifts->size() - 1; i >= 0; i-- )
+ {
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Considering shift of length " << i << " (" << ( poss_shifts->at ( i ) ).size() << ")" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ /* Consider shifts of length i+1 */
+ double curfix = curerr - ( cur_best_shift_cost + cur_best_align->numEdits );
+ double maxfix = ( 2 * ( 1 + i ) );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
+ {
+ break;
+ }
+ else
+ {
+ for ( s = 0; s < ( int ) ( poss_shifts->at ( i ) ).size(); s++ )
+ {
+ curfix = curerr - ( cur_best_shift_cost + cur_best_align->numEdits );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
+ {
+ break;
+ }
+ else
+ {
+ curshift->set(( poss_shifts->at ( i ) ).at ( s ));
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "cur : "<< join(" ",cur) << endl;
+ cerr << "shift size : "<< i << endl;
+ cerr << "shift number : "<< s << endl;
+ cerr << "size of shift size : "<< ( int ) ( poss_shifts->at ( i ) ).size() << endl;
+ cerr << "curshift : "<< curshift->toString() << endl;
+
+ }
+// alignmentStruct shiftReturns;
+ shiftReturns.set(permuter ( cur, curshift ));
+ shiftarr = shiftReturns.nwords;
+ curHypSpans = shiftReturns.aftershift;
+ if ( PRINT_DEBUG )
+ {
+ cerr << "shiftarr : "<< join(" ",shiftarr) << endl;
+ cerr << "curHypSpans size : "<< (int)curHypSpans.size() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+// terAlignment tmp=minimizeDistanceEdition ( shiftarr, ref, curHypSpans );
+ minimizeDistanceEdition ( shiftarr, ref, curHypSpans, curalign );
+// curalign->set(tmp);
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
- cerr << "END DEBUG " << endl;
- }
- /* Consider shifts of length i+1 */
- double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- double maxfix = ( 2 * ( 1 + i ) );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
- break;
- }
+ curalign->hyp = hyp;
+ curalign->ref = ref;
+ curalign->aftershift = shiftarr;
- for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ ) {
- curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
- break;
- }
- terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "cur : "<< join(" ",cur) << endl;
- cerr << "curshift : "<< curshift.toString() << endl;
-
- }
- alignmentStruct shiftReturns = permuter ( cur, curshift );
- vector<string> shiftarr = shiftReturns.nwords;
- vector<vecInt> curHypSpans = shiftReturns.aftershift;
-
- if ( PRINT_DEBUG ) {
- cerr << "shiftarr : "<< join(" ",shiftarr) << endl;
-// cerr << "curHypSpans : "<< curHypSpans.toString() << endl;
- cerr << "END DEBUG " << endl;
- }
- terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans );
-
- curalign.hyp = hyp;
- curalign.ref = ref;
- curalign.aftershift = shiftarr;
-
-
- double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
-
- // if (DEBUG) {
- // string testeuh=terAlignment join(" ", shiftarr);
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
- cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"<<endl;
- cerr << "Details of gains : gain = ("<<cur_best_align.numEdits << "+" << cur_best_shift_cost << ") - (" << curalign.numEdits << "+" << curshift.cost << ")"<<endl;
- cerr << "" << curalign.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
- }
- // }
- //
- if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) {
- anygain = true;
- cur_best_shift = curshift;
- cur_best_shift_cost = curshift.cost;
- cur_best_align = curalign;
- // if (DEBUG)
- if ( PRINT_DEBUG ) {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
- }
- }
- }
- }
- if ( anygain ) {
- to_return.m_best_shift = cur_best_shift;
- to_return.m_best_align = cur_best_align;
- to_return.m_empty = false;
- } else {
- to_return.m_empty = true;
- }
- return to_return;
-}
-void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign )
-{
- int hpos = -1;
- int rpos = -1;
- if ( PRINT_DEBUG ) {
-
- cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl;
- cerr << "END DEBUG " << endl;
- }
- for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
- herr[i] = false;
- rerr[i] = false;
- ralign[i] = -1;
- }
- for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
- char sym = align.alignment[i];
- if ( sym == 'A' ) {
- hpos++;
- rpos++;
- herr[hpos] = false;
- rerr[rpos] = false;
- ralign[rpos] = hpos;
- } else if ( sym == 'S' ) {
- hpos++;
- rpos++;
- herr[hpos] = true;
- rerr[rpos] = true;
- ralign[rpos] = hpos;
- } else if ( sym == 'I' ) {
- hpos++;
- herr[hpos] = true;
- } else if ( sym == 'D' ) {
- rpos++;
- rerr[rpos] = true;
- ralign[rpos] = hpos+1;
- } else {
- cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl;
- exit ( -1 );
- }
- }
-}
+ double gain = ( cur_best_align->numEdits + cur_best_shift_cost ) - ( curalign->numEdits + curshift->cost );
-vector<vecTerShift> terCalc::calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
-{
- vector<vecTerShift> to_return;
- if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) ) {
- return to_return;
- }
-
- vector<vecTerShift> allshifts ( TAILLE_PERMUT_MAX + 1 );
- for ( int start = 0; start < ( int ) hyp.size(); start++ ) {
- string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
- if ( ! rloc.trouve ( subVectorHypString ) ) {
- continue;
- }
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Gain for " << curshift->toString() << " is " << gain << ". (result: [" << curalign->join ( " ", shiftarr ) << "]" << endl;
+ cerr << "Details of gains : gain = ( cur_best_align->numEdits + cur_best_shift_cost ) - ( curalign->numEdits + curshift->cost )"<<endl;
+ cerr << "Details of gains : gain = ("<<cur_best_align->numEdits << "+" << cur_best_shift_cost << ") - (" << curalign->numEdits << "+" << curshift->cost << ")"<<endl;
+ cerr << "" << curalign->toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
- bool ok = false;
- vector<int> mtiVec = rloc.getValue ( subVectorHypString );
- vector<int>::iterator mti = mtiVec.begin();
- while ( mti != mtiVec.end() && ( ! ok ) ) {
- int moveto = ( *mti );
- mti++;
- if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) ) {
- ok = true;
- }
- }
- if ( ! ok ) {
- continue;
- }
- ok = true;
- for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ ) {
- /* check if cand is good if so, add it */
- vector<string> cand = subVector ( hyp, start, end + 1 );
- ok = false;
- if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) {
- continue;
- }
-
- bool any_herr = false;
-
- for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) {
- if ( herr[start+i] ) {
- any_herr = true;
+ if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) )
+ {
+ anygain = true;
+ cur_best_shift->set(curshift);
+ cur_best_shift_cost = curshift->cost;
+ cur_best_align->set(curalign);
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Tmp Choosing shift: " << cur_best_shift->toString() << " gives:\n" << cur_best_align->toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ }
+ }
+ }
+ }
}
- }
- if ( any_herr == false ) {
- ok = true;
- continue;
- }
-
- vector<int> movetoitVec;
- movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
-// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl;
- vector<int>::iterator movetoit = movetoitVec.begin();
- while ( movetoit != movetoitVec.end() ) {
- int moveto = ( *movetoit );
- movetoit++;
- if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) ) {
- continue;
+ bestShiftStruct * to_return=new bestShiftStruct();
+ if ( anygain )
+ {
+ to_return->setEmpty(false);
+ if ( PRINT_DEBUG )
+ {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Final shift chosen : " << cur_best_shift->toString() << " gives:\n" << cur_best_align->toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ to_return->m_best_shift->set(cur_best_shift);
+// terAlignment tmp=cur_best_align;
+// cur_best_align->toString();
+// to_return.m_best_align.toString();
+// if ((int)cur_best_align->alignment.size() == 0)
+// {
+// to_return.m_best_align = cur_best_align;
+// }
+// else
+// {
+// cerr << "Warning: cur_best_align->alignment.size() = 0 !!!"<<endl;
+//
+// }
+ to_return->m_best_align->set(cur_best_align);
+// to_return.m_best_align.toString();
}
- ok = true;
+ else
+ {
+ to_return->setEmpty(true);
+ }
+// // cerr << to_return->toString() << endl;
+ delete(poss_shifts);
+ delete(cur_best_align);
+ delete(cur_best_shift);
+ delete(curshift);
+ delete(curalign) ;
+ return to_return;
+ }
- /* check to see if there are any errors in either string
- (only move if this is the case!)
- */
+ void terCalc::calculateTerAlignment ( terAlignment& align, vector<bool>* herr, vector<bool>* rerr, vector<int>* ralign )
+ {
+ int hpos = -1;
+ int rpos = -1;
+ CALL_TER_ALIGN++;
+// cerr << "CALL_TER_ALIGN " << CALL_TER_ALIGN << endl;
+ if ( PRINT_DEBUG )
+ {
- bool any_rerr = false;
- for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) {
- if ( rerr[moveto+i] ) {
- any_rerr = true;
- }
+ cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl;
+ cerr << "END DEBUG " << endl;
}
- if ( ! any_rerr ) {
- continue;
+// cerr << (int)herr->size() <<endl;
+// cerr << (int)rerr->size() <<endl;
+// cerr << ( int ) align.alignment.size() <<endl;
+// for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
+// {
+// herr->at(i) = false;
+// rerr->at(i) = false;
+// ralign->at(i) = -1;
+// }
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
+ {
+ char sym = align.alignment.at(i);
+ if ( sym == 'A' )
+ {
+ hpos++;
+ rpos++;
+ herr->at(hpos) = false;
+ rerr->at(rpos) = false;
+ ralign->at(rpos) = hpos;
+ }
+ else
+ if ( sym == 'S' )
+ {
+ hpos++;
+ rpos++;
+ herr->at(hpos) = true;
+ rerr->at(rpos) = true;
+ ralign->at(rpos) = hpos;
+ }
+ else
+ if ( sym == 'I' )
+ {
+ hpos++;
+ herr->at(hpos) = true;
+ }
+ else
+ if ( sym == 'D' )
+ {
+ rpos++;
+ rerr->at(rpos) = true;
+ ralign->at(rpos) = hpos+1;
+ }
+ else
+ {
+ cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl;
+ exit ( -1 );
+ }
}
- for ( int roff = -1; roff <= ( end - start ); roff++ ) {
- terShift topush;
- bool topushNull = true;
- if ( ( roff == -1 ) && ( moveto == 0 ) ) {
- if ( PRINT_DEBUG ) {
+ }
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
+ vector<vecTerShift> * terCalc::calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& align, vector<bool>* herr, vector<bool>* rerr, vector<int>* ralign )
+ {
+ vector<vecTerShift> * allshifts = new vector<vecTerShift>(0);
+// to_return.clear();
+ CALL_CALC_PERMUT++;
+// cerr << "CALL_CALC_PERMUT " << CALL_CALC_PERMUT << endl;
+ if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) )
+ {
+ return allshifts;
+ }
+ allshifts = new vector<vecTerShift>( TAILLE_PERMUT_MAX + 1 );
+ int start=0;
+ int end=0;
+ bool ok = false;
+ vector<int> mtiVec(0);
+ vector<int>::iterator mti;
+ int moveto=0;
+ vector<string> cand(0);
+ bool any_herr = false;
+ bool any_rerr = false;
+ int i=0;
+ int l_nbr_permuts=0;
+// for (i=0; i< (int)ref.size() +1 ; i++) {cerr << " " << ralign[i] ;} cerr <<endl;
+ vector<int> movetoitVec(0);
+ string subVectorHypString="";
+ terShift * topush;
+ for ( start = 0; start < ( int ) hyp.size(); start++ )
+ {
+ subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
+ if ( ! rloc.trouve ( subVectorHypString ) )
+ {
+ continue;
}
- terShift t01 ( start, end, -1, -1 );
- topush = t01;
- topushNull = false;
- } else if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) ) {
- int newloc = ralign[moveto+roff];
- if ( PRINT_DEBUG ) {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
+
+ ok = false;
+ mtiVec = rloc.getValue ( subVectorHypString );
+ mti = mtiVec.begin();
+ while ( mti != mtiVec.end() && ( ! ok ) )
+ {
+ moveto = ( *mti );
+ mti++;
+ if ( ( start != ralign->at(moveto) ) && ( ( ralign->at(moveto) - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign->at(moveto) - 1 ) <= DIST_MAX_PERMUT ) )
+ {
+ ok = true;
+ }
}
- terShift t02 ( start, end, moveto + roff, newloc );
- topush = t02;
- topushNull = false;
- }
- if ( !topushNull ) {
- topush.shifted = cand;
- topush.cost = shift_cost;
- if ( PRINT_DEBUG ) {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl;
- cerr << "start : " << start << endl;
- cerr << "end : " << end << endl;
- cerr << "end - start : " << end - start << endl;
- cerr << "END DEBUG " << endl;
+ if ( ! ok )
+ {
+ continue;
}
- ( allshifts.at ( end - start ) ).push_back ( topush );
- }
- }
- }
- }
- }
- to_return.clear();
- for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ ) {
- to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
- }
- return to_return;
-}
+ ok = true;
+ for ( end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ )
+ {
+ /* check if cand is good if so, add it */
+ cand = subVector ( hyp, start, end + 1 );
+ ok = false;
+ if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) )
+ {
+ continue;
+ }
+ any_herr = false;
-alignmentStruct terCalc::permuter ( vector<string> words, terShift s )
-{
- return permuter ( words, s.start, s.end, s.newloc );
-}
+ for ( i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ )
+ {
+ if ( herr->at(start+i) )
+ {
+ any_herr = true;
+ }
+ }
+ if ( any_herr == false )
+ {
+ ok = true;
+ continue;
+ }
+ movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
+// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl;
+ vector<int>::iterator movetoit;
+ movetoit = movetoitVec.begin();
+ while ( movetoit != movetoitVec.end() )
+ {
+ moveto = ( *movetoit );
+ movetoit++;
+ if ( ! ( ( ralign->at(moveto) != start ) && ( ( ralign->at(moveto) < start ) || ( ralign->at(moveto) > end ) ) && ( ( ralign->at(moveto) - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign->at(moveto) ) <= DIST_MAX_PERMUT ) ) )
+ {
+ continue;
+ }
+ ok = true;
-alignmentStruct terCalc::permuter ( vector<string> words, int start, int end, int newloc )
-{
- int c = 0;
- vector<string> nwords ( words );
- vector<vecInt> spans ( ( int ) hypSpans.size() );
- alignmentStruct to_return;
- if ( PRINT_DEBUG ) {
-
- if ( ( int ) hypSpans.size() > 0 ) {
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ;
- } else {
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ;
- }
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl;
- }
- if (newloc >= ( int ) words.size()) {
- if ( PRINT_DEBUG ) {
- cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl;
- }
- newloc = ( int ) words.size()-1;
- }
+ /* check to see if there are any errors in either string
+ (only move if this is the case!)
+ */
-// }
+ any_rerr = false;
+ for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ )
+ {
+ if ( rerr->at(moveto+i) )
+ {
+ any_rerr = true;
+ }
+ }
+ if ( ! any_rerr )
+ {
+ continue;
+ }
+ for ( int roff = -1; roff <= ( end - start ); roff++ )
+ {
+ topush = new terShift();
+ bool topushNull = true;
+ if ( ( roff == -1 ) && ( moveto == 0 ) )
+ {
+ if ( PRINT_DEBUG )
+ {
- if ( newloc == -1 ) {
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
+ }
+// terShift t01 ( start, end, -1, -1 );
+// topush = t01;
+ topush->start=start;
+ topush->end=end;
+ topush->moveto=-1;
+ topush->newloc=-1;
+ topushNull = false;
+ }
+ else
+ if ( ( start != ralign->at(moveto+roff) ) && ( ( roff == 0 ) || ( ralign->at(moveto+roff) != ralign->at(moveto) ) ) )
+ {
+ int newloc = ralign->at(moveto+roff);
+ if ( PRINT_DEBUG )
+ {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
+ }
+// terShift t02 ( start, end, moveto + roff, newloc );
+// topush = t02;
+ topush->start=start;
+ topush->end=end;
+ topush->moveto=moveto + roff;
+ topush->newloc=newloc;
+ topushNull = false;
+ }
+ if ( !topushNull )
+ {
+ topush->shifted = cand;
+ topush->cost = shift_cost;
+ l_nbr_permuts++;
+ if ( PRINT_DEBUG )
+ {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl;
+ cerr << "start : " << start << endl;
+ cerr << "end : " << end << endl;
+ cerr << "end - start : " << end - start << endl;
+ cerr << "nbr Permutations added: " << l_nbr_permuts << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ if (l_nbr_permuts < NBR_PERMUT_MAX + 1)
+ {
+ ( allshifts->at ( end - start ) ).push_back ( (*(topush)) );
+ }
+// else
+// {
+// break;
+// }
+ }
+ delete(topush);
+ }
+ }
+ }
+ }
+// to_return.clear();
+// for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ )
+// {
+// to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
+// }
+ return allshifts;
}
- for ( int i = 0; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
+
+
+ alignmentStruct terCalc::permuter ( vector< string >& words, TERCpp::terShift& s )
+ {
+ return permuter ( words, s.start, s.end, s.newloc );
}
- for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
+ alignmentStruct terCalc::permuter ( vector< string >& words, TERCpp::terShift* s )
+ {
+ return permuter ( words, s->start, s->end, s->newloc );
}
- } else {
- if ( newloc < start ) {
- for ( int i = 0; i < newloc; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc ; i < start ; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- } else {
- if ( newloc > end ) {
- for ( int i = 0; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i <= newloc; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc + 1; i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- } else {
- // we are moving inside of ourselves
- for ( int i = 0; i <= start - 1; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
+
+ alignmentStruct terCalc::permuter ( vector< string >& words, int start, int end, int newloc )
+ {
+ int c = 0;
+ vector<string> nwords ( words );
+ vector<vecInt> spans ( ( int ) hypSpans.size() );
+ alignmentStruct to_return;
+ if ( PRINT_DEBUG )
+ {
+
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ;
+ }
+ else
+ {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ;
+ }
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl;
}
- for ( int i = start; i <= end; i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
+ if (newloc >= ( int ) words.size())
+ {
+ if ( PRINT_DEBUG )
+ {
+ cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl;
+ }
+ newloc = ( int ) words.size()-1;
+ }
+
+// }
+
+ if ( newloc == -1 )
+ {
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = 0; i <= start - 1;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
}
- for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size(); i++ ) {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 ) {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
+ else
+ {
+ if ( newloc < start )
+ {
+
+ for ( int i = 0; i < newloc; i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc ; i < start ;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
+ else
+ {
+ if ( newloc > end )
+ {
+ for ( int i = 0; i <= start - 1; i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i <= newloc;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc + 1; i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
+ else
+ {
+ // we are moving inside of ourselves
+ for ( int i = 0; i <= start - 1; i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end;i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size();i++ )
+ {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 )
+ {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
+ }
}
- }
- }
- }
- NBR_PERMUTS_CONSID++;
-
- if ( PRINT_DEBUG ) {
- cerr << "nwords" << join(" ",nwords) << endl;
+ NBR_PERMUTS_CONSID++;
+
+ if ( PRINT_DEBUG )
+ {
+ cerr << "nwords" << join(" ",nwords) << endl;
// cerr << "spans" << spans. << endl;
- }
-
- to_return.nwords = nwords;
- to_return.aftershift = spans;
- return to_return;
-}
-void terCalc::setDebugMode ( bool b )
-{
- PRINT_DEBUG = b;
-}
+ }
+
+ to_return.nwords = nwords;
+ to_return.aftershift = spans;
+ return to_return;
+ }
+ void terCalc::setDebugMode ( bool b )
+ {
+ PRINT_DEBUG = b;
+ }
}
diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h
index 778d83395..22b5e2c9d 100644
--- a/mert/TER/tercalc.h
+++ b/mert/TER/tercalc.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
-#ifndef _TERCPPTERCALC_H__
-#define _TERCPPTERCALC_H__
+#ifndef _TERCPPTERCALC_H___
+#define _TERCPPTERCALC_H___
#include <vector>
#include <stdio.h>
@@ -41,62 +41,70 @@ namespace TERCpp
{
// typedef size_t WERelement[2];
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
-typedef vector<terShift> vecTerShift;
-/**
- @author
-*/
-class terCalc
-{
-private :
+ typedef vector<terShift> vecTerShift;
+ /**
+ @author
+ */
+ class terCalc
+ {
+ private :
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
- WERalignment l_WERalignment;
+ WERalignment l_WERalignment;
// HashMap contenant les valeurs de hash de chaque mot
- hashMap bagOfWords;
- int TAILLE_PERMUT_MAX;
- // Increments internes
- int NBR_SEGS_EVALUATED;
- int NBR_PERMUTS_CONSID;
- int NBR_BS_APPELS;
- int DIST_MAX_PERMUT;
- bool PRINT_DEBUG;
+ hashMap bagOfWords;
+ int TAILLE_PERMUT_MAX;
+ int NBR_PERMUT_MAX;
+ // Increments internes
+ int NBR_SEGS_EVALUATED;
+ int NBR_PERMUTS_CONSID;
+ int NBR_BS_APPELS;
+ int DIST_MAX_PERMUT;
+ int CALL_TER_ALIGN;
+ int CALL_CALC_PERMUT;
+ int CALL_FIND_BSHIFT;
+ int MAX_LENGTH_SENTENCE;
+ bool PRINT_DEBUG;
- // Utilisés dans minDistEdit et ils ne sont pas réajustés
- double S[1000][1000];
- char P[1000][1000];
- vector<vecInt> refSpans;
- vector<vecInt> hypSpans;
- int TAILLE_BEAM;
+ // Utilisés dans minDistEdit et ils ne sont pas réajustés
+ vector < vector < double > > * S;
+ vector < vector < char > > * P;
+ vector<vecInt> refSpans;
+ vector<vecInt> hypSpans;
+ int TAILLE_BEAM;
-public:
- int shift_cost;
- int insert_cost;
- int delete_cost;
- int substitute_cost;
- int match_cost;
- double infinite;
- terCalc();
+ public:
+ int shift_cost;
+ int insert_cost;
+ int delete_cost;
+ int substitute_cost;
+ int match_cost;
+ double infinite;
+ terCalc();
-// ~terCalc();
+ ~terCalc();
// size_t* hashVec ( vector<string> s );
- void setDebugMode ( bool b );
+ void setDebugMode ( bool b );
// int WERCalculation ( size_t * ref, size_t * hyp );
// int WERCalculation ( vector<string> ref, vector<string> hyp );
// int WERCalculation ( vector<int> ref, vector<int> hyp );
- terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
+ terAlignment WERCalculation ( vector< string >& hyp, vector< string >& ref );
// string vectorToString(vector<string> vec);
// vector<string> subVector(vector<string> vec, int start, int end);
- hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
- terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
- bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
- terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
- terAlignment TER ( vector<string> hyp, vector<string> ref );
- terAlignment TER ( vector<int> hyp, vector<int> ref );
- bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
- void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
- vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
- alignmentStruct permuter ( vector<string> words, terShift s );
- alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
-};
+ hashMapInfos createConcordMots ( vector<string>& hyp, vector<string>& ref );
+ terAlignment minimizeDistanceEdition ( vector<string>& hyp, vector<string>& ref, vector<vecInt>& curHypSpans );
+ void minimizeDistanceEdition ( vector<string>& hyp, vector<string>& ref, vector<vecInt>& curHypSpans , terAlignment* l_terAlign);
+// terAlignment minimizeDistanceEdition ( vector<string>& hyp, vector<string>& ref, vector<vecInt>& curHypSpans );
+ bool trouverIntersection ( vecInt& refSpan, vecInt& hypSpan );
+ terAlignment TER ( vector<string>& hyp, vector<string>& ref , float avRefLength );
+ terAlignment TER ( vector<string>& hyp, vector<string>& ref );
+ terAlignment TER ( vector<int>& hyp, vector<int>& ref );
+ bestShiftStruct * findBestShift ( vector< string >& cur, vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& med_align );
+ void calculateTerAlignment ( terAlignment& align, vector<bool>* herr, vector<bool>* rerr, vector<int>* ralign );
+ vector<vecTerShift> * calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& align, vector<bool>* herr, vector<bool>* rerr, vector<int>* ralign );
+ alignmentStruct permuter ( vector<string>& words, terShift& s );
+ alignmentStruct permuter ( vector<string>& words, terShift* s );
+ alignmentStruct permuter ( vector<string>& words, int start, int end, int newloc );
+ };
}
diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp
index 8858a7119..22ee091a8 100644
--- a/mert/TER/tools.cpp
+++ b/mert/TER/tools.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -25,677 +25,811 @@ using namespace boost::xpressive;
namespace Tools
{
-string vectorToString ( vector<string> vec )
-{
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour += ( *vecIter );
- } else {
- retour += "\t" + ( *vecIter );
+ string vectorToString ( vector<string> vec )
+ {
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour += ( *vecIter );
+ }
+ else
+ {
+ retour += "\t" + ( *vecIter );
+ }
+ }
+ return retour;
}
- }
- return retour;
-}
-string vectorToString ( vector<char> vec )
-{
- stringstream retour;
- retour.str("");
- for ( vector<char>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour << ( *vecIter );
- } else {
- retour << "\t" << ( *vecIter );
+ string vectorToString ( vector<char> vec )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << "\t" << ( *vecIter );
+ }
+ }
+ return retour.str();
}
- }
- return retour.str();
-}
-string vectorToString ( vector<int> vec )
-{
- stringstream retour;
- retour.str("");
- for ( vector<int>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour << ( *vecIter );
- } else {
- retour << "\t" << ( *vecIter );
+ string vectorToString ( vector<int> vec )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << "\t" << ( *vecIter );
+ }
+ }
+ return retour.str();
+ }
+ string vectorToString ( vector<int> * vec )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec->begin();vecIter != vec->end(); vecIter++ )
+ {
+ if ( vecIter == vec->begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << "\t" << ( *vecIter );
+ }
+ }
+ return retour.str();
}
- }
- return retour.str();
-}
-string vectorToString ( vector< string > vec, string s )
-{
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour += ( *vecIter );
- } else {
- retour += s + ( *vecIter );
+ string vectorToString ( vector< string > vec, string s )
+ {
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour += ( *vecIter );
+ }
+ else
+ {
+ retour += s + ( *vecIter );
+ }
+ }
+ return retour;
+
}
- }
- return retour;
-}
+ string vectorToString ( vector< char > vec, string s )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << s << ( *vecIter );
+ }
+ }
+ return retour.str();
-string vectorToString ( vector< char > vec, string s )
-{
- stringstream retour;
- retour.str("");
- for ( vector<char>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour << ( *vecIter );
- } else {
- retour << s << ( *vecIter );
}
- }
- return retour.str();
-}
+ string vectorToString ( vector< int > vec, string s )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << s << ( *vecIter );
+ }
+ }
+ return retour.str();
-string vectorToString ( vector< int > vec, string s )
-{
- stringstream retour;
- retour.str("");
- for ( vector<int>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour << ( *vecIter );
- } else {
- retour << s << ( *vecIter );
}
- }
- return retour.str();
-}
+ string vectorToString ( vector< bool > vec, string s )
+ {
+ stringstream retour;
+ retour.str("");
+ for ( vector<bool>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
+ {
+ if ( vecIter == vec.begin() )
+ {
+ retour << ( *vecIter );
+ }
+ else
+ {
+ retour << s << ( *vecIter );
+ }
+ }
+ return retour.str();
-string vectorToString ( vector< bool > vec, string s )
-{
- stringstream retour;
- retour.str("");
- for ( vector<bool>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
- if ( vecIter == vec.begin() ) {
- retour << ( *vecIter );
- } else {
- retour << s << ( *vecIter );
}
- }
- return retour.str();
+ string vectorToString ( char* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec[l_i];
+ }
+ else
+ {
+ retour << s << vec[l_i];
+ }
+ }
+ return retour.str();
-}
-string vectorToString ( char* vec, string s , int taille)
-{
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++) {
- if ( l_i == 0 ) {
- retour << vec[l_i];
- } else {
- retour << s << vec[l_i];
}
- }
- return retour.str();
-}
+ string vectorToString ( int* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec[l_i];
+ }
+ else
+ {
+ retour << s << vec[l_i];
+ }
+ }
+ return retour.str();
-string vectorToString ( int* vec, string s , int taille)
-{
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++) {
- if ( l_i == 0 ) {
- retour << vec[l_i];
- } else {
- retour << s << vec[l_i];
}
- }
- return retour.str();
-}
+ string vectorToString ( bool* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec[l_i];
+ }
+ else
+ {
+ retour << s << vec[l_i];
+ }
+ }
+ return retour.str();
-string vectorToString ( bool* vec, string s , int taille)
-{
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++) {
- if ( l_i == 0 ) {
- retour << vec[l_i];
- } else {
- retour << s << vec[l_i];
}
- }
- return retour.str();
-
-}
+
+ string vectorToString ( vector<bool>* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec->at(l_i);
+ }
+ else
+ {
+ retour << s << vec->at(l_i);
+ }
+ }
+ return retour.str();
-vector<string> subVector ( vector<string> vec, int start, int end )
-{
- vector<string> retour;
- if ( start > end ) {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
+ }
-vector<int> subVector ( vector<int> vec, int start, int end )
-{
- vector<int> retour;
- if ( start > end ) {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
+ string vectorToString ( vector<int>* vec, string s , int taille)
+ {
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++)
+ {
+ if ( l_i == 0 )
+ {
+ retour << vec->at(l_i);
+ }
+ else
+ {
+ retour << s << vec->at(l_i);
+ }
+ }
+ return retour.str();
-vector<float> subVector ( vector<float> vec, int start, int end )
-{
- vector<float> retour;
- if ( start > end ) {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
+ }
-vector<string> copyVector ( vector<string> vec )
-{
- vector<string> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
-vector<int> copyVector ( vector<int> vec )
-{
- vector<int> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
-vector<float> copyVector ( vector<float> vec )
-{
- vector<float> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ ) {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
-}
-vector<string> stringToVector ( string s, string tok )
-{
- vector<string> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
- if ( ( *sIt ) == ( *sTok ) ) {
- to_return.push_back ( to_push );
- to_push = "";
- pushed = true;
- }
+
+
+ vector<string> subVector ( vector<string> vec, int start, int end )
+ {
+ vector<string> retour;
+ if ( start > end )
+ {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
}
- if ( !pushed ) {
- to_push.push_back ( ( *sIt ) );
+
+ vector<int> subVector ( vector<int> vec, int start, int end )
+ {
+ vector<int> retour;
+ if ( start > end )
+ {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
}
- }
- to_return.push_back ( to_push );
- return to_return;
-}
-vector<int> stringToVectorInt ( string s, string tok )
-{
- vector<int> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
- if ( ( *sIt ) == ( *sTok ) ) {
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atoi ( to_push.c_str() ) );
+
+ vector<float> subVector ( vector<float> vec, int start, int end )
+ {
+ vector<float> retour;
+ if ( start > end )
+ {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
}
- to_push = "";
- pushed = true;
- }
+ return retour;
}
- if ( !pushed ) {
- to_push.push_back ( ( *sIt ) );
+
+ vector<string> copyVector ( vector<string> vec )
+ {
+ vector<string> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
}
- }
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- return to_return;
-}
-vector<float> stringToVectorFloat ( string s, string tok )
-{
- vector<float> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
- if ( ( *sIt ) == ( *sTok ) ) {
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atof ( to_push.c_str() ) );
+ vector<int> copyVector ( vector<int> vec )
+ {
+ vector<int> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
}
- to_push = "";
- pushed = true;
- }
+ return retour;
}
- if ( !pushed ) {
- to_push.push_back ( ( *sIt ) );
+ vector<float> copyVector ( vector<float> vec )
+ {
+ vector<float> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ )
+ {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+ }
+ vector<string> stringToVector ( string s, string tok )
+ {
+ vector<string> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ )
+ {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
+ {
+ if ( ( *sIt ) == ( *sTok ) )
+ {
+ to_return.push_back ( to_push );
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed )
+ {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ to_return.push_back ( to_push );
+ return to_return;
+ }
+ vector<int> stringToVectorInt ( string s, string tok )
+ {
+ vector<int> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ )
+ {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
+ {
+ if ( ( *sIt ) == ( *sTok ) )
+ {
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed )
+ {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+ }
+ vector<float> stringToVectorFloat ( string s, string tok )
+ {
+ vector<float> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ )
+ {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
+ {
+ if ( ( *sIt ) == ( *sTok ) )
+ {
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atof ( to_push.c_str() ) );
+ }
+ to_push = "";
+ pushed = true;
+ }
+ }
+ if ( !pushed )
+ {
+ to_push.push_back ( ( *sIt ) );
+ }
+ }
+ if ( ( int ) to_push.length() > 0 )
+ {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
}
- }
- if ( ( int ) to_push.length() > 0 ) {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- return to_return;
-}
-string lowerCase ( string str )
-{
- for ( int i = 0; i < ( int ) str.size(); i++ ) {
- if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) {
- str[i] = str[i] + 0x20;
+ string lowerCase ( string str )
+ {
+ for ( int i = 0;i < ( int ) str.size();i++ )
+ {
+ if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) )
+ {
+ str[i] = str[i] + 0x20;
+ }
+ }
+ return str;
}
- }
- return str;
-}
-string removePunctTercom ( string str )
-{
- string str_mod = str;
- sregex rex;
- string replace;
+ string removePunctTercom ( string str )
+ {
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
-string removePunct ( string str )
-{
- string str_mod = str;
- sregex rex;
- string replace;
+ return str_mod;
+ }
+ string removePunct ( string str )
+ {
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
-string tokenizePunct ( string str )
-{
- string str_mod = str;
- sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
- string replace ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ return str_mod;
+ }
+ string tokenizePunct ( string str )
+ {
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
+ string replace ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
- replace = ( "$2.$4. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
+ replace = ( "$2.$4. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " ? " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " ? " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " ; " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " ; " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " ! " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " ! " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " ( " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " ( " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " ) " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " ) " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " \" " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " \" " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
- replace = ( "num_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
+ replace = ( "num_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
- replace = ( "ordinal_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
+ replace = ( "ordinal_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Dd]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Dd]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Dd]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Dd]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]rs) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]rs) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]rs) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]rs) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Nn]o) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Nn]o) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Nn]o) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Nn]o) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
// rex = sregex::compile ( "(^(([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "(^(([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
-
- rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
+
+ return str_mod;
+ }
-string normalizeStd ( string str )
-{
- string str_mod = str;
- sregex rex = sregex::compile ( "(<skipped>)" );
- string replace ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ string normalizeStd ( string str )
+ {
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(<skipped>)" );
+ string replace ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "-\n" );
- replace = ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "-\n" );
+ replace = ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "\n" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "\n" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&quot;" );
- replace = ( "\"" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&quot;" );
+ replace = ( "\"" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&amp;" );
- replace = ( "& " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&amp;" );
+ replace = ( "& " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&lt;" );
- replace = ( "<" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&lt;" );
+ replace = ( "<" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&gt;" );
- replace = ( ">" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&gt;" );
+ replace = ( ">" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
-}
+ return str_mod;
+ }
-param copyParam ( param p )
-{
- param to_return;
- to_return.caseOn = p.caseOn;
- to_return.noPunct = p.noPunct;
- to_return.debugMode = p.debugMode;
- to_return.debugLevel = p.debugLevel;
- to_return.hypothesisFile = p.hypothesisFile;
- to_return.referenceFile = p.referenceFile;
- to_return.normalize = p.normalize;
- to_return.noTxtIds = p.noTxtIds;
- to_return.outputFileExtension = p.outputFileExtension;
- to_return.outputFileName = p.outputFileName;
- to_return.sgmlInputs = p.sgmlInputs;
- to_return.tercomLike = p.tercomLike;
- to_return.printAlignments = p.printAlignments;
- to_return.WER=p.WER;
- return to_return;
-}
-string printParams ( param p )
-{
- stringstream s;
- s << "caseOn = " << p.caseOn << endl;
- s << "noPunct = " << p.noPunct << endl;
- s << "debugMode = " << p.debugMode << endl;
- s << "debugLevel = " << p.debugLevel << endl;
- s << "hypothesisFile = " << p.hypothesisFile << endl;
- s << "referenceFile = " << p.referenceFile << endl;
- s << "normalize = " << p.normalize << endl;
- s << "noTxtIds = " << p.noTxtIds << endl;
- s << "outputFileExtension = " << p.outputFileExtension << endl;
- s << "outputFileName = " << p.outputFileName << endl;
- s << "sgmlInputs = " << p.sgmlInputs << endl;
- s << "tercomLike = " << p.tercomLike << endl;
- return s.str();
+ param copyParam ( param p )
+ {
+ param to_return;
+ to_return.caseOn = p.caseOn;
+ to_return.noPunct = p.noPunct;
+ to_return.debugMode = p.debugMode;
+ to_return.debugLevel = p.debugLevel;
+ to_return.hypothesisFile = p.hypothesisFile;
+ to_return.referenceFile = p.referenceFile;
+ to_return.normalize = p.normalize;
+ to_return.noTxtIds = p.noTxtIds;
+ to_return.verbose = p.verbose;
+ to_return.count_verbose = p.count_verbose;
+ to_return.outputFileExtension = p.outputFileExtension;
+ to_return.outputFileName = p.outputFileName;
+ to_return.sgmlInputs = p.sgmlInputs;
+ to_return.tercomLike = p.tercomLike;
+ to_return.printAlignments = p.printAlignments;
+ to_return.WER=p.WER;
+ return to_return;
+ }
+ string printParams ( param p )
+ {
+ stringstream s;
+ s << "caseOn = " << p.caseOn << endl;
+ s << "noPunct = " << p.noPunct << endl;
+ s << "debugMode = " << p.debugMode << endl;
+ s << "debugLevel = " << p.debugLevel << endl;
+ s << "hypothesisFile = " << p.hypothesisFile << endl;
+ s << "referenceFile = " << p.referenceFile << endl;
+ s << "normalize = " << p.normalize << endl;
+ s << "noTxtIds = " << p.noTxtIds << endl;
+ s << "outputFileExtension = " << p.outputFileExtension << endl;
+ s << "outputFileName = " << p.outputFileName << endl;
+ s << "sgmlInputs = " << p.sgmlInputs << endl;
+ s << "tercomLike = " << p.tercomLike << endl;
+ s << "verbose = " << p.verbose << endl;
+ s << "count_verbose = " << p.count_verbose << endl;
+ return s.str();
-}
-string join ( string delim, vector<string> arr )
-{
- if ( ( int ) arr.size() == 0 ) return "";
+ }
+ string join ( string delim, vector<string> arr )
+ {
+ if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
- stringstream s;
- s.str ( "" );
- for ( int i = 0; i < ( int ) arr.size(); i++ ) {
- if ( i == 0 ) {
- s << arr.at ( i );
- } else {
- s << delim << arr.at ( i );
- }
- }
- return s.str();
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ )
+ {
+ if ( i == 0 )
+ {
+ s << arr.at ( i );
+ }
+ else
+ {
+ s << delim << arr.at ( i );
+ }
+ }
+ return s.str();
// return "";
-}
+ }
}
diff --git a/mert/TER/tools.h b/mert/TER/tools.h
index 157b739a5..4c3b108cd 100644
--- a/mert/TER/tools.h
+++ b/mert/TER/tools.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
-#ifndef MERT_TER_TOOLS_H_
-#define MERT_TER_TOOLS_H_
+#ifndef __TERCPPTOOLS_H__
+#define __TERCPPTOOLS_H__
#include <vector>
@@ -35,31 +35,34 @@ using namespace std;
namespace Tools
{
-typedef vector<double> vecDouble;
-typedef vector<char> vecChar;
-typedef vector<int> vecInt;
-typedef vector<float> vecFloat;
-typedef vector<size_t> vecSize_t;
-typedef vector<string> vecString;
-typedef vector<string> alignmentElement;
-typedef vector<alignmentElement> WERalignment;
+ typedef vector<double> vecDouble;
+ typedef vector<char> vecChar;
+ typedef vector<int> vecInt;
+ typedef vector<float> vecFloat;
+ typedef vector<size_t> vecSize_t;
+ typedef vector<string> vecString;
+ typedef vector<string> alignmentElement;
+ typedef vector<alignmentElement> WERalignment;
-struct param {
- bool debugMode;
- string referenceFile; // path to the resources
- string hypothesisFile; // path to the configuration files
- string outputFileExtension;
- string outputFileName;
- bool noPunct;
- bool caseOn;
- bool normalize;
- bool tercomLike;
- bool sgmlInputs;
- bool noTxtIds;
- bool printAlignments;
- bool WER;
- int debugLevel;
+struct param
+{
+ bool debugMode;
+ string referenceFile; // path to the resources
+ string hypothesisFile; // path to the configuration files
+ string outputFileExtension;
+ string outputFileName;
+ bool noPunct;
+ bool caseOn;
+ bool normalize;
+ bool tercomLike;
+ bool sgmlInputs;
+ bool verbose;
+ bool count_verbose;
+ bool noTxtIds;
+ bool printAlignments;
+ bool WER;
+ int debugLevel;
};
// param = { false, "","","","" };
@@ -67,35 +70,38 @@ struct param {
// private:
// public:
-string vectorToString ( vector<string> vec );
-string vectorToString ( vector<char> vec );
-string vectorToString ( vector<int> vec );
-string vectorToString ( vector<string> vec, string s );
-string vectorToString ( vector<char> vec, string s );
-string vectorToString ( vector<int> vec, string s );
-string vectorToString ( vector<bool> vec, string s );
-string vectorToString ( char* vec, string s, int taille );
-string vectorToString ( int* vec, string s , int taille );
-string vectorToString ( bool* vec, string s , int taille );
-vector<string> subVector ( vector<string> vec, int start, int end );
-vector<int> subVector ( vector<int> vec, int start, int end );
-vector<float> subVector ( vector<float> vec, int start, int end );
-vector<string> copyVector ( vector<string> vec );
-vector<int> copyVector ( vector<int> vec );
-vector<float> copyVector ( vector<float> vec );
-vector<string> stringToVector ( string s, string tok );
-vector<string> stringToVector ( char s, string tok );
-vector<string> stringToVector ( int s, string tok );
-vector<int> stringToVectorInt ( string s, string tok );
-vector<float> stringToVectorFloat ( string s, string tok );
-string lowerCase(string str);
-string removePunct(string str);
-string tokenizePunct(string str);
-string removePunctTercom(string str);
-string normalizeStd(string str);
-string printParams(param p);
-string join ( string delim, vector<string> arr );
+ string vectorToString ( vector<string> vec );
+ string vectorToString ( vector<char> vec );
+ string vectorToString ( vector<int> vec );
+ string vectorToString ( vector<string> vec, string s );
+ string vectorToString ( vector<char> vec, string s );
+ string vectorToString ( vector<int> vec, string s );
+ string vectorToString ( vector<bool> vec, string s );
+ string vectorToString ( char* vec, string s, int taille );
+ string vectorToString ( int* vec, string s , int taille );
+ string vectorToString ( bool* vec, string s , int taille );
+ string vectorToString ( vector<char>* vec, string s, int taille );
+ string vectorToString ( vector<int>* vec, string s , int taille );
+ string vectorToString ( vector<bool>* vec, string s , int taille );
+ vector<string> subVector ( vector<string> vec, int start, int end );
+ vector<int> subVector ( vector<int> vec, int start, int end );
+ vector<float> subVector ( vector<float> vec, int start, int end );
+ vector<string> copyVector ( vector<string> vec );
+ vector<int> copyVector ( vector<int> vec );
+ vector<float> copyVector ( vector<float> vec );
+ vector<string> stringToVector ( string s, string tok );
+ vector<string> stringToVector ( char s, string tok );
+ vector<string> stringToVector ( int s, string tok );
+ vector<int> stringToVectorInt ( string s, string tok );
+ vector<float> stringToVectorFloat ( string s, string tok );
+ string lowerCase(string str);
+ string removePunct(string str);
+ string tokenizePunct(string str);
+ string removePunctTercom(string str);
+ string normalizeStd(string str);
+ string printParams(param p);
+ string join ( string delim, vector<string> arr );
// };
-param copyParam(param p);
+ param copyParam(param p);
}
#endif