Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authorUlrich Germann <ugermann@inf.ed.ac.uk>2015-02-06 04:30:00 +0300
committerUlrich Germann <ugermann@inf.ed.ac.uk>2015-02-06 04:30:00 +0300
commitbe5799dca34027849fc40a38a63459e164f27add (patch)
tree140e865a962c546c12e2322ab76a56699e7338c6 /mert
parent80a9f84422f3b7ce3ddf0bcfcbe2e8d06bba9e98 (diff)
parent8b61f396a7558bf628c2e94a9583023b9ae34a8c (diff)
Merge branch 'master' of https://github.com/moses-smt/mosesdecoder
Conflicts: moses/TranslationOptionCollection.cpp moses/TranslationOptionCollectionLattice.cpp moses/TranslationOptionCollectionLattice.h moses/TranslationOptionList.h
Diffstat (limited to 'mert')
-rw-r--r--mert/Data.cpp8
-rw-r--r--mert/FeatureStats.cpp5
-rw-r--r--mert/ForestRescore.cpp81
-rw-r--r--mert/ForestRescore.h53
-rw-r--r--mert/ForestRescoreTest.cpp4
-rw-r--r--mert/HopeFearDecoder.cpp112
-rw-r--r--mert/HopeFearDecoder.h72
-rw-r--r--mert/HwcmScorer.cpp12
-rw-r--r--mert/Hypergraph.cpp54
-rw-r--r--mert/Hypergraph.h359
-rw-r--r--mert/HypergraphTest.cpp30
-rw-r--r--mert/InterpolatedScorer.cpp24
-rw-r--r--mert/MeteorScorer.cpp9
-rw-r--r--mert/MeteorScorer.h2
-rw-r--r--mert/MiraFeatureVector.cpp9
-rw-r--r--mert/MiraWeightVector.cpp6
-rw-r--r--mert/Optimizer.cpp20
-rw-r--r--mert/Point.cpp4
-rw-r--r--mert/PreProcessFilter.cpp2
-rw-r--r--mert/Scorer.cpp4
-rw-r--r--mert/StatisticsBasedScorer.h2
-rw-r--r--mert/TER/alignmentStruct.cpp16
-rw-r--r--mert/TER/alignmentStruct.h26
-rw-r--r--mert/TER/bestShiftStruct.h22
-rw-r--r--mert/TER/hashMap.cpp232
-rw-r--r--mert/TER/hashMap.h40
-rw-r--r--mert/TER/hashMapInfos.cpp239
-rw-r--r--mert/TER/hashMapInfos.h42
-rw-r--r--mert/TER/hashMapStringInfos.cpp313
-rw-r--r--mert/TER/hashMapStringInfos.h42
-rw-r--r--mert/TER/infosHasher.cpp58
-rw-r--r--mert/TER/infosHasher.h36
-rw-r--r--mert/TER/stringHasher.cpp46
-rw-r--r--mert/TER/stringHasher.h28
-rw-r--r--mert/TER/stringInfosHasher.cpp58
-rw-r--r--mert/TER/stringInfosHasher.h36
-rw-r--r--mert/TER/terAlignment.cpp286
-rw-r--r--mert/TER/terAlignment.h72
-rw-r--r--mert/TER/terShift.cpp116
-rw-r--r--mert/TER/terShift.h46
-rw-r--r--mert/TER/tercalc.cpp1512
-rw-r--r--mert/TER/tercalc.h90
-rw-r--r--mert/TER/tools.cpp1167
-rw-r--r--mert/TER/tools.h107
-rw-r--r--mert/evaluator.cpp9
-rw-r--r--mert/kbmira.cpp12
46 files changed, 2642 insertions, 2881 deletions
diff --git a/mert/Data.cpp b/mert/Data.cpp
index 08e80409f..49c1239e5 100644
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@@ -263,13 +263,13 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
{
UTIL_THROW_IF(shard_count == 0, util::Exception, "Must have at least 1 shard");
UTIL_THROW_IF(shard_size < 0 || shard_size > 1,
- util::Exception,
- "Shard size must be between 0 and 1, inclusive. Currently " << shard_size);
+ util::Exception,
+ "Shard size must be between 0 and 1, inclusive. Currently " << shard_size);
size_t data_size = m_score_data->size();
UTIL_THROW_IF(data_size != m_feature_data->size(),
- util::Exception,
- "Error");
+ util::Exception,
+ "Error");
shard_size *= data_size;
const float coeff = static_cast<float>(data_size) / shard_count;
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index a0c6a6ebc..a3ed2cc9b 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -61,7 +61,8 @@ void SparseVector::set(const string& name, FeatureStatsType value)
m_fvector[id] = value;
}
-void SparseVector::set(size_t id, FeatureStatsType value) {
+void SparseVector::set(size_t id, FeatureStatsType value)
+{
assert(m_id_to_name.size() > id);
m_fvector[id] = value;
}
@@ -204,7 +205,7 @@ FeatureStats::FeatureStats(const size_t size)
FeatureStats::~FeatureStats()
{
- delete [] m_array;
+ delete [] m_array;
}
void FeatureStats::Copy(const FeatureStats &stats)
diff --git a/mert/ForestRescore.cpp b/mert/ForestRescore.cpp
index 0172c6d92..009152e35 100644
--- a/mert/ForestRescore.cpp
+++ b/mert/ForestRescore.cpp
@@ -31,9 +31,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
-namespace MosesTuning {
+namespace MosesTuning
+{
-std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec)
+{
out << "[";
for (size_t i = 0; i < wordVec.size(); ++i) {
out << wordVec[i]->first;
@@ -44,7 +46,8 @@ std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
}
-void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
+void ReferenceSet::Load(const vector<string>& files, Vocab& vocab)
+{
for (size_t i = 0; i < files.size(); ++i) {
util::FilePiece fh(files[i].c_str());
size_t sentenceId = 0;
@@ -55,14 +58,15 @@ void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
} catch (util::EndOfFileException &e) {
break;
}
- AddLine(sentenceId, line, vocab);
- ++sentenceId;
+ AddLine(sentenceId, line, vocab);
+ ++sentenceId;
}
}
}
-void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab) {
+void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab)
+{
//cerr << line << endl;
NgramCounter ngramCounts;
list<WordVec> openNgrams;
@@ -74,14 +78,14 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
openNgrams.push_front(WordVec());
for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
k->push_back(nextTok);
- ++ngramCounts[*k];
+ ++ngramCounts[*k];
}
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
}
//merge into overall ngram map
for (NgramCounter::const_iterator ni = ngramCounts.begin();
- ni != ngramCounts.end(); ++ni) {
+ ni != ngramCounts.end(); ++ni) {
size_t count = ni->second;
//cerr << *ni << " " << count << endl;
if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
@@ -104,8 +108,9 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
//cerr << endl;
}
-
-size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const {
+
+size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const
+{
const NgramMap& ngramCounts = ngramCounts_.at(sentenceId);
NgramMap::const_iterator ngi = ngramCounts.find(ngram);
if (ngi == ngramCounts.end()) return 0;
@@ -114,7 +119,8 @@ size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool
VertexState::VertexState(): bleuStats(kBleuNgramOrder), targetLength(0) {}
-void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const {
+void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const
+{
for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
//cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) << endl;
size_t order = ngi->first.size();
@@ -124,7 +130,8 @@ void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStats
}
}
-size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
+size_t HgBleuScorer::GetTargetLength(const Edge& edge) const
+{
size_t targetLength = 0;
for (size_t i = 0; i < edge.Words().size(); ++i) {
const Vocab::Entry* word = edge.Words()[i];
@@ -137,7 +144,8 @@ size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
return targetLength;
}
-FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) {
+FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats)
+{
NgramCounter ngramCounts;
size_t childId = 0;
size_t wordId = 0;
@@ -147,7 +155,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
bool inRightContext = false;
list<WordVec> openNgrams;
const Vocab::Entry* currentWord = NULL;
- while (wordId < edge.Words().size()) {
+ while (wordId < edge.Words().size()) {
currentWord = edge.Words()[wordId];
if (currentWord != NULL) {
++wordId;
@@ -214,7 +222,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
}
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
}
-
+
//Collect matches
//This edge
//cerr << "edge ngrams" << endl;
@@ -227,26 +235,27 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
}
}
-
+
FeatureStatsType sourceLength = head.SourceCovered();
size_t referenceLength = references_.Length(sentenceId_);
- FeatureStatsType effectiveReferenceLength =
+ FeatureStatsType effectiveReferenceLength =
sourceLength / totalSourceLength_ * referenceLength;
bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
- //backgroundBleu_[backgroundBleu_.size()-1] =
+ //backgroundBleu_[backgroundBleu_.size()-1] =
// backgroundRefLength_ * sourceLength / totalSourceLength_;
FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);
return bleu;
}
-void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) {
+void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats)
+{
//TODO: Maybe more efficient to absorb into the Score() method
VertexState& vertexState = vertexStates_[vertexId];
//cerr << "Updating state for " << vertexId << endl;
-
+
//leftContext
int wi = 0;
const VertexState* childState = NULL;
@@ -263,9 +272,9 @@ void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const ve
//start of child state
childState = &(vertexStates_[winnerEdge.Children()[childi++]]);
contexti = 0;
- }
+ }
if ((size_t)contexti < childState->leftContext.size()) {
- vertexState.leftContext.push_back(childState->leftContext[contexti++]);
+ vertexState.leftContext.push_back(childState->leftContext[contexti++]);
} else {
//end of child context
childState = NULL;
@@ -314,7 +323,8 @@ typedef pair<const Edge*,FeatureStatsType> BackPointer;
* Recurse through back pointers
**/
static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<BackPointer>& bps,
- HgHypothesis* bestHypo) {
+ HgHypothesis* bestHypo)
+{
//cerr << "Expanding " << vertexId << " Score: " << bps[vertexId].second << endl;
//UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end");
if (!bps[vertexId].first) return;
@@ -334,14 +344,14 @@ static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<
}
}
-void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
{
BackPointer init(NULL,kMinScore);
vector<BackPointer> backPointers(graph.VertexSize(),init);
HgBleuScorer bleuScorer(references, graph, sentenceId, backgroundBleu);
vector<FeatureStatsType> winnerStats(kBleuNgramOrder*2+1);
for (size_t vi = 0; vi < graph.VertexSize(); ++vi) {
- //cerr << "vertex id " << vi << endl;
+// cerr << "vertex id " << vi << endl;
FeatureStatsType winnerScore = kMinScore;
const Vertex& vertex = graph.GetVertex(vi);
const vector<const Edge*>& incoming = vertex.GetIncoming();
@@ -349,7 +359,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
//UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
//If no incoming edges, vertex is a dead end
backPointers[vi].first = NULL;
- backPointers[vi].second = kMinScore/2;
+ backPointers[vi].second = kMinScore;
} else {
//cerr << "\nVertex: " << vi << endl;
for (size_t ei = 0; ei < incoming.size(); ++ei) {
@@ -357,15 +367,15 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
FeatureStatsType incomingScore = incoming[ei]->GetScore(weights);
for (size_t i = 0; i < incoming[ei]->Children().size(); ++i) {
size_t childId = incoming[ei]->Children()[i];
- UTIL_THROW_IF(backPointers[childId].second == kMinScore,
- HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
- incomingScore += backPointers[childId].second;
+ //UTIL_THROW_IF(backPointers[childId].second == kMinScore,
+ // HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+ incomingScore = max(incomingScore + backPointers[childId].second, kMinScore);
}
vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1);
- // cerr << "Score: " << incomingScore << " Bleu: ";
- // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
+ // cerr << "Score: " << incomingScore << " Bleu: ";
+ // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
FeatureStatsType totalScore = incomingScore;
- if (bleuWeight) {
+ if (bleuWeight) {
FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
if (isnan(bleuScore)) {
cerr << "WARN: bleu score undefined" << endl;
@@ -379,7 +389,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
}
//UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
totalScore += bleuWeight * bleuScore;
- // cerr << bleuScore << " Total: " << incomingScore << endl << endl;
+ // cerr << bleuScore << " Total: " << incomingScore << endl << endl;
//cerr << "is " << incomingScore << " bs " << bleuScore << endl;
}
if (totalScore >= winnerScore) {
@@ -394,9 +404,12 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
//update with winner
//if (bleuWeight) {
//TODO: Not sure if we need this when computing max-model solution
- bleuScorer.UpdateState(*(backPointers[vi].first), vi, winnerStats);
+ if (backPointers[vi].first) {
+ bleuScorer.UpdateState(*(backPointers[vi].first), vi, winnerStats);
+ }
}
+// cerr << "backpointer[" << vi << "] = (" << backPointers[vi].first << "," << backPointers[vi].second << ")" << endl;
}
//expand back pointers
diff --git a/mert/ForestRescore.h b/mert/ForestRescore.h
index 900275b74..2101a9248 100644
--- a/mert/ForestRescore.h
+++ b/mert/ForestRescore.h
@@ -27,7 +27,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "BleuScorer.h"
#include "Hypergraph.h"
-namespace MosesTuning {
+namespace MosesTuning
+{
std::ostream& operator<<(std::ostream& out, const WordVec& wordVec);
@@ -47,18 +48,21 @@ struct NgramEquals : public std::binary_function<const WordVec&, const WordVec&,
typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter;
-class ReferenceSet {
+class ReferenceSet
+{
public:
-
+
void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab);
void Load(const std::vector<std::string>& files, Vocab& vocab);
size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const;
- size_t Length(size_t sentenceId) const {return lengths_[sentenceId];}
+ size_t Length(size_t sentenceId) const {
+ return lengths_[sentenceId];
+ }
private:
//ngrams to (clipped,unclipped) counts
@@ -80,31 +84,32 @@ struct VertexState {
/**
* Used to score an rule (ie edge) when we are applying it.
**/
-class HgBleuScorer {
- public:
- HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
+class HgBleuScorer
+{
+public:
+ HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu),
- backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
- vertexStates_.resize(graph.VertexSize());
- totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
- }
-
- FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
+ backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
+ vertexStates_.resize(graph.VertexSize());
+ totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
+ }
- void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
+ FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
+ void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
- private:
- const ReferenceSet& references_;
- std::vector<VertexState> vertexStates_;
- size_t sentenceId_;
- size_t totalSourceLength_;
- const Graph& graph_;
- std::vector<FeatureStatsType> backgroundBleu_;
- FeatureStatsType backgroundRefLength_;
- void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
- size_t GetTargetLength(const Edge& edge) const;
+private:
+ const ReferenceSet& references_;
+ std::vector<VertexState> vertexStates_;
+ size_t sentenceId_;
+ size_t totalSourceLength_;
+ const Graph& graph_;
+ std::vector<FeatureStatsType> backgroundBleu_;
+ FeatureStatsType backgroundRefLength_;
+
+ void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
+ size_t GetTargetLength(const Edge& edge) const;
};
struct HgHypothesis {
diff --git a/mert/ForestRescoreTest.cpp b/mert/ForestRescoreTest.cpp
index 86975d3a5..4b62e8317 100644
--- a/mert/ForestRescoreTest.cpp
+++ b/mert/ForestRescoreTest.cpp
@@ -15,7 +15,7 @@ BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
Vocab vocab;
WordVec words;
string wordStrings[] =
- {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
for (size_t i = 0; i < 9; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
}
@@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
Vocab vocab;
WordVec words;
string wordStrings[] =
- {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
for (size_t i = 0; i < 13; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
}
diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
index 993cef1d8..3e62d8171 100644
--- a/mert/HopeFearDecoder.cpp
+++ b/mert/HopeFearDecoder.cpp
@@ -34,11 +34,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
namespace fs = boost::filesystem;
-namespace MosesTuning {
+namespace MosesTuning
+{
static const ValType BLEU_RATIO = 5;
-ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
+ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv)
+{
vector<ValType> stats(scorer_->NumberOfScores(),0);
for(reset(); !finished(); next()) {
vector<ValType> sent;
@@ -51,13 +53,14 @@ ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
}
NbestHopeFearDecoder::NbestHopeFearDecoder(
- const vector<string>& featureFiles,
- const vector<string>& scoreFiles,
- bool streaming,
- bool no_shuffle,
- bool safe_hope,
- Scorer* scorer
- ) : safe_hope_(safe_hope) {
+ const vector<string>& featureFiles,
+ const vector<string>& scoreFiles,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ Scorer* scorer
+) : safe_hope_(safe_hope)
+{
scorer_ = scorer;
if (streaming) {
train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
@@ -67,25 +70,29 @@ NbestHopeFearDecoder::NbestHopeFearDecoder(
}
-void NbestHopeFearDecoder::next() {
+void NbestHopeFearDecoder::next()
+{
train_->next();
}
-bool NbestHopeFearDecoder::finished() {
+bool NbestHopeFearDecoder::finished()
+{
return train_->finished();
}
-void NbestHopeFearDecoder::reset() {
+void NbestHopeFearDecoder::reset()
+{
train_->reset();
}
void NbestHopeFearDecoder::HopeFear(
- const std::vector<ValType>& backgroundBleu,
- const MiraWeightVector& wv,
- HopeFearData* hopeFear
- ) {
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+)
+{
+
-
// Hope / fear decode
ValType hope_scale = 1.0;
size_t hope_index=0, fear_index=0, model_index=0;
@@ -134,7 +141,8 @@ void NbestHopeFearDecoder::HopeFear(
hopeFear->hopeFearEqual = (hope_index == fear_index);
}
-void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats) {
+void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
+{
// Find max model
size_t max_index=0;
ValType max_score=0;
@@ -152,18 +160,19 @@ void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValTy
HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
- (
- const string& hypergraphDir,
- const vector<string>& referenceFiles,
- size_t num_dense,
- bool streaming,
- bool no_shuffle,
- bool safe_hope,
- size_t hg_pruning,
- const MiraWeightVector& wv,
- Scorer* scorer
- ) :
- num_dense_(num_dense) {
+(
+ const string& hypergraphDir,
+ const vector<string>& referenceFiles,
+ size_t num_dense,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ size_t hg_pruning,
+ const MiraWeightVector& wv,
+ Scorer* scorer
+) :
+ num_dense_(num_dense)
+{
UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs");
UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist");
@@ -177,16 +186,17 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
static const string kWeights = "weights";
fs::directory_iterator dend;
size_t fileCount = 0;
-
+
cerr << "Reading hypergraphs" << endl;
for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
const fs::path& hgpath = di->path();
if (hgpath.filename() == kWeights) continue;
+ // cerr << "Reading " << hgpath.filename() << endl;
Graph graph(vocab_);
size_t id = boost::lexical_cast<size_t>(hgpath.stem().string());
util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str()));
//util::FilePiece file(di->path().string().c_str());
- util::FilePiece file(fd.release());
+ util::FilePiece file(fd.release());
ReadGraph(file,graph);
//cerr << "ref length " << references_.Length(id) << endl;
@@ -195,7 +205,7 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
prunedGraph.reset(new Graph(vocab_));
graph.Prune(prunedGraph.get(), weights, edgeCount);
graphs_[id] = prunedGraph;
- //cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
+ // cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
++fileCount;
if (fileCount % 10 == 0) cerr << ".";
if (fileCount % 400 == 0) cerr << " [count=" << fileCount << "]\n";
@@ -210,23 +220,27 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
}
-void HypergraphHopeFearDecoder::reset() {
+void HypergraphHopeFearDecoder::reset()
+{
sentenceIdIter_ = sentenceIds_.begin();
}
-void HypergraphHopeFearDecoder::next() {
+void HypergraphHopeFearDecoder::next()
+{
sentenceIdIter_++;
}
-bool HypergraphHopeFearDecoder::finished() {
+bool HypergraphHopeFearDecoder::finished()
+{
return sentenceIdIter_ == sentenceIds_.end();
}
void HypergraphHopeFearDecoder::HopeFear(
- const vector<ValType>& backgroundBleu,
- const MiraWeightVector& wv,
- HopeFearData* hopeFear
- ) {
+ const vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+)
+{
size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
wv.ToSparse(&weights);
@@ -246,12 +260,12 @@ void HypergraphHopeFearDecoder::HopeFear(
Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo);
- // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+ // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
// where model score is having far more influence than BLEU
- // hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
- // if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
- // hope_scale = abs(hope_bleu) / abs(hope_model);
- // else break;
+ // hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+ // if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+ // hope_scale = abs(hope_bleu) / abs(hope_model);
+ // else break;
//TODO: Don't currently get model and bleu so commented this out for now.
break;
}
@@ -310,21 +324,23 @@ void HypergraphHopeFearDecoder::HopeFear(
if (hopeFear->hopeFearEqual) {
for (size_t i = 0; i < fearStats.size(); ++i) {
if (fearStats[i] != hopeFear->hopeStats[i]) {
- hopeFear->hopeFearEqual = false;
- break;
+ hopeFear->hopeFearEqual = false;
+ break;
}
}
}
hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures);
}
-void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) {
+void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats)
+{
assert(!finished());
HgHypothesis bestHypo;
size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
wv.ToSparse(&weights);
vector<ValType> bg(scorer_->NumberOfScores());
+ //cerr << "Calculating bleu on " << sentenceId << endl;
Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo);
stats->resize(bestHypo.bleuStats.size());
/*
diff --git a/mert/HopeFearDecoder.h b/mert/HopeFearDecoder.h
index d1881eeb2..53c0e935d 100644
--- a/mert/HopeFearDecoder.h
+++ b/mert/HopeFearDecoder.h
@@ -35,7 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
// the n-best list and lattice/hypergraph implementations
//
-namespace MosesTuning {
+namespace MosesTuning
+{
class Scorer;
@@ -44,7 +45,7 @@ struct HopeFearData {
MiraFeatureVector modelFeatures;
MiraFeatureVector hopeFeatures;
MiraFeatureVector fearFeatures;
-
+
std::vector<float> modelStats;
std::vector<float> hopeStats;
@@ -55,7 +56,8 @@ struct HopeFearData {
};
//Abstract base class
-class HopeFearDecoder {
+class HopeFearDecoder
+{
public:
//iterator methods
virtual void reset() = 0;
@@ -68,14 +70,14 @@ public:
* Calculate hope, fear and model hypotheses
**/
virtual void HopeFear(
- const std::vector<ValType>& backgroundBleu,
- const MiraWeightVector& wv,
- HopeFearData* hopeFear
- ) = 0;
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ ) = 0;
/** Max score decoding */
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
- = 0;
+ = 0;
/** Calculate bleu on training set */
ValType Evaluate(const AvgWeightVector& wv);
@@ -86,25 +88,26 @@ protected:
/** Gets hope-fear from nbest lists */
-class NbestHopeFearDecoder : public virtual HopeFearDecoder {
+class NbestHopeFearDecoder : public virtual HopeFearDecoder
+{
public:
NbestHopeFearDecoder(const std::vector<std::string>& featureFiles,
- const std::vector<std::string>& scoreFiles,
- bool streaming,
- bool no_shuffle,
- bool safe_hope,
- Scorer* scorer
- );
+ const std::vector<std::string>& scoreFiles,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ Scorer* scorer
+ );
virtual void reset();
virtual void next();
virtual bool finished();
virtual void HopeFear(
- const std::vector<ValType>& backgroundBleu,
- const MiraWeightVector& wv,
- HopeFearData* hopeFear
- );
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ );
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
@@ -117,29 +120,30 @@ private:
/** Gets hope-fear from hypergraphs */
-class HypergraphHopeFearDecoder : public virtual HopeFearDecoder {
+class HypergraphHopeFearDecoder : public virtual HopeFearDecoder
+{
public:
HypergraphHopeFearDecoder(
- const std::string& hypergraphDir,
- const std::vector<std::string>& referenceFiles,
- size_t num_dense,
- bool streaming,
- bool no_shuffle,
- bool safe_hope,
- size_t hg_pruning,
- const MiraWeightVector& wv,
- Scorer* scorer_
- );
+ const std::string& hypergraphDir,
+ const std::vector<std::string>& referenceFiles,
+ size_t num_dense,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ size_t hg_pruning,
+ const MiraWeightVector& wv,
+ Scorer* scorer_
+ );
virtual void reset();
virtual void next();
virtual bool finished();
virtual void HopeFear(
- const std::vector<ValType>& backgroundBleu,
- const MiraWeightVector& wv,
- HopeFearData* hopeFear
- );
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ );
virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
diff --git a/mert/HwcmScorer.cpp b/mert/HwcmScorer.cpp
index 6aff77def..bb3cd4382 100644
--- a/mert/HwcmScorer.cpp
+++ b/mert/HwcmScorer.cpp
@@ -55,7 +55,8 @@ void HwcmScorer::setReferenceFiles(const vector<string>& referenceFiles)
}
-void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc) {
+void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc)
+{
if (tree->GetLength() > 0) {
string head = getHead(tree);
@@ -64,8 +65,7 @@ void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
extractHeadWordChain(*it, history, hwc);
}
- }
- else {
+ } else {
vector<string> new_history(kHwcmOrder);
new_history[0] = head;
hwc[0][head]++;
@@ -85,11 +85,11 @@ void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history
}
}
-string HwcmScorer::getHead(TreePointer tree) {
+string HwcmScorer::getHead(TreePointer tree)
+{
// assumption (only true for dependency parse: each constituent has a preterminal label, and corresponding terminal is head)
// if constituent has multiple preterminals, first one is picked; if it has no preterminals, empty string is returned
- for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it)
- {
+ for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
TreePointer child = *it;
if (child->GetLength() == 1 && child->GetChildren()[0]->IsTerminal()) {
diff --git a/mert/Hypergraph.cpp b/mert/Hypergraph.cpp
index b7725ead0..a8087acb5 100644
--- a/mert/Hypergraph.cpp
+++ b/mert/Hypergraph.cpp
@@ -31,18 +31,22 @@ using namespace std;
static const string kBOS = "<s>";
static const string kEOS = "</s>";
-namespace MosesTuning {
+namespace MosesTuning
+{
-StringPiece NextLine(util::FilePiece& from) {
+StringPiece NextLine(util::FilePiece& from)
+{
StringPiece line;
while ((line = from.ReadLine()).starts_with("#"));
return line;
}
-Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS)){
+Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS))
+{
}
-const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str) {
+const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str)
+{
#if BOOST_VERSION >= 104200
Map::const_iterator i= map_.find(str, Hash(), Equals());
#else
@@ -62,7 +66,8 @@ double_conversion::StringToDoubleConverter converter(double_conversion::StringTo
/**
* Reads an incoming edge. Returns edge and source words covered.
**/
-static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
+static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph)
+{
Edge* edge = graph.NewEdge();
StringPiece line = from.ReadLine(); //Don't allow comments within edge lists
util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
@@ -82,7 +87,7 @@ static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
edge->AddWord(&found);
}
}
-
+
//Features
++pipes;
for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
@@ -100,17 +105,18 @@ static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
//Covered words
++pipes;
size_t sourceCovered = boost::lexical_cast<size_t>(*pipes);
- return pair<Edge*,size_t>(edge,sourceCovered);
+ return pair<Edge*,size_t>(edge,sourceCovered);
}
-void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const {
+void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const
+{
Graph& newGraph = *pNewGraph;
//TODO: Optimise case where no pruning required
//For debug
-
-
+
+
/*
map<const Edge*, string> edgeIds;
for (size_t i = 0; i < edges_.Size(); ++i) {
@@ -136,7 +142,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
//Compute backward scores
for (size_t vi = 0; vi < vertices_.Size(); ++vi) {
- // cerr << "Vertex " << vi << endl;
+ // cerr << "Vertex " << vi << endl;
const Vertex& vertex = vertices_[vi];
const vector<const Edge*>& incoming = vertex.GetIncoming();
if (!incoming.size()) {
@@ -150,7 +156,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
//cerr << "\tChild " << incoming[ei]->Children()[i] << endl;
size_t childId = incoming[ei]->Children()[i];
UTIL_THROW_IF(vertexBackwardScores[childId] == kMinScore,
- HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+ HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
outgoing[childId].push_back(incoming[ei]);
incomingScore += vertexBackwardScores[childId];
}
@@ -172,7 +178,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
} else {
for (size_t ei = 0; ei < outgoing[vi].size(); ++ei) {
//cerr << "Edge " << edgeIds[outgoing[vi][ei]] << endl;
- FeatureStatsType outgoingScore = 0;
+ FeatureStatsType outgoingScore = 0;
//add score of head
outgoingScore += vertexForwardScores[edgeHeads[outgoing[vi][ei]]];
//cerr << "Forward score " << outgoingScore << endl;
@@ -204,11 +210,11 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
}
FeatureStatsType score = edgeForwardScores[edge] + edgeBackwardScores[edge];
edgeScores.insert(pair<FeatureStatsType, const Edge*>(score,edge));
- // cerr << edgeIds[edge] << " " << score << endl;
+ // cerr << edgeIds[edge] << " " << score << endl;
}
-
+
multimap<FeatureStatsType, const Edge*>::const_reverse_iterator ei = edgeScores.rbegin();
size_t edgeCount = 1;
while(edgeCount < minEdgeCount && ei != edgeScores.rend()) {
@@ -235,10 +241,10 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
map<size_t,size_t> oldIdToNew;
size_t vi = 0;
for (set<size_t>::const_iterator i = retainedVertices.begin(); i != retainedVertices.end(); ++i, ++vi) {
- //cerr << *i << " New: " << vi << endl;
+// cerr << *i << " New: " << vi << endl;
oldIdToNew[*i] = vi;
Vertex* vertex = newGraph.NewVertex();
- vertex->SetSourceCovered(vertices_[*i].SourceCovered());
+ vertex->SetSourceCovered(vertices_[*i].SourceCovered());
}
for (set<const Edge*>::const_iterator i = retainedEdges.begin(); i != retainedEdges.end(); ++i) {
@@ -255,6 +261,7 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
newHead.AddEdge(newEdge);
}
+
/*
cerr << "New graph" << endl;
for (size_t vi = 0; vi < newGraph.VertexSize(); ++vi) {
@@ -274,21 +281,22 @@ void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeC
}
cerr << endl;
}
- */
+ */
}
/**
* Read from "Kenneth's hypergraph" aka cdec target_graph format (with comments)
**/
-void ReadGraph(util::FilePiece &from, Graph &graph) {
+void ReadGraph(util::FilePiece &from, Graph &graph)
+{
//First line should contain field names
StringPiece line = from.ReadLine();
UTIL_THROW_IF(line.compare("# target ||| features ||| source-covered") != 0, HypergraphException, "Incorrect format spec on first line: '" << line << "'");
line = NextLine(from);
-
+
//Then expect numbers of vertices
util::TokenIter<util::SingleCharacter, false> i(line, util::SingleCharacter(' '));
unsigned long int vertices = boost::lexical_cast<unsigned long int>(*i);
@@ -303,9 +311,11 @@ void ReadGraph(util::FilePiece &from, Graph &graph) {
for (unsigned long int e = 0; e < edge_count; ++e) {
pair<Edge*,size_t> edge = ReadEdge(from, graph);
vertex->AddEdge(edge.first);
- //Note: the file format attaches this to the edge, but it's really a property
+ //Note: the file format attaches this to the edge, but it's really a property
//of the vertex.
- if (!e) {vertex->SetSourceCovered(edge.second);}
+ if (!e) {
+ vertex->SetSourceCovered(edge.second);
+ }
}
}
}
diff --git a/mert/Hypergraph.h b/mert/Hypergraph.h
index b6ee6c3f8..14226fb48 100644
--- a/mert/Hypergraph.h
+++ b/mert/Hypergraph.h
@@ -37,81 +37,88 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FeatureStats.h"
-namespace MosesTuning {
+namespace MosesTuning
+{
typedef unsigned int WordIndex;
const WordIndex kMaxWordIndex = UINT_MAX;
-const FeatureStatsType kMinScore = -std::numeric_limits<FeatureStatsType>::max();
-
-template <class T> class FixedAllocator : boost::noncopyable {
- public:
- FixedAllocator() : current_(NULL), end_(NULL) {}
-
- void Init(std::size_t count) {
- assert(!current_);
- array_.reset(new T[count]);
- current_ = array_.get();
- end_ = current_ + count;
- }
-
- T &operator[](std::size_t idx) {
- return array_.get()[idx];
- }
- const T &operator[](std::size_t idx) const {
- return array_.get()[idx];
- }
-
- T *New() {
- T *ret = current_++;
- UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
- return ret;
- }
-
- std::size_t Capacity() const {
- return end_ - array_.get();
- }
-
- std::size_t Size() const {
- return current_ - array_.get();
- }
-
- private:
- boost::scoped_array<T> array_;
- T *current_, *end_;
+const FeatureStatsType kMinScore = -1e10;
+
+template <class T> class FixedAllocator : boost::noncopyable
+{
+public:
+ FixedAllocator() : current_(NULL), end_(NULL) {}
+
+ void Init(std::size_t count) {
+ assert(!current_);
+ array_.reset(new T[count]);
+ current_ = array_.get();
+ end_ = current_ + count;
+ }
+
+ T &operator[](std::size_t idx) {
+ return array_.get()[idx];
+ }
+ const T &operator[](std::size_t idx) const {
+ return array_.get()[idx];
+ }
+
+ T *New() {
+ T *ret = current_++;
+ UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
+ return ret;
+ }
+
+ std::size_t Capacity() const {
+ return end_ - array_.get();
+ }
+
+ std::size_t Size() const {
+ return current_ - array_.get();
+ }
+
+private:
+ boost::scoped_array<T> array_;
+ T *current_, *end_;
};
-class Vocab {
- public:
- Vocab();
+class Vocab
+{
+public:
+ Vocab();
- typedef std::pair<const char *const, WordIndex> Entry;
+ typedef std::pair<const char *const, WordIndex> Entry;
- const Entry &FindOrAdd(const StringPiece &str);
+ const Entry &FindOrAdd(const StringPiece &str);
- const Entry& Bos() const {return bos_;}
+ const Entry& Bos() const {
+ return bos_;
+ }
- const Entry& Eos() const {return eos_;}
+ const Entry& Eos() const {
+ return eos_;
+ }
- private:
- util::Pool piece_backing_;
+private:
+ util::Pool piece_backing_;
- struct Hash : public std::unary_function<const char *, std::size_t> {
- std::size_t operator()(StringPiece str) const {
- return util::MurmurHashNative(str.data(), str.size());
- }
- };
+ struct Hash : public std::unary_function<const char *, std::size_t> {
+ std::size_t operator()(StringPiece str) const {
+ return util::MurmurHashNative(str.data(), str.size());
+ }
+ };
- struct Equals : public std::binary_function<const char *, const char *, bool> {
- bool operator()(StringPiece first, StringPiece second) const {
- return first == second;
- }
- };
+ struct Equals : public std::binary_function<const char *, const char *, bool> {
+ bool operator()(StringPiece first, StringPiece second) const {
+ return first == second;
+ }
+ };
- typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
- Map map_;
- Entry eos_;
- Entry bos_;
+ typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
+ Map map_;
+ Entry eos_;
+ Entry bos_;
};
@@ -125,121 +132,141 @@ typedef boost::shared_ptr<SparseVector> FeaturePtr;
/**
* An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector.
**/
-class Edge {
- public:
- Edge() {features_.reset(new SparseVector());}
-
- void AddWord(const Vocab::Entry *word) {
- words_.push_back(word);
- }
-
- void AddChild(size_t child) {
- children_.push_back(child);
- }
-
- void AddFeature(const StringPiece& name, FeatureStatsType value) {
- //TODO StringPiece interface
- features_->set(name.as_string(),value);
- }
-
-
- const WordVec &Words() const {
- return words_;
- }
-
- const FeaturePtr& Features() const {
- return features_;
- }
-
- void SetFeatures(const FeaturePtr& features) {
- features_ = features;
- }
-
- const std::vector<size_t>& Children() const {
- return children_;
- }
-
- FeatureStatsType GetScore(const SparseVector& weights) const {
- return inner_product(*(features_.get()), weights);
- }
-
- private:
- // NULL for non-terminals.
- std::vector<const Vocab::Entry*> words_;
- std::vector<size_t> children_;
- boost::shared_ptr<SparseVector> features_;
+class Edge
+{
+public:
+ Edge() {
+ features_.reset(new SparseVector());
+ }
+
+ void AddWord(const Vocab::Entry *word) {
+ words_.push_back(word);
+ }
+
+ void AddChild(size_t child) {
+ children_.push_back(child);
+ }
+
+ void AddFeature(const StringPiece& name, FeatureStatsType value) {
+ //TODO StringPiece interface
+ features_->set(name.as_string(),value);
+ }
+
+
+ const WordVec &Words() const {
+ return words_;
+ }
+
+ const FeaturePtr& Features() const {
+ return features_;
+ }
+
+ void SetFeatures(const FeaturePtr& features) {
+ features_ = features;
+ }
+
+ const std::vector<size_t>& Children() const {
+ return children_;
+ }
+
+ FeatureStatsType GetScore(const SparseVector& weights) const {
+ return inner_product(*(features_.get()), weights);
+ }
+
+private:
+ // NULL for non-terminals.
+ std::vector<const Vocab::Entry*> words_;
+ std::vector<size_t> children_;
+ boost::shared_ptr<SparseVector> features_;
};
/*
* A vertex has 0..n incoming edges
**/
-class Vertex {
- public:
- Vertex() : sourceCovered_(0) {}
-
- void AddEdge(const Edge* edge) {incoming_.push_back(edge);}
-
- void SetSourceCovered(size_t sourceCovered) {sourceCovered_ = sourceCovered;}
-
- const std::vector<const Edge*>& GetIncoming() const {return incoming_;}
-
- size_t SourceCovered() const {return sourceCovered_;}
-
- private:
- std::vector<const Edge*> incoming_;
- size_t sourceCovered_;
+class Vertex
+{
+public:
+ Vertex() : sourceCovered_(0) {}
+
+ void AddEdge(const Edge* edge) {
+ incoming_.push_back(edge);
+ }
+
+ void SetSourceCovered(size_t sourceCovered) {
+ sourceCovered_ = sourceCovered;
+ }
+
+ const std::vector<const Edge*>& GetIncoming() const {
+ return incoming_;
+ }
+
+ size_t SourceCovered() const {
+ return sourceCovered_;
+ }
+
+private:
+ std::vector<const Edge*> incoming_;
+ size_t sourceCovered_;
};
-class Graph : boost::noncopyable {
- public:
- Graph(Vocab& vocab) : vocab_(vocab) {}
-
- void SetCounts(std::size_t vertices, std::size_t edges) {
- vertices_.Init(vertices);
- edges_.Init(edges);
- }
-
- Vocab &MutableVocab() { return vocab_; }
-
- Edge *NewEdge() {
- return edges_.New();
- }
-
- Vertex *NewVertex() {
- return vertices_.New();
- }
-
- const Vertex &GetVertex(std::size_t index) const {
- return vertices_[index];
- }
-
- Edge &GetEdge(std::size_t index) {
- return edges_[index];
- }
-
- /* Created a pruned copy of this graph with minEdgeCount edges. Uses
- the scores in the max-product semiring to rank edges, as suggested by
- Colin Cherry */
- void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
-
- std::size_t VertexSize() const { return vertices_.Size(); }
- std::size_t EdgeSize() const { return edges_.Size(); }
-
- bool IsBoundary(const Vocab::Entry* word) const {
- return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
- }
-
- private:
- FixedAllocator<Edge> edges_;
- FixedAllocator<Vertex> vertices_;
- Vocab& vocab_;
+class Graph : boost::noncopyable
+{
+public:
+ Graph(Vocab& vocab) : vocab_(vocab) {}
+
+ void SetCounts(std::size_t vertices, std::size_t edges) {
+ vertices_.Init(vertices);
+ edges_.Init(edges);
+ }
+
+ Vocab &MutableVocab() {
+ return vocab_;
+ }
+
+ Edge *NewEdge() {
+ return edges_.New();
+ }
+
+ Vertex *NewVertex() {
+ return vertices_.New();
+ }
+
+ const Vertex &GetVertex(std::size_t index) const {
+ return vertices_[index];
+ }
+
+ Edge &GetEdge(std::size_t index) {
+ return edges_[index];
+ }
+
+ /* Created a pruned copy of this graph with minEdgeCount edges. Uses
+ the scores in the max-product semiring to rank edges, as suggested by
+ Colin Cherry */
+ void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
+
+ std::size_t VertexSize() const {
+ return vertices_.Size();
+ }
+ std::size_t EdgeSize() const {
+ return edges_.Size();
+ }
+
+ bool IsBoundary(const Vocab::Entry* word) const {
+ return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
+ }
+
+private:
+ FixedAllocator<Edge> edges_;
+ FixedAllocator<Vertex> vertices_;
+ Vocab& vocab_;
};
-class HypergraphException : public util::Exception {
- public:
- HypergraphException() {}
- ~HypergraphException() throw() {}
+class HypergraphException : public util::Exception
+{
+public:
+ HypergraphException() {}
+ ~HypergraphException() throw() {}
};
diff --git a/mert/HypergraphTest.cpp b/mert/HypergraphTest.cpp
index 345a445f0..0dc1c04c6 100644
--- a/mert/HypergraphTest.cpp
+++ b/mert/HypergraphTest.cpp
@@ -8,12 +8,12 @@
using namespace std;
using namespace MosesTuning;
-BOOST_AUTO_TEST_CASE(prune)
+BOOST_AUTO_TEST_CASE(prune)
{
Vocab vocab;
WordVec words;
string wordStrings[] =
- {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
for (size_t i = 0; i < 13; ++i) {
words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
}
@@ -105,7 +105,7 @@ BOOST_AUTO_TEST_CASE(prune)
BOOST_CHECK_EQUAL(5, pruned.EdgeSize());
BOOST_CHECK_EQUAL(4, pruned.VertexSize());
-
+
//edges retained should be best path (<s> ab jk </s>) and hi
BOOST_CHECK_EQUAL(1, pruned.GetVertex(0).GetIncoming().size());
BOOST_CHECK_EQUAL(2, pruned.GetVertex(1).GetIncoming().size());
@@ -115,37 +115,37 @@ BOOST_AUTO_TEST_CASE(prune)
const Edge* edge;
edge = pruned.GetVertex(0).GetIncoming()[0];
- BOOST_CHECK_EQUAL(1, edge->Words().size());
- BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
+ BOOST_CHECK_EQUAL(1, edge->Words().size());
+ BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
edge = pruned.GetVertex(1).GetIncoming()[0];
- BOOST_CHECK_EQUAL(3, edge->Words().size());
- BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
- BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
- BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
+ BOOST_CHECK_EQUAL(3, edge->Words().size());
+ BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+ BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(1).GetIncoming()[1];
BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
- BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[10]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(2).GetIncoming()[0];
BOOST_CHECK_EQUAL(3, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
- BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
BOOST_CHECK_EQUAL(words[12]->first, edge->Words()[2]->first);
edge = pruned.GetVertex(3).GetIncoming()[0];
BOOST_CHECK_EQUAL(2, edge->Words().size());
BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
- BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
+
+
-
+// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
-// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
-
}
diff --git a/mert/InterpolatedScorer.cpp b/mert/InterpolatedScorer.cpp
index ea4240472..b8ec3a855 100644
--- a/mert/InterpolatedScorer.cpp
+++ b/mert/InterpolatedScorer.cpp
@@ -174,19 +174,19 @@ float InterpolatedScorer::calculateScore(const std::vector<ScoreStatsType>& tota
float InterpolatedScorer::getReferenceLength(const std::vector<ScoreStatsType>& totals) const
{
- size_t scorerNum = 0;
- size_t last = 0;
- float refLen = 0;
- for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
- itsc != m_scorers.end(); ++itsc) {
- int numScoresScorer = (*itsc)->NumberOfScores();
- std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
- refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
- last += numScoresScorer;
- scorerNum++;
- }
- return refLen;
+ size_t scorerNum = 0;
+ size_t last = 0;
+ float refLen = 0;
+ for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
+ itsc != m_scorers.end(); ++itsc) {
+ int numScoresScorer = (*itsc)->NumberOfScores();
+ std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
+ refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
+ last += numScoresScorer;
+ scorerNum++;
}
+ return refLen;
+}
void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
{
diff --git a/mert/MeteorScorer.cpp b/mert/MeteorScorer.cpp
index 914fd02d4..3a7eb6ab7 100644
--- a/mert/MeteorScorer.cpp
+++ b/mert/MeteorScorer.cpp
@@ -34,7 +34,8 @@ namespace MosesTuning
#define CHILD_STDOUT_WRITE pipefds_output[1]
MeteorScorer::MeteorScorer(const string& config)
- : StatisticsBasedScorer("METEOR",config) {
+ : StatisticsBasedScorer("METEOR",config)
+{
meteor_jar = getConfig("jar", "");
meteor_lang = getConfig("lang", "en");
meteor_task = getConfig("task", "tune");
@@ -88,7 +89,8 @@ MeteorScorer::MeteorScorer(const string& config)
m_from_meteor = new ifdstream(CHILD_STDOUT_READ);
}
-MeteorScorer::~MeteorScorer() {
+MeteorScorer::~MeteorScorer()
+{
// Cleanup IO
delete m_to_meteor;
delete m_from_meteor;
@@ -171,7 +173,8 @@ float MeteorScorer::calculateScore(const vector<ScoreStatsType>& comps) const
// Meteor unsupported, throw error if used
MeteorScorer::MeteorScorer(const string& config)
- : StatisticsBasedScorer("METEOR",config) {
+ : StatisticsBasedScorer("METEOR",config)
+{
throw runtime_error("Meteor unsupported, requires GLIBCXX");
}
diff --git a/mert/MeteorScorer.h b/mert/MeteorScorer.h
index 9c3657018..31b05ec72 100644
--- a/mert/MeteorScorer.h
+++ b/mert/MeteorScorer.h
@@ -20,7 +20,7 @@ class ifdstream;
class ScoreStats;
/**
- * Meteor scoring
+ * Meteor scoring
*
* https://github.com/mjdenkowski/meteor
* http://statmt.org/wmt11/pdf/WMT07.pdf
diff --git a/mert/MiraFeatureVector.cpp b/mert/MiraFeatureVector.cpp
index 347ad488e..ad3588339 100644
--- a/mert/MiraFeatureVector.cpp
+++ b/mert/MiraFeatureVector.cpp
@@ -9,7 +9,8 @@ namespace MosesTuning
{
-void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit) {
+void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit)
+{
vector<size_t> sparseFeats = sparse.feats();
bool bFirst = true;
size_t lastFeat = 0;
@@ -40,7 +41,8 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
InitSparse(vec.sparse);
}
-MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense) {
+MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense)
+{
m_dense.resize(num_dense);
//Assume that features with id [0,num_dense) are the dense features
for (size_t id = 0; id < num_dense; ++id) {
@@ -162,7 +164,8 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
return MiraFeatureVector(dense,sparseFeats,sparseVals);
}
-bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b) {
+bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b)
+{
ValType eps = 1e-8;
//dense features
if (a.m_dense.size() != b.m_dense.size()) return false;
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
index c8a1ca774..eba9617c8 100644
--- a/mert/MiraWeightVector.cpp
+++ b/mert/MiraWeightVector.cpp
@@ -93,7 +93,8 @@ void MiraWeightVector::update(size_t index, ValType delta)
m_lastUpdated[index] = m_numUpdates;
}
-void MiraWeightVector::ToSparse(SparseVector* sparse) const {
+void MiraWeightVector::ToSparse(SparseVector* sparse) const
+{
for (size_t i = 0; i < m_weights.size(); ++i) {
if(abs(m_weights[i])>1e-8) {
sparse->set(i,m_weights[i]);
@@ -171,7 +172,8 @@ size_t AvgWeightVector::size() const
return m_wv.m_weights.size();
}
-void AvgWeightVector::ToSparse(SparseVector* sparse) const {
+void AvgWeightVector::ToSparse(SparseVector* sparse) const
+{
for (size_t i = 0; i < size(); ++i) {
ValType w = weight(i);
if(abs(w)>1e-8) {
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp
index 3f5aa48a6..5da32363f 100644
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@@ -168,8 +168,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
// The rightmost bestindex is the one with the highest slope.
// They should be equal but there might be.
- UTIL_THROW_IF(abs(leftmost->first-gradient.rbegin()->first) >= 0.0001,
- util::Exception, "Error");
+ UTIL_THROW_IF(abs(leftmost->first-gradient.rbegin()->first) >= 0.0001,
+ util::Exception, "Error");
// A small difference due to rounding error
break;
}
@@ -191,8 +191,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
if (tit == previnserted) {
// The threshold is the same as before can happen if 2 candidates are the same for example.
UTIL_THROW_IF(previnserted->second.back().first != newd.first,
- util::Exception,
- "Error");
+ util::Exception,
+ "Error");
previnserted->second.back()=newd; // just replace the 1 best for sentence S
// previnsert doesn't change
} else {
@@ -207,8 +207,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
// We append the diffs in previnsert to tit before destroying previnsert.
tit->second.insert(tit->second.end(),previnserted->second.begin(),previnserted->second.end());
UTIL_THROW_IF(tit->second.back().first != newd.first,
- util::Exception,
- "Error");
+ util::Exception,
+ "Error");
tit->second.back()=newd; // change diff for sentence S
thresholdmap.erase(previnserted); // erase old previnsert
previnserted = tit; // point previnsert to the new threshold
@@ -216,8 +216,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
}
UTIL_THROW_IF(previnserted == thresholdmap.end(),
- util::Exception,
- "Error");
+ util::Exception,
+ "Error");
} else { //normal insertion process
previnserted = AddThreshold(thresholdmap, leftmostx, newd);
}
@@ -254,8 +254,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
// We skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best.
UTIL_THROW_IF(scores.size() != thresholdmap.size(),
- util::Exception,
- "Error");
+ util::Exception,
+ "Error");
for (unsigned int sc = 0; sc != scores.size(); sc++) {
//cerr << "x=" << thrit->first << " => " << scores[sc] << endl;
diff --git a/mert/Point.cpp b/mert/Point.cpp
index 1db59ce66..55dc6a6b2 100644
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@@ -40,8 +40,8 @@ Point::Point(const vector<parameter_t>& init,
m_max[i] = max[i];
}
} else {
- UTIL_THROW_IF(init.size() != m_pdim, util::Exception, "Error");
- UTIL_THROW_IF(m_opt_indices.size() != Point::m_dim, util::Exception, "Error");
+ UTIL_THROW_IF(init.size() != m_pdim, util::Exception, "Error");
+ UTIL_THROW_IF(m_opt_indices.size() != Point::m_dim, util::Exception, "Error");
for (unsigned int i = 0; i < Point::m_dim; i++) {
operator[](i) = init[m_opt_indices[i]];
m_min[i] = min[m_opt_indices[i]];
diff --git a/mert/PreProcessFilter.cpp b/mert/PreProcessFilter.cpp
index a36ed6155..7a3add789 100644
--- a/mert/PreProcessFilter.cpp
+++ b/mert/PreProcessFilter.cpp
@@ -35,7 +35,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
m_fromFilter(NULL)
{
#if defined __MINGW32__
- //TODO(jie): replace this function with boost implementation
+ //TODO(jie): replace this function with boost implementation
#else
// Child error signal install
// sigaction is the replacement for the traditional signal() method
diff --git a/mert/Scorer.cpp b/mert/Scorer.cpp
index ed3ff2458..ffaf03be4 100644
--- a/mert/Scorer.cpp
+++ b/mert/Scorer.cpp
@@ -25,9 +25,9 @@ const int kUnknownToken = -1;
Scorer::Scorer(const string& name, const string& config)
: m_name(name),
m_vocab(mert::VocabularyFactory::GetVocabulary()),
- #if defined(__GLIBCXX__) || defined(__GLIBCPP__)
+#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
m_filter(NULL),
- #endif
+#endif
m_score_data(NULL),
m_enable_preserve_case(true)
{
diff --git a/mert/StatisticsBasedScorer.h b/mert/StatisticsBasedScorer.h
index f1c77e0ba..ba45634cc 100644
--- a/mert/StatisticsBasedScorer.h
+++ b/mert/StatisticsBasedScorer.h
@@ -23,7 +23,7 @@ namespace MosesTuning
*/
class StatisticsBasedScorer : public Scorer
{
-friend class HopeFearDecoder;
+ friend class HopeFearDecoder;
public:
StatisticsBasedScorer(const std::string& name, const std::string& config);
diff --git a/mert/TER/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp
index 544ee61ac..e42ec4a14 100644
--- a/mert/TER/alignmentStruct.cpp
+++ b/mert/TER/alignmentStruct.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -23,15 +23,15 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace TERCpp
{
- string alignmentStruct::toString()
- {
- stringstream s;
+string alignmentStruct::toString()
+{
+ stringstream s;
// s << "nword : " << vectorToString(nwords)<<endl;
// s << "alignment" << vectorToString(alignment)<<endl;
// s << "afterShift" << vectorToString(alignment)<<endl;
- s << "Nothing to be printed" <<endl;
- return s.str();
- }
+ s << "Nothing to be printed" <<endl;
+ return s.str();
+}
// alignmentStruct::alignmentStruct()
// {
@@ -99,7 +99,7 @@ namespace TERCpp
// return s.str();
// }
- /* The distance of the shift. */
+/* The distance of the shift. */
// int alignmentStruct::distance()
// {
// if (moveto < start)
diff --git a/mert/TER/alignmentStruct.h b/mert/TER/alignmentStruct.h
index adda2c345..c1459960b 100644
--- a/mert/TER/alignmentStruct.h
+++ b/mert/TER/alignmentStruct.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,10 +34,10 @@ using namespace Tools;
namespace TERCpp
{
- class alignmentStruct
- {
- private:
- public:
+class alignmentStruct
+{
+private:
+public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -53,14 +53,14 @@ namespace TERCpp
// int end;
// int moveto;
// int newloc;
- vector<string> nwords; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
- string toString();
- };
+ vector<string> nwords; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<vecInt> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+ string toString();
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/bestShiftStruct.h b/mert/TER/bestShiftStruct.h
index 9457fd1d8..d68f2319f 100644
--- a/mert/TER/bestShiftStruct.h
+++ b/mert/TER/bestShiftStruct.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -36,10 +36,10 @@ using namespace Tools;
namespace TERCpp
{
- class bestShiftStruct
- {
- private:
- public:
+class bestShiftStruct
+{
+private:
+public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@@ -55,16 +55,16 @@ namespace TERCpp
// int end;
// int moveto;
// int newloc;
- terShift m_best_shift;
- terAlignment m_best_align;
- bool m_empty;
+ terShift m_best_shift;
+ terAlignment m_best_align;
+ bool m_empty;
// vector<string> nwords; // The words we shifted
// char* alignment ; // for pra_more output
// vector<vecInt> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
// double cost;
- };
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/hashMap.cpp b/mert/TER/hashMap.cpp
index de84ff796..253fda715 100644
--- a/mert/TER/hashMap.cpp
+++ b/mert/TER/hashMap.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,156 +28,142 @@ using namespace std;
namespace HashMapSpace
{
// hashMap::hashMap();
- /* hashMap::~hashMap()
- {
- // vector<stringHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
- /**
- * int hashMap::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
- int hashMap::trouve ( long searchKey )
+/* hashMap::~hashMap()
{
- long foundKey;
+// vector<stringHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+/**
+ * int hashMap::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+int hashMap::trouve ( long searchKey )
+{
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
- int hashMap::trouve ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;;
+ }
+ return 0;
+}
+int hashMap::trouve ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
- /**
- * long hashMap::hashValue ( string key )
- * @param key
- * @return
- */
- long hashMap::hashValue ( string key )
- {
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+ }
+ return 0;
+}
+/**
+ * long hashMap::hashValue ( string key )
+ * @param key
+ * @return
+ */
+long hashMap::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
- }
- /**
- * void hashMap::addHasher ( string key, string value )
- * @param key
- * @param value
- */
- void hashMap::addHasher ( string key, string value )
- {
- if ( trouve ( hashValue ( key ) ) ==0 )
- {
+}
+/**
+ * void hashMap::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+void hashMap::addHasher ( string key, string value )
+{
+ if ( trouve ( hashValue ( key ) ) ==0 ) {
// cerr << "ICI1" <<endl;
- stringHasher H ( hashValue ( key ),key,value );
+ stringHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
- }
- stringHasher hashMap::getHasher ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
- stringHasher defaut(0,"","");
+ m_hasher.push_back ( H );
+ }
+}
+stringHasher hashMap::getHasher ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+ stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return ( *l_hasher );
- }
- }
- return defaut;
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
}
- string hashMap::getValue ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+ }
+ return defaut;
+}
+string hashMap::getValue ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
- }
- }
- return "";
+ return ( *l_hasher ).getValue();
}
- string hashMap::searchValue ( string value )
- {
+ }
+ return "";
+}
+string hashMap::searchValue ( string value )
+{
// long searchKey=hashValue ( key );
// long foundKey;
- string foundValue;
+ string foundValue;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundValue= ( *l_hasher ).getValue();
- if ( foundValue.compare ( value ) == 0 )
- {
- return ( *l_hasher ).getKey();
- }
- }
- return "";
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundValue= ( *l_hasher ).getValue();
+ if ( foundValue.compare ( value ) == 0 ) {
+ return ( *l_hasher ).getKey();
}
+ }
+ return "";
+}
- void hashMap::setValue ( string key , string value )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+void hashMap::setValue ( string key , string value )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- ( *l_hasher ).setValue ( value );
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
- }
- }
}
+ }
+}
- /**
- *
- */
- void hashMap::printHash()
- {
- for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- }
+/**
+ *
+ */
+void hashMap::printHash()
+{
+ for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+}
diff --git a/mert/TER/hashMap.h b/mert/TER/hashMap.h
index 6cb721573..c2708b360 100644
--- a/mert/TER/hashMap.h
+++ b/mert/TER/hashMap.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -35,27 +35,27 @@ using namespace std;
namespace HashMapSpace
{
- class hashMap
- {
- private:
- vector<stringHasher> m_hasher;
+class hashMap
+{
+private:
+ vector<stringHasher> m_hasher;
- public:
+public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, string value );
- stringHasher getHasher ( string key );
- string getValue ( string key );
- string searchValue ( string key );
- void setValue ( string key , string value );
- void printHash();
- vector<stringHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
- };
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, string value );
+ stringHasher getHasher ( string key );
+ string getValue ( string key );
+ string searchValue ( string key );
+ void setValue ( string key , string value );
+ void printHash();
+ vector<stringHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
}
diff --git a/mert/TER/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp
index 23f57d808..0ab6d21b2 100644
--- a/mert/TER/hashMapInfos.cpp
+++ b/mert/TER/hashMapInfos.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,117 +28,108 @@ using namespace std;
namespace HashMapSpace
{
// hashMapInfos::hashMap();
- /* hashMapInfos::~hashMap()
- {
- // vector<infosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
- /**
- * int hashMapInfos::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
- int hashMapInfos::trouve ( long searchKey )
+/* hashMapInfos::~hashMap()
{
- long foundKey;
+// vector<infosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+ }*/
+/**
+ * int hashMapInfos::trouve ( long searchKey )
+ * @param searchKey
+ * @return
+ */
+int hashMapInfos::trouve ( long searchKey )
+{
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
- int hashMapInfos::trouve ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;;
+ }
+ return 0;
+}
+int hashMapInfos::trouve ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
+ }
+ return 0;
+}
- /**
- * long hashMapInfos::hashValue ( string key )
- * @param key
- * @return
- */
- long hashMapInfos::hashValue ( string key )
- {
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> >(loc);
- return coll.hash(key.data(),key.data()+key.length());
+/**
+ * long hashMapInfos::hashValue ( string key )
+ * @param key
+ * @return
+ */
+long hashMapInfos::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> >(loc);
+ return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
- }
- /**
- * void hashMapInfos::addHasher ( string key, string value )
- * @param key
- * @param value
- */
- void hashMapInfos::addHasher ( string key, vector<int> value )
- {
- if ( trouve ( hashValue ( key ) ) ==0 )
- {
+}
+/**
+ * void hashMapInfos::addHasher ( string key, string value )
+ * @param key
+ * @param value
+ */
+void hashMapInfos::addHasher ( string key, vector<int> value )
+{
+ if ( trouve ( hashValue ( key ) ) ==0 ) {
// cerr << "ICI1" <<endl;
- infosHasher H ( hashValue ( key ),key,value );
+ infosHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
- m_hasher.push_back ( H );
- }
- }
- void hashMapInfos::addValue ( string key, vector<int> value )
- {
- addHasher ( key, value );
- }
- infosHasher hashMapInfos::getHasher ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+ m_hasher.push_back ( H );
+ }
+}
+void hashMapInfos::addValue ( string key, vector<int> value )
+{
+ addHasher ( key, value );
+}
+infosHasher hashMapInfos::getHasher ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return ( *l_hasher );
- }
- }
- vector<int> temp;
- infosHasher defaut(0,"",temp);
- return defaut;
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
}
- vector<int> hashMapInfos::getValue ( string key )
- {
- long searchKey=hashValue ( key );
- long foundKey;
- vector<int> retour;
+ }
+ vector<int> temp;
+ infosHasher defaut(0,"",temp);
+ return defaut;
+}
+vector<int> hashMapInfos::getValue ( string key )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
+ vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
- }
- }
- return retour;
+ return ( *l_hasher ).getValue();
}
+ }
+ return retour;
+}
// string hashMapInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
@@ -158,42 +149,38 @@ namespace HashMapSpace
// }
//
- void hashMapInfos::setValue ( string key , vector<int> value )
- {
- long searchKey=hashValue ( key );
- long foundKey;
+void hashMapInfos::setValue ( string key , vector<int> value )
+{
+ long searchKey=hashValue ( key );
+ long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
- foundKey= ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- ( *l_hasher ).setValue ( value );
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
+ foundKey= ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
- }
- }
- }
- string hashMapInfos::toString ()
- {
- stringstream to_return;
- for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- to_return << (*l_hasher).toString();
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- return to_return.str();
}
+ }
+}
+string hashMapInfos::toString ()
+{
+ stringstream to_return;
+ for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
+}
- /**
- *
- */
- void hashMapInfos::printHash()
- {
- for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- {
+/**
+ *
+ */
+void hashMapInfos::printHash()
+{
+ for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- }
+ }
+}
diff --git a/mert/TER/hashMapInfos.h b/mert/TER/hashMapInfos.h
index 5e7dbb6e7..e975aa738 100644
--- a/mert/TER/hashMapInfos.h
+++ b/mert/TER/hashMapInfos.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace
{
- class hashMapInfos
- {
- private:
- vector<infosHasher> m_hasher;
+class hashMapInfos
+{
+private:
+ vector<infosHasher> m_hasher;
- public:
+public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<int> value );
- void addValue ( string key, vector<int> value );
- infosHasher getHasher ( string key );
- vector<int> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<int> value );
+ void addValue ( string key, vector<int> value );
+ infosHasher getHasher ( string key );
+ vector<int> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<int> value );
- void printHash();
- string toString();
- vector<infosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
- };
+ void setValue ( string key , vector<int> value );
+ void printHash();
+ string toString();
+ vector<infosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
}
diff --git a/mert/TER/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp
index 773c148d4..d984bdadc 100644
--- a/mert/TER/hashMapStringInfos.cpp
+++ b/mert/TER/hashMapStringInfos.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,179 +27,166 @@ using namespace std;
namespace HashMapSpace
{
- // hashMapStringInfos::hashMap();
- /* hashMapStringInfos::~hashMap()
- {
- // vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
- for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
- {
- delete(*del);
- }
- }*/
- /**
- * int hashMapStringInfos::trouve ( long searchKey )
- * @param searchKey
- * @return
- */
- int hashMapStringInfos::trouve ( long searchKey )
- {
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+// hashMapStringInfos::hashMap();
+/* hashMapStringInfos::~hashMap()
+{
+// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
+ for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
+ {
+ delete(*del);
+ }
+}*/
+/**
+* int hashMapStringInfos::trouve ( long searchKey )
+* @param searchKey
+* @return
+*/
+int hashMapStringInfos::trouve ( long searchKey )
+{
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
+ }
+ return 0;
+}
- int hashMapStringInfos::trouve ( string key )
- {
- long searchKey = hashValue ( key );
- long foundKey;;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return 1;
- }
- }
- return 0;
+int hashMapStringInfos::trouve ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return 1;
}
+ }
+ return 0;
+}
- /**
- * long hashMapStringInfos::hashValue ( string key )
- * @param key
- * @return
- */
- long hashMapStringInfos::hashValue ( string key )
- {
- locale loc; // the "C" locale
- const collate<char>& coll = use_facet<collate<char> > ( loc );
- return coll.hash ( key.data(), key.data() + key.length() );
+/**
+* long hashMapStringInfos::hashValue ( string key )
+* @param key
+* @return
+*/
+long hashMapStringInfos::hashValue ( string key )
+{
+ locale loc; // the "C" locale
+ const collate<char>& coll = use_facet<collate<char> > ( loc );
+ return coll.hash ( key.data(), key.data() + key.length() );
// boost::hash<string> hasher;
// return hasher ( key );
+}
+/**
+* void hashMapStringInfos::addHasher ( string key, string value )
+* @param key
+* @param value
+*/
+void hashMapStringInfos::addHasher ( string key, vector<string> value )
+{
+ if ( trouve ( hashValue ( key ) ) == 0 ) {
+ // cerr << "ICI1" <<endl;
+ stringInfosHasher H ( hashValue ( key ), key, value );
+ // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
+ // cerr << "ICI2" <<endl;
+
+ m_hasher.push_back ( H );
+ }
+}
+void hashMapStringInfos::addValue ( string key, vector<string> value )
+{
+ addHasher ( key, value );
+}
+stringInfosHasher hashMapStringInfos::getHasher ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ return ( *l_hasher );
}
- /**
- * void hashMapStringInfos::addHasher ( string key, string value )
- * @param key
- * @param value
- */
- void hashMapStringInfos::addHasher ( string key, vector<string> value )
- {
- if ( trouve ( hashValue ( key ) ) == 0 )
- {
- // cerr << "ICI1" <<endl;
- stringInfosHasher H ( hashValue ( key ), key, value );
- // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
- // cerr << "ICI2" <<endl;
-
- m_hasher.push_back ( H );
- }
- }
- void hashMapStringInfos::addValue ( string key, vector<string> value )
- {
- addHasher ( key, value );
- }
- stringInfosHasher hashMapStringInfos::getHasher ( string key )
- {
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- return ( *l_hasher );
- }
- }
- vector<string> tmp;
- stringInfosHasher defaut ( 0, "", tmp );
- return defaut;
- }
- vector<string> hashMapStringInfos::getValue ( string key )
- {
- long searchKey = hashValue ( key );
- long foundKey;
- vector<string> retour;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
- return ( *l_hasher ).getValue();
- }
- }
- return retour;
+ }
+ vector<string> tmp;
+ stringInfosHasher defaut ( 0, "", tmp );
+ return defaut;
+}
+vector<string> hashMapStringInfos::getValue ( string key )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ vector<string> retour;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
+ return ( *l_hasher ).getValue();
}
- // string hashMapStringInfos::searchValue ( string value )
- // {
- // // long searchKey=hashValue ( key );
- // // long foundKey;
- // vector<int> foundValue;
- //
- // // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- // for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
- // {
- // foundValue= ( *l_hasher ).getValue();
- // /* if ( foundValue.compare ( value ) == 0 )
- // {
- // return ( *l_hasher ).getKey();
- // }*/
- // }
- // return "";
- // }
- //
-
- void hashMapStringInfos::setValue ( string key , vector<string> value )
- {
- long searchKey = hashValue ( key );
- long foundKey;
- // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- foundKey = ( *l_hasher ).getHashKey();
- if ( searchKey == foundKey )
- {
- ( *l_hasher ).setValue ( value );
- // return ( *l_hasher ).getValue();
- }
- }
+ }
+ return retour;
+}
+// string hashMapStringInfos::searchValue ( string value )
+// {
+// // long searchKey=hashValue ( key );
+// // long foundKey;
+// vector<int> foundValue;
+//
+// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
+// {
+// foundValue= ( *l_hasher ).getValue();
+// /* if ( foundValue.compare ( value ) == 0 )
+// {
+// return ( *l_hasher ).getKey();
+// }*/
+// }
+// return "";
+// }
+//
+
+void hashMapStringInfos::setValue ( string key , vector<string> value )
+{
+ long searchKey = hashValue ( key );
+ long foundKey;
+ // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ foundKey = ( *l_hasher ).getHashKey();
+ if ( searchKey == foundKey ) {
+ ( *l_hasher ).setValue ( value );
+ // return ( *l_hasher ).getValue();
}
+ }
+}
- string hashMapStringInfos::toString ()
- {
- stringstream to_return;
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- to_return << (*l_hasher).toString();
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- return to_return.str();
- }
+string hashMapStringInfos::toString ()
+{
+ stringstream to_return;
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ to_return << (*l_hasher).toString();
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+ return to_return.str();
+}
- /**
- *
- */
- void hashMapStringInfos::printHash()
- {
- for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
- {
- // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
- }
- }
- vector< stringInfosHasher > hashMapStringInfos::getHashMap()
- {
- return m_hasher;
- }
+/**
+*
+*/
+void hashMapStringInfos::printHash()
+{
+ for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
+ // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
+ }
+}
+vector< stringInfosHasher > hashMapStringInfos::getHashMap()
+{
+ return m_hasher;
+}
diff --git a/mert/TER/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h
index 5337d50f2..a0eae951d 100644
--- a/mert/TER/hashMapStringInfos.h
+++ b/mert/TER/hashMapStringInfos.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace
{
- class hashMapStringInfos
- {
- private:
- vector<stringInfosHasher> m_hasher;
+class hashMapStringInfos
+{
+private:
+ vector<stringInfosHasher> m_hasher;
- public:
+public:
// ~hashMap();
- long hashValue ( string key );
- int trouve ( long searchKey );
- int trouve ( string key );
- void addHasher ( string key, vector<string> value );
- void addValue ( string key, vector<string> value );
- stringInfosHasher getHasher ( string key );
- vector<string> getValue ( string key );
+ long hashValue ( string key );
+ int trouve ( long searchKey );
+ int trouve ( string key );
+ void addHasher ( string key, vector<string> value );
+ void addValue ( string key, vector<string> value );
+ stringInfosHasher getHasher ( string key );
+ vector<string> getValue ( string key );
// string searchValue ( string key );
- void setValue ( string key , vector<string> value );
- void printHash();
- string toString();
- vector<stringInfosHasher> getHashMap();
- string printStringHash();
- string printStringHash2();
- string printStringHashForLexicon();
- };
+ void setValue ( string key , vector<string> value );
+ void printHash();
+ string toString();
+ vector<stringInfosHasher> getHashMap();
+ string printStringHash();
+ string printStringHash2();
+ string printStringHashForLexicon();
+};
}
diff --git a/mert/TER/infosHasher.cpp b/mert/TER/infosHasher.cpp
index 8ce23ae44..450b70d94 100644
--- a/mert/TER/infosHasher.cpp
+++ b/mert/TER/infosHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace
{
- infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
- {
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
- }
+infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+}
// infosHasher::~infosHasher(){};*/
- long infosHasher::getHashKey()
- {
- return m_hashKey;
- }
- string infosHasher::getKey()
- {
- return m_key;
- }
- vector<int> infosHasher::getValue()
- {
- return m_value;
- }
- void infosHasher::setValue ( vector<int> value )
- {
- m_value=value;
- }
- string infosHasher::toString()
- {
- stringstream to_return;
- to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
- return to_return.str();
- }
+long infosHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string infosHasher::getKey()
+{
+ return m_key;
+}
+vector<int> infosHasher::getValue()
+{
+ return m_value;
+}
+void infosHasher::setValue ( vector<int> value )
+{
+ m_value=value;
+}
+string infosHasher::toString()
+{
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+}
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;
diff --git a/mert/TER/infosHasher.h b/mert/TER/infosHasher.h
index d3d56317a..ab9c7b5ed 100644
--- a/mert/TER/infosHasher.h
+++ b/mert/TER/infosHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -31,23 +31,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace HashMapSpace
{
- class infosHasher
- {
- private:
- long m_hashKey;
- string m_key;
- vector<int> m_value;
-
- public:
- infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
- long getHashKey();
- string getKey();
- vector<int> getValue();
- void setValue ( vector<int> value );
- string toString();
-
-
- };
+class infosHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ vector<int> m_value;
+
+public:
+ infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<int> getValue();
+ void setValue ( vector<int> value );
+ string toString();
+
+
+};
}
diff --git a/mert/TER/stringHasher.cpp b/mert/TER/stringHasher.cpp
index f4d1526e8..729310352 100644
--- a/mert/TER/stringHasher.cpp
+++ b/mert/TER/stringHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -26,29 +26,29 @@ using namespace std;
namespace HashMapSpace
{
- stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
- {
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueTxt;
- }
+stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueTxt;
+}
// stringHasher::~stringHasher(){};*/
- long stringHasher::getHashKey()
- {
- return m_hashKey;
- }
- string stringHasher::getKey()
- {
- return m_key;
- }
- string stringHasher::getValue()
- {
- return m_value;
- }
- void stringHasher::setValue ( string value )
- {
- m_value=value;
- }
+long stringHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string stringHasher::getKey()
+{
+ return m_key;
+}
+string stringHasher::getValue()
+{
+ return m_value;
+}
+void stringHasher::setValue ( string value )
+{
+ m_value=value;
+}
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringHasher.h b/mert/TER/stringHasher.h
index d831f642c..5b0ccfc94 100644
--- a/mert/TER/stringHasher.h
+++ b/mert/TER/stringHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -28,22 +28,22 @@ using namespace std;
namespace HashMapSpace
{
- class stringHasher
- {
- private:
- long m_hashKey;
- string m_key;
- string m_value;
+class stringHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ string m_value;
- public:
- stringHasher ( long cle, string cleTxt, string valueTxt );
- long getHashKey();
- string getKey();
- string getValue();
- void setValue ( string value );
+public:
+ stringHasher ( long cle, string cleTxt, string valueTxt );
+ long getHashKey();
+ string getKey();
+ string getValue();
+ void setValue ( string value );
- };
+};
}
diff --git a/mert/TER/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp
index 007fd720f..ecbc10fa5 100644
--- a/mert/TER/stringInfosHasher.cpp
+++ b/mert/TER/stringInfosHasher.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace
{
- stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
- {
- m_hashKey=cle;
- m_key=cleTxt;
- m_value=valueVecInt;
- }
+stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
+{
+ m_hashKey=cle;
+ m_key=cleTxt;
+ m_value=valueVecInt;
+}
// stringInfosHasher::~stringInfosHasher(){};*/
- long stringInfosHasher::getHashKey()
- {
- return m_hashKey;
- }
- string stringInfosHasher::getKey()
- {
- return m_key;
- }
- vector<string> stringInfosHasher::getValue()
- {
- return m_value;
- }
- void stringInfosHasher::setValue ( vector<string> value )
- {
- m_value=value;
- }
- string stringInfosHasher::toString()
- {
- stringstream to_return;
- to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
- return to_return.str();
- }
+long stringInfosHasher::getHashKey()
+{
+ return m_hashKey;
+}
+string stringInfosHasher::getKey()
+{
+ return m_key;
+}
+vector<string> stringInfosHasher::getValue()
+{
+ return m_value;
+}
+void stringInfosHasher::setValue ( vector<string> value )
+{
+ m_value=value;
+}
+string stringInfosHasher::toString()
+{
+ stringstream to_return;
+ to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
+ return to_return.str();
+}
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;
diff --git a/mert/TER/stringInfosHasher.h b/mert/TER/stringInfosHasher.h
index 307b48da7..e4369f27a 100644
--- a/mert/TER/stringInfosHasher.h
+++ b/mert/TER/stringInfosHasher.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -29,23 +29,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std;
namespace HashMapSpace
{
- class stringInfosHasher
- {
- private:
- long m_hashKey;
- string m_key;
- vector<string> m_value;
-
- public:
- stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
- long getHashKey();
- string getKey();
- vector<string> getValue();
- void setValue ( vector<string> value );
- string toString();
-
-
- };
+class stringInfosHasher
+{
+private:
+ long m_hashKey;
+ string m_key;
+ vector<string> m_value;
+
+public:
+ stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
+ long getHashKey();
+ string getKey();
+ vector<string> getValue();
+ void setValue ( vector<string> value );
+ string toString();
+
+
+};
}
diff --git a/mert/TER/terAlignment.cpp b/mert/TER/terAlignment.cpp
index 6c5d35cc5..ec7bcafb7 100644
--- a/mert/TER/terAlignment.cpp
+++ b/mert/TER/terAlignment.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -24,191 +24,163 @@ using namespace std;
namespace TERCpp
{
- terAlignment::terAlignment()
- {
+terAlignment::terAlignment()
+{
// vector<string> ref;
// vector<string> hyp;
// vector<string> aftershift;
- // TERshift[] allshifts = null;
+ // TERshift[] allshifts = null;
- numEdits=0;
- numWords=0;
- bestRef="";
+ numEdits=0;
+ numWords=0;
+ bestRef="";
- numIns=0;
- numDel=0;
- numSub=0;
- numSft=0;
- numWsf=0;
- }
- string terAlignment::toString()
- {
- stringstream s;
- s.str ( "" );
- s << "Original Ref: \t" << join ( " ", ref ) << endl;
- s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
- s << "Hyp After Shift:\t" << join ( " ", aftershift );
+ numIns=0;
+ numDel=0;
+ numSub=0;
+ numSft=0;
+ numWsf=0;
+}
+string terAlignment::toString()
+{
+ stringstream s;
+ s.str ( "" );
+ s << "Original Ref: \t" << join ( " ", ref ) << endl;
+ s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
+ s << "Hyp After Shift:\t" << join ( " ", aftershift );
// s << "Hyp After Shift: " << join ( " ", aftershift );
- s << endl;
+ s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
- if ( ( int ) sizeof ( alignment ) >0 )
- {
- s << "Alignment: (";
+ if ( ( int ) sizeof ( alignment ) >0 ) {
+ s << "Alignment: (";
// s += "\nAlignment: (";
- for ( int i = 0; i < ( int ) ( alignment.size() ); i++ )
- {
- s << alignment[i];
+ for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
+ s << alignment[i];
// s+=alignment[i];
- }
+ }
// s += ")";
- s << ")";
- }
- s << endl;
- if ( ( int ) allshifts.size() == 0 )
- {
+ s << ")";
+ }
+ s << endl;
+ if ( ( int ) allshifts.size() == 0 ) {
// s += "\nNumShifts: 0";
- s << "NumShifts: 0";
- }
- else
- {
+ s << "NumShifts: 0";
+ } else {
// s += "\nNumShifts: " + (int)allshifts.size();
- s << "NumShifts: "<< ( int ) allshifts.size();
- for ( int i = 0; i < ( int ) allshifts.size(); i++ )
- {
- s << endl << " " ;
- s << ( ( terShift ) allshifts[i] ).toString();
+ s << "NumShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
+ s << endl << " " ;
+ s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
- }
- }
- s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
+ }
+ }
+ s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
- return s.str();
+ return s.str();
- }
- string terAlignment::join ( string delim, vector<string> arr )
- {
- if ( ( int ) arr.size() == 0 ) return "";
+}
+string terAlignment::join ( string delim, vector<string> arr )
+{
+ if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
- stringstream s;
- s.str ( "" );
- for ( int i = 0; i < ( int ) arr.size(); i++ )
- {
- if ( i == 0 )
- {
- s << arr.at ( i );
- }
- else
- {
- s << delim << arr.at ( i );
- }
- }
- return s.str();
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ ) {
+ if ( i == 0 ) {
+ s << arr.at ( i );
+ } else {
+ s << delim << arr.at ( i );
+ }
+ }
+ return s.str();
// return "";
+}
+double terAlignment::score()
+{
+ if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
+ return 1.0;
+ }
+ if ( numWords <= 0.0 ) {
+ return 0.0;
+ }
+ return ( double ) numEdits / numWords;
+}
+double terAlignment::scoreAv()
+{
+ if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
+ return 1.0;
+ }
+ if ( averageWords <= 0.0 ) {
+ return 0.0;
+ }
+ return ( double ) numEdits / averageWords;
+}
+
+void terAlignment::scoreDetails()
+{
+ numIns = numDel = numSub = numWsf = numSft = 0;
+ if((int)allshifts.size()>0) {
+ for(int i = 0; i < (int)allshifts.size(); ++i) {
+ numWsf += allshifts[i].size();
}
- double terAlignment::score()
- {
- if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
- {
- return 1.0;
- }
- if ( numWords <= 0.0 )
- {
- return 0.0;
- }
- return ( double ) numEdits / numWords;
+ numSft = allshifts.size();
+ }
+
+ if((int)alignment.size()>0 ) {
+ for(int i = 0; i < (int)alignment.size(); ++i) {
+ switch (alignment[i]) {
+ case 'S':
+ case 'T':
+ numSub++;
+ break;
+ case 'D':
+ numDel++;
+ break;
+ case 'I':
+ numIns++;
+ break;
+ }
}
- double terAlignment::scoreAv()
- {
- if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) )
- {
- return 1.0;
- }
- if ( averageWords <= 0.0 )
- {
- return 0.0;
- }
- return ( double ) numEdits / averageWords;
+ }
+ // if(numEdits != numSft + numDel + numIns + numSub)
+ // System.out.println("** Error, unmatch edit erros " + numEdits +
+ // " vs " + (numSft + numDel + numIns + numSub));
+}
+string terAlignment::printAlignments()
+{
+ stringstream to_return;
+ for(int i = 0; i < (int)alignment.size(); ++i) {
+ char alignInfo=alignment.at(i);
+ if (alignInfo == 'A' ) {
+ alignInfo='A';
}
- void terAlignment::scoreDetails()
- {
- numIns = numDel = numSub = numWsf = numSft = 0;
- if((int)allshifts.size()>0)
- {
- for(int i = 0; i < (int)allshifts.size(); ++i)
- {
- numWsf += allshifts[i].size();
- }
- numSft = allshifts.size();
- }
-
- if((int)alignment.size()>0 )
- {
- for(int i = 0; i < (int)alignment.size(); ++i)
- {
- switch (alignment[i])
- {
- case 'S':
- case 'T':
- numSub++;
- break;
- case 'D':
- numDel++;
- break;
- case 'I':
- numIns++;
- break;
- }
- }
- }
- // if(numEdits != numSft + numDel + numIns + numSub)
- // System.out.println("** Error, unmatch edit erros " + numEdits +
- // " vs " + (numSft + numDel + numIns + numSub));
- }
- string terAlignment::printAlignments()
- {
- stringstream to_return;
- for(int i = 0; i < (int)alignment.size(); ++i)
- {
- char alignInfo=alignment.at(i);
- if (alignInfo == 'A' )
- {
- alignInfo='A';
- }
-
- if (i==0)
- {
- to_return << alignInfo;
- }
- else
- {
- to_return << " " << alignInfo;
- }
- }
- return to_return.str();
+ if (i==0) {
+ to_return << alignInfo;
+ } else {
+ to_return << " " << alignInfo;
+ }
}
+ return to_return.str();
+}
string terAlignment::printAllShifts()
{
- stringstream to_return;
- if ( ( int ) allshifts.size() == 0 )
- {
+ stringstream to_return;
+ if ( ( int ) allshifts.size() == 0 ) {
// s += "\nNumShifts: 0";
- to_return << "NbrShifts: 0";
- }
- else
- {
+ to_return << "NbrShifts: 0";
+ } else {
// s += "\nNumShifts: " + (int)allshifts.size();
- to_return << "NbrShifts: "<< ( int ) allshifts.size();
- for ( int i = 0; i < ( int ) allshifts.size(); i++ )
- {
- to_return << "\t" ;
- to_return << ( ( terShift ) allshifts[i] ).toString();
+ to_return << "NbrShifts: "<< ( int ) allshifts.size();
+ for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
+ to_return << "\t" ;
+ to_return << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
- }
- }
- return to_return.str();
+ }
+ }
+ return to_return.str();
}
} \ No newline at end of file
diff --git a/mert/TER/terAlignment.h b/mert/TER/terAlignment.h
index 0af86f663..2af0b7490 100644
--- a/mert/TER/terAlignment.h
+++ b/mert/TER/terAlignment.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,41 +34,41 @@ using namespace std;
namespace TERCpp
{
- class terAlignment
- {
- private:
- public:
-
- terAlignment();
- string toString();
- void scoreDetails();
-
- vector<string> ref;
- vector<string> hyp;
- vector<string> aftershift;
- vector<terShift> allshifts;
- vector<int> hyp_int;
- vector<int> aftershift_int;
-
- double numEdits;
- double numWords;
- double averageWords;
- vector<char> alignment;
- string bestRef;
-
- int numIns;
- int numDel;
- int numSub;
- int numSft;
- int numWsf;
-
-
- string join ( string delim, vector<string> arr );
- double score();
- double scoreAv();
- string printAlignments();
- string printAllShifts();
- };
+class terAlignment
+{
+private:
+public:
+
+ terAlignment();
+ string toString();
+ void scoreDetails();
+
+ vector<string> ref;
+ vector<string> hyp;
+ vector<string> aftershift;
+ vector<terShift> allshifts;
+ vector<int> hyp_int;
+ vector<int> aftershift_int;
+
+ double numEdits;
+ double numWords;
+ double averageWords;
+ vector<char> alignment;
+ string bestRef;
+
+ int numIns;
+ int numDel;
+ int numSub;
+ int numSft;
+ int numWsf;
+
+
+ string join ( string delim, vector<string> arr );
+ double score();
+ double scoreAv();
+ string printAlignments();
+ string printAllShifts();
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/terShift.cpp b/mert/TER/terShift.cpp
index c1106db76..440b4d2ce 100644
--- a/mert/TER/terShift.cpp
+++ b/mert/TER/terShift.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -42,32 +42,32 @@ namespace TERCpp
// numSft=0;
// numWsf=0;
// }
- terShift::terShift ()
- {
- start = 0;
- end = 0;
- moveto = 0;
- newloc = 0;
- cost=1.0;
- }
- terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
- {
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- cost=1.0;
- }
+terShift::terShift ()
+{
+ start = 0;
+ end = 0;
+ moveto = 0;
+ newloc = 0;
+ cost=1.0;
+}
+terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
+{
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ cost=1.0;
+}
- terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
- {
- start = _start;
- end = _end;
- moveto = _moveto;
- newloc = _newloc;
- shifted = _shifted;
- cost=1.0;
- }
+terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
+{
+ start = _start;
+ end = _end;
+ moveto = _moveto;
+ newloc = _newloc;
+ shifted = _shifted;
+ cost=1.0;
+}
// string terShift::vectorToString(vector<string> vec)
// {
// string retour("");
@@ -78,44 +78,38 @@ namespace TERCpp
// return retour;
// }
- string terShift::toString()
- {
- stringstream s;
- s.str ( "" );
- s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
- if ( ( int ) shifted.size() > 0 )
- {
- s << " (" << vectorToString ( shifted ) << ")";
- }
- return s.str();
- }
+string terShift::toString()
+{
+ stringstream s;
+ s.str ( "" );
+ s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
+ if ( ( int ) shifted.size() > 0 ) {
+ s << " (" << vectorToString ( shifted ) << ")";
+ }
+ return s.str();
+}
- /* The distance of the shift. */
- int terShift::distance()
- {
- if ( moveto < start )
- {
- return start - moveto;
- }
- else if ( moveto > end )
- {
- return moveto - end;
- }
- else
- {
- return moveto - start;
- }
- }
+/* The distance of the shift. */
+int terShift::distance()
+{
+ if ( moveto < start ) {
+ return start - moveto;
+ } else if ( moveto > end ) {
+ return moveto - end;
+ } else {
+ return moveto - start;
+ }
+}
- bool terShift::leftShift()
- {
- return ( moveto < start );
- }
+bool terShift::leftShift()
+{
+ return ( moveto < start );
+}
- int terShift::size()
- {
- return ( end - start ) + 1;
- }
+int terShift::size()
+{
+ return ( end - start ) + 1;
+}
// terShift terShift::operator=(terShift t)
// {
//
diff --git a/mert/TER/terShift.h b/mert/TER/terShift.h
index ba84a5947..74545e0de 100644
--- a/mert/TER/terShift.h
+++ b/mert/TER/terShift.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -34,32 +34,32 @@ using namespace Tools;
namespace TERCpp
{
- class terShift
- {
- private:
- public:
+class terShift
+{
+private:
+public:
- terShift();
- terShift ( int _start, int _end, int _moveto, int _newloc );
- terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
- string toString();
- int distance() ;
- bool leftShift();
- int size();
+ terShift();
+ terShift ( int _start, int _end, int _moveto, int _newloc );
+ terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
+ string toString();
+ int distance() ;
+ bool leftShift();
+ int size();
// terShift operator=(terShift t);
// string vectorToString(vector<string> vec);
- int start;
- int end;
- int moveto;
- int newloc;
- vector<string> shifted; // The words we shifted
- vector<char> alignment ; // for pra_more output
- vector<string> aftershift; // for pra_more output
- // This is used to store the cost of a shift, so we don't have to
- // calculate it multiple times.
- double cost;
- };
+ int start;
+ int end;
+ int moveto;
+ int newloc;
+ vector<string> shifted; // The words we shifted
+ vector<char> alignment ; // for pra_more output
+ vector<string> aftershift; // for pra_more output
+ // This is used to store the cost of a shift, so we don't have to
+ // calculate it multiple times.
+ double cost;
+};
}
#endif \ No newline at end of file
diff --git a/mert/TER/tercalc.cpp b/mert/TER/tercalc.cpp
index b7f63772c..c4629c639 100644
--- a/mert/TER/tercalc.cpp
+++ b/mert/TER/tercalc.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -35,902 +35,724 @@ using namespace Tools;
namespace TERCpp
{
- terCalc::terCalc()
- {
- TAILLE_PERMUT_MAX = 50;
- infinite = 999999.0;
- shift_cost = 1.0;
- insert_cost = 1.0;
- delete_cost = 1.0;
- substitute_cost = 1.0;
- match_cost = 0.0;
- NBR_SEGS_EVALUATED = 0;
- NBR_PERMUTS_CONSID = 0;
- NBR_BS_APPELS = 0;
- TAILLE_BEAM = 20;
- DIST_MAX_PERMUT = 50;
- PRINT_DEBUG = false;
- hypSpans.clear();
- refSpans.clear();
- }
+terCalc::terCalc()
+{
+ TAILLE_PERMUT_MAX = 50;
+ infinite = 999999.0;
+ shift_cost = 1.0;
+ insert_cost = 1.0;
+ delete_cost = 1.0;
+ substitute_cost = 1.0;
+ match_cost = 0.0;
+ NBR_SEGS_EVALUATED = 0;
+ NBR_PERMUTS_CONSID = 0;
+ NBR_BS_APPELS = 0;
+ TAILLE_BEAM = 20;
+ DIST_MAX_PERMUT = 50;
+ PRINT_DEBUG = false;
+ hypSpans.clear();
+ refSpans.clear();
+}
- terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref )
- {
-
- return minimizeDistanceEdition ( hyp, ref, hypSpans );
-
- }
+terAlignment terCalc::WERCalculation ( vector< string > hyp , vector< string > ref )
+{
- terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
- {
- stringstream s;
- s.str ( "" );
- string stringRef ( "" );
- string stringHyp ( "" );
- for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ )
- {
- if ( l_it == ref.begin() )
- {
- s << ( *l_it );
- }
- else
- {
- s << " " << ( *l_it );
- }
- }
- stringRef = s.str();
- s.str ( "" );
- for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ )
- {
- if ( l_itHyp == hyp.begin() )
- {
- s << ( *l_itHyp );
- }
- else
- {
- s << " " << ( *l_itHyp );
- }
- }
- stringHyp = s.str();
- s.str ( "" );
- return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
- }
+ return minimizeDistanceEdition ( hyp, ref, hypSpans );
+}
- hashMapInfos terCalc::createConcordMots ( vector<string> hyp, vector<string> ref )
- {
- hashMap tempHash;
- hashMapInfos retour;
- for ( int i = 0; i < ( int ) hyp.size(); i++ )
- {
- tempHash.addHasher ( hyp.at ( i ), "" );
- }
- bool cor[ref.size() ];
- for ( int i = 0; i < ( int ) ref.size(); i++ )
- {
- if ( tempHash.trouve ( ( string ) ref.at ( i ) ) )
- {
- cor[i] = true;
- }
- else
- {
- cor[i] = false;
- }
- }
- for ( int start = 0; start < ( int ) ref.size(); start++ )
- {
- if ( cor[start] )
- {
- for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) );end++ )
- {
- vector<string> ajouter = subVector ( ref, start, end + 1 );
- string ajouterString = vectorToString ( ajouter );
- vector<int> values = retour.getValue ( ajouterString );
- values.push_back ( start );
- if ( values.size() > 1 )
- {
- retour.setValue ( ajouterString, values );
- }
- else
- {
- retour.addValue ( ajouterString, values );
- }
- }
- }
- }
- return retour;
+terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref )
+{
+ stringstream s;
+ s.str ( "" );
+ string stringRef ( "" );
+ string stringHyp ( "" );
+ for ( vector<int>::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) {
+ if ( l_it == ref.begin() ) {
+ s << ( *l_it );
+ } else {
+ s << " " << ( *l_it );
+ }
+ }
+ stringRef = s.str();
+ s.str ( "" );
+ for ( vector<int>::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) {
+ if ( l_itHyp == hyp.begin() ) {
+ s << ( *l_itHyp );
+ } else {
+ s << " " << ( *l_itHyp );
}
+ }
+ stringHyp = s.str();
+ s.str ( "" );
+ return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) );
+}
+
- bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan )
- {
- if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) )
- {
- return true;
+hashMapInfos terCalc::createConcordMots ( vector<string> hyp, vector<string> ref )
+{
+ hashMap tempHash;
+ hashMapInfos retour;
+ for ( int i = 0; i < ( int ) hyp.size(); i++ ) {
+ tempHash.addHasher ( hyp.at ( i ), "" );
+ }
+ bool cor[ref.size() ];
+ for ( int i = 0; i < ( int ) ref.size(); i++ ) {
+ if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) {
+ cor[i] = true;
+ } else {
+ cor[i] = false;
+ }
+ }
+ for ( int start = 0; start < ( int ) ref.size(); start++ ) {
+ if ( cor[start] ) {
+ for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= TAILLE_PERMUT_MAX ) && ( cor[end] ) ); end++ ) {
+ vector<string> ajouter = subVector ( ref, start, end + 1 );
+ string ajouterString = vectorToString ( ajouter );
+ vector<int> values = retour.getValue ( ajouterString );
+ values.push_back ( start );
+ if ( values.size() > 1 ) {
+ retour.setValue ( ajouterString, values );
+ } else {
+ retour.addValue ( ajouterString, values );
}
- return false;
+ }
}
+ }
+ return retour;
+}
+bool terCalc::trouverIntersection ( vecInt refSpan, vecInt hypSpan )
+{
+ if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) {
+ return true;
+ }
+ return false;
+}
- terAlignment terCalc::minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
- {
- double current_best = infinite;
- double last_best = infinite;
- int first_good = 0;
- int current_first_good = 0;
- int last_good = -1;
- int cur_last_good = 0;
- int last_peak = 0;
- int cur_last_peak = 0;
- int i, j;
- double cost, icost, dcost;
- double score;
-
-
-
- NBR_BS_APPELS++;
-
-
- for ( i = 0; i <= ( int ) ref.size(); i++ )
- {
- for ( j = 0; j <= ( int ) hyp.size(); j++ )
- {
- S[i][j] = -1.0;
- P[i][j] = '0';
- }
- }
- S[0][0] = 0.0;
- for ( j = 0; j <= ( int ) hyp.size(); j++ )
- {
- last_best = current_best;
- current_best = infinite;
- first_good = current_first_good;
- current_first_good = -1;
- last_good = cur_last_good;
- cur_last_good = -1;
- last_peak = cur_last_peak;
- cur_last_peak = 0;
- for ( i = first_good; i <= ( int ) ref.size(); i++ )
- {
- if ( i > last_good )
- {
- break;
- }
- if ( S[i][j] < 0 )
- {
- continue;
- }
- score = S[i][j];
- if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) )
- {
- continue;
- }
- if ( current_first_good == -1 )
- {
- current_first_good = i ;
- }
- if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) )
- {
- if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) )
- {
- if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 )
- {
- cost = match_cost + score;
- if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) )
- {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = 'A';
- }
- if ( cost < current_best )
- {
- current_best = cost;
- }
- if ( current_best == cost )
- {
- cur_last_peak = i + 1;
- }
- }
- else
- {
- cost = substitute_cost + score;
- if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) )
- {
- S[i+1][j+1] = cost;
- P[i+1][j+1] = 'S';
- if ( cost < current_best )
- {
- current_best = cost;
- }
- if ( current_best == cost )
- {
- cur_last_peak = i + 1 ;
- }
- }
- }
- }
- }
- cur_last_good = i + 1;
- if ( j < ( int ) hyp.size() )
- {
- icost = score + insert_cost;
- if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) )
- {
- S[i][j+1] = icost;
- P[i][j+1] = 'I';
- if ( ( cur_last_peak < i ) && ( current_best == icost ) )
- {
- cur_last_peak = i;
- }
- }
- }
- if ( i < ( int ) ref.size() )
- {
- dcost = score + delete_cost;
- if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) )
- {
- S[i+1][j] = dcost;
- P[i+1][j] = 'D';
- if ( i >= last_good )
- {
- last_good = i + 1 ;
- }
- }
- }
- }
- }
+
+terAlignment terCalc::minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans )
+{
+ double current_best = infinite;
+ double last_best = infinite;
+ int first_good = 0;
+ int current_first_good = 0;
+ int last_good = -1;
+ int cur_last_good = 0;
+ int last_peak = 0;
+ int cur_last_peak = 0;
+ int i, j;
+ double cost, icost, dcost;
+ double score;
- int tracelength = 0;
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) )
- {
- tracelength++;
- if ( P[i][j] == 'A' )
- {
- i--;
- j--;
+
+ NBR_BS_APPELS++;
+
+
+ for ( i = 0; i <= ( int ) ref.size(); i++ ) {
+ for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
+ S[i][j] = -1.0;
+ P[i][j] = '0';
+ }
+ }
+ S[0][0] = 0.0;
+ for ( j = 0; j <= ( int ) hyp.size(); j++ ) {
+ last_best = current_best;
+ current_best = infinite;
+ first_good = current_first_good;
+ current_first_good = -1;
+ last_good = cur_last_good;
+ cur_last_good = -1;
+ last_peak = cur_last_peak;
+ cur_last_peak = 0;
+ for ( i = first_good; i <= ( int ) ref.size(); i++ ) {
+ if ( i > last_good ) {
+ break;
+ }
+ if ( S[i][j] < 0 ) {
+ continue;
+ }
+ score = S[i][j];
+ if ( ( j < ( int ) hyp.size() ) && ( score > last_best + TAILLE_BEAM ) ) {
+ continue;
+ }
+ if ( current_first_good == -1 ) {
+ current_first_good = i ;
+ }
+ if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) ) {
+ if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || trouverIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) {
+ if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) {
+ cost = match_cost + score;
+ if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) ) {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'A';
}
- else
- if ( P[i][j] == 'S' )
- {
- i--;
- j--;
- }
- else
- if ( P[i][j] == 'D' )
- {
- i--;
- }
- else
- if ( P[i][j] == 'I' )
- {
- j--;
- }
- else
- {
- cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl;
- exit ( -1 );
- }
- }
- vector<char> path ( tracelength );
- i = ref.size();
- j = hyp.size();
- while ( ( i > 0 ) || ( j > 0 ) )
- {
- path[--tracelength] = P[i][j];
- if ( P[i][j] == 'A' )
- {
- i--;
- j--;
+ if ( cost < current_best ) {
+ current_best = cost;
+ }
+ if ( current_best == cost ) {
+ cur_last_peak = i + 1;
}
- else
- if ( P[i][j] == 'S' )
- {
- i--;
- j--;
- }
- else
- if ( P[i][j] == 'D' )
- {
- i--;
- }
- else
- if ( P[i][j] == 'I' )
- {
- j--;
- }
+ } else {
+ cost = substitute_cost + score;
+ if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) ) {
+ S[i+1][j+1] = cost;
+ P[i+1][j+1] = 'S';
+ if ( cost < current_best ) {
+ current_best = cost;
+ }
+ if ( current_best == cost ) {
+ cur_last_peak = i + 1 ;
+ }
+ }
+ }
}
- terAlignment to_return;
- to_return.numWords = ref.size();
- to_return.alignment = path;
- to_return.numEdits = S[ref.size() ][hyp.size() ];
- to_return.hyp = hyp;
- to_return.ref = ref;
- to_return.averageWords = (int)ref.size();
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
+ }
+ cur_last_good = i + 1;
+ if ( j < ( int ) hyp.size() ) {
+ icost = score + insert_cost;
+ if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) ) {
+ S[i][j+1] = icost;
+ P[i][j+1] = 'I';
+ if ( ( cur_last_peak < i ) && ( current_best == icost ) ) {
+ cur_last_peak = i;
+ }
}
- return to_return;
-
+ }
+ if ( i < ( int ) ref.size() ) {
+ dcost = score + delete_cost;
+ if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) ) {
+ S[i+1][j] = dcost;
+ P[i+1][j] = 'D';
+ if ( i >= last_good ) {
+ last_good = i + 1 ;
+ }
+ }
+ }
+ }
+ }
+
+
+ int tracelength = 0;
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) ) {
+ tracelength++;
+ if ( P[i][j] == 'A' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'S' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'D' ) {
+ i--;
+ } else if ( P[i][j] == 'I' ) {
+ j--;
+ } else {
+ cerr << "ERROR : terCalc::minimizeDistanceEdition : Invalid path : " << P[i][j] << endl;
+ exit ( -1 );
+ }
+ }
+ vector<char> path ( tracelength );
+ i = ref.size();
+ j = hyp.size();
+ while ( ( i > 0 ) || ( j > 0 ) ) {
+ path[--tracelength] = P[i][j];
+ if ( P[i][j] == 'A' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'S' ) {
+ i--;
+ j--;
+ } else if ( P[i][j] == 'D' ) {
+ i--;
+ } else if ( P[i][j] == 'I' ) {
+ j--;
}
- terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
- {
- hashMapInfos rloc = createConcordMots ( hyp, ref );
- terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans );
- vector<string> cur = hyp;
- cur_align.hyp = hyp;
- cur_align.ref = ref;
- cur_align.aftershift = hyp;
- double edits = 0;
+ }
+ terAlignment to_return;
+ to_return.numWords = ref.size();
+ to_return.alignment = path;
+ to_return.numEdits = S[ref.size() ][hyp.size() ];
+ to_return.hyp = hyp;
+ to_return.ref = ref;
+ to_return.averageWords = (int)ref.size();
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::minimizeDistanceEdition : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl;
+ }
+ return to_return;
+
+}
+terAlignment terCalc::TER ( vector<string> hyp, vector<string> ref )
+{
+ hashMapInfos rloc = createConcordMots ( hyp, ref );
+ terAlignment cur_align = minimizeDistanceEdition ( hyp, ref, hypSpans );
+ vector<string> cur = hyp;
+ cur_align.hyp = hyp;
+ cur_align.ref = ref;
+ cur_align.aftershift = hyp;
+ double edits = 0;
// int numshifts = 0;
- vector<terShift> allshifts;
+ vector<terShift> allshifts;
// cerr << "Initial Alignment:" << endl << cur_align.toString() <<endl;
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
- }
- while ( true )
- {
- bestShiftStruct returns;
- returns = findBestShift ( cur, hyp, ref, rloc, cur_align );
- if ( returns.m_empty )
- {
- break;
- }
- terShift bestShift = returns.m_best_shift;
- cur_align = returns.m_best_align;
- edits += bestShift.cost;
- bestShift.alignment = cur_align.alignment;
- bestShift.aftershift = cur_align.aftershift;
- allshifts.push_back ( bestShift );
- cur = cur_align.aftershift;
- }
- terAlignment to_return;
- to_return = cur_align;
- to_return.allshifts = allshifts;
- to_return.numEdits += edits;
- NBR_SEGS_EVALUATED++;
- return to_return;
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::TER : cur_align :" << endl << cur_align.toString() << endl << "END DEBUG" << endl;
+ }
+ while ( true ) {
+ bestShiftStruct returns;
+ returns = findBestShift ( cur, hyp, ref, rloc, cur_align );
+ if ( returns.m_empty ) {
+ break;
}
- bestShiftStruct terCalc::findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
- {
- bestShiftStruct to_return;
- bool anygain = false;
- bool herr[ ( int ) hyp.size() ];
- bool rerr[ ( int ) ref.size() ];
- int ralign[ ( int ) ref.size() ];
- calculateTerAlignment ( med_align, herr, rerr, ralign );
- vector<vecTerShift> poss_shifts;
-
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl;
- cerr << "indices: ";
- for (int l_i=0; l_i < ( int ) ref.size() ; l_i++)
- {
- cerr << l_i << "\t";
- }
- cerr << endl;
- cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl;
- cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl;
- cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl;
- cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl;
- cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl;
- cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl;
- cerr << "END DEBUG " << endl;
- }
- poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign );
- double curerr = med_align.numEdits;
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Possible Shifts:" << endl;
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- )
- {
- for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ )
- {
- cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
- }
- }
- cerr << endl;
- cerr << "END DEBUG " << endl;
- }
+ terShift bestShift = returns.m_best_shift;
+ cur_align = returns.m_best_align;
+ edits += bestShift.cost;
+ bestShift.alignment = cur_align.alignment;
+ bestShift.aftershift = cur_align.aftershift;
+ allshifts.push_back ( bestShift );
+ cur = cur_align.aftershift;
+ }
+ terAlignment to_return;
+ to_return = cur_align;
+ to_return.allshifts = allshifts;
+ to_return.numEdits += edits;
+ NBR_SEGS_EVALUATED++;
+ return to_return;
+}
+bestShiftStruct terCalc::findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment med_align )
+{
+ bestShiftStruct to_return;
+ bool anygain = false;
+ bool herr[ ( int ) hyp.size() ];
+ bool rerr[ ( int ) ref.size() ];
+ int ralign[ ( int ) ref.size() ];
+ calculateTerAlignment ( med_align, herr, rerr, ralign );
+ vector<vecTerShift> poss_shifts;
+
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift (after the calculateTerAlignment call) :" << endl;
+ cerr << "indices: ";
+ for (int l_i=0; l_i < ( int ) ref.size() ; l_i++) {
+ cerr << l_i << "\t";
+ }
+ cerr << endl;
+ cerr << "hyp : \t"<<vectorToString(hyp ,"\t") << endl;
+ cerr << "cur : \t"<<vectorToString(cur ,"\t") << endl;
+ cerr << "ref : \t"<<vectorToString(ref ,"\t") << endl;
+ cerr << "herr : "<<vectorToString(herr,"\t",( int ) hyp.size()) << " | " << ( int ) hyp.size() <<endl;
+ cerr << "rerr : "<<vectorToString(rerr,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() <<endl;
+ cerr << "ralign : "<< vectorToString(ralign,"\t",( int ) ref.size()) << " | " << ( int ) ref.size() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ poss_shifts = calculerPermutations ( cur, ref, rloc, med_align, herr, rerr, ralign );
+ double curerr = med_align.numEdits;
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Possible Shifts:" << endl;
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
+ for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ ) {
+ cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl;
+ }
+ }
+ cerr << endl;
+ cerr << "END DEBUG " << endl;
+ }
// exit(0);
- double cur_best_shift_cost = 0.0;
- terAlignment cur_best_align = med_align;
- terShift cur_best_shift;
+ double cur_best_shift_cost = 0.0;
+ terAlignment cur_best_align = med_align;
+ terShift cur_best_shift;
- for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- )
- {
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
- cerr << "END DEBUG " << endl;
- }
- /* Consider shifts of length i+1 */
- double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- double maxfix = ( 2 * ( 1 + i ) );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
- {
- break;
- }
+ for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) {
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ /* Consider shifts of length i+1 */
+ double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ double maxfix = ( 2 * ( 1 + i ) );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
+ break;
+ }
- for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ )
- {
- curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
- if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) )
- {
- break;
- }
- terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "cur : "<< join(" ",cur) << endl;
- cerr << "curshift : "<< curshift.toString() << endl;
-
- }
- alignmentStruct shiftReturns = permuter ( cur, curshift );
- vector<string> shiftarr = shiftReturns.nwords;
- vector<vecInt> curHypSpans = shiftReturns.aftershift;
-
- if ( PRINT_DEBUG )
- {
- cerr << "shiftarr : "<< join(" ",shiftarr) << endl;
+ for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ ) {
+ curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits );
+ if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) {
+ break;
+ }
+ terShift curshift = ( poss_shifts.at ( i ) ).at ( s );
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "cur : "<< join(" ",cur) << endl;
+ cerr << "curshift : "<< curshift.toString() << endl;
+
+ }
+ alignmentStruct shiftReturns = permuter ( cur, curshift );
+ vector<string> shiftarr = shiftReturns.nwords;
+ vector<vecInt> curHypSpans = shiftReturns.aftershift;
+
+ if ( PRINT_DEBUG ) {
+ cerr << "shiftarr : "<< join(" ",shiftarr) << endl;
// cerr << "curHypSpans : "<< curHypSpans.toString() << endl;
- cerr << "END DEBUG " << endl;
- }
- terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans );
-
- curalign.hyp = hyp;
- curalign.ref = ref;
- curalign.aftershift = shiftarr;
-
-
- double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
-
- // if (DEBUG) {
- // string testeuh=terAlignment join(" ", shiftarr);
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
- cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"<<endl;
- cerr << "Details of gains : gain = ("<<cur_best_align.numEdits << "+" << cur_best_shift_cost << ") - (" << curalign.numEdits << "+" << curshift.cost << ")"<<endl;
- cerr << "" << curalign.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
- }
- // }
- //
- if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) )
- {
- anygain = true;
- cur_best_shift = curshift;
- cur_best_shift_cost = curshift.cost;
- cur_best_align = curalign;
- // if (DEBUG)
- if ( PRINT_DEBUG )
- {
- cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
- cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
- cerr << "END DEBUG " << endl;
- }
- }
- }
- }
- if ( anygain )
- {
- to_return.m_best_shift = cur_best_shift;
- to_return.m_best_align = cur_best_align;
- to_return.m_empty = false;
+ cerr << "END DEBUG " << endl;
+ }
+ terAlignment curalign = minimizeDistanceEdition ( shiftarr, ref, curHypSpans );
+
+ curalign.hyp = hyp;
+ curalign.ref = ref;
+ curalign.aftershift = shiftarr;
+
+
+ double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost );
+
+ // if (DEBUG) {
+ // string testeuh=terAlignment join(" ", shiftarr);
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl;
+ cerr << "Details of gains : gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost )"<<endl;
+ cerr << "Details of gains : gain = ("<<cur_best_align.numEdits << "+" << cur_best_shift_cost << ") - (" << curalign.numEdits << "+" << curshift.cost << ")"<<endl;
+ cerr << "" << curalign.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ // }
+ //
+ if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) {
+ anygain = true;
+ cur_best_shift = curshift;
+ cur_best_shift_cost = curshift.cost;
+ cur_best_align = curalign;
+ // if (DEBUG)
+ if ( PRINT_DEBUG ) {
+ cerr << "BEGIN DEBUG : terCalc::findBestShift :" << endl;
+ cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl;
+ cerr << "END DEBUG " << endl;
}
- else
- {
- to_return.m_empty = true;
- }
- return to_return;
+ }
}
+ }
+ if ( anygain ) {
+ to_return.m_best_shift = cur_best_shift;
+ to_return.m_best_align = cur_best_align;
+ to_return.m_empty = false;
+ } else {
+ to_return.m_empty = true;
+ }
+ return to_return;
+}
- void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign )
- {
- int hpos = -1;
- int rpos = -1;
- if ( PRINT_DEBUG )
- {
+void terCalc::calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign )
+{
+ int hpos = -1;
+ int rpos = -1;
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl;
+ cerr << "END DEBUG " << endl;
+ }
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
+ herr[i] = false;
+ rerr[i] = false;
+ ralign[i] = -1;
+ }
+ for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) {
+ char sym = align.alignment[i];
+ if ( sym == 'A' ) {
+ hpos++;
+ rpos++;
+ herr[hpos] = false;
+ rerr[rpos] = false;
+ ralign[rpos] = hpos;
+ } else if ( sym == 'S' ) {
+ hpos++;
+ rpos++;
+ herr[hpos] = true;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos;
+ } else if ( sym == 'I' ) {
+ hpos++;
+ herr[hpos] = true;
+ } else if ( sym == 'D' ) {
+ rpos++;
+ rerr[rpos] = true;
+ ralign[rpos] = hpos+1;
+ } else {
+ cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl;
+ exit ( -1 );
+ }
+ }
+}
- cerr << "BEGIN DEBUG : terCalc::calculateTerAlignment : " << endl << align.toString() << endl;
- cerr << "END DEBUG " << endl;
- }
- for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
- {
- herr[i] = false;
- rerr[i] = false;
- ralign[i] = -1;
- }
- for ( int i = 0; i < ( int ) align.alignment.size(); i++ )
- {
- char sym = align.alignment[i];
- if ( sym == 'A' )
- {
- hpos++;
- rpos++;
- herr[hpos] = false;
- rerr[rpos] = false;
- ralign[rpos] = hpos;
- }
- else
- if ( sym == 'S' )
- {
- hpos++;
- rpos++;
- herr[hpos] = true;
- rerr[rpos] = true;
- ralign[rpos] = hpos;
- }
- else
- if ( sym == 'I' )
- {
- hpos++;
- herr[hpos] = true;
- }
- else
- if ( sym == 'D' )
- {
- rpos++;
- rerr[rpos] = true;
- ralign[rpos] = hpos+1;
- }
- else
- {
- cerr << "ERROR : terCalc::calculateTerAlignment : Invalid mini align sequence " << sym << " at pos " << i << endl;
- exit ( -1 );
- }
- }
+vector<vecTerShift> terCalc::calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
+{
+ vector<vecTerShift> to_return;
+ if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) ) {
+ return to_return;
+ }
+
+ vector<vecTerShift> allshifts ( TAILLE_PERMUT_MAX + 1 );
+ for ( int start = 0; start < ( int ) hyp.size(); start++ ) {
+ string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
+ if ( ! rloc.trouve ( subVectorHypString ) ) {
+ continue;
}
- vector<vecTerShift> terCalc::calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign )
- {
- vector<vecTerShift> to_return;
- if ( ( TAILLE_PERMUT_MAX <= 0 ) || ( DIST_MAX_PERMUT <= 0 ) )
- {
- return to_return;
+ bool ok = false;
+ vector<int> mtiVec = rloc.getValue ( subVectorHypString );
+ vector<int>::iterator mti = mtiVec.begin();
+ while ( mti != mtiVec.end() && ( ! ok ) ) {
+ int moveto = ( *mti );
+ mti++;
+ if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) ) {
+ ok = true;
+ }
+ }
+ if ( ! ok ) {
+ continue;
+ }
+ ok = true;
+ for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ ) {
+ /* check if cand is good if so, add it */
+ vector<string> cand = subVector ( hyp, start, end + 1 );
+ ok = false;
+ if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) {
+ continue;
+ }
+
+ bool any_herr = false;
+
+ for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) {
+ if ( herr[start+i] ) {
+ any_herr = true;
}
+ }
+ if ( any_herr == false ) {
+ ok = true;
+ continue;
+ }
+
+ vector<int> movetoitVec;
+ movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
+// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl;
+ vector<int>::iterator movetoit = movetoitVec.begin();
+ while ( movetoit != movetoitVec.end() ) {
+ int moveto = ( *movetoit );
+ movetoit++;
+ if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) ) {
+ continue;
+ }
+ ok = true;
- vector<vecTerShift> allshifts ( TAILLE_PERMUT_MAX + 1 );
- for ( int start = 0; start < ( int ) hyp.size(); start++ )
- {
- string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) );
- if ( ! rloc.trouve ( subVectorHypString ) )
- {
- continue;
- }
+ /* check to see if there are any errors in either string
+ (only move if this is the case!)
+ */
+
+ bool any_rerr = false;
+ for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) {
+ if ( rerr[moveto+i] ) {
+ any_rerr = true;
+ }
+ }
+ if ( ! any_rerr ) {
+ continue;
+ }
+ for ( int roff = -1; roff <= ( end - start ); roff++ ) {
+ terShift topush;
+ bool topushNull = true;
+ if ( ( roff == -1 ) && ( moveto == 0 ) ) {
+ if ( PRINT_DEBUG ) {
- bool ok = false;
- vector<int> mtiVec = rloc.getValue ( subVectorHypString );
- vector<int>::iterator mti = mtiVec.begin();
- while ( mti != mtiVec.end() && ( ! ok ) )
- {
- int moveto = ( *mti );
- mti++;
- if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] - 1 ) <= DIST_MAX_PERMUT ) )
- {
- ok = true;
- }
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
}
- if ( ! ok )
- {
- continue;
+ terShift t01 ( start, end, -1, -1 );
+ topush = t01;
+ topushNull = false;
+ } else if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) ) {
+ int newloc = ralign[moveto+roff];
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
}
- ok = true;
- for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + TAILLE_PERMUT_MAX ) ); end++ )
- {
- /* check if cand is good if so, add it */
- vector<string> cand = subVector ( hyp, start, end + 1 );
- ok = false;
- if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) )
- {
- continue;
- }
-
- bool any_herr = false;
-
- for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ )
- {
- if ( herr[start+i] )
- {
- any_herr = true;
- }
- }
- if ( any_herr == false )
- {
- ok = true;
- continue;
- }
-
- vector<int> movetoitVec;
- movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) );
-// cerr << "CANDIDATE " << ( string ) vectorToString ( cand ) <<" PLACED : " << ( string ) vectorToString ( movetoitVec," ") << endl;
- vector<int>::iterator movetoit = movetoitVec.begin();
- while ( movetoit != movetoitVec.end() )
- {
- int moveto = ( *movetoit );
- movetoit++;
- if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= DIST_MAX_PERMUT ) && ( ( start - ralign[moveto] ) <= DIST_MAX_PERMUT ) ) )
- {
- continue;
- }
- ok = true;
-
- /* check to see if there are any errors in either string
- (only move if this is the case!)
- */
-
- bool any_rerr = false;
- for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ )
- {
- if ( rerr[moveto+i] )
- {
- any_rerr = true;
- }
- }
- if ( ! any_rerr )
- {
- continue;
- }
- for ( int roff = -1; roff <= ( end - start ); roff++ )
- {
- terShift topush;
- bool topushNull = true;
- if ( ( roff == -1 ) && ( moveto == 0 ) )
- {
- if ( PRINT_DEBUG )
- {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 01 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl;
- }
- terShift t01 ( start, end, -1, -1 );
- topush = t01;
- topushNull = false;
- }
- else
- if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) )
- {
- int newloc = ralign[moveto+roff];
- if ( PRINT_DEBUG )
- {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl << "Consider making " << start << "..." << end << " (" << vectorToString(cand," ")<< ") moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl;
- }
- terShift t02 ( start, end, moveto + roff, newloc );
- topush = t02;
- topushNull = false;
- }
- if ( !topushNull )
- {
- topush.shifted = cand;
- topush.cost = shift_cost;
- if ( PRINT_DEBUG )
- {
-
- cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl;
- cerr << "start : " << start << endl;
- cerr << "end : " << end << endl;
- cerr << "end - start : " << end - start << endl;
- cerr << "END DEBUG " << endl;
- }
- ( allshifts.at ( end - start ) ).push_back ( topush );
- }
- }
- }
+ terShift t02 ( start, end, moveto + roff, newloc );
+ topush = t02;
+ topushNull = false;
+ }
+ if ( !topushNull ) {
+ topush.shifted = cand;
+ topush.cost = shift_cost;
+ if ( PRINT_DEBUG ) {
+
+ cerr << "BEGIN DEBUG : terCalc::calculerPermutations 02 : " << endl;
+ cerr << "start : " << start << endl;
+ cerr << "end : " << end << endl;
+ cerr << "end - start : " << end - start << endl;
+ cerr << "END DEBUG " << endl;
}
+ ( allshifts.at ( end - start ) ).push_back ( topush );
+ }
}
- to_return.clear();
- for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ )
- {
- to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
- }
- return to_return;
+ }
}
+ }
+ to_return.clear();
+ for ( int i = 0; i < TAILLE_PERMUT_MAX + 1; i++ ) {
+ to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) );
+ }
+ return to_return;
+}
- alignmentStruct terCalc::permuter ( vector<string> words, terShift s )
- {
- return permuter ( words, s.start, s.end, s.newloc );
- }
+alignmentStruct terCalc::permuter ( vector<string> words, terShift s )
+{
+ return permuter ( words, s.start, s.end, s.newloc );
+}
- alignmentStruct terCalc::permuter ( vector<string> words, int start, int end, int newloc )
- {
- int c = 0;
- vector<string> nwords ( words );
- vector<vecInt> spans ( ( int ) hypSpans.size() );
- alignmentStruct to_return;
- if ( PRINT_DEBUG )
- {
+alignmentStruct terCalc::permuter ( vector<string> words, int start, int end, int newloc )
+{
+ int c = 0;
+ vector<string> nwords ( words );
+ vector<vecInt> spans ( ( int ) hypSpans.size() );
+ alignmentStruct to_return;
+ if ( PRINT_DEBUG ) {
+
+ if ( ( int ) hypSpans.size() > 0 ) {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ;
+ } else {
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ;
+ }
+ cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl;
+ }
+ if (newloc >= ( int ) words.size()) {
+ if ( PRINT_DEBUG ) {
+ cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl;
+ }
+ newloc = ( int ) words.size()-1;
+ }
- if ( ( int ) hypSpans.size() > 0 )
- {
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl ;
- }
- else
- {
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl ;
- }
- cerr << "BEGIN DEBUG : terCalc::permuter :" << endl << join(" ",words) << " start: " << start << " end: " << end << " newloc "<< newloc << endl << "END DEBUG " << endl;
- }
- if (newloc >= ( int ) words.size())
- {
- if ( PRINT_DEBUG )
- {
- cerr << "WARNING: Relocation over the size of the hypothesis, replacing at the end of it."<<endl;
- }
- newloc = ( int ) words.size()-1;
- }
-
// }
- if ( newloc == -1 )
- {
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = 0; i <= start - 1;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
+ if ( newloc == -1 ) {
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ if ( newloc < start ) {
+
+ for ( int i = 0; i < newloc; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
}
- else
- {
- if ( newloc < start )
- {
-
- for ( int i = 0; i < newloc; i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc ; i < start ;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- }
- else
- {
- if ( newloc > end )
- {
- for ( int i = 0; i <= start - 1; i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; i <= newloc;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = newloc + 1; i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- }
- else
- {
- // we are moving inside of ourselves
- for ( int i = 0; i <= start - 1; i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = start; i <= end;i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size();i++ )
- {
- nwords.at ( c++ ) = words.at ( i );
- if ( ( int ) hypSpans.size() > 0 )
- {
- spans.at ( c - 1 ) = hypSpans.at ( i );
- }
- }
- }
- }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
}
- NBR_PERMUTS_CONSID++;
-
- if ( PRINT_DEBUG )
- {
- cerr << "nwords" << join(" ",nwords) << endl;
-// cerr << "spans" << spans. << endl;
- }
-
- to_return.nwords = nwords;
- to_return.aftershift = spans;
- return to_return;
- }
- void terCalc::setDebugMode ( bool b )
- {
- PRINT_DEBUG = b;
+ }
+ for ( int i = newloc ; i < start ; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ if ( newloc > end ) {
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; i <= newloc; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = newloc + 1; i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ } else {
+ // we are moving inside of ourselves
+ for ( int i = 0; i <= start - 1; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = start; i <= end; i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size(); i++ ) {
+ nwords.at ( c++ ) = words.at ( i );
+ if ( ( int ) hypSpans.size() > 0 ) {
+ spans.at ( c - 1 ) = hypSpans.at ( i );
+ }
+ }
+ }
}
+ }
+ NBR_PERMUTS_CONSID++;
+
+ if ( PRINT_DEBUG ) {
+ cerr << "nwords" << join(" ",nwords) << endl;
+// cerr << "spans" << spans. << endl;
+ }
+
+ to_return.nwords = nwords;
+ to_return.aftershift = spans;
+ return to_return;
+}
+void terCalc::setDebugMode ( bool b )
+{
+ PRINT_DEBUG = b;
+}
}
diff --git a/mert/TER/tercalc.h b/mert/TER/tercalc.h
index 92d9caf2b..778d83395 100644
--- a/mert/TER/tercalc.h
+++ b/mert/TER/tercalc.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -41,62 +41,62 @@ namespace TERCpp
{
// typedef size_t WERelement[2];
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
- typedef vector<terShift> vecTerShift;
- /**
- @author
- */
- class terCalc
- {
- private :
+typedef vector<terShift> vecTerShift;
+/**
+ @author
+*/
+class terCalc
+{
+private :
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
- WERalignment l_WERalignment;
+ WERalignment l_WERalignment;
// HashMap contenant les valeurs de hash de chaque mot
- hashMap bagOfWords;
- int TAILLE_PERMUT_MAX;
- // Increments internes
- int NBR_SEGS_EVALUATED;
- int NBR_PERMUTS_CONSID;
- int NBR_BS_APPELS;
- int DIST_MAX_PERMUT;
- bool PRINT_DEBUG;
+ hashMap bagOfWords;
+ int TAILLE_PERMUT_MAX;
+ // Increments internes
+ int NBR_SEGS_EVALUATED;
+ int NBR_PERMUTS_CONSID;
+ int NBR_BS_APPELS;
+ int DIST_MAX_PERMUT;
+ bool PRINT_DEBUG;
- // Utilisés dans minDistEdit et ils ne sont pas réajustés
- double S[1000][1000];
- char P[1000][1000];
- vector<vecInt> refSpans;
- vector<vecInt> hypSpans;
- int TAILLE_BEAM;
+ // Utilisés dans minDistEdit et ils ne sont pas réajustés
+ double S[1000][1000];
+ char P[1000][1000];
+ vector<vecInt> refSpans;
+ vector<vecInt> hypSpans;
+ int TAILLE_BEAM;
- public:
- int shift_cost;
- int insert_cost;
- int delete_cost;
- int substitute_cost;
- int match_cost;
- double infinite;
- terCalc();
+public:
+ int shift_cost;
+ int insert_cost;
+ int delete_cost;
+ int substitute_cost;
+ int match_cost;
+ double infinite;
+ terCalc();
// ~terCalc();
// size_t* hashVec ( vector<string> s );
- void setDebugMode ( bool b );
+ void setDebugMode ( bool b );
// int WERCalculation ( size_t * ref, size_t * hyp );
// int WERCalculation ( vector<string> ref, vector<string> hyp );
// int WERCalculation ( vector<int> ref, vector<int> hyp );
- terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
+ terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
// string vectorToString(vector<string> vec);
// vector<string> subVector(vector<string> vec, int start, int end);
- hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
- terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
- bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
- terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
- terAlignment TER ( vector<string> hyp, vector<string> ref );
- terAlignment TER ( vector<int> hyp, vector<int> ref );
- bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
- void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
- vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
- alignmentStruct permuter ( vector<string> words, terShift s );
- alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
- };
+ hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
+ terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
+ bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
+ terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
+ terAlignment TER ( vector<string> hyp, vector<string> ref );
+ terAlignment TER ( vector<int> hyp, vector<int> ref );
+ bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
+ void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
+ vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
+ alignmentStruct permuter ( vector<string> words, terShift s );
+ alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
+};
}
diff --git a/mert/TER/tools.cpp b/mert/TER/tools.cpp
index 64e1483b6..8858a7119 100644
--- a/mert/TER/tools.cpp
+++ b/mert/TER/tools.cpp
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -25,748 +25,677 @@ using namespace boost::xpressive;
namespace Tools
{
- string vectorToString ( vector<string> vec )
- {
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour += ( *vecIter );
- }
- else
- {
- retour += "\t" + ( *vecIter );
- }
- }
- return retour;
+string vectorToString ( vector<string> vec )
+{
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour += ( *vecIter );
+ } else {
+ retour += "\t" + ( *vecIter );
}
- string vectorToString ( vector<char> vec )
- {
- stringstream retour;
- retour.str("");
- for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << "\t" << ( *vecIter );
- }
- }
- return retour.str();
+ }
+ return retour;
+}
+string vectorToString ( vector<char> vec )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << "\t" << ( *vecIter );
}
- string vectorToString ( vector<int> vec )
- {
- stringstream retour;
- retour.str("");
- for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << "\t" << ( *vecIter );
- }
- }
- return retour.str();
+ }
+ return retour.str();
+}
+string vectorToString ( vector<int> vec )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << "\t" << ( *vecIter );
}
+ }
+ return retour.str();
+}
- string vectorToString ( vector< string > vec, string s )
- {
- string retour ( "" );
- for ( vector<string>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour += ( *vecIter );
- }
- else
- {
- retour += s + ( *vecIter );
- }
- }
- return retour;
-
+string vectorToString ( vector< string > vec, string s )
+{
+ string retour ( "" );
+ for ( vector<string>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour += ( *vecIter );
+ } else {
+ retour += s + ( *vecIter );
}
+ }
+ return retour;
- string vectorToString ( vector< char > vec, string s )
- {
- stringstream retour;
- retour.str("");
- for ( vector<char>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << s << ( *vecIter );
- }
- }
- return retour.str();
+}
+string vectorToString ( vector< char > vec, string s )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<char>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << s << ( *vecIter );
}
+ }
+ return retour.str();
- string vectorToString ( vector< int > vec, string s )
- {
- stringstream retour;
- retour.str("");
- for ( vector<int>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << s << ( *vecIter );
- }
- }
- return retour.str();
+}
+string vectorToString ( vector< int > vec, string s )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<int>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << s << ( *vecIter );
}
+ }
+ return retour.str();
- string vectorToString ( vector< bool > vec, string s )
- {
- stringstream retour;
- retour.str("");
- for ( vector<bool>::iterator vecIter = vec.begin();vecIter != vec.end(); vecIter++ )
- {
- if ( vecIter == vec.begin() )
- {
- retour << ( *vecIter );
- }
- else
- {
- retour << s << ( *vecIter );
- }
- }
- return retour.str();
+}
+string vectorToString ( vector< bool > vec, string s )
+{
+ stringstream retour;
+ retour.str("");
+ for ( vector<bool>::iterator vecIter = vec.begin(); vecIter != vec.end(); vecIter++ ) {
+ if ( vecIter == vec.begin() ) {
+ retour << ( *vecIter );
+ } else {
+ retour << s << ( *vecIter );
}
- string vectorToString ( char* vec, string s , int taille)
- {
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++)
- {
- if ( l_i == 0 )
- {
- retour << vec[l_i];
- }
- else
- {
- retour << s << vec[l_i];
- }
- }
- return retour.str();
+ }
+ return retour.str();
+}
+string vectorToString ( char* vec, string s , int taille)
+{
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++) {
+ if ( l_i == 0 ) {
+ retour << vec[l_i];
+ } else {
+ retour << s << vec[l_i];
}
+ }
+ return retour.str();
- string vectorToString ( int* vec, string s , int taille)
- {
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++)
- {
- if ( l_i == 0 )
- {
- retour << vec[l_i];
- }
- else
- {
- retour << s << vec[l_i];
- }
- }
- return retour.str();
+}
+string vectorToString ( int* vec, string s , int taille)
+{
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++) {
+ if ( l_i == 0 ) {
+ retour << vec[l_i];
+ } else {
+ retour << s << vec[l_i];
}
+ }
+ return retour.str();
- string vectorToString ( bool* vec, string s , int taille)
- {
- stringstream retour;
- retour.str("");
- int l_i;
- for ( l_i=0; l_i < taille ; l_i++)
- {
- if ( l_i == 0 )
- {
- retour << vec[l_i];
- }
- else
- {
- retour << s << vec[l_i];
- }
- }
- return retour.str();
+}
+string vectorToString ( bool* vec, string s , int taille)
+{
+ stringstream retour;
+ retour.str("");
+ int l_i;
+ for ( l_i=0; l_i < taille ; l_i++) {
+ if ( l_i == 0 ) {
+ retour << vec[l_i];
+ } else {
+ retour << s << vec[l_i];
}
-
- vector<string> subVector ( vector<string> vec, int start, int end )
- {
- vector<string> retour;
- if ( start > end )
- {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
- }
-
- vector<int> subVector ( vector<int> vec, int start, int end )
- {
- vector<int> retour;
- if ( start > end )
- {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
- }
-
- vector<float> subVector ( vector<float> vec, int start, int end )
- {
- vector<float> retour;
- if ( start > end )
- {
- cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
- exit ( 0 );
- }
- for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
- }
-
- vector<string> copyVector ( vector<string> vec )
- {
- vector<string> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
+ }
+ return retour.str();
+
+}
+
+vector<string> subVector ( vector<string> vec, int start, int end )
+{
+ vector<string> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<int> subVector ( vector<int> vec, int start, int end )
+{
+ vector<int> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<float> subVector ( vector<float> vec, int start, int end )
+{
+ vector<float> retour;
+ if ( start > end ) {
+ cerr << "ERREUR : TERcalc::subVector : end > start" << endl;
+ exit ( 0 );
+ }
+ for ( int i = start; ( ( i < end ) && ( i < ( int ) vec.size() ) ); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+
+vector<string> copyVector ( vector<string> vec )
+{
+ vector<string> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<int> copyVector ( vector<int> vec )
+{
+ vector<int> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<float> copyVector ( vector<float> vec )
+{
+ vector<float> retour;
+ for ( int i = 0; i < ( int ) vec.size(); i++ ) {
+ retour.push_back ( vec.at ( i ) );
+ }
+ return retour;
+}
+vector<string> stringToVector ( string s, string tok )
+{
+ vector<string> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ to_return.push_back ( to_push );
+ to_push = "";
+ pushed = true;
+ }
}
- vector<int> copyVector ( vector<int> vec )
- {
- vector<int> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ )
- {
- retour.push_back ( vec.at ( i ) );
- }
- return retour;
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
}
- vector<float> copyVector ( vector<float> vec )
- {
- vector<float> retour;
- for ( int i = 0; i < ( int ) vec.size(); i++ )
- {
- retour.push_back ( vec.at ( i ) );
+ }
+ to_return.push_back ( to_push );
+ return to_return;
+}
+vector<int> stringToVectorInt ( string s, string tok )
+{
+ vector<int> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
}
- return retour;
+ to_push = "";
+ pushed = true;
+ }
}
- vector<string> stringToVector ( string s, string tok )
- {
- vector<string> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ )
- {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
- {
- if ( ( *sIt ) == ( *sTok ) )
- {
- to_return.push_back ( to_push );
- to_push = "";
- pushed = true;
- }
- }
- if ( !pushed )
- {
- to_push.push_back ( ( *sIt ) );
- }
- }
- to_return.push_back ( to_push );
- return to_return;
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
}
- vector<int> stringToVectorInt ( string s, string tok )
- {
- vector<int> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ )
- {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
- {
- if ( ( *sIt ) == ( *sTok ) )
- {
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- to_push = "";
- pushed = true;
- }
- }
- if ( !pushed )
- {
- to_push.push_back ( ( *sIt ) );
- }
- }
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+}
+vector<float> stringToVectorFloat ( string s, string tok )
+{
+ vector<float> to_return;
+ string to_push ( "" );
+ bool pushed = false;
+ string::iterator sIt;
+ for ( sIt = s.begin(); sIt < s.end(); sIt++ ) {
+ pushed = false;
+ for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ ) {
+ if ( ( *sIt ) == ( *sTok ) ) {
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atof ( to_push.c_str() ) );
}
- return to_return;
+ to_push = "";
+ pushed = true;
+ }
}
- vector<float> stringToVectorFloat ( string s, string tok )
- {
- vector<float> to_return;
- string to_push ( "" );
- bool pushed = false;
- string::iterator sIt;
- for ( sIt = s.begin(); sIt < s.end(); sIt++ )
- {
- pushed = false;
- for ( string::iterator sTok = tok.begin(); sTok < tok.end(); sTok++ )
- {
- if ( ( *sIt ) == ( *sTok ) )
- {
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atof ( to_push.c_str() ) );
- }
- to_push = "";
- pushed = true;
- }
- }
- if ( !pushed )
- {
- to_push.push_back ( ( *sIt ) );
- }
- }
- if ( ( int ) to_push.length() > 0 )
- {
- to_return.push_back ( atoi ( to_push.c_str() ) );
- }
- return to_return;
+ if ( !pushed ) {
+ to_push.push_back ( ( *sIt ) );
}
+ }
+ if ( ( int ) to_push.length() > 0 ) {
+ to_return.push_back ( atoi ( to_push.c_str() ) );
+ }
+ return to_return;
+}
- string lowerCase ( string str )
- {
- for ( int i = 0;i < ( int ) str.size();i++ )
- {
- if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) )
- {
- str[i] = str[i] + 0x20;
- }
- }
- return str;
+string lowerCase ( string str )
+{
+ for ( int i = 0; i < ( int ) str.size(); i++ ) {
+ if ( ( str[i] >= 0x41 ) && ( str[i] <= 0x5A ) ) {
+ str[i] = str[i] + 0x20;
}
- string removePunctTercom ( string str )
- {
- string str_mod = str;
- sregex rex;
- string replace;
+ }
+ return str;
+}
+string removePunctTercom ( string str )
+{
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
- string removePunct ( string str )
- {
- string str_mod = str;
- sregex rex;
- string replace;
+ return str_mod;
+}
+string removePunct ( string str )
+{
+ string str_mod = str;
+ sregex rex;
+ string replace;
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[,]" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[,]" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
- replace = ( "$1 $3" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([^0-9])([\\.])([^0-9])" );
+ replace = ( "$1 $3" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "([\\.]$)" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "([\\.]$)" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\:]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\:]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
- string tokenizePunct ( string str )
- {
- string str_mod = str;
- sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
- string replace ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ return str_mod;
+}
+string tokenizePunct ( string str )
+{
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(([^0-9])([\\,])([^0-9]))" );
+ string replace ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\.])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([A-Z]|[a-z]) ([\\.])$)" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([A-Z]|[a-z]) ([\\.]) )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
- replace = ( "$2.$4. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([A-Z]|[a-z])([\\.]) ([A-Z]|[a-z])([\\.]) )" );
+ replace = ( "$2.$4. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\?]" );
- replace = ( " ? " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\?]" );
+ replace = ( " ? " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\;]" );
- replace = ( " ; " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\;]" );
+ replace = ( " ; " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
- replace = ( "$2 $3 $4" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(([^0-9])([\\:])([^0-9]))" );
+ replace = ( "$2 $3 $4" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\!]" );
- replace = ( " ! " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\!]" );
+ replace = ( " ! " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\(]" );
- replace = ( " ( " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\(]" );
+ replace = ( " ( " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\\)]" );
- replace = ( " ) " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\\)]" );
+ replace = ( " ) " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[\"]" );
- replace = ( " \" " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[\"]" );
+ replace = ( " \" " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
- replace = ( "num_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(num_ \\( ([^\\)]+) \\))" );
+ replace = ( "num_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
- replace = ( "ordinal_($2)" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(ordinal_ \\( ([^\\)]*) \\))" );
+ replace = ( "ordinal_($2)" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Dd]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Dd]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Dd]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Dd]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]r) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]r) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]r) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]r) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Mm]rs) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Mm]rs) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Mm]rs) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Mm]rs) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^([Nn]o) \\.)" );
- replace = ( "$2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^([Nn]o) \\.)" );
+ replace = ( "$2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( ([Nn]o) \\.)" );
- replace = ( " $2." );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( ([Nn]o) \\.)" );
+ replace = ( " $2." );
+ str_mod = regex_replace ( str_mod, rex, replace );
// rex = sregex::compile ( "(^(([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Jj]an)|([Ff]ev)|([Mm]ar)|([Aa]pr)|([Jj]un)|([Jj]ul)|([Aa]ug)|([Ss]ept)|([Oo]ct)|([Nn]ov)|([Dd]ec)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "(^(([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( "$2." );
// str_mod = regex_replace ( str_mod, rex, replace );
-//
+//
// rex = sregex::compile ( "( (([Gg]en)|([Cc]ol)) \\.)" );
// replace = ( " $2." );
// str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
- replace = ( "$2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "(^(([A-Z][a-z][a-z])) \\. )" );
+ replace = ( "$2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
- replace = ( " $2. " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "( (([A-Z][a-z][a-z])) \\. )" );
+ replace = ( " $2. " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+" );
- replace = " ";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+" );
+ replace = " ";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "^[ ]+" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "^[ ]+" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "[ ]+$" );
- replace = "";
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "[ ]+$" );
+ replace = "";
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
+ return str_mod;
+}
- string normalizeStd ( string str )
- {
- string str_mod = str;
- sregex rex = sregex::compile ( "(<skipped>)" );
- string replace ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+string normalizeStd ( string str )
+{
+ string str_mod = str;
+ sregex rex = sregex::compile ( "(<skipped>)" );
+ string replace ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "-\n" );
- replace = ( "" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "-\n" );
+ replace = ( "" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "\n" );
- replace = ( " " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "\n" );
+ replace = ( " " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&quot;" );
- replace = ( "\"" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&quot;" );
+ replace = ( "\"" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&amp;" );
- replace = ( "& " );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&amp;" );
+ replace = ( "& " );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&lt;" );
- replace = ( "<" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&lt;" );
+ replace = ( "<" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- rex = sregex::compile ( "&gt;" );
- replace = ( ">" );
- str_mod = regex_replace ( str_mod, rex, replace );
+ rex = sregex::compile ( "&gt;" );
+ replace = ( ">" );
+ str_mod = regex_replace ( str_mod, rex, replace );
- return str_mod;
- }
+ return str_mod;
+}
- param copyParam ( param p )
- {
- param to_return;
- to_return.caseOn = p.caseOn;
- to_return.noPunct = p.noPunct;
- to_return.debugMode = p.debugMode;
- to_return.debugLevel = p.debugLevel;
- to_return.hypothesisFile = p.hypothesisFile;
- to_return.referenceFile = p.referenceFile;
- to_return.normalize = p.normalize;
- to_return.noTxtIds = p.noTxtIds;
- to_return.outputFileExtension = p.outputFileExtension;
- to_return.outputFileName = p.outputFileName;
- to_return.sgmlInputs = p.sgmlInputs;
- to_return.tercomLike = p.tercomLike;
- to_return.printAlignments = p.printAlignments;
- to_return.WER=p.WER;
- return to_return;
- }
- string printParams ( param p )
- {
- stringstream s;
- s << "caseOn = " << p.caseOn << endl;
- s << "noPunct = " << p.noPunct << endl;
- s << "debugMode = " << p.debugMode << endl;
- s << "debugLevel = " << p.debugLevel << endl;
- s << "hypothesisFile = " << p.hypothesisFile << endl;
- s << "referenceFile = " << p.referenceFile << endl;
- s << "normalize = " << p.normalize << endl;
- s << "noTxtIds = " << p.noTxtIds << endl;
- s << "outputFileExtension = " << p.outputFileExtension << endl;
- s << "outputFileName = " << p.outputFileName << endl;
- s << "sgmlInputs = " << p.sgmlInputs << endl;
- s << "tercomLike = " << p.tercomLike << endl;
- return s.str();
+param copyParam ( param p )
+{
+ param to_return;
+ to_return.caseOn = p.caseOn;
+ to_return.noPunct = p.noPunct;
+ to_return.debugMode = p.debugMode;
+ to_return.debugLevel = p.debugLevel;
+ to_return.hypothesisFile = p.hypothesisFile;
+ to_return.referenceFile = p.referenceFile;
+ to_return.normalize = p.normalize;
+ to_return.noTxtIds = p.noTxtIds;
+ to_return.outputFileExtension = p.outputFileExtension;
+ to_return.outputFileName = p.outputFileName;
+ to_return.sgmlInputs = p.sgmlInputs;
+ to_return.tercomLike = p.tercomLike;
+ to_return.printAlignments = p.printAlignments;
+ to_return.WER=p.WER;
+ return to_return;
+}
+string printParams ( param p )
+{
+ stringstream s;
+ s << "caseOn = " << p.caseOn << endl;
+ s << "noPunct = " << p.noPunct << endl;
+ s << "debugMode = " << p.debugMode << endl;
+ s << "debugLevel = " << p.debugLevel << endl;
+ s << "hypothesisFile = " << p.hypothesisFile << endl;
+ s << "referenceFile = " << p.referenceFile << endl;
+ s << "normalize = " << p.normalize << endl;
+ s << "noTxtIds = " << p.noTxtIds << endl;
+ s << "outputFileExtension = " << p.outputFileExtension << endl;
+ s << "outputFileName = " << p.outputFileName << endl;
+ s << "sgmlInputs = " << p.sgmlInputs << endl;
+ s << "tercomLike = " << p.tercomLike << endl;
+ return s.str();
- }
- string join ( string delim, vector<string> arr )
- {
- if ( ( int ) arr.size() == 0 ) return "";
+}
+string join ( string delim, vector<string> arr )
+{
+ if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
- stringstream s;
- s.str ( "" );
- for ( int i = 0; i < ( int ) arr.size(); i++ )
- {
- if ( i == 0 )
- {
- s << arr.at ( i );
- }
- else
- {
- s << delim << arr.at ( i );
- }
- }
- return s.str();
-// return "";
+ stringstream s;
+ s.str ( "" );
+ for ( int i = 0; i < ( int ) arr.size(); i++ ) {
+ if ( i == 0 ) {
+ s << arr.at ( i );
+ } else {
+ s << delim << arr.at ( i );
}
+ }
+ return s.str();
+// return "";
+}
}
diff --git a/mert/TER/tools.h b/mert/TER/tools.h
index 0a85e7b4b..157b739a5 100644
--- a/mert/TER/tools.h
+++ b/mert/TER/tools.h
@@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
-under the terms of the GNU Lesser General Public License as published by
+under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
@@ -35,32 +35,31 @@ using namespace std;
namespace Tools
{
- typedef vector<double> vecDouble;
- typedef vector<char> vecChar;
- typedef vector<int> vecInt;
- typedef vector<float> vecFloat;
- typedef vector<size_t> vecSize_t;
- typedef vector<string> vecString;
- typedef vector<string> alignmentElement;
- typedef vector<alignmentElement> WERalignment;
+typedef vector<double> vecDouble;
+typedef vector<char> vecChar;
+typedef vector<int> vecInt;
+typedef vector<float> vecFloat;
+typedef vector<size_t> vecSize_t;
+typedef vector<string> vecString;
+typedef vector<string> alignmentElement;
+typedef vector<alignmentElement> WERalignment;
-struct param
-{
- bool debugMode;
- string referenceFile; // path to the resources
- string hypothesisFile; // path to the configuration files
- string outputFileExtension;
- string outputFileName;
- bool noPunct;
- bool caseOn;
- bool normalize;
- bool tercomLike;
- bool sgmlInputs;
- bool noTxtIds;
- bool printAlignments;
- bool WER;
- int debugLevel;
+struct param {
+ bool debugMode;
+ string referenceFile; // path to the resources
+ string hypothesisFile; // path to the configuration files
+ string outputFileExtension;
+ string outputFileName;
+ bool noPunct;
+ bool caseOn;
+ bool normalize;
+ bool tercomLike;
+ bool sgmlInputs;
+ bool noTxtIds;
+ bool printAlignments;
+ bool WER;
+ int debugLevel;
};
// param = { false, "","","","" };
@@ -68,35 +67,35 @@ struct param
// private:
// public:
- string vectorToString ( vector<string> vec );
- string vectorToString ( vector<char> vec );
- string vectorToString ( vector<int> vec );
- string vectorToString ( vector<string> vec, string s );
- string vectorToString ( vector<char> vec, string s );
- string vectorToString ( vector<int> vec, string s );
- string vectorToString ( vector<bool> vec, string s );
- string vectorToString ( char* vec, string s, int taille );
- string vectorToString ( int* vec, string s , int taille );
- string vectorToString ( bool* vec, string s , int taille );
- vector<string> subVector ( vector<string> vec, int start, int end );
- vector<int> subVector ( vector<int> vec, int start, int end );
- vector<float> subVector ( vector<float> vec, int start, int end );
- vector<string> copyVector ( vector<string> vec );
- vector<int> copyVector ( vector<int> vec );
- vector<float> copyVector ( vector<float> vec );
- vector<string> stringToVector ( string s, string tok );
- vector<string> stringToVector ( char s, string tok );
- vector<string> stringToVector ( int s, string tok );
- vector<int> stringToVectorInt ( string s, string tok );
- vector<float> stringToVectorFloat ( string s, string tok );
- string lowerCase(string str);
- string removePunct(string str);
- string tokenizePunct(string str);
- string removePunctTercom(string str);
- string normalizeStd(string str);
- string printParams(param p);
- string join ( string delim, vector<string> arr );
+string vectorToString ( vector<string> vec );
+string vectorToString ( vector<char> vec );
+string vectorToString ( vector<int> vec );
+string vectorToString ( vector<string> vec, string s );
+string vectorToString ( vector<char> vec, string s );
+string vectorToString ( vector<int> vec, string s );
+string vectorToString ( vector<bool> vec, string s );
+string vectorToString ( char* vec, string s, int taille );
+string vectorToString ( int* vec, string s , int taille );
+string vectorToString ( bool* vec, string s , int taille );
+vector<string> subVector ( vector<string> vec, int start, int end );
+vector<int> subVector ( vector<int> vec, int start, int end );
+vector<float> subVector ( vector<float> vec, int start, int end );
+vector<string> copyVector ( vector<string> vec );
+vector<int> copyVector ( vector<int> vec );
+vector<float> copyVector ( vector<float> vec );
+vector<string> stringToVector ( string s, string tok );
+vector<string> stringToVector ( char s, string tok );
+vector<string> stringToVector ( int s, string tok );
+vector<int> stringToVectorInt ( string s, string tok );
+vector<float> stringToVectorFloat ( string s, string tok );
+string lowerCase(string str);
+string removePunct(string str);
+string tokenizePunct(string str);
+string removePunctTercom(string str);
+string normalizeStd(string str);
+string printParams(param p);
+string join ( string delim, vector<string> arr );
// };
- param copyParam(param p);
+param copyParam(param p);
}
#endif
diff --git a/mert/evaluator.cpp b/mert/evaluator.cpp
index caae07684..7ab03c7eb 100644
--- a/mert/evaluator.cpp
+++ b/mert/evaluator.cpp
@@ -43,7 +43,8 @@ private:
};
// load hypothesis from candidate output
-vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) {
+vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile)
+{
ifstream cand(candFile.c_str());
if (!cand.good()) throw runtime_error("Error opening candidate file");
@@ -61,7 +62,8 @@ vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) {
}
// load 1-best hypothesis from n-best file (useful if relying on alignment/tree information)
-vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile) {
+vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile)
+{
vector<ScoreStats> entries;
Data data(g_scorer);
@@ -81,8 +83,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_i
if (nbest_input) {
entries = loadNBest(candFile);
- }
- else {
+ } else {
entries = loadCand(candFile);
}
diff --git a/mert/kbmira.cpp b/mert/kbmira.cpp
index 0abce8af4..5a119e875 100644
--- a/mert/kbmira.cpp
+++ b/mert/kbmira.cpp
@@ -77,7 +77,7 @@ int main(int argc, char** argv)
bool model_bg = false; // Use model for background corpus
bool verbose = false; // Verbose updates
bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU
- size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
+ size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
// Command-line processing follows pro.cpp
po::options_description desc("Allowed options");
@@ -157,7 +157,7 @@ int main(int argc, char** argv)
do {
size_t equals = buffer.find_last_of("=");
UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '"
- << buffer << "'");
+ << buffer << "'");
string name = buffer.substr(0,equals);
names.push_back(name);
initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2)));
@@ -183,7 +183,7 @@ int main(int argc, char** argv)
//Make sure that SparseVector encodes dense feature names as 0..n-1.
for (size_t i = 0; i < names.size(); ++i) {
size_t id = SparseVector::encode(names[i]);
- assert(id == i);
+ assert(id == i);
if (verbose) cerr << names[i] << " " << initParams[i] << endl;
}
@@ -246,12 +246,12 @@ int main(int argc, char** argv)
int iNumUpdates = 0;
ValType totalLoss = 0.0;
size_t sentenceIndex = 0;
- for(decoder->reset();!decoder->finished(); decoder->next()) {
+ for(decoder->reset(); !decoder->finished(); decoder->next()) {
HopeFearData hfd;
decoder->HopeFear(bg,wv,&hfd);
-
+
// Update weights
- if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
+ if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
// Vector difference
MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures;
// Bleu difference