Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieu@hoang.co.uk>2013-07-04 23:19:51 +0400
committerHieu Hoang <hieu@hoang.co.uk>2013-07-04 23:19:51 +0400
commitf35750bc08ca06766f2f9bf9742f45c986a1c44e (patch)
tree4e720429b3cf403c38d37bd36198a962b5072729
parentb10159a29f9ab020b3e606ce04247dd7265a8590 (diff)
beautify
-rw-r--r--mert/BleuDocScorer.cpp79
-rw-r--r--mert/BleuDocScorer.h2
-rw-r--r--mert/BleuScorer.h4
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.cpp246
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.h49
-rw-r--r--moses/FF/OSM-Feature/osmHyp.cpp898
-rw-r--r--moses/FF/OSM-Feature/osmHyp.h115
-rw-r--r--moses/LM/Ken.cpp2
-rw-r--r--moses/StaticData.cpp6
-rw-r--r--moses/Word.h3
10 files changed, 678 insertions, 726 deletions
diff --git a/mert/BleuDocScorer.cpp b/mert/BleuDocScorer.cpp
index 558757cef..b96a6bc48 100644
--- a/mert/BleuDocScorer.cpp
+++ b/mert/BleuDocScorer.cpp
@@ -31,11 +31,11 @@ const char REFLEN_CLOSEST[] = "closest";
namespace MosesTuning
{
-
+
BleuDocScorer::BleuDocScorer(const string& config)
- : BleuScorer("BLEUDOC", config),
- m_ref_length_type(CLOSEST)
+ : BleuScorer("BLEUDOC", config),
+ m_ref_length_type(CLOSEST)
{
const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST);
if (reflen == REFLEN_AVERAGE) {
@@ -63,41 +63,40 @@ bool BleuDocScorer::OpenReferenceStream(istream* is, size_t file_id)
if (line.find("<doc docid") != std::string::npos) { // new document
doc_id++;
- m_references.push_back(new ScopedVector<Reference>());
+ m_references.push_back(new ScopedVector<Reference>());
sid = 0;
- }
- else if (line.find("<seg") != std::string::npos) { //new sentence
+ } else if (line.find("<seg") != std::string::npos) { //new sentence
int start = line.find_first_of('>') + 1;
std::string trans = line.substr(start, line.find_last_of('<')-start);
trans = preprocessSentence(trans);
if (file_id == 0) {
- Reference* ref = new Reference;
- m_references[doc_id]->push_back(ref); // Take ownership of the Reference object.
+ Reference* ref = new Reference;
+ m_references[doc_id]->push_back(ref); // Take ownership of the Reference object.
}
if (m_references[doc_id]->size() <= sid) {
- return false;
+ return false;
}
NgramCounts counts;
size_t length = CountNgrams(trans, counts, kBleuNgramOrder);
-
+
//for any counts larger than those already there, merge them in
for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
- const NgramCounts::Key& ngram = ci->first;
- const NgramCounts::Value newcount = ci->second;
-
- NgramCounts::Value oldcount = 0;
- m_references[doc_id]->get().at(sid)->get_counts()->Lookup(ngram, &oldcount);
- if (newcount > oldcount) {
- m_references[doc_id]->get().at(sid)->get_counts()->operator[](ngram) = newcount;
- }
+ const NgramCounts::Key& ngram = ci->first;
+ const NgramCounts::Value newcount = ci->second;
+
+ NgramCounts::Value oldcount = 0;
+ m_references[doc_id]->get().at(sid)->get_counts()->Lookup(ngram, &oldcount);
+ if (newcount > oldcount) {
+ m_references[doc_id]->get().at(sid)->get_counts()->operator[](ngram) = newcount;
+ }
}
//add in the length
- m_references[doc_id]->get().at(sid)->push_back(length);
+ m_references[doc_id]->get().at(sid)->push_back(length);
if (sid > 0 && sid % 100 == 0) {
- TRACE_ERR(".");
+ TRACE_ERR(".");
}
++sid;
}
@@ -127,14 +126,14 @@ void BleuDocScorer::prepareStats(size_t sid, const string& text, ScoreStats& ent
//precision on each ngram type
for (NgramCounts::const_iterator testcounts_it = testcounts.begin();
- testcounts_it != testcounts.end(); ++testcounts_it) {
+ testcounts_it != testcounts.end(); ++testcounts_it) {
const NgramCounts::Value guess = testcounts_it->second;
const size_t len = testcounts_it->first.size();
NgramCounts::Value correct = 0;
-
+
NgramCounts::Value v = 0;
if (m_references[sid]->get().at(i)->get_counts()->Lookup(testcounts_it->first, &v)) {
- correct = min(v, guess);
+ correct = min(v, guess);
}
stats[len * 2 - 2] += correct;
stats[len * 2 - 1] += guess;
@@ -143,13 +142,13 @@ void BleuDocScorer::prepareStats(size_t sid, const string& text, ScoreStats& ent
const int reference_len = CalcReferenceLength(sid, i, length);
stats.push_back(reference_len);
- //ADD stats to totStats
- std::transform(stats.begin(), stats.end(), totStats.begin(),
- totStats.begin(), std::plus<int>());
+ //ADD stats to totStats
+ std::transform(stats.begin(), stats.end(), totStats.begin(),
+ totStats.begin(), std::plus<int>());
}
- entry.set(totStats);
+ entry.set(totStats);
}
-
+
std::vector<std::string> BleuDocScorer::splitDoc(const std::string& text)
{
std::vector<std::string> res;
@@ -188,18 +187,18 @@ statscore_t BleuDocScorer::calculateScore(const vector<int>& comps) const
int BleuDocScorer::CalcReferenceLength(size_t doc_id, size_t sentence_id, size_t length)
{
switch (m_ref_length_type) {
- case AVERAGE:
- return m_references[doc_id]->get().at(sentence_id)->CalcAverage();
- break;
- case CLOSEST:
- return m_references[doc_id]->get().at(sentence_id)->CalcClosest(length);
- break;
- case SHORTEST:
- return m_references[doc_id]->get().at(sentence_id)->CalcShortest();
- break;
- default:
- cerr << "unknown reference types." << endl;
- exit(1);
+ case AVERAGE:
+ return m_references[doc_id]->get().at(sentence_id)->CalcAverage();
+ break;
+ case CLOSEST:
+ return m_references[doc_id]->get().at(sentence_id)->CalcClosest(length);
+ break;
+ case SHORTEST:
+ return m_references[doc_id]->get().at(sentence_id)->CalcShortest();
+ break;
+ default:
+ cerr << "unknown reference types." << endl;
+ exit(1);
}
}
diff --git a/mert/BleuDocScorer.h b/mert/BleuDocScorer.h
index 349745825..9677410f8 100644
--- a/mert/BleuDocScorer.h
+++ b/mert/BleuDocScorer.h
@@ -29,7 +29,7 @@ public:
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual statscore_t calculateScore(const std::vector<int>& comps) const;
- int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length);
+ int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length);
// NOTE: this function is used for unit testing.
virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 92d7fb9d5..8be567574 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -67,7 +67,7 @@ public:
// NOTE: this function is used for unit testing.
virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
- //private:
+ //private:
protected:
ReferenceLengthType m_ref_length_type;
@@ -76,7 +76,7 @@ protected:
// constructor used by subclasses
BleuScorer(const std::string& name, const std::string& config): StatisticsBasedScorer(name,config) {}
-
+
// no copying allowed
BleuScorer(const BleuScorer&);
BleuScorer& operator=(const BleuScorer&);
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index 2e12f0aef..566d0cceb 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -11,7 +11,7 @@ namespace Moses
{
OpSequenceModel::OpSequenceModel(const std::string &line)
-:StatefulFeatureFunction("OpSequenceModel", 5, line )
+ :StatefulFeatureFunction("OpSequenceModel", 5, line )
{
ReadParameters();
@@ -20,29 +20,29 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
void OpSequenceModel :: readLanguageModel(const char *lmFile)
{
- string unkOp = "_TRANS_SLF_";
+ string unkOp = "_TRANS_SLF_";
-
- /*
- // Code for SRILM
+ /*
+
+ // Code for SRILM
- vector <int> numbers;
+ vector <int> numbers;
int nonWordFlag = 0;
-
- ptrOp = new Api;
- ptrOp -> read_lm(lmFile,lmOrder);
- numbers.push_back(ptrOp->getLMID(const_cast <char *> (unkOp.c_str())));
- unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag);
-
- */
-
- // Code to load KenLM
-
- OSM = new Model(m_lmPath.c_str());
- State startState = OSM->NullContextState();
- State endState;
- unkOpProb = OSM->Score(startState,OSM->GetVocabulary().Index(unkOp),endState);
+
+ ptrOp = new Api;
+ ptrOp -> read_lm(lmFile,lmOrder);
+ numbers.push_back(ptrOp->getLMID(const_cast <char *> (unkOp.c_str())));
+ unkOpProb = ptrOp->contextProbN(numbers,nonWordFlag);
+
+ */
+
+ // Code to load KenLM
+
+ OSM = new Model(m_lmPath.c_str());
+ State startState = OSM->NullContextState();
+ State endState;
+ unkOpProb = OSM->Score(startState,OSM->GetVocabulary().Index(unkOp),endState);
}
@@ -86,58 +86,55 @@ void OpSequenceModel::Load()
void OpSequenceModel:: Evaluate(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
- osmHypothesis obj;
- obj.setState(OSM->NullContextState());
- WordsBitmap myBitmap(source.GetSize());
- vector <string> mySourcePhrase;
- vector <string> myTargetPhrase;
- vector<float> scores(5);
- vector <int> alignments;
- int startIndex = 0;
- int endIndex = source.GetSize();
-
- const AlignmentInfo &align = targetPhrase.GetAlignTerm();
- AlignmentInfo::const_iterator iter;
-
-
- for (iter = align.begin(); iter != align.end(); ++iter)
- {
- alignments.push_back(iter->first);
- alignments.push_back(iter->second);
- }
-
- for (int i = 0; i < targetPhrase.GetSize(); i++)
- {
- if (targetPhrase.GetWord(i).IsOOV())
- myTargetPhrase.push_back("_TRANS_SLF_");
- else
- myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
- }
-
- for (int i = 0; i < source.GetSize(); i++)
- {
- mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
- }
-
- obj.setPhrases(mySourcePhrase , myTargetPhrase);
- obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
- obj.computeOSMFeature(startIndex,myBitmap);
- obj.calculateOSMProb(*OSM);
- obj.populateScores(scores);
- estimatedFutureScore.PlusEquals(this, scores);
+ osmHypothesis obj;
+ obj.setState(OSM->NullContextState());
+ WordsBitmap myBitmap(source.GetSize());
+ vector <string> mySourcePhrase;
+ vector <string> myTargetPhrase;
+ vector<float> scores(5);
+ vector <int> alignments;
+ int startIndex = 0;
+ int endIndex = source.GetSize();
+
+ const AlignmentInfo &align = targetPhrase.GetAlignTerm();
+ AlignmentInfo::const_iterator iter;
+
+
+ for (iter = align.begin(); iter != align.end(); ++iter) {
+ alignments.push_back(iter->first);
+ alignments.push_back(iter->second);
+ }
+
+ for (int i = 0; i < targetPhrase.GetSize(); i++) {
+ if (targetPhrase.GetWord(i).IsOOV())
+ myTargetPhrase.push_back("_TRANS_SLF_");
+ else
+ myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
+ }
+
+ for (int i = 0; i < source.GetSize(); i++) {
+ mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
+ }
+
+ obj.setPhrases(mySourcePhrase , myTargetPhrase);
+ obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
+ obj.computeOSMFeature(startIndex,myBitmap);
+ obj.calculateOSMProb(*OSM);
+ obj.populateScores(scores);
+ estimatedFutureScore.PlusEquals(this, scores);
}
FFState* OpSequenceModel::Evaluate(
- const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
{
const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase();
const WordsBitmap &bitmap = cur_hypo.GetWordsBitmap();
@@ -160,83 +157,81 @@ FFState* OpSequenceModel::Evaluate(
//cerr << source <<endl;
- // int a = sourceRange.GetStartPos();
- // cerr << source.GetWord(a);
+// int a = sourceRange.GetStartPos();
+// cerr << source.GetWord(a);
//cerr <<a<<endl;
//const Sentence &sentence = static_cast<const Sentence&>(curr_hypo.GetManager().GetSource());
- const WordsRange & sourceRange = cur_hypo.GetCurrSourceWordsRange();
- int startIndex = sourceRange.GetStartPos();
- int endIndex = sourceRange.GetEndPos();
- const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
- osmState * statePtr;
+ const WordsRange & sourceRange = cur_hypo.GetCurrSourceWordsRange();
+ int startIndex = sourceRange.GetStartPos();
+ int endIndex = sourceRange.GetEndPos();
+ const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
+ osmState * statePtr;
- vector <int> alignments;
+ vector <int> alignments;
- AlignmentInfo::const_iterator iter;
+ AlignmentInfo::const_iterator iter;
- for (iter = align.begin(); iter != align.end(); ++iter) {
- //cerr << iter->first << "----" << iter->second << " ";
- alignments.push_back(iter->first);
- alignments.push_back(iter->second);
- }
+ for (iter = align.begin(); iter != align.end(); ++iter) {
+ //cerr << iter->first << "----" << iter->second << " ";
+ alignments.push_back(iter->first);
+ alignments.push_back(iter->second);
+ }
- //cerr<<bitmap<<endl;
- //cerr<<startIndex<<" "<<endIndex<<endl;
+ //cerr<<bitmap<<endl;
+ //cerr<<startIndex<<" "<<endIndex<<endl;
- for (int i = startIndex; i <= endIndex; i++)
- {
- myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
- mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
- // cerr<<mySourcePhrase[i]<<endl;
+ for (int i = startIndex; i <= endIndex; i++) {
+ myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
+ mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
+ // cerr<<mySourcePhrase[i]<<endl;
}
- for (int i = 0; i < target.GetSize(); i++)
- {
+ for (int i = 0; i < target.GetSize(); i++) {
- if (target.GetWord(i).IsOOV())
- myTargetPhrase.push_back("_TRANS_SLF_");
- else
- myTargetPhrase.push_back(target.GetWord(i).GetFactor(0)->GetString().as_string());
+ if (target.GetWord(i).IsOOV())
+ myTargetPhrase.push_back("_TRANS_SLF_");
+ else
+ myTargetPhrase.push_back(target.GetWord(i).GetFactor(0)->GetString().as_string());
}
-
+
//cerr<<myBitmap<<endl;
obj.setState(prev_state);
obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
obj.setPhrases(mySourcePhrase , myTargetPhrase);
- obj.computeOSMFeature(startIndex,myBitmap);
+ obj.computeOSMFeature(startIndex,myBitmap);
obj.calculateOSMProb(*OSM);
obj.populateScores(scores);
-/*
- if (bitmap.GetFirstGapPos() == NOT_FOUND)
- {
+ /*
+ if (bitmap.GetFirstGapPos() == NOT_FOUND)
+ {
- int xx;
- cerr<<bitmap<<endl;
- int a = bitmap.GetFirstGapPos();
- obj.print();
- cin>>xx;
- }
- */
+ int xx;
+ cerr<<bitmap<<endl;
+ int a = bitmap.GetFirstGapPos();
+ obj.print();
+ cin>>xx;
+ }
+ */
-/*
- vector<float> scores(5);
- scores[0] = 0.343423f;
- scores[1] = 1.343423f;
- scores[2] = 2.343423f;
- scores[3] = 3.343423f;
- scores[4] = 4.343423f;
- */
+ /*
+ vector<float> scores(5);
+ scores[0] = 0.343423f;
+ scores[1] = 1.343423f;
+ scores[2] = 2.343423f;
+ scores[3] = 3.343423f;
+ scores[4] = 4.343423f;
+ */
accumulator->PlusEquals(this, scores);
@@ -246,7 +241,7 @@ FFState* OpSequenceModel::Evaluate(
//return statePtr;
- // return NULL;
+// return NULL;
}
FFState* OpSequenceModel::EvaluateChart(
@@ -277,29 +272,28 @@ std::vector<float> OpSequenceModel::GetFutureScores(const Phrase &source, const
ParallelPhrase pp(source, target);
std::map<ParallelPhrase, Scores>::const_iterator iter;
iter = m_futureCost.find(pp);
- //iter = m_coll.find(pp);
+//iter = m_coll.find(pp);
if (iter == m_futureCost.end()) {
vector<float> scores(5, 0);
scores[0] = unkOpProb;
return scores;
- }
- else {
+ } else {
const vector<float> &scores = iter->second;
- return scores;
+ return scores;
}
}
void OpSequenceModel::SetParameter(const std::string& key, const std::string& value)
{
- if (key == "feature-path") {
- m_featurePath = value;
- } else if (key == "path") {
- m_lmPath = value;
- } else if (key == "order") {
- lmOrder = Scan<int>(value);
- } else {
- StatefulFeatureFunction::SetParameter(key, value);
- }
+ if (key == "feature-path") {
+ m_featurePath = value;
+ } else if (key == "path") {
+ m_lmPath = value;
+ } else if (key == "order") {
+ lmOrder = Scan<int>(value);
+ } else {
+ StatefulFeatureFunction::SetParameter(key, value);
+ }
}
} // namespace
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h
index 1e32bd6a1..fe9cef0bd 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@@ -16,26 +16,26 @@ class OpSequenceModel : public StatefulFeatureFunction
{
public:
-
- lm::ngram::Model * OSM;
-
- int lmOrder;
- float unkOpProb;
- OpSequenceModel(const std::string &line);
+ lm::ngram::Model * OSM;
- void readLanguageModel(const char *);
- void Load();
+ int lmOrder;
+ float unkOpProb;
- FFState* Evaluate(
- const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ OpSequenceModel(const std::string &line);
- void Evaluate(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ void readLanguageModel(const char *);
+ void Load();
+
+ FFState* Evaluate(
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
+
+ void Evaluate(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
virtual FFState* EvaluateChart(
const ChartHypothesis& /* cur_hypo */,
@@ -49,17 +49,18 @@ public:
std::vector<float> GetFutureScores(const Phrase &source, const Phrase &target) const;
void SetParameter(const std::string& key, const std::string& value);
- bool IsUseable(const FactorMask &mask) const
- { return true; }
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
protected:
- typedef std::pair<Phrase, Phrase> ParallelPhrase;
- typedef std::vector<float> Scores;
- std::map<ParallelPhrase, Scores> m_futureCost;
+ typedef std::pair<Phrase, Phrase> ParallelPhrase;
+ typedef std::vector<float> Scores;
+ std::map<ParallelPhrase, Scores> m_futureCost;
- std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
- std::set <int> targetNullWords;
- std::string m_featurePath, m_lmPath;
+ std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
+ std::set <int> targetNullWords;
+ std::string m_featurePath, m_lmPath;
diff --git a/moses/FF/OSM-Feature/osmHyp.cpp b/moses/FF/OSM-Feature/osmHyp.cpp
index 555bbb00b..5ef80211c 100644
--- a/moses/FF/OSM-Feature/osmHyp.cpp
+++ b/moses/FF/OSM-Feature/osmHyp.cpp
@@ -1,4 +1,4 @@
- #include "osmHyp.h"
+#include "osmHyp.h"
#include <sstream>
using namespace std;
@@ -7,19 +7,19 @@ using namespace lm::ngram;
namespace Moses
{
osmState::osmState(const State & val)
-:j(0)
-,E(0)
+ :j(0)
+ ,E(0)
{
lmState = val;
-
+
}
void osmState::saveState(int jVal, int eVal, map <int , string> & gapVal)
{
- gap.clear();
- gap = gapVal;
- j = jVal;
- E = eVal;
+ gap.clear();
+ gap = gapVal;
+ j = jVal;
+ E = eVal;
}
int osmState::Compare(const FFState& otherBase) const
@@ -33,7 +33,7 @@ int osmState::Compare(const FFState& otherBase) const
return (gap < other.gap) ? -1 : +1;
if (lmState.length < other.lmState.length) return -1;
-
+
if (lmState.length > other.lmState.length) return 1;
return 0;
@@ -43,606 +43,552 @@ int osmState::Compare(const FFState& otherBase) const
std::string osmState :: getName() const
{
- return "done";
+ return "done";
}
//////////////////////////////////////////////////
osmHypothesis :: osmHypothesis()
{
- opProb = 0;
- gapWidth = 0;
- gapCount = 0;
- openGapCount = 0;
- deletionCount = 0;
- gapCount = 0;
- j = 0;
- E = 0;
- gap.clear();
+ opProb = 0;
+ gapWidth = 0;
+ gapCount = 0;
+ openGapCount = 0;
+ deletionCount = 0;
+ gapCount = 0;
+ j = 0;
+ E = 0;
+ gap.clear();
}
void osmHypothesis :: setState(const FFState* prev_state)
{
- if(prev_state != NULL)
- {
+ if(prev_state != NULL) {
- j = static_cast <const osmState *> (prev_state)->getJ();
- E = static_cast <const osmState *> (prev_state)->getE();
- gap = static_cast <const osmState *> (prev_state)->getGap();
- lmState = static_cast <const osmState *> (prev_state)->getLMState();
- }
+ j = static_cast <const osmState *> (prev_state)->getJ();
+ E = static_cast <const osmState *> (prev_state)->getE();
+ gap = static_cast <const osmState *> (prev_state)->getGap();
+ lmState = static_cast <const osmState *> (prev_state)->getLMState();
+ }
}
osmState * osmHypothesis :: saveState()
{
- osmState * statePtr = new osmState(lmState);
- statePtr->saveState(j,E,gap);
- return statePtr;
+ osmState * statePtr = new osmState(lmState);
+ statePtr->saveState(j,E,gap);
+ return statePtr;
}
int osmHypothesis :: isTranslationOperation(int x)
{
- if (operations[x].find("_JMP_BCK_") != -1)
- return 0;
-
- if (operations[x].find("_JMP_FWD_") != -1)
- return 0;
-
- if (operations[x].find("_CONT_CEPT_") != -1)
- return 0;
-
- if (operations[x].find("_INS_GAP_") != -1)
- return 0;
-
- return 1;
-
+ if (operations[x].find("_JMP_BCK_") != -1)
+ return 0;
+
+ if (operations[x].find("_JMP_FWD_") != -1)
+ return 0;
+
+ if (operations[x].find("_CONT_CEPT_") != -1)
+ return 0;
+
+ if (operations[x].find("_INS_GAP_") != -1)
+ return 0;
+
+ return 1;
+
}
void osmHypothesis :: removeReorderingOperations()
{
- gapCount = 0;
- deletionCount = 0;
- openGapCount = 0;
- gapWidth = 0;
- //cout<<"I came here"<<endl;
-
- std::vector <std::string> tupleSequence;
-
- for (int x = 0; x < operations.size(); x++)
- {
- // cout<<operations[x]<<endl;
-
- if(isTranslationOperation(x) == 1)
- {
- tupleSequence.push_back(operations[x]);
- }
-
- }
-
- operations.clear();
- operations = tupleSequence;
+ gapCount = 0;
+ deletionCount = 0;
+ openGapCount = 0;
+ gapWidth = 0;
+ //cout<<"I came here"<<endl;
+
+ std::vector <std::string> tupleSequence;
+
+ for (int x = 0; x < operations.size(); x++) {
+ // cout<<operations[x]<<endl;
+
+ if(isTranslationOperation(x) == 1) {
+ tupleSequence.push_back(operations[x]);
+ }
+
+ }
+
+ operations.clear();
+ operations = tupleSequence;
}
void osmHypothesis :: calculateOSMProb(Model & ptrOp)
{
-
- opProb = 0;
- State currState = lmState;
- State temp;
- for (int i = 0; i<operations.size(); i++)
- {
- temp = currState;
- opProb += ptrOp.Score(temp,ptrOp.GetVocabulary().Index(operations[i]),currState);
- }
+ opProb = 0;
+ State currState = lmState;
+ State temp;
+
+ for (int i = 0; i<operations.size(); i++) {
+ temp = currState;
+ opProb += ptrOp.Score(temp,ptrOp.GetVocabulary().Index(operations[i]),currState);
+ }
- lmState = currState;
+ lmState = currState;
- //print();
+ //print();
}
int osmHypothesis :: firstOpenGap(vector <int> & coverageVector)
{
-
- int firstOG =-1;
-
- for(int nd = 0; nd < coverageVector.size(); nd++)
- {
- if(coverageVector[nd]==0)
- {
- firstOG = nd;
- return firstOG;
- }
- }
-
- return firstOG;
+
+ int firstOG =-1;
+
+ for(int nd = 0; nd < coverageVector.size(); nd++) {
+ if(coverageVector[nd]==0) {
+ firstOG = nd;
+ return firstOG;
+ }
+ }
+
+ return firstOG;
}
string osmHypothesis :: intToString(int num)
{
-
- std::ostringstream stm;
- stm<<num;
- return stm.str();
+ std::ostringstream stm;
+ stm<<num;
+
+ return stm.str();
}
void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , WordsBitmap & coverageVector , string english , string german , set <int> & targetNullWords , vector <string> & currF)
{
-
- int gFlag = 0;
- int gp = 0;
- int ans;
-
-
- if ( j < j1) // j1 is the index of the source word we are about to generate ...
- {
- //if(coverageVector[j]==0) // if source word at j is not generated yet ...
- if(coverageVector.GetValue(j)==0) // if source word at j is not generated yet ...
- {
- operations.push_back("_INS_GAP_");
- gFlag++;
- gap[j]="Unfilled";
- }
- if (j == E)
- {
- j = j1;
- }
- else
- {
- operations.push_back("_JMP_FWD_");
- j=E;
- }
- }
-
- if (j1 < j)
- {
- // if(j < E && coverageVector[j]==0)
- if(j < E && coverageVector.GetValue(j)==0)
- {
- operations.push_back("_INS_GAP_");
- gFlag++;
- gap[j]="Unfilled";
- }
-
- j=closestGap(gap,j1,gp);
- operations.push_back("_JMP_BCK_"+ intToString(gp));
-
- //cout<<"I am j "<<j<<endl;
- //cout<<"I am j1 "<<j1<<endl;
-
- if(j==j1)
- gap[j]="Filled";
- }
-
- if (j < j1)
- {
- operations.push_back("_INS_GAP_");
- gap[j] = "Unfilled";
- gFlag++;
- j=j1;
- }
-
- if(contFlag == 0) // First words of the multi-word cept ...
- {
-
- if(english == "_TRANS_SLF_") // Unknown word ...
- {
- operations.push_back("_TRANS_SLF_");
- }
- else
- {
- operations.push_back("_TRANS_" + english + "_TO_" + german);
- }
-
- //ans = firstOpenGap(coverageVector);
- ans = coverageVector.GetFirstGapPos();
-
- if (ans != -1)
- gapWidth += j - ans;
-
- }
- else if (contFlag == 2)
- {
-
- operations.push_back("_INS_" + german);
- ans = coverageVector.GetFirstGapPos();
-
- if (ans != -1)
- gapWidth += j - ans;
- deletionCount++;
- }
- else
- {
- operations.push_back("_CONT_CEPT_");
- }
-
- //coverageVector[j]=1;
- coverageVector.SetValue(j,1);
- j+=1;
-
- if(E<j)
- E=j;
-
- if (gFlag > 0)
- gapCount++;
-
- openGapCount += getOpenGaps();
-
- //if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end())
- if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end())
- {
- j1 = j;
- german = currF[j1-startIndex];
- english = "_INS_";
- generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
- }
+
+ int gFlag = 0;
+ int gp = 0;
+ int ans;
+
+
+ if ( j < j1) { // j1 is the index of the source word we are about to generate ...
+ //if(coverageVector[j]==0) // if source word at j is not generated yet ...
+ if(coverageVector.GetValue(j)==0) { // if source word at j is not generated yet ...
+ operations.push_back("_INS_GAP_");
+ gFlag++;
+ gap[j]="Unfilled";
+ }
+ if (j == E) {
+ j = j1;
+ } else {
+ operations.push_back("_JMP_FWD_");
+ j=E;
+ }
+ }
+
+ if (j1 < j) {
+ // if(j < E && coverageVector[j]==0)
+ if(j < E && coverageVector.GetValue(j)==0) {
+ operations.push_back("_INS_GAP_");
+ gFlag++;
+ gap[j]="Unfilled";
+ }
+
+ j=closestGap(gap,j1,gp);
+ operations.push_back("_JMP_BCK_"+ intToString(gp));
+
+ //cout<<"I am j "<<j<<endl;
+ //cout<<"I am j1 "<<j1<<endl;
+
+ if(j==j1)
+ gap[j]="Filled";
+ }
+
+ if (j < j1) {
+ operations.push_back("_INS_GAP_");
+ gap[j] = "Unfilled";
+ gFlag++;
+ j=j1;
+ }
+
+ if(contFlag == 0) { // First words of the multi-word cept ...
+
+ if(english == "_TRANS_SLF_") { // Unknown word ...
+ operations.push_back("_TRANS_SLF_");
+ } else {
+ operations.push_back("_TRANS_" + english + "_TO_" + german);
+ }
+
+ //ans = firstOpenGap(coverageVector);
+ ans = coverageVector.GetFirstGapPos();
+
+ if (ans != -1)
+ gapWidth += j - ans;
+
+ } else if (contFlag == 2) {
+
+ operations.push_back("_INS_" + german);
+ ans = coverageVector.GetFirstGapPos();
+
+ if (ans != -1)
+ gapWidth += j - ans;
+ deletionCount++;
+ } else {
+ operations.push_back("_CONT_CEPT_");
+ }
+
+ //coverageVector[j]=1;
+ coverageVector.SetValue(j,1);
+ j+=1;
+
+ if(E<j)
+ E=j;
+
+ if (gFlag > 0)
+ gapCount++;
+
+ openGapCount += getOpenGaps();
+
+ //if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end())
+ if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end()) {
+ j1 = j;
+ german = currF[j1-startIndex];
+ english = "_INS_";
+ generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
+ }
}
void osmHypothesis :: print()
{
- for (int i = 0; i< operations.size(); i++)
- {
- cerr<<operations[i]<<" ";
+ for (int i = 0; i< operations.size(); i++) {
+ cerr<<operations[i]<<" ";
+
+ }
- }
+ cerr<<endl<<endl;
- cerr<<endl<<endl;
-
- cerr<<"Operation Probability "<<opProb<<endl;
- cerr<<"Gap Count "<<gapCount<<endl;
- cerr<<"Open Gap Count "<<openGapCount<<endl;
- cerr<<"Gap Width "<<gapWidth<<endl;
- cerr<<"Deletion Count "<<deletionCount<<endl;
+ cerr<<"Operation Probability "<<opProb<<endl;
+ cerr<<"Gap Count "<<gapCount<<endl;
+ cerr<<"Open Gap Count "<<openGapCount<<endl;
+ cerr<<"Gap Width "<<gapWidth<<endl;
+ cerr<<"Deletion Count "<<deletionCount<<endl;
- cerr<<"_______________"<<endl;
+ cerr<<"_______________"<<endl;
}
int osmHypothesis :: closestGap(map <int,string> gap, int j1, int & gp)
{
- int dist=1172;
- int value=-1;
- int temp=0;
- gp=0;
- int opGap=0;
-
- map <int,string> :: iterator iter;
-
- iter=gap.end();
-
- do
- {
- iter--;
- //cout<<"Trapped "<<iter->first<<endl;
-
- if(iter->first==j1 && iter->second== "Unfilled")
- {
- opGap++;
- gp = opGap;
- return j1;
-
- }
-
- if(iter->second =="Unfilled")
- {
- opGap++;
- temp = iter->first - j1;
-
- if(temp<0)
- temp=temp * -1;
-
- if(dist>temp && iter->first < j1)
- {
- dist=temp;
- value=iter->first;
- gp=opGap;
- }
- }
-
-
- }
- while(iter!=gap.begin());
-
- return value;
+ int dist=1172;
+ int value=-1;
+ int temp=0;
+ gp=0;
+ int opGap=0;
+
+ map <int,string> :: iterator iter;
+
+ iter=gap.end();
+
+ do {
+ iter--;
+ //cout<<"Trapped "<<iter->first<<endl;
+
+ if(iter->first==j1 && iter->second== "Unfilled") {
+ opGap++;
+ gp = opGap;
+ return j1;
+
+ }
+
+ if(iter->second =="Unfilled") {
+ opGap++;
+ temp = iter->first - j1;
+
+ if(temp<0)
+ temp=temp * -1;
+
+ if(dist>temp && iter->first < j1) {
+ dist=temp;
+ value=iter->first;
+ gp=opGap;
+ }
+ }
+
+
+ } while(iter!=gap.begin());
+
+ return value;
}
int osmHypothesis :: getOpenGaps()
{
- map <int,string> :: iterator iter;
+ map <int,string> :: iterator iter;
- int nd = 0;
- for (iter = gap.begin(); iter!=gap.end(); iter++)
- {
- if(iter->second == "Unfilled")
- nd++;
- }
+ int nd = 0;
+ for (iter = gap.begin(); iter!=gap.end(); iter++) {
+ if(iter->second == "Unfilled")
+ nd++;
+ }
- return nd;
+ return nd;
}
void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes)
{
- operations.push_back("_DEL_" + english);
- currTargetIndex++;
+ operations.push_back("_DEL_" + english);
+ currTargetIndex++;
- while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end())
- {
- currTargetIndex++;
- }
+ while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end()) {
+ currTargetIndex++;
+ }
- if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end())
- {
- english = currE[currTargetIndex];
- generateDeleteOperations(english,currTargetIndex,doneTargetIndexes);
- }
+ if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end()) {
+ english = currE[currTargetIndex];
+ generateDeleteOperations(english,currTargetIndex,doneTargetIndexes);
+ }
}
void osmHypothesis :: computeOSMFeature(int startIndex , WordsBitmap & coverageVector)
{
- set <int> doneTargetIndexes;
- set <int> eSide;
- set <int> fSide;
- set <int> :: iterator iter;
- string english;
- string source;
- int j1;
- int start = 0;
- int targetIndex = 0;
- doneTargetIndexes.clear();
-
-
- if (targetNullWords.size() != 0) // Source words to be deleted in the start of this phrase ...
- {
- iter = targetNullWords.begin();
-
- if (*iter == startIndex)
- {
-
- j1 = startIndex;
- source = currF[j1-startIndex];
- english = "_INS_";
- generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF);
- }
- }
-
- if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) // first word has to be deleted ...
- {
- english = currE[targetIndex];
- generateDeleteOperations(english,targetIndex, doneTargetIndexes);
- }
-
-
- for (int i = 0; i < ceptsInPhrase.size(); i++)
- {
- source = "";
- english = "";
-
- fSide = ceptsInPhrase[i].first;
- eSide = ceptsInPhrase[i].second;
-
- iter = eSide.begin();
- targetIndex = *iter;
- english += currE[*iter];
- iter++;
-
- for (; iter != eSide.end(); iter++)
- {
- if(*iter == targetIndex+1)
- targetIndex++;
- else
- doneTargetIndexes.insert(*iter);
-
- english += "^_^";
- english += currE[*iter];
- }
-
- iter = fSide.begin();
- source += currF[*iter];
- iter++;
-
- for (; iter != fSide.end(); iter++)
- {
- source += "^_^";
- source += currF[*iter];
- }
-
- iter = fSide.begin();
- j1 = *iter + startIndex;
- iter++;
-
- generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF);
-
-
- for (; iter != fSide.end(); iter++)
- {
- j1 = *iter + startIndex;
- generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF);
- }
-
- targetIndex++; // Check whether the next target word is unaligned ...
-
- while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end())
- {
- targetIndex++;
- }
-
- if(sourceNullWords.find(targetIndex) != sourceNullWords.end())
- {
- english = currE[targetIndex];
- generateDeleteOperations(english,targetIndex, doneTargetIndexes);
- }
- }
-
- //removeReorderingOperations();
-
- //print();
+ set <int> doneTargetIndexes;
+ set <int> eSide;
+ set <int> fSide;
+ set <int> :: iterator iter;
+ string english;
+ string source;
+ int j1;
+ int start = 0;
+ int targetIndex = 0;
+ doneTargetIndexes.clear();
+
+
+ if (targetNullWords.size() != 0) { // Source words to be deleted in the start of this phrase ...
+ iter = targetNullWords.begin();
+
+ if (*iter == startIndex) {
+
+ j1 = startIndex;
+ source = currF[j1-startIndex];
+ english = "_INS_";
+ generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF);
+ }
+ }
+
+ if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) { // first word has to be deleted ...
+ english = currE[targetIndex];
+ generateDeleteOperations(english,targetIndex, doneTargetIndexes);
+ }
+
+
+ for (int i = 0; i < ceptsInPhrase.size(); i++) {
+ source = "";
+ english = "";
+
+ fSide = ceptsInPhrase[i].first;
+ eSide = ceptsInPhrase[i].second;
+
+ iter = eSide.begin();
+ targetIndex = *iter;
+ english += currE[*iter];
+ iter++;
+
+ for (; iter != eSide.end(); iter++) {
+ if(*iter == targetIndex+1)
+ targetIndex++;
+ else
+ doneTargetIndexes.insert(*iter);
+
+ english += "^_^";
+ english += currE[*iter];
+ }
+
+ iter = fSide.begin();
+ source += currF[*iter];
+ iter++;
+
+ for (; iter != fSide.end(); iter++) {
+ source += "^_^";
+ source += currF[*iter];
+ }
+
+ iter = fSide.begin();
+ j1 = *iter + startIndex;
+ iter++;
+
+ generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF);
+
+
+ for (; iter != fSide.end(); iter++) {
+ j1 = *iter + startIndex;
+ generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF);
+ }
+
+ targetIndex++; // Check whether the next target word is unaligned ...
+
+ while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end()) {
+ targetIndex++;
+ }
+
+ if(sourceNullWords.find(targetIndex) != sourceNullWords.end()) {
+ english = currE[targetIndex];
+ generateDeleteOperations(english,targetIndex, doneTargetIndexes);
+ }
+ }
+
+ //removeReorderingOperations();
+
+ //print();
}
void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
{
- set <int> :: iterator iter;
+ set <int> :: iterator iter;
- int sz = eSide.size();
- vector <int> t;
+ int sz = eSide.size();
+ vector <int> t;
- for (iter = eSide.begin(); iter != eSide.end(); iter++)
- {
- t = tS[*iter];
+ for (iter = eSide.begin(); iter != eSide.end(); iter++) {
+ t = tS[*iter];
- for (int i = 0; i < t.size(); i++)
- {
- fSide.insert(t[i]);
- }
+ for (int i = 0; i < t.size(); i++) {
+ fSide.insert(t[i]);
+ }
- }
+ }
- for (iter = fSide.begin(); iter != fSide.end(); iter++)
- {
+ for (iter = fSide.begin(); iter != fSide.end(); iter++) {
- t = sT[*iter];
+ t = sT[*iter];
- for (int i = 0 ; i<t.size(); i++)
- {
- eSide.insert(t[i]);
- }
+ for (int i = 0 ; i<t.size(); i++) {
+ eSide.insert(t[i]);
+ }
- }
+ }
- if (eSide.size () > sz)
- {
- getMeCepts(eSide,fSide,tS,sT);
- }
+ if (eSide.size () > sz) {
+ getMeCepts(eSide,fSide,tS,sT);
+ }
}
void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength)
{
- std::map <int , vector <int> > sT;
- std::map <int , vector <int> > tS;
- std::set <int> eSide;
- std::set <int> fSide;
- std::set <int> :: iterator iter;
- std :: map <int , vector <int> > :: iterator iter2;
- std :: pair < set <int> , set <int> > cept;
- int src;
- int tgt;
+ std::map <int , vector <int> > sT;
+ std::map <int , vector <int> > tS;
+ std::set <int> eSide;
+ std::set <int> fSide;
+ std::set <int> :: iterator iter;
+ std :: map <int , vector <int> > :: iterator iter2;
+ std :: pair < set <int> , set <int> > cept;
+ int src;
+ int tgt;
- for (int i = 0; i < align.size(); i+=2)
- {
- src = align[i];
- tgt = align[i+1];
- tS[tgt].push_back(src);
- sT[src].push_back(tgt);
- }
+ for (int i = 0; i < align.size(); i+=2) {
+ src = align[i];
+ tgt = align[i+1];
+ tS[tgt].push_back(src);
+ sT[src].push_back(tgt);
+ }
- for (int i = startIndex; i<= endIndex; i++) // What are unaligned source words in this phrase ...
- {
- if (sT.find(i-startIndex) == sT.end())
- {
- targetNullWords.insert(i);
- }
- }
+ for (int i = startIndex; i<= endIndex; i++) { // What are unaligned source words in this phrase ...
+ if (sT.find(i-startIndex) == sT.end()) {
+ targetNullWords.insert(i);
+ }
+ }
- for (int i = 0; i < targetPhraseLength; i++) // What are unaligned target words in this phrase ...
- {
- if (tS.find(i) == tS.end())
- {
- sourceNullWords.insert(i);
- }
- }
+ for (int i = 0; i < targetPhraseLength; i++) { // What are unaligned target words in this phrase ...
+ if (tS.find(i) == tS.end()) {
+ sourceNullWords.insert(i);
+ }
+ }
- while (tS.size() != 0 && sT.size() != 0)
- {
+ while (tS.size() != 0 && sT.size() != 0) {
- iter2 = tS.begin();
+ iter2 = tS.begin();
- eSide.clear();
- fSide.clear();
- eSide.insert (iter2->first);
+ eSide.clear();
+ fSide.clear();
+ eSide.insert (iter2->first);
- getMeCepts(eSide, fSide, tS , sT);
+ getMeCepts(eSide, fSide, tS , sT);
- for (iter = eSide.begin(); iter != eSide.end(); iter++)
- {
- iter2 = tS.find(*iter);
- tS.erase(iter2);
- }
+ for (iter = eSide.begin(); iter != eSide.end(); iter++) {
+ iter2 = tS.find(*iter);
+ tS.erase(iter2);
+ }
- for (iter = fSide.begin(); iter != fSide.end(); iter++)
- {
- iter2 = sT.find(*iter);
- sT.erase(iter2);
- }
+ for (iter = fSide.begin(); iter != fSide.end(); iter++) {
+ iter2 = sT.find(*iter);
+ sT.erase(iter2);
+ }
- cept = make_pair (fSide , eSide);
- ceptsInPhrase.push_back(cept);
- }
+ cept = make_pair (fSide , eSide);
+ ceptsInPhrase.push_back(cept);
+ }
-/*
+ /*
- cerr<<"Extracted Cepts "<<endl;
- for (int i = 0; i < ceptsInPhrase.size(); i++)
- {
+ cerr<<"Extracted Cepts "<<endl;
+ for (int i = 0; i < ceptsInPhrase.size(); i++)
+ {
- fSide = ceptsInPhrase[i].first;
- eSide = ceptsInPhrase[i].second;
+ fSide = ceptsInPhrase[i].first;
+ eSide = ceptsInPhrase[i].second;
- for (iter = eSide.begin(); iter != eSide.end(); iter++)
- {
- cerr<<*iter<<" ";
- }
- cerr<<"<---> ";
+ for (iter = eSide.begin(); iter != eSide.end(); iter++)
+ {
+ cerr<<*iter<<" ";
+ }
+ cerr<<"<---> ";
- for (iter = fSide.begin(); iter != fSide.end(); iter++)
- {
- cerr<<*iter<<" ";
- }
+ for (iter = fSide.begin(); iter != fSide.end(); iter++)
+ {
+ cerr<<*iter<<" ";
+ }
- cerr<<endl;
- }
- cerr<<endl;
+ cerr<<endl;
+ }
+ cerr<<endl;
- cerr<<"Unaligned Target Words"<<endl;
+ cerr<<"Unaligned Target Words"<<endl;
- for (iter = sourceNullWords.begin(); iter != sourceNullWords.end(); iter++)
- cerr<<*iter<<"<--->"<<endl;
+ for (iter = sourceNullWords.begin(); iter != sourceNullWords.end(); iter++)
+ cerr<<*iter<<"<--->"<<endl;
- cerr<<"Unaligned Source Words"<<endl;
+ cerr<<"Unaligned Source Words"<<endl;
- for (iter = targetNullWords.begin(); iter != targetNullWords.end(); iter++)
- cerr<<*iter<<"<--->"<<endl;
+ for (iter = targetNullWords.begin(); iter != targetNullWords.end(); iter++)
+ cerr<<*iter<<"<--->"<<endl;
-*/
+ */
}
void osmHypothesis :: populateScores(vector <float> & scores)
{
- scores.clear();
- scores.push_back(opProb);
- scores.push_back(gapWidth);
- scores.push_back(gapCount);
- scores.push_back(openGapCount);
- scores.push_back(deletionCount);
+ scores.clear();
+ scores.push_back(opProb);
+ scores.push_back(gapWidth);
+ scores.push_back(gapCount);
+ scores.push_back(openGapCount);
+ scores.push_back(deletionCount);
}
diff --git a/moses/FF/OSM-Feature/osmHyp.h b/moses/FF/OSM-Feature/osmHyp.h
index ab8051176..368cd8e19 100644
--- a/moses/FF/OSM-Feature/osmHyp.h
+++ b/moses/FF/OSM-Feature/osmHyp.h
@@ -17,15 +17,23 @@ public:
osmState(const lm::ngram::State & val);
int Compare(const FFState& other) const;
void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
- int getJ()const {return j;}
- int getE()const {return E;}
- std::map <int , std::string> getGap() const { return gap;}
-
- lm::ngram::State getLMState() const {return lmState;}
+ int getJ()const {
+ return j;
+ }
+ int getE()const {
+ return E;
+ }
+ std::map <int , std::string> getGap() const {
+ return gap;
+ }
+
+ lm::ngram::State getLMState() const {
+ return lmState;
+ }
void print() const;
std::string getName() const;
-
+
protected:
int j, E;
std::map <int,std::string> gap;
@@ -35,51 +43,56 @@ protected:
class osmHypothesis
{
- private:
-
-
- std::vector <std::string> operations; // List of operations required to generated this hyp ...
- std::map <int,std::string> gap; // Maintains gap history ...
- int j; // Position after the last source word generated ...
- int E; // Position after the right most source word so far generated ...
- lm::ngram::State lmState; // KenLM's Model State ...
-
- int gapCount; // Number of gaps inserted ...
- int deletionCount;
- int openGapCount;
- int gapWidth;
- double opProb;
-
- std::vector <std::string> currE;
- std::vector <std::string> currF;
- std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
- std::set <int> targetNullWords;
- std::set <int> sourceNullWords;
-
- int closestGap(std::map <int,std::string> gap,int j1, int & gp);
- int firstOpenGap(std::vector <int> & coverageVector);
- std::string intToString(int);
- int getOpenGaps();
- int isTranslationOperation(int j);
- void removeReorderingOperations();
-
- void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
-
- public:
-
- osmHypothesis();
- ~osmHypothesis(){};
- void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
- void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
- void calculateOSMProb(lm::ngram::Model & ptrOp);
- void computeOSMFeature(int startIndex , WordsBitmap & coverageVector);
- void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
- void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2){currF = val1; currE = val2;}
- void setState(const FFState* prev_state);
- osmState * saveState();
- void print();
- void populateScores(std::vector <float> & scores);
- void setState(const lm::ngram::State & val){lmState = val;}
+private:
+
+
+ std::vector <std::string> operations; // List of operations required to generated this hyp ...
+ std::map <int,std::string> gap; // Maintains gap history ...
+ int j; // Position after the last source word generated ...
+ int E; // Position after the right most source word so far generated ...
+ lm::ngram::State lmState; // KenLM's Model State ...
+
+ int gapCount; // Number of gaps inserted ...
+ int deletionCount;
+ int openGapCount;
+ int gapWidth;
+ double opProb;
+
+ std::vector <std::string> currE;
+ std::vector <std::string> currF;
+ std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
+ std::set <int> targetNullWords;
+ std::set <int> sourceNullWords;
+
+ int closestGap(std::map <int,std::string> gap,int j1, int & gp);
+ int firstOpenGap(std::vector <int> & coverageVector);
+ std::string intToString(int);
+ int getOpenGaps();
+ int isTranslationOperation(int j);
+ void removeReorderingOperations();
+
+ void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
+
+public:
+
+ osmHypothesis();
+ ~osmHypothesis() {};
+ void generateOperations(int & startIndex, int j1 , int contFlag , WordsBitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
+ void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
+ void calculateOSMProb(lm::ngram::Model & ptrOp);
+ void computeOSMFeature(int startIndex , WordsBitmap & coverageVector);
+ void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
+ void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2) {
+ currF = val1;
+ currE = val2;
+ }
+ void setState(const FFState* prev_state);
+ osmState * saveState();
+ void print();
+ void populateScores(std::vector <float> & scores);
+ void setState(const lm::ngram::State & val) {
+ lmState = val;
+ }
};
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index df757386a..edfbc7f75 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -383,7 +383,7 @@ LanguageModel *ConstructKenLM(const std::string &description, const std::string
try {
lm::ngram::ModelType model_type;
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
-
+
switch(model_type) {
case lm::ngram::PROBING:
return new LanguageModelKen<lm::ngram::ProbingModel>(description, line, file, factorType, lazy);
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index e0a4683de..af52b5cbf 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -694,9 +694,9 @@ bool StaticData::LoadData(Parameter *parameter)
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "OpSequenceModel") {
- OpSequenceModel* model = new OpSequenceModel(line);
- vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
- SetWeights(model, weights);
+ OpSequenceModel* model = new OpSequenceModel(line);
+ vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
+ SetWeights(model, weights);
} else if (feature == "PhrasePenalty") {
PhrasePenalty* model = new PhrasePenalty(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
diff --git a/moses/Word.h b/moses/Word.h
index efce4e187..c6a62f6d6 100644
--- a/moses/Word.h
+++ b/moses/Word.h
@@ -58,8 +58,7 @@ public:
/** deep copy */
Word(const Word &copy)
:m_isNonTerminal(copy.m_isNonTerminal)
- ,m_isOOV(copy.m_isOOV)
- {
+ ,m_isOOV(copy.m_isOOV) {
std::memcpy(m_factorArray, copy.m_factorArray, sizeof(FactorArray));
}