Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/FF
diff options
context:
space:
mode:
authorMosesAdmin <moses-support-owner@mit.edu>2016-04-11 02:00:43 +0300
committerMosesAdmin <moses-support-owner@mit.edu>2016-04-11 02:00:43 +0300
commit7b205b0c8a4b0f10c5d6bea7019bf468ab4046be (patch)
treeab64282add3474c18d634cc22f3d09e59483053c /moses/FF
parentba0a3d92f4ef6b14b76e859a43487adc8321d263 (diff)
daily automatic beautifier
Diffstat (limited to 'moses/FF')
-rw-r--r--moses/FF/Dsg-Feature/Desegmenter.cpp119
-rw-r--r--moses/FF/Dsg-Feature/Desegmenter.h30
-rw-r--r--moses/FF/Dsg-Feature/DsgModel.cpp238
-rw-r--r--moses/FF/Dsg-Feature/DsgModel.h70
-rw-r--r--moses/FF/Dsg-Feature/KenDsg.cpp56
-rw-r--r--moses/FF/Dsg-Feature/KenDsg.h18
-rw-r--r--moses/FF/Dsg-Feature/dsgHyp.cpp610
-rw-r--r--moses/FF/Dsg-Feature/dsgHyp.h66
8 files changed, 617 insertions, 590 deletions
diff --git a/moses/FF/Dsg-Feature/Desegmenter.cpp b/moses/FF/Dsg-Feature/Desegmenter.cpp
index a9713831b..677de6e6e 100644
--- a/moses/FF/Dsg-Feature/Desegmenter.cpp
+++ b/moses/FF/Dsg-Feature/Desegmenter.cpp
@@ -11,73 +11,74 @@ using namespace std;
namespace Moses
{
-void Desegmenter::Load(const string filename){
+void Desegmenter::Load(const string filename)
+{
- std::ifstream myFile(filename.c_str() );
- if (myFile.is_open()){
- cerr << "Desegmentation File open successful." << endl;
- string line;
- while (getline(myFile, line)){
- stringstream ss(line);
- string token;
- vector<string> myline;
- while (getline(ss, token, '\t')){
- myline.push_back(token);
- }
- mmDesegTable.insert(pair<string, string>(myline[2], myline[1] ));
- }
- myFile.close();
- }
- else
- cerr << "open() failed: check if Desegmentation file is in right folder" << endl;
+ std::ifstream myFile(filename.c_str() );
+ if (myFile.is_open()) {
+ cerr << "Desegmentation File open successful." << endl;
+ string line;
+ while (getline(myFile, line)) {
+ stringstream ss(line);
+ string token;
+ vector<string> myline;
+ while (getline(ss, token, '\t')) {
+ myline.push_back(token);
+ }
+ mmDesegTable.insert(pair<string, string>(myline[2], myline[1] ));
+ }
+ myFile.close();
+ } else
+ cerr << "open() failed: check if Desegmentation file is in right folder" << endl;
}
-vector<string> Desegmenter::Search(string myKey){
- multimap<string, string>::const_iterator mmiPairFound = mmDesegTable.find(myKey);
- vector<string> result;
- if (mmiPairFound != mmDesegTable.end()){
- size_t nNumPairsInMap = mmDesegTable.count(myKey);
- for (size_t nValuesCounter = 0; nValuesCounter < nNumPairsInMap; ++nValuesCounter){
- if (mmiPairFound != mmDesegTable.end()) {
- result.push_back(mmiPairFound->second);
- }
- ++mmiPairFound;
- }
- return result;
- }
- else{
- string rule_deseg ;
- rule_deseg = ApplyRules(myKey);
- result.push_back(rule_deseg);
- return result;
- }
+vector<string> Desegmenter::Search(string myKey)
+{
+ multimap<string, string>::const_iterator mmiPairFound = mmDesegTable.find(myKey);
+ vector<string> result;
+ if (mmiPairFound != mmDesegTable.end()) {
+ size_t nNumPairsInMap = mmDesegTable.count(myKey);
+ for (size_t nValuesCounter = 0; nValuesCounter < nNumPairsInMap; ++nValuesCounter) {
+ if (mmiPairFound != mmDesegTable.end()) {
+ result.push_back(mmiPairFound->second);
+ }
+ ++mmiPairFound;
+ }
+ return result;
+ } else {
+ string rule_deseg ;
+ rule_deseg = ApplyRules(myKey);
+ result.push_back(rule_deseg);
+ return result;
+ }
}
-string Desegmenter::ApplyRules(string & segToken){
+string Desegmenter::ApplyRules(string & segToken)
+{
+
+ string desegToken=segToken;
+ if (!simple) {
+ boost::replace_all(desegToken, "l+ All", "ll");
+ boost::replace_all(desegToken, "l+ Al", "ll");
+ boost::replace_all(desegToken, "y+ y ", "y");
+ boost::replace_all(desegToken, "p+ ", "t");
+ boost::replace_all(desegToken, "' +", "}");
+ boost::replace_all(desegToken, "y +", "A");
+ boost::replace_all(desegToken, "n +n", "n");
+ boost::replace_all(desegToken, "mn +m", "mm");
+ boost::replace_all(desegToken, "En +m", "Em");
+ boost::replace_all(desegToken, "An +lA", "Em");
+ boost::replace_all(desegToken, "-LRB-", "(");
+ boost::replace_all(desegToken, "-RRB-", ")");
+ }
+
+ boost::replace_all(desegToken, "+ +", "");
+ boost::replace_all(desegToken, "+ ", "");
+ boost::replace_all(desegToken, " +", "");
- string desegToken=segToken;
- if (!simple){
- boost::replace_all(desegToken, "l+ All", "ll");
- boost::replace_all(desegToken, "l+ Al", "ll");
- boost::replace_all(desegToken, "y+ y ", "y");
- boost::replace_all(desegToken, "p+ ", "t");
- boost::replace_all(desegToken, "' +", "}");
- boost::replace_all(desegToken, "y +", "A");
- boost::replace_all(desegToken, "n +n", "n");
- boost::replace_all(desegToken, "mn +m", "mm");
- boost::replace_all(desegToken, "En +m", "Em");
- boost::replace_all(desegToken, "An +lA", "Em");
- boost::replace_all(desegToken, "-LRB-", "(");
- boost::replace_all(desegToken, "-RRB-", ")");
- }
-
- boost::replace_all(desegToken, "+ +", "");
- boost::replace_all(desegToken, "+ ", "");
- boost::replace_all(desegToken, " +", "");
-
- return desegToken;
+ return desegToken;
}
Desegmenter::~Desegmenter()
diff --git a/moses/FF/Dsg-Feature/Desegmenter.h b/moses/FF/Dsg-Feature/Desegmenter.h
index 397140f91..21da78d2e 100644
--- a/moses/FF/Dsg-Feature/Desegmenter.h
+++ b/moses/FF/Dsg-Feature/Desegmenter.h
@@ -11,21 +11,23 @@ namespace Moses
class Desegmenter
{
private:
- std::multimap<string, string> mmDesegTable;
- std::string filename;
- bool simple;
- void Load(const string filename);
+ std::multimap<string, string> mmDesegTable;
+ std::string filename;
+ bool simple;
+ void Load(const string filename);
public:
- Desegmenter(const std::string& file, const bool scheme){
- filename = file;
- simple=scheme;
- Load(filename);
- }
- string getFileName(){ return filename; }
-
- vector<string> Search(string myKey);
- string ApplyRules(string &);
- ~Desegmenter();
+ Desegmenter(const std::string& file, const bool scheme) {
+ filename = file;
+ simple=scheme;
+ Load(filename);
+ }
+ string getFileName() {
+ return filename;
+ }
+
+ vector<string> Search(string myKey);
+ string ApplyRules(string &);
+ ~Desegmenter();
};
}
diff --git a/moses/FF/Dsg-Feature/DsgModel.cpp b/moses/FF/Dsg-Feature/DsgModel.cpp
index 090b5545a..0bd25a50b 100644
--- a/moses/FF/Dsg-Feature/DsgModel.cpp
+++ b/moses/FF/Dsg-Feature/DsgModel.cpp
@@ -10,147 +10,147 @@ using namespace lm::ngram;
namespace Moses
{
- DesegModel::DesegModel(const std::string &line)
- :StatefulFeatureFunction(5, line )
- {
- tFactor = 0;
- order=5;
- numFeatures = 5;
- optimistic = 1;
- ReadParameters();
- }
+DesegModel::DesegModel(const std::string &line)
+ :StatefulFeatureFunction(5, line )
+{
+ tFactor = 0;
+ order=5;
+ numFeatures = 5;
+ optimistic = 1;
+ ReadParameters();
+}
+
+DesegModel::~DesegModel()
+{
+ delete DSGM;
+}
- DesegModel::~DesegModel()
- {
- delete DSGM;
- }
+void DesegModel :: readLanguageModel(const char *lmFile)
+{
+ DSGM = ConstructDsgLM(m_lmPath.c_str());
+ State startState = DSGM->NullContextState();
+ desegT=new Desegmenter(m_desegPath,m_simple);// Desegmentation Table
+}
- void DesegModel :: readLanguageModel(const char *lmFile)
- {
- DSGM = ConstructDsgLM(m_lmPath.c_str());
- State startState = DSGM->NullContextState();
- desegT=new Desegmenter(m_desegPath,m_simple);// Desegmentation Table
- }
+void DesegModel::Load(AllOptions::ptr const& opts)
+{
+ m_options = opts;
+ readLanguageModel(m_lmPath.c_str());
+}
- void DesegModel::Load(AllOptions::ptr const& opts)
- {
- m_options = opts;
- readLanguageModel(m_lmPath.c_str());
- }
+void DesegModel:: EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedScores) const
+{
- void DesegModel:: EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedScores) const
- {
+ dsgHypothesis obj;
+ vector <string> myTargetPhrase;
+ vector<float> scores;
+ vector<string> targ_phrase; //stores the segmented tokens in the target phrase
+ const AlignmentInfo &align = targetPhrase.GetAlignTerm();
+
+ for (int i = 0; i < targetPhrase.GetSize(); i++) {
+ targ_phrase.push_back(targetPhrase.GetWord(i).GetFactor(tFactor)->GetString().as_string());
+ }
- dsgHypothesis obj;
- vector <string> myTargetPhrase;
- vector<float> scores;
- vector<string> targ_phrase; //stores the segmented tokens in the target phrase
- const AlignmentInfo &align = targetPhrase.GetAlignTerm();
+ obj.setState(DSGM->NullContextState());
+ obj.setPhrases(targ_phrase);
+ obj.calculateDsgProbinIsol(*DSGM,*desegT,align);
+ obj.populateScores(scores,numFeatures);
+ estimatedScores.PlusEquals(this, scores);
+}
- for (int i = 0; i < targetPhrase.GetSize(); i++) {
- targ_phrase.push_back(targetPhrase.GetWord(i).GetFactor(tFactor)->GetString().as_string());
- }
- obj.setState(DSGM->NullContextState());
- obj.setPhrases(targ_phrase);
- obj.calculateDsgProbinIsol(*DSGM,*desegT,align);
- obj.populateScores(scores,numFeatures);
- estimatedScores.PlusEquals(this, scores);
+FFState* DesegModel::EvaluateWhenApplied(
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
+{
+ const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase();
+ const Range &src_rng =cur_hypo.GetCurrSourceWordsRange();
+ const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
+ size_t sourceOffset = src_rng.GetStartPos();
+
+ dsgHypothesis obj;
+ vector<float> scores;
+ vector<string> targ_phrase; //stores the segmented tokens in the target phrase
+ bool isCompleted;
+
+ isCompleted=cur_hypo.IsSourceCompleted();
+ for (int i = 0; i < cur_hypo.GetCurrTargetLength(); i++) {
+ targ_phrase.push_back(target.GetWord(i).GetFactor(tFactor)->GetString().as_string());
}
+ obj.setState(prev_state);
+ obj.setPhrases( targ_phrase );
+ obj.calculateDsgProb(*DSGM,*desegT,isCompleted,align, sourceOffset, optimistic);
+ obj.populateScores(scores,numFeatures);
+ accumulator->PlusEquals(this, scores);
+ return obj.saveState();
- FFState* DesegModel::EvaluateWhenApplied(
- const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const
- {
- const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase();
- const Range &src_rng =cur_hypo.GetCurrSourceWordsRange();
- const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
- size_t sourceOffset = src_rng.GetStartPos();
-
- dsgHypothesis obj;
- vector<float> scores;
- vector<string> targ_phrase; //stores the segmented tokens in the target phrase
- bool isCompleted;
-
- isCompleted=cur_hypo.IsSourceCompleted();
- for (int i = 0; i < cur_hypo.GetCurrTargetLength(); i++) {
- targ_phrase.push_back(target.GetWord(i).GetFactor(tFactor)->GetString().as_string());
- }
-
- obj.setState(prev_state);
- obj.setPhrases( targ_phrase );
- obj.calculateDsgProb(*DSGM,*desegT,isCompleted,align, sourceOffset, optimistic);
- obj.populateScores(scores,numFeatures);
- accumulator->PlusEquals(this, scores);
- return obj.saveState();
+}
- }
+FFState* DesegModel::EvaluateWhenApplied(
+ const ChartHypothesis& /* cur_hypo */,
+ int /* featureID - used to index the state in the previous hypotheses */,
+ ScoreComponentCollection* accumulator) const
+{
+ UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
+}
- FFState* DesegModel::EvaluateWhenApplied(
- const ChartHypothesis& /* cur_hypo */,
- int /* featureID - used to index the state in the previous hypotheses */,
- ScoreComponentCollection* accumulator) const
- {
- UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
- }
+const FFState* DesegModel::EmptyHypothesisState(const InputType &input) const
+{
+ VERBOSE(3,"DesegModel::EmptyHypothesisState()" << endl);
+ State startState = DSGM->BeginSentenceState();
+ dsgState ss= dsgState(startState);
+ return new dsgState(ss);
+}
- const FFState* DesegModel::EmptyHypothesisState(const InputType &input) const
- {
- VERBOSE(3,"DesegModel::EmptyHypothesisState()" << endl);
- State startState = DSGM->BeginSentenceState();
- dsgState ss= dsgState(startState);
- return new dsgState(ss);
- }
+std::string DesegModel::GetScoreProducerWeightShortName(unsigned idx) const
+{
+ return "dsg";
+}
- std::string DesegModel::GetScoreProducerWeightShortName(unsigned idx) const
- {
- return "dsg";
- }
+void DesegModel::SetParameter(const std::string& key, const std::string& value)
+{
- void DesegModel::SetParameter(const std::string& key, const std::string& value)
- {
-
- if (key == "path") {
- m_lmPath = value;
- } else if (key == "contiguity-features") {
- if(value == "no")
- numFeatures = 1;
- else
- numFeatures = 5;
- } else if (key == "output-factor") {
- tFactor = Scan<int>(value);
- } else if (key == "optimistic") {
- if (value == "n")
+ if (key == "path") {
+ m_lmPath = value;
+ } else if (key == "contiguity-features") {
+ if(value == "no")
+ numFeatures = 1;
+ else
+ numFeatures = 5;
+ } else if (key == "output-factor") {
+ tFactor = Scan<int>(value);
+ } else if (key == "optimistic") {
+ if (value == "n")
optimistic = 0;
- else
+ else
optimistic = 1;
- } else if (key == "deseg-path") {
- m_desegPath = Scan<int>(value);
- } else if (key == "deseg-scheme") {
- if(value == "s")
- m_simple = 1;
- else
- m_simple = 0;
- } else if (key == "order") {
- order = Scan<int>(value);
- } else {
- StatefulFeatureFunction::SetParameter(key, value);
- }
+ } else if (key == "deseg-path") {
+ m_desegPath = Scan<int>(value);
+ } else if (key == "deseg-scheme") {
+ if(value == "s")
+ m_simple = 1;
+ else
+ m_simple = 0;
+ } else if (key == "order") {
+ order = Scan<int>(value);
+ } else {
+ StatefulFeatureFunction::SetParameter(key, value);
}
+}
- bool DesegModel::IsUseable(const FactorMask &mask) const
- {
- bool ret = mask[0];
- return ret;
- }
+bool DesegModel::IsUseable(const FactorMask &mask) const
+{
+ bool ret = mask[0];
+ return ret;
+}
} // namespace
diff --git a/moses/FF/Dsg-Feature/DsgModel.h b/moses/FF/Dsg-Feature/DsgModel.h
index f456123d2..7c19f0a63 100644
--- a/moses/FF/Dsg-Feature/DsgModel.h
+++ b/moses/FF/Dsg-Feature/DsgModel.h
@@ -13,52 +13,52 @@
namespace Moses
{
- class DesegModel : public StatefulFeatureFunction
- {
- public:
+class DesegModel : public StatefulFeatureFunction
+{
+public:
- DsgLM * DSGM;
- Desegmenter* desegT;
- int tFactor;// Target Factor ...
- int order;
- int numFeatures; // Number of features used an be 1 (unsegmented LM)or 5 (with 3 contiguity features and 1 UnsegWP)
- bool optimistic;
+ DsgLM * DSGM;
+ Desegmenter* desegT;
+ int tFactor;// Target Factor ...
+ int order;
+ int numFeatures; // Number of features used an be 1 (unsegmented LM)or 5 (with 3 contiguity features and 1 UnsegWP)
+ bool optimistic;
- DesegModel(const std::string &line);
- ~DesegModel();
+ DesegModel(const std::string &line);
+ ~DesegModel();
- void readLanguageModel(const char *);
- void Load(AllOptions::ptr const& opts);
+ void readLanguageModel(const char *);
+ void Load(AllOptions::ptr const& opts);
- FFState* EvaluateWhenApplied(
- const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
+ FFState* EvaluateWhenApplied(
+ const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
- virtual FFState* EvaluateWhenApplied(
- const ChartHypothesis& /* cur_hypo */,
- int /* featureID - used to index the state in the previous hypotheses */,
- ScoreComponentCollection* accumulator) const;
+ virtual FFState* EvaluateWhenApplied(
+ const ChartHypothesis& /* cur_hypo */,
+ int /* featureID - used to index the state in the previous hypotheses */,
+ ScoreComponentCollection* accumulator) const;
- void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedScores) const;
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedScores) const;
- virtual const FFState* EmptyHypothesisState(const InputType &input) const;
+ virtual const FFState* EmptyHypothesisState(const InputType &input) const;
- virtual std::string GetScoreProducerWeightShortName(unsigned idx=0) const;
+ virtual std::string GetScoreProducerWeightShortName(unsigned idx=0) const;
- void SetParameter(const std::string& key, const std::string& value);
+ void SetParameter(const std::string& key, const std::string& value);
- bool IsUseable(const FactorMask &mask) const;
+ bool IsUseable(const FactorMask &mask) const;
- protected:
- typedef std::vector<float> Scores;
- std::string m_lmPath;
- std::string m_desegPath;
- bool m_simple; //desegmentation scheme; if 1 then use simple, else use rule and backoff to simple
- };
+protected:
+ typedef std::vector<float> Scores;
+ std::string m_lmPath;
+ std::string m_desegPath;
+ bool m_simple; //desegmentation scheme; if 1 then use simple, else use rule and backoff to simple
+};
}
diff --git a/moses/FF/Dsg-Feature/KenDsg.cpp b/moses/FF/Dsg-Feature/KenDsg.cpp
index 08a8dd0ed..b9fd92aef 100644
--- a/moses/FF/Dsg-Feature/KenDsg.cpp
+++ b/moses/FF/Dsg-Feature/KenDsg.cpp
@@ -3,32 +3,32 @@
namespace Moses
{
- DsgLM* ConstructDsgLM(const char *file)
- {
- lm::ngram::ModelType model_type;
- lm::ngram::Config config;
- if (lm::ngram::RecognizeBinary(file, model_type)) {
- switch(model_type) {
- case lm::ngram::PROBING:
- return new KenDsg<lm::ngram::ProbingModel>(file, config);
- case lm::ngram::REST_PROBING:
- return new KenDsg<lm::ngram::RestProbingModel>(file, config);
- case lm::ngram::TRIE:
- return new KenDsg<lm::ngram::TrieModel>(file, config);
- case lm::ngram::QUANT_TRIE:
- return new KenDsg<lm::ngram::QuantTrieModel>(file, config);
- case lm::ngram::ARRAY_TRIE:
- return new KenDsg<lm::ngram::ArrayTrieModel>(file, config);
- case lm::ngram::QUANT_ARRAY_TRIE:
- return new KenDsg<lm::ngram::QuantArrayTrieModel>(file, config);
- default:
- UTIL_THROW2("Unrecognized kenlm model type " << model_type);
- }
- } else {
+DsgLM* ConstructDsgLM(const char *file)
+{
+ lm::ngram::ModelType model_type;
+ lm::ngram::Config config;
+ if (lm::ngram::RecognizeBinary(file, model_type)) {
+ switch(model_type) {
+ case lm::ngram::PROBING:
return new KenDsg<lm::ngram::ProbingModel>(file, config);
- }
- }
-
-} // namespace
-
-
+ case lm::ngram::REST_PROBING:
+ return new KenDsg<lm::ngram::RestProbingModel>(file, config);
+ case lm::ngram::TRIE:
+ return new KenDsg<lm::ngram::TrieModel>(file, config);
+ case lm::ngram::QUANT_TRIE:
+ return new KenDsg<lm::ngram::QuantTrieModel>(file, config);
+ case lm::ngram::ARRAY_TRIE:
+ return new KenDsg<lm::ngram::ArrayTrieModel>(file, config);
+ case lm::ngram::QUANT_ARRAY_TRIE:
+ return new KenDsg<lm::ngram::QuantArrayTrieModel>(file, config);
+ default:
+ UTIL_THROW2("Unrecognized kenlm model type " << model_type);
+ }
+ } else {
+ return new KenDsg<lm::ngram::ProbingModel>(file, config);
+ }
+}
+
+} // namespace
+
+
diff --git a/moses/FF/Dsg-Feature/KenDsg.h b/moses/FF/Dsg-Feature/KenDsg.h
index d32a2d98a..3fc07003f 100644
--- a/moses/FF/Dsg-Feature/KenDsg.h
+++ b/moses/FF/Dsg-Feature/KenDsg.h
@@ -8,7 +8,7 @@ namespace Moses
class KenDsgBase
{
- public:
+public:
virtual ~KenDsgBase() {}
virtual float Score(const lm::ngram::State&, StringPiece,
@@ -22,17 +22,17 @@ class KenDsgBase
};
template <class KenModel>
- class KenDsg : public KenDsgBase
+class KenDsg : public KenDsgBase
{
- public:
+public:
KenDsg(const char *file, const lm::ngram::Config &config)
: m_kenlm(file, config) {}
float Score(const lm::ngram::State &in_state,
- StringPiece word,
- lm::ngram::State &out_state) const {
+ StringPiece word,
+ lm::ngram::State &out_state) const {
return m_kenlm.Score(in_state, m_kenlm.GetVocabulary().Index(word),
- out_state);
+ out_state);
}
const lm::ngram::State &BeginSentenceState() const {
@@ -48,13 +48,13 @@ template <class KenModel>
}
- private:
+private:
KenModel m_kenlm;
};
- typedef KenDsgBase DsgLM;
+typedef KenDsgBase DsgLM;
- DsgLM* ConstructDsgLM(const char *file);
+DsgLM* ConstructDsgLM(const char *file);
} // namespace
diff --git a/moses/FF/Dsg-Feature/dsgHyp.cpp b/moses/FF/Dsg-Feature/dsgHyp.cpp
index 7daba14c0..d33262d11 100644
--- a/moses/FF/Dsg-Feature/dsgHyp.cpp
+++ b/moses/FF/Dsg-Feature/dsgHyp.cpp
@@ -2,9 +2,9 @@
#include <sstream>
#include <boost/algorithm/string.hpp>
#include <algorithm>
-#include <cstdlib>
-#include <math.h>
-#include <map>
+#include <cstdlib>
+#include <math.h>
+#include <map>
using namespace std;
@@ -12,356 +12,380 @@ using namespace lm::ngram;
namespace Moses
{
- dsgState::dsgState(const State & val)
- {
- lmState = val;
- }
+dsgState::dsgState(const State & val)
+{
+ lmState = val;
+}
- void dsgState::saveState( std::vector<std::string> danglingTok, std::vector<int> srcSpans,float deltaValue)
- {
- buffer = danglingTok;
- span=srcSpans;
- delta=deltaValue;
- }
+void dsgState::saveState( std::vector<std::string> danglingTok, std::vector<int> srcSpans,float deltaValue)
+{
+ buffer = danglingTok;
+ span=srcSpans;
+ delta=deltaValue;
+}
- size_t dsgState::hash() const
- {
+size_t dsgState::hash() const
+{
- size_t ret = 0;
- boost::hash_combine(ret, lmState);
+ size_t ret = 0;
+ boost::hash_combine(ret, lmState);
- /*size_t ret = delta;
+ /*size_t ret = delta;
boost::hash_combine(ret, buffer);
boost::hash_combine(ret, span);
boost::hash_combine(ret, lmState.length);
return ret;*/
- }
+}
- bool dsgState::operator==(const FFState& otherBase) const //CHECK
- {
- const dsgState &other = static_cast<const dsgState&>(otherBase);
+bool dsgState::operator==(const FFState& otherBase) const //CHECK
+{
+ const dsgState &other = static_cast<const dsgState&>(otherBase);
- if (lmState < other.lmState) return false;
- if (lmState == other.lmState) return true;
- return false;
- }
+ if (lmState < other.lmState) return false;
+ if (lmState == other.lmState) return true;
+ return false;
+}
- // ----------------------------------------
-
- std::string dsgState :: getName() const
- {
- return "done";
- }
+// ----------------------------------------
- dsgHypothesis :: dsgHypothesis()
- {
- lmProb = 0;
- discontig0 = 0;
- discontig1 = 0;
- discontig2 = 0;
- UnsegWP = 0;
- m_buffer.clear();//="";
- }
+std::string dsgState :: getName() const
+{
+ return "done";
+}
- void dsgHypothesis :: setState(const FFState* prev_state)
- {
- if(prev_state != NULL) {
- m_buffer = static_cast <const dsgState *> (prev_state)->getBuffer();
- m_span = static_cast <const dsgState *> (prev_state)->getSpan();
- lmState = static_cast <const dsgState *> (prev_state)->getLMState();
- delta = static_cast <const dsgState *> (prev_state)->getDelta(); //NEW
- }
+dsgHypothesis :: dsgHypothesis()
+{
+ lmProb = 0;
+ discontig0 = 0;
+ discontig1 = 0;
+ discontig2 = 0;
+ UnsegWP = 0;
+ m_buffer.clear();//="";
+}
+
+void dsgHypothesis :: setState(const FFState* prev_state)
+{
+ if(prev_state != NULL) {
+ m_buffer = static_cast <const dsgState *> (prev_state)->getBuffer();
+ m_span = static_cast <const dsgState *> (prev_state)->getSpan();
+ lmState = static_cast <const dsgState *> (prev_state)->getLMState();
+ delta = static_cast <const dsgState *> (prev_state)->getDelta(); //NEW
}
+}
- dsgState * dsgHypothesis :: saveState()
- {
- dsgState * statePtr = new dsgState(lmState);
- statePtr->saveState(m_buffer, m_span, delta);
- return statePtr;
- }
+dsgState * dsgHypothesis :: saveState()
+{
+ dsgState * statePtr = new dsgState(lmState);
+ statePtr->saveState(m_buffer, m_span, delta);
+ return statePtr;
+}
- void dsgHypothesis :: populateScores(vector <float> & scores , const int numFeatures)
- {
- scores.clear();
- scores.push_back(lmProb);
-
- if (numFeatures == 1)
- return;
- scores.push_back(discontig0);
- scores.push_back(discontig1);
- scores.push_back(discontig2);
- scores.push_back(UnsegWP);
- }
+void dsgHypothesis :: populateScores(vector <float> & scores , const int numFeatures)
+{
+ scores.clear();
+ scores.push_back(lmProb);
+ if (numFeatures == 1)
+ return;
+ scores.push_back(discontig0);
+ scores.push_back(discontig1);
+ scores.push_back(discontig2);
+ scores.push_back(UnsegWP);
+}
- bool dsgHypothesis::isPrefix(const std::string &tok){
- if ((tok.at(tok.size() - 1) == '+' )&& (tok != "+")) { return true; }
- else { return false; };
- }
- bool dsgHypothesis::isSuffix(const std::string &tok){
- if ((tok.at(0) == '+' )&& (tok != "+")) { return true; }
- else { return false; };
- }
+bool dsgHypothesis::isPrefix(const std::string &tok)
+{
+ if ((tok.at(tok.size() - 1) == '+' )&& (tok != "+")) {
+ return true;
+ } else {
+ return false;
+ };
+}
- bool dsgHypothesis::isStem(const std::string &tok){
- if ((tok.at(0) != '+') && (tok.at(tok.size() - 1) != '+')){ return true; }
- else { return false; };
- }
+bool dsgHypothesis::isSuffix(const std::string &tok)
+{
+ if ((tok.at(0) == '+' )&& (tok != "+")) {
+ return true;
+ } else {
+ return false;
+ };
+}
+bool dsgHypothesis::isStem(const std::string &tok)
+{
+ if ((tok.at(0) != '+') && (tok.at(tok.size() - 1) != '+')) {
+ return true;
+ } else {
+ return false;
+ };
+}
- /**
- * chain stores segmented tokens that are in process of building a word
- * The function checks if tok contributes to the word being formed in chain
- *
- */
- bool dsgHypothesis::isValidChain(const std::string &tok, std::vector<std::string> &chain){
- std::string last_tok;
- if (chain.size() >= 1){
- last_tok = chain[chain.size() - 1];
- }
- else{
- last_tok = "NULL";
- }
- if(tok=="+"){return false;}
- if (isPrefix(tok) && (chain.size() == 0 || isPrefix(last_tok))) { return true; }
- else if (isSuffix(tok) && (chain.size() != 0 && ( isStem(last_tok) || isPrefix(last_tok)))) { return true; } // allows one suffix ONLY
- //else if (isSuffix(tok) && (chain.size() != 0 && ( isStem(last_tok) || isPrefix(last_tok) || isSuffix(last_tok) ))) { return true; } // allows multiple suffixes
- else if (isStem(tok) && (chain.size() == 0 || isPrefix(last_tok))) { return true; }
- else { return false; }
+
+/**
+ * chain stores segmented tokens that are in process of building a word
+ * The function checks if tok contributes to the word being formed in chain
+ *
+ */
+bool dsgHypothesis::isValidChain(const std::string &tok, std::vector<std::string> &chain)
+{
+ std::string last_tok;
+ if (chain.size() >= 1) {
+ last_tok = chain[chain.size() - 1];
+ } else {
+ last_tok = "NULL";
+ }
+ if(tok=="+") {
+ return false;
+ }
+ if (isPrefix(tok) && (chain.size() == 0 || isPrefix(last_tok))) {
+ return true;
+ } else if (isSuffix(tok) && (chain.size() != 0 && ( isStem(last_tok) || isPrefix(last_tok)))) {
+ return true; // allows one suffix ONLY
}
+ //else if (isSuffix(tok) && (chain.size() != 0 && ( isStem(last_tok) || isPrefix(last_tok) || isSuffix(last_tok) ))) { return true; } // allows multiple suffixes
+ else if (isStem(tok) && (chain.size() == 0 || isPrefix(last_tok))) {
+ return true;
+ } else {
+ return false;
+ }
+}
- /**
- * grouper function groups tokens that form a word together
- */
- vector<string> dsgHypothesis::grouper(std::vector<std::string> &phr_vec,vector<vector<int> > &allchain_ids, int sourceOffset,const AlignmentInfo &align, bool isolation){
+/**
+ * grouper function groups tokens that form a word together
+ */
+vector<string> dsgHypothesis::grouper(std::vector<std::string> &phr_vec,vector<vector<int> > &allchain_ids, int sourceOffset,const AlignmentInfo &align, bool isolation)
+{
- std::vector<std::string> chain;
- std::vector<int> chain_ids;
- std::vector<std::string> allchains;
- chain_ids=m_span;
+ std::vector<std::string> chain;
+ std::vector<int> chain_ids;
+ std::vector<std::string> allchains;
+ chain_ids=m_span;
- if (!m_buffer.empty() && !isolation){// if evaluate in isolation is called, then do not add buffer content
- for (int i = 0; i < m_buffer.size(); i++){ // initialize chain with the content of the buffer
- chain.push_back(m_buffer[i]);
- }
+ if (!m_buffer.empty() && !isolation) { // if evaluate in isolation is called, then do not add buffer content
+ for (int i = 0; i < m_buffer.size(); i++) { // initialize chain with the content of the buffer
+ chain.push_back(m_buffer[i]);
}
+ }
- for (int i = 0; i < phr_vec.size(); i++){
- std::set<std::size_t> sourcePosSet = align.GetAlignmentsForTarget(i);
-
- if (isValidChain(phr_vec[i], chain)){
- chain.push_back(phr_vec[i]);
- if (sourcePosSet.empty()==false){
- for (std::set<size_t>::iterator it(sourcePosSet.begin());it != sourcePosSet.end(); it++) {
- int cur=*it;
- chain_ids.push_back(cur+sourceOffset);
- }
- }
- }
-
- else if (chain.size() == 0) { // start of a suffix at hypothesis0
- allchains.push_back(phr_vec[i]);
- allchain_ids.push_back(chain_ids);
- chain_ids.clear();//={};
- }
-
- else { // tokens formed a complete word; add tokens segmented by space to allchains
- std::string joined = boost::algorithm::join(chain, " ");
- allchains.push_back(joined);
- allchain_ids.push_back(chain_ids);
-
- chain.clear();// = {};
- chain_ids.clear();//={};
-
- chain.push_back(phr_vec[i]);
- if (sourcePosSet.empty()==false){
- for (std::set<size_t>::iterator it(sourcePosSet.begin());it != sourcePosSet.end(); it++) {
- int cur=*it;
- chain_ids.push_back(cur+sourceOffset);
- }
- }
+ for (int i = 0; i < phr_vec.size(); i++) {
+ std::set<std::size_t> sourcePosSet = align.GetAlignmentsForTarget(i);
+ if (isValidChain(phr_vec[i], chain)) {
+ chain.push_back(phr_vec[i]);
+ if (sourcePosSet.empty()==false) {
+ for (std::set<size_t>::iterator it(sourcePosSet.begin()); it != sourcePosSet.end(); it++) {
+ int cur=*it;
+ chain_ids.push_back(cur+sourceOffset);
+ }
}
+ }
+ else if (chain.size() == 0) { // start of a suffix at hypothesis0
+ allchains.push_back(phr_vec[i]);
+ allchain_ids.push_back(chain_ids);
+ chain_ids.clear();//={};
}
- if (!chain.empty()){
+ else { // tokens formed a complete word; add tokens segmented by space to allchains
std::string joined = boost::algorithm::join(chain, " ");
allchains.push_back(joined);
allchain_ids.push_back(chain_ids);
- }
- return allchains;
- }
+ chain.clear();// = {};
+ chain_ids.clear();//={};
+ chain.push_back(phr_vec[i]);
+ if (sourcePosSet.empty()==false) {
+ for (std::set<size_t>::iterator it(sourcePosSet.begin()); it != sourcePosSet.end(); it++) {
+ int cur=*it;
+ chain_ids.push_back(cur+sourceOffset);
+ }
+ }
- void dsgHypothesis :: calculateDsgProbinIsol(DsgLM & ptrDsgLM, Desegmenter &desegT, const AlignmentInfo &align ){
- lmProb = 0;
- State currState = lmState;
- State temp;
- string desegmented="";
- vector <string> words;
- vector <string> currFVec;
+ }
- discontig0=0;
- discontig1=0;
- discontig2=0;
- UnsegWP=0;
+ }
- currFVec = m_buffer;
- currFVec.insert( currFVec.end(), m_curr_phr.begin(), m_curr_phr.end() );
+ if (!chain.empty()) {
+ std::string joined = boost::algorithm::join(chain, " ");
+ allchains.push_back(joined);
+ allchain_ids.push_back(chain_ids);
+ }
+ return allchains;
+}
- int vecSize=currFVec.size();
- // phrases with suffix-starts and prefix-end
- if (currFVec.size()>0 && isPrefix (currFVec.back())) {
- UnsegWP-=0.5;}
- if (currFVec.size()>0 && isSuffix (currFVec.front())) {
- UnsegWP-=0.5;}
- /* //Dropping prefix-end and suffix-start
- while (currFVec.size()>0 && isPrefix (currFVec.back())){
- currFVec.pop_back(); //drop prefix appearing at end of phrase
- }
+void dsgHypothesis :: calculateDsgProbinIsol(DsgLM & ptrDsgLM, Desegmenter &desegT, const AlignmentInfo &align )
+{
+ lmProb = 0;
+ State currState = lmState;
+ State temp;
+ string desegmented="";
+ vector <string> words;
+ vector <string> currFVec;
+
+ discontig0=0;
+ discontig1=0;
+ discontig2=0;
+ UnsegWP=0;
+
+ currFVec = m_buffer;
+ currFVec.insert( currFVec.end(), m_curr_phr.begin(), m_curr_phr.end() );
+
+ int vecSize=currFVec.size();
+
+ // phrases with suffix-starts and prefix-end
+ if (currFVec.size()>0 && isPrefix (currFVec.back())) {
+ UnsegWP-=0.5;
+ }
+ if (currFVec.size()>0 && isSuffix (currFVec.front())) {
+ UnsegWP-=0.5;
+ }
- while (currFVec.size()>0 && isSuffix (currFVec.front())){
- currFVec.erase (currFVec.begin()); //drop suffix appearning at start of a phrase
- } */
+ /* //Dropping prefix-end and suffix-start
+ while (currFVec.size()>0 && isPrefix (currFVec.back())){
+ currFVec.pop_back(); //drop prefix appearing at end of phrase
+ }
+
+ while (currFVec.size()>0 && isSuffix (currFVec.front())){
+ currFVec.erase (currFVec.begin()); //drop suffix appearning at start of a phrase
+ } */
+
+ vector<vector<int> > chain_ids;
+ words = grouper(currFVec,chain_ids,0,align,1);
+
+ for (int i = 0; i<words.size(); i++) {
+ UnsegWP+=1;
+ temp = currState;
+ if (words[i].find(" ")!=std::string::npos) {
+ desegmented=desegT.Search(words[i])[0];
+ lmProb += ptrDsgLM.Score(temp,desegmented,currState);
+ } else {
+ boost::replace_all(words[i], "-LRB-", "(");
+ boost::replace_all(words[i], "-RRB-", ")");
+ lmProb += ptrDsgLM.Score(temp,words[i],currState);
+ }
+ }
+ lmState = currState;
+}
- vector<vector<int> > chain_ids;
- words = grouper(currFVec,chain_ids,0,align,1);
+void dsgHypothesis :: calculateDsgProb(DsgLM& ptrDsgLM, Desegmenter &desegT, bool isCompleted , const AlignmentInfo &align, int sourceOffset, bool optimistic)
+{
+ lmProb = 0;
+ discontig0=0;
+ discontig1=0;
+ discontig2=0;
+ UnsegWP=0;
+
+ State currState = lmState;
+ State temp;
+ string desegmented="";
+ vector <string> words;
+ vector <string> currFVec;
+ bool completePhraseSuffixEnd = false;
+ vector<vector<int> > all_chain_ids;
+ double pscore;
+ currFVec=m_curr_phr;
+
+ // Check if the the phrase ends in a suffix, which means that it completes a full word;Make sure to change the isValidChain
+ if (isSuffix (currFVec.back()) && (currFVec.back()!="+")) {
+ completePhraseSuffixEnd=true;
+ }
- for (int i = 0; i<words.size(); i++) {
- UnsegWP+=1;
- temp = currState;
- if (words[i].find(" ")!=std::string::npos){
- desegmented=desegT.Search(words[i])[0];
- lmProb += ptrDsgLM.Score(temp,desegmented,currState);
- }
- else{
- boost::replace_all(words[i], "-LRB-", "(");
- boost::replace_all(words[i], "-RRB-", ")");
- lmProb += ptrDsgLM.Score(temp,words[i],currState);
+ words = grouper(currFVec,all_chain_ids,sourceOffset,align,0);
+
+ for (int i = 0; i < words.size(); i++) {
+ temp = currState;
+
+ if (i==words.size()-1) {
+ if (completePhraseSuffixEnd) { //i.e if phrase ends with suffix, which marks an end of a word
+ m_buffer.clear();// ="";
+ m_span.clear();// ={};
+ } else if (!isCompleted) { // not end of sentence( or final hypothesis), and probably the last token is not a complete word
+ m_buffer.clear();
+ if (optimistic == 1) {
+ if ( isPrefix (currFVec.back())) { // this will delay scoring of prefix in prefix-ending phrases until the next hypothesis arrives
+ //pscore = ptrDsgLM.Score(temp,desegmented,currState);
+ lmProb -= delta;
+ delta = 0.0;
+ }
+
+ else if (words[i].find(" ")!=std::string::npos) {
+ desegmented=desegT.Search(words[i])[0];
+ pscore=ptrDsgLM.Score(temp,desegmented,currState);
+ lmProb = lmProb + pscore - delta;
+ delta=pscore;
+ currState=temp;
+ } else {
+ boost::replace_all(words[i], "-LRB-", "(");
+ boost::replace_all(words[i], "-RRB-", ")");
+ pscore=ptrDsgLM.Score(temp,words[i],currState);
+ lmProb = lmProb + pscore - delta;
+ delta=pscore;
+ currState=temp;
+ }
+ }
+
+ m_buffer.push_back(words.back());
+ m_span=all_chain_ids.back();
+ break;
}
}
- lmState = currState;
- }
- void dsgHypothesis :: calculateDsgProb(DsgLM& ptrDsgLM, Desegmenter &desegT, bool isCompleted , const AlignmentInfo &align, int sourceOffset, bool optimistic)
- {
- lmProb = 0;
- discontig0=0;
- discontig1=0;
- discontig2=0;
- UnsegWP=0;
-
- State currState = lmState;
- State temp;
- string desegmented="";
- vector <string> words;
- vector <string> currFVec;
- bool completePhraseSuffixEnd = false;
- vector<vector<int> > all_chain_ids;
- double pscore;
- currFVec=m_curr_phr;
-
- // Check if the the phrase ends in a suffix, which means that it completes a full word;Make sure to change the isValidChain
- if (isSuffix (currFVec.back()) && (currFVec.back()!="+")){completePhraseSuffixEnd=true;}
-
- words = grouper(currFVec,all_chain_ids,sourceOffset,align,0);
-
- for (int i = 0; i < words.size(); i++) {
- temp = currState;
-
- if (i==words.size()-1){
- if (completePhraseSuffixEnd){ //i.e if phrase ends with suffix, which marks an end of a word
- m_buffer.clear();// ="";
- m_span.clear();// ={};
- }
- else if (!isCompleted) { // not end of sentence( or final hypothesis), and probably the last token is not a complete word
- m_buffer.clear();
- if (optimistic == 1){
- if ( isPrefix (currFVec.back())){ // this will delay scoring of prefix in prefix-ending phrases until the next hypothesis arrives
- //pscore = ptrDsgLM.Score(temp,desegmented,currState);
- lmProb -= delta;
- delta = 0.0;
- }
-
- else if (words[i].find(" ")!=std::string::npos){
- desegmented=desegT.Search(words[i])[0];
- pscore=ptrDsgLM.Score(temp,desegmented,currState);
- lmProb = lmProb + pscore - delta;
- delta=pscore;
- currState=temp;
- }
- else{
- boost::replace_all(words[i], "-LRB-", "(");
- boost::replace_all(words[i], "-RRB-", ")");
- pscore=ptrDsgLM.Score(temp,words[i],currState);
- lmProb = lmProb + pscore - delta;
- delta=pscore;
- currState=temp;
- } }
-
- m_buffer.push_back(words.back());
- m_span=all_chain_ids.back();
- break;
- }
+ //temp = currState;
+ if (words[i].find(" ")!=std::string::npos) {
+ UnsegWP+=1;
+ desegmented=desegT.Search(words[i])[0];
+ std::set<int> cur_chain_ids(all_chain_ids[i].begin(),all_chain_ids[i].end());
+ if (cur_chain_ids.size()>1) {
+ vector<int> dsc;
+ for (std::set<int>::iterator it(cur_chain_ids.begin()), next(it); it != cur_chain_ids.end() && ++next != cur_chain_ids.end(); it = next) {
+ int cur=*it;
+ int mynext=*next;
+ if (std::abs(cur - mynext)>= 3) {
+ dsc.push_back(3);
+ } else if (std::abs(cur - mynext)== 2) {
+ dsc.push_back(2);
+ } else if (std::abs(cur - mynext)<= 1) {
+ dsc.push_back(1);
+ }
+ }
+ int mymax=*std::max_element(dsc.begin(),dsc.end());
+ if (mymax==3) {
+ discontig2+=1;
+ } else if (mymax==2) {
+ discontig1+=1;
+ } else {
+ discontig0+=1;
+ }
+ } else {
+ discontig0 += 1;
}
- //temp = currState;
- if (words[i].find(" ")!=std::string::npos){
- UnsegWP+=1;
- desegmented=desegT.Search(words[i])[0];
- std::set<int> cur_chain_ids(all_chain_ids[i].begin(),all_chain_ids[i].end());
- if (cur_chain_ids.size()>1){
- vector<int> dsc;
- for (std::set<int>::iterator it(cur_chain_ids.begin()), next(it);it != cur_chain_ids.end() && ++next != cur_chain_ids.end(); it = next) {
- int cur=*it;
- int mynext=*next;
- if (std::abs(cur - mynext)>= 3) {
- dsc.push_back(3);
- }
- else if (std::abs(cur - mynext)== 2){
- dsc.push_back(2);
- }
- else if (std::abs(cur - mynext)<= 1){
- dsc.push_back(1);
- }
- }
- int mymax=*std::max_element(dsc.begin(),dsc.end());
- if (mymax==3){discontig2+=1;}
- else if (mymax==2){discontig1+=1;}
- else{discontig0+=1;}
- }
- else{
- discontig0 += 1;
- }
-
- lmProb += ptrDsgLM.Score(temp,desegmented,currState);
- }
- else{
- UnsegWP+=1;
- boost::replace_all(words[i], "-LRB-", "(");
- boost::replace_all(words[i], "-RRB-", ")");
- lmProb += ptrDsgLM.Score(temp,words[i],currState);
- }
+ lmProb += ptrDsgLM.Score(temp,desegmented,currState);
+ } else {
+ UnsegWP+=1;
+ boost::replace_all(words[i], "-LRB-", "(");
+ boost::replace_all(words[i], "-RRB-", ")");
+ lmProb += ptrDsgLM.Score(temp,words[i],currState);
}
+ }
- if (isCompleted){
- temp = currState;
- lmProb = lmProb + ptrDsgLM.ScoreEndSentence(temp,currState) - delta;
- }
- lmState = currState;
+ if (isCompleted) {
+ temp = currState;
+ lmProb = lmProb + ptrDsgLM.ScoreEndSentence(temp,currState) - delta;
}
+ lmState = currState;
+}
- void dsgHypothesis :: print()
- {}
+void dsgHypothesis :: print()
+{}
} // namespace
diff --git a/moses/FF/Dsg-Feature/dsgHyp.h b/moses/FF/Dsg-Feature/dsgHyp.h
index 0df4af11a..d36ad0530 100644
--- a/moses/FF/Dsg-Feature/dsgHyp.h
+++ b/moses/FF/Dsg-Feature/dsgHyp.h
@@ -14,53 +14,53 @@
namespace Moses
{
- class dsgState : public FFState
- {
- public:
+class dsgState : public FFState
+{
+public:
- dsgState(const lm::ngram::State & val);
- virtual bool operator==(const FFState& other) const;
- void saveState( std::vector<std::string> bufferVal,std::vector<int> spanVal, float deltaValue);
+ dsgState(const lm::ngram::State & val);
+ virtual bool operator==(const FFState& other) const;
+ void saveState( std::vector<std::string> bufferVal,std::vector<int> spanVal, float deltaValue);
- std::vector<std::string> getBuffer() const {
- return buffer;
- }
+ std::vector<std::string> getBuffer() const {
+ return buffer;
+ }
- std::vector<int> getSpan() const {
- return span;
- }
+ std::vector<int> getSpan() const {
+ return span;
+ }
- lm::ngram::State getLMState() const {
- return lmState;
- }
+ lm::ngram::State getLMState() const {
+ return lmState;
+ }
- float getDelta() const {
- return delta;
- }
+ float getDelta() const {
+ return delta;
+ }
- void setDelta(double val1 ) {
- delta = val1;
- }
+ void setDelta(double val1 ) {
+ delta = val1;
+ }
- void print() const;
- std::string getName() const;
+ void print() const;
+ std::string getName() const;
- virtual size_t hash() const;
+ virtual size_t hash() const;
- protected:
- std::vector<std::string> buffer;
- std::vector<int> span;
- lm::ngram::State lmState;
- double delta; //NEW
- };
+protected:
+ std::vector<std::string> buffer;
+ std::vector<int> span;
+ lm::ngram::State lmState;
+ double delta; //NEW
+};
class dsgHypothesis
{
- private:
+private:
std::vector<std::string> m_buffer;// maintains dangling affix from previous hypothesis
std::vector<int> m_span;// maintains source alignment for dangling affix from previous hypothesis
lm::ngram::State lmState; // KenLM's Model State ...
@@ -73,7 +73,7 @@ class dsgHypothesis
int discontig2;
double UnsegWP; //Word Penalty score based on count of words
- public:
+public:
dsgHypothesis();
~dsgHypothesis() {};
@@ -84,7 +84,7 @@ class dsgHypothesis
m_curr_phr = val1;
}
- void setDelta(double val1 ) {
+ void setDelta(double val1 ) {
delta = val1;
}