Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorEva Hasler <evahasler@gmail.com>2012-04-10 18:45:18 +0400
committerEva Hasler <evahasler@gmail.com>2012-04-10 18:45:18 +0400
commita729e2447df1be10cc71093a3d08a954d32811dc (patch)
tree202503544297a9c5c3eeca1771903e6c67fb377f /moses
parent2b9c250d3665b17ee8c9537db40e8b357ce4840c (diff)
set source phrase in RuleTableLoader, read rule count info from phrase table
Diffstat (limited to 'moses')
-rw-r--r--moses/src/RuleTableLoaderCompact.cpp1
-rw-r--r--moses/src/RuleTableLoaderStandard.cpp5
-rw-r--r--moses/src/TargetPhrase.cpp31
-rw-r--r--moses/src/TargetPhrase.h5
4 files changed, 40 insertions, 2 deletions
diff --git a/moses/src/RuleTableLoaderCompact.cpp b/moses/src/RuleTableLoaderCompact.cpp
index 21d146bec..dce3382e7 100644
--- a/moses/src/RuleTableLoaderCompact.cpp
+++ b/moses/src/RuleTableLoaderCompact.cpp
@@ -226,6 +226,7 @@ bool RuleTableLoaderCompact::LoadRuleSection(
targetPhrase->SetTargetLHS(targetLhs);
targetPhrase->SetScoreChart(ruleTable.GetFeature(), scoreVector, weights,
languageModels, wpProducer);
+ targetPhrase->SetSourcePhrase(sourcePhrase);
// Insert rule into table.
TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(
diff --git a/moses/src/RuleTableLoaderStandard.cpp b/moses/src/RuleTableLoaderStandard.cpp
index 190241a13..dc4fb7235 100644
--- a/moses/src/RuleTableLoaderStandard.cpp
+++ b/moses/src/RuleTableLoaderStandard.cpp
@@ -185,7 +185,8 @@ bool RuleTableLoaderStandard::Load(FormatType format
const string &sourcePhraseString = tokens[0]
, &targetPhraseString = tokens[1]
, &scoreString = tokens[2]
- , &alignString = tokens[3];
+ , &alignString = tokens[3]
+ , &ruleCountString = tokens[4];
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
@@ -216,10 +217,12 @@ bool RuleTableLoaderStandard::Load(FormatType format
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase(Output);
targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS);
+ targetPhrase->SetSourcePhrase(sourcePhrase);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
+ targetPhrase->SetRuleCount(ruleCountString, scoreVector);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
// component score, for n-best output
diff --git a/moses/src/TargetPhrase.cpp b/moses/src/TargetPhrase.cpp
index 8dae1c694..ce0623e5b 100644
--- a/moses/src/TargetPhrase.cpp
+++ b/moses/src/TargetPhrase.cpp
@@ -34,6 +34,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Util.h"
#include "DummyScoreProducers.h"
#include "AlignmentInfoCollection.h"
+#include <boost/algorithm/string.hpp>
+
using namespace std;
@@ -332,5 +334,34 @@ std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
return os;
}
+void TargetPhrase::SetRuleCount(const StringPiece &ruleCountString, std::vector<float> &scoreVector) {
+ set<pair<size_t,size_t> > ruleCountInfo;
+ float p_f_given_e = 0, p_e_given_f = 0;
+ p_f_given_e = scoreVector[0];
+ if (scoreVector.size() >= 4) {
+ p_f_given_e = scoreVector[0];
+ p_e_given_f = scoreVector[2];
+ }
+ else {
+ if (scoreVector.size() >= 1 ) p_f_given_e = scoreVector[0];
+ std::cerr << "Warning: possibly wrong format of phrase translation scores" << endl;
+ }
+
+ std::vector<std::string> tokens;
+ boost::split(tokens, ruleCountString, boost::is_any_of("\t "));
+
+ float targetCount = 0, sourceCount = 0;
+ if (tokens.size() == 2) {
+ targetCount = Scan<float>(tokens[0]);
+ sourceCount = Scan<float>(tokens[1]);
+ float ruleCount = p_f_given_e * targetCount;
+ //float ruleCount2 = p_e_given_f * sourceCount; // could use this to double-check the counts
+ m_ruleCount = floor(ruleCount + 0.5);
+ }
+ else if (tokens.size() == 3) {
+ m_ruleCount = Scan<float>(tokens[2]);
+ }
+}
+
}
diff --git a/moses/src/TargetPhrase.h b/moses/src/TargetPhrase.h
index f5e20271c..9226b2636 100644
--- a/moses/src/TargetPhrase.h
+++ b/moses/src/TargetPhrase.h
@@ -59,11 +59,12 @@ protected:
Phrase m_sourcePhrase;
const AlignmentInfo* m_alignmentInfo;
Word m_lhsTarget;
+ size_t m_ruleCount;
public:
TargetPhrase();
TargetPhrase(std::string out_string);
- TargetPhrase(const Phrase &);
+ TargetPhrase(const Phrase &targetPhrase);
~TargetPhrase();
//! used by the unknown word handler- these targets
@@ -159,6 +160,8 @@ public:
const AlignmentInfo &GetAlignmentInfo() const
{ return *m_alignmentInfo; }
+ void SetRuleCount(const StringPiece &ruleCountString, std::vector<float> &scoreVector);
+ size_t GetRuleCount() const { return m_ruleCount; }
TO_STRING();