Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2015-01-03 19:35:20 +0300
committerHieu Hoang <hieuhoang@gmail.com>2015-01-03 19:35:20 +0300
commit0036d8bb4d08ee7f12c6ec2a25b5ce4fd7c35831 (patch)
treef2a5cb72865d787e0d5212412710ebf497a8c54f
parent0a707597d81eeac2043c183e162e7267c05ba2e3 (diff)
parent7cc75a0fa15b85861e2e3e8c7db88ca26c81b73f (diff)
Merge branch 'master' of github.com:moses-smt/mosesdecoder
-rw-r--r--phrase-extract/score-stsg/Options.h4
-rw-r--r--phrase-extract/score-stsg/RuleGroup.cpp4
-rw-r--r--phrase-extract/score-stsg/RuleGroup.h7
-rw-r--r--phrase-extract/score-stsg/RuleTableWriter.cpp8
-rw-r--r--phrase-extract/score-stsg/RuleTableWriter.h2
-rw-r--r--phrase-extract/score-stsg/ScoreStsg.cpp20
6 files changed, 28 insertions, 17 deletions
diff --git a/phrase-extract/score-stsg/Options.h b/phrase-extract/score-stsg/Options.h
index 17b959c84..25e63a5c0 100644
--- a/phrase-extract/score-stsg/Options.h
+++ b/phrase-extract/score-stsg/Options.h
@@ -20,7 +20,7 @@ public:
, negLogProb(false)
, noLex(false)
, noWordAlignment(false)
- , pcfg(false) {}
+ , treeScore(false) {}
// Positional options
std::string extractFile;
@@ -36,7 +36,7 @@ public:
bool negLogProb;
bool noLex;
bool noWordAlignment;
- bool pcfg;
+ bool treeScore;
};
} // namespace ScoreStsg
diff --git a/phrase-extract/score-stsg/RuleGroup.cpp b/phrase-extract/score-stsg/RuleGroup.cpp
index bbbe3b2b6..a4e6ff3a2 100644
--- a/phrase-extract/score-stsg/RuleGroup.cpp
+++ b/phrase-extract/score-stsg/RuleGroup.cpp
@@ -15,7 +15,8 @@ void RuleGroup::SetNewSource(const StringPiece &source)
}
void RuleGroup::AddRule(const StringPiece &target, const StringPiece &ntAlign,
- const StringPiece &fullAlign, int count)
+ const StringPiece &fullAlign, int count,
+ double treeScore)
{
if (m_distinctRules.empty() ||
ntAlign != m_distinctRules.back().ntAlign ||
@@ -27,6 +28,7 @@ void RuleGroup::AddRule(const StringPiece &target, const StringPiece &ntAlign,
fullAlign.CopyToString(&r.alignments.back().first);
r.alignments.back().second = count;
r.count = count;
+ r.treeScore = treeScore;
m_distinctRules.push_back(r);
} else {
DistinctRule &r = m_distinctRules.back();
diff --git a/phrase-extract/score-stsg/RuleGroup.h b/phrase-extract/score-stsg/RuleGroup.h
index de0c25f17..8d9933263 100644
--- a/phrase-extract/score-stsg/RuleGroup.h
+++ b/phrase-extract/score-stsg/RuleGroup.h
@@ -25,13 +25,14 @@ class RuleGroup
{
public:
// Stores the target-side and NT-alignment of a distinct rule. Also records
- // the rule's count and the observed symbol alignments (plus their
- // frequencies).
+ // the rule's count, the observed symbol alignments (plus their frequencies),
+ // and the tree score.
struct DistinctRule {
std::string target;
std::string ntAlign;
std::vector<std::pair<std::string, int> > alignments;
int count;
+ double treeScore;
};
typedef std::vector<DistinctRule>::const_iterator ConstIterator;
@@ -58,7 +59,7 @@ public:
// values will be checked against those of the previous rule only (in other
// words, the input is assumed to be ordered).
void AddRule(const StringPiece &target, const StringPiece &ntAlign,
- const StringPiece &fullAlign, int count);
+ const StringPiece &fullAlign, int count, double treeScore);
private:
std::string m_source;
diff --git a/phrase-extract/score-stsg/RuleTableWriter.cpp b/phrase-extract/score-stsg/RuleTableWriter.cpp
index 0a1d5aa08..d7bbe9d7f 100644
--- a/phrase-extract/score-stsg/RuleTableWriter.cpp
+++ b/phrase-extract/score-stsg/RuleTableWriter.cpp
@@ -28,8 +28,8 @@ namespace ScoreStsg
void RuleTableWriter::WriteLine(const TokenizedRuleHalf &source,
const TokenizedRuleHalf &target,
const std::string &bestAlignment,
- double lexScore, int count, int totalCount,
- int distinctCount)
+ double lexScore, double treeScore, int count,
+ int totalCount, int distinctCount)
{
if (m_options.inverse) {
WriteRuleHalf(target);
@@ -47,7 +47,9 @@ void RuleTableWriter::WriteLine(const TokenizedRuleHalf &source,
m_out << MaybeLog(lexScore);
}
- // TODO PCFG
+ if (m_options.treeScore && !m_options.inverse) {
+ m_out << " " << MaybeLog(treeScore);
+ }
m_out << " ||| " << totalCount << " " << count;
if (m_options.kneserNey) {
diff --git a/phrase-extract/score-stsg/RuleTableWriter.h b/phrase-extract/score-stsg/RuleTableWriter.h
index db8924de3..340a4bf19 100644
--- a/phrase-extract/score-stsg/RuleTableWriter.h
+++ b/phrase-extract/score-stsg/RuleTableWriter.h
@@ -23,7 +23,7 @@ public:
, m_out(out) {}
void WriteLine(const TokenizedRuleHalf &, const TokenizedRuleHalf &,
- const std::string &, double, int, int, int);
+ const std::string &, double, double, int, int, int);
private:
double MaybeLog(double a) const {
diff --git a/phrase-extract/score-stsg/ScoreStsg.cpp b/phrase-extract/score-stsg/ScoreStsg.cpp
index 04e3b5a44..642c5dc05 100644
--- a/phrase-extract/score-stsg/ScoreStsg.cpp
+++ b/phrase-extract/score-stsg/ScoreStsg.cpp
@@ -83,6 +83,12 @@ int ScoreStsg::Main(int argc, char *argv[])
StringPiece fullAlign = *it++;
it->CopyToString(&tmp);
int count = std::atoi(tmp.c_str());
+ double treeScore = 0.0f;
+ if (m_options.treeScore && !m_options.inverse) {
+ ++it;
+ it->CopyToString(&tmp);
+ treeScore = std::atof(tmp.c_str());
+ }
// If this is the first line or if source has changed since the last
// line then process the current rule group and start a new one.
@@ -95,7 +101,7 @@ int ScoreStsg::Main(int argc, char *argv[])
}
// Add the rule to the current rule group.
- ruleGroup.AddRule(target, ntAlign, fullAlign, count);
+ ruleGroup.AddRule(target, ntAlign, fullAlign, count, treeScore);
}
// Process the final rule group.
@@ -223,11 +229,9 @@ void ScoreStsg::ProcessRuleGroup(const RuleGroup &group,
double lexProb = ComputeLexProb(m_sourceHalf.frontierSymbols,
m_targetHalf.frontierSymbols, m_tgtToSrc);
- // TODO PCFG score
-
// Write a line to the rule table.
writer.WriteLine(m_sourceHalf, m_targetHalf, bestAlignment, lexProb,
- p->count, totalCount, distinctCount);
+ rule.treeScore, p->count, totalCount, distinctCount);
}
}
@@ -348,7 +352,9 @@ void ScoreStsg::ProcessOptions(int argc, char *argv[], Options &options) const
("NoWordAlignment",
"do not output word alignments")
("PCFG",
- "include pre-computed PCFG score from extract")
+ "synonym for TreeScore (included for compatibility with score)")
+ ("TreeScore",
+ "include pre-computed tree score from extract")
("UnpairedExtractFormat",
"ignored (included for compatibility with score)")
;
@@ -429,8 +435,8 @@ void ScoreStsg::ProcessOptions(int argc, char *argv[], Options &options) const
if (vm.count("NoWordAlignment")) {
options.noWordAlignment = true;
}
- if (vm.count("PCFG")) {
- options.pcfg = true;
+ if (vm.count("TreeScore") || vm.count("PCFG")) {
+ options.treeScore = true;
}
}