Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2013-08-16 01:20:48 +0400
committerHieu Hoang <hieuhoang@gmail.com>2013-08-16 01:20:48 +0400
commit55bd87d38c280f3e90aa9cd944be7550ab9938bb (patch)
tree61c3df23bce948820b60885819a5bbea7e7d9647 /moses-cmd
parent9f92642e59ee09cfe4b1369e07646f282d29b03e (diff)
parent091bed03afb921c23c5a205b660492f0eb75ffc3 (diff)
Merge pull request #43 from jgwinnup/master
Added option to output unknown words with prefix 'UNK'
Diffstat (limited to 'moses-cmd')
-rw-r--r--moses-cmd/IOWrapper.cpp11
1 files changed, 10 insertions, 1 deletions
diff --git a/moses-cmd/IOWrapper.cpp b/moses-cmd/IOWrapper.cpp
index 8829af3d6..b1c50cff8 100644
--- a/moses-cmd/IOWrapper.cpp
+++ b/moses-cmd/IOWrapper.cpp
@@ -196,6 +196,7 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<
{
CHECK(outputFactorOrder.size() > 0);
const Phrase& phrase = edge.GetCurrTargetPhrase();
+ bool markUnknown = StaticData::Instance().GetMarkUnknown();
if (reportAllFactors == true) {
out << phrase;
} else {
@@ -212,8 +213,16 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<
}
}
CHECK(factor);
- out << *factor;
+ //preface surface form with UNK if marking unknowns
+ const Word &word = phrase.GetWord(pos);
+ if(markUnknown && word.IsOOV()) {
+ out << "UNK" << *factor;
+ }
+ else {
+ out << *factor;
+ }
+
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
CHECK(factor);