diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2013-08-16 01:20:48 +0400 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2013-08-16 01:20:48 +0400 |
commit | 55bd87d38c280f3e90aa9cd944be7550ab9938bb (patch) | |
tree | 61c3df23bce948820b60885819a5bbea7e7d9647 /moses-cmd | |
parent | 9f92642e59ee09cfe4b1369e07646f282d29b03e (diff) | |
parent | 091bed03afb921c23c5a205b660492f0eb75ffc3 (diff) |
Merge pull request #43 from jgwinnup/master
Added option to output unknown words with prefix 'UNK'
Diffstat (limited to 'moses-cmd')
-rw-r--r-- | moses-cmd/IOWrapper.cpp | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/moses-cmd/IOWrapper.cpp b/moses-cmd/IOWrapper.cpp index 8829af3d6..b1c50cff8 100644 --- a/moses-cmd/IOWrapper.cpp +++ b/moses-cmd/IOWrapper.cpp @@ -196,6 +196,7 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector< { CHECK(outputFactorOrder.size() > 0); const Phrase& phrase = edge.GetCurrTargetPhrase(); + bool markUnknown = StaticData::Instance().GetMarkUnknown(); if (reportAllFactors == true) { out << phrase; } else { @@ -212,8 +213,16 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector< } } CHECK(factor); - out << *factor; + //preface surface form with UNK if marking unknowns + const Word &word = phrase.GetWord(pos); + if(markUnknown && word.IsOOV()) { + out << "UNK" << *factor; + } + else { + out << *factor; + } + for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); CHECK(factor); |