Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/PP
diff options
context:
space:
mode:
authorMatthias Huck <huck@i6.informatik.rwth-aachen.de>2014-06-11 23:02:31 +0400
committerMatthias Huck <huck@i6.informatik.rwth-aachen.de>2014-06-11 23:02:31 +0400
commite693a27e4e81ba9609b8c69bdc30c0895c685bfc (patch)
treececb653c665ac2fdef7871ba0195ba8518e38251 /moses/PP
parentd0e92da7340ae1c46c4eaa41f52bf5eaaf47961c (diff)
A simple phrase property class to access the three phrase count values.
The counts are usually not needed during decoding and are not loaded from the phrase table. This is just a workaround that can make them available to features which have a use for them. If you need access to the counts, copy the two marginal counts and the joint count into an additional information property with key "Counts", e.g. using awk: $ zcat phrase-table.gz | awk -F' \|\|\| ' '{printf("%s {{Counts %s}}\n",$0,$5);}' | gzip -c > phrase-table.withCountsPP.gz CountsPhraseProperty reads them from the phrase table and provides methods GetSourceMarginal(), GetTargetMarginal(), GetJointCount().
Diffstat (limited to 'moses/PP')
-rw-r--r--moses/PP/CountsPhraseProperty.cpp31
-rw-r--r--moses/PP/CountsPhraseProperty.h38
-rw-r--r--moses/PP/Factory.cpp2
3 files changed, 71 insertions, 0 deletions
diff --git a/moses/PP/CountsPhraseProperty.cpp b/moses/PP/CountsPhraseProperty.cpp
new file mode 100644
index 000000000..f7af18d5b
--- /dev/null
+++ b/moses/PP/CountsPhraseProperty.cpp
@@ -0,0 +1,31 @@
+#include "moses/PP/CountsPhraseProperty.h"
+#include <sstream>
+#include <assert.h>
+#include "util/exception.hh"
+
+namespace Moses
+{
+
+void CountsPhraseProperty::ProcessValue()
+{
+ std::istringstream tokenizer(m_value);
+
+ if (! (tokenizer >> m_targetMarginal)) { // first token: countE
+ UTIL_THROW2("CountsPhraseProperty: Not able to read target marginal. Flawed property?");
+ }
+ assert( m_targetMarginal > 0 );
+
+ if (! (tokenizer >> m_sourceMarginal)) { // first token: countF
+ UTIL_THROW2("CountsPhraseProperty: Not able to read source marginal. Flawed property?");
+ }
+ assert( m_sourceMarginal > 0 );
+
+ if (! (tokenizer >> m_jointCount)) { // first token: countEF
+ UTIL_THROW2("CountsPhraseProperty: Not able to read joint count. Flawed property?");
+ }
+ assert( m_jointCount > 0 );
+
+};
+
+} // namespace Moses
+
diff --git a/moses/PP/CountsPhraseProperty.h b/moses/PP/CountsPhraseProperty.h
new file mode 100644
index 000000000..24e4eaa40
--- /dev/null
+++ b/moses/PP/CountsPhraseProperty.h
@@ -0,0 +1,38 @@
+
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include <string>
+#include <list>
+
+namespace Moses
+{
+
+class CountsPhraseProperty : public PhraseProperty
+{
+public:
+
+ CountsPhraseProperty(const std::string &value) : PhraseProperty(value) {};
+
+ virtual void ProcessValue();
+
+ size_t GetSourceMarginal() const {
+ return m_sourceMarginal;
+ }
+
+ size_t GetTargetMarginal() const {
+ return m_targetMarginal;
+ }
+
+ float GetJointCount() const {
+ return m_jointCount;
+ }
+
+protected:
+
+ float m_sourceMarginal, m_targetMarginal, m_jointCount;
+
+};
+
+} // namespace Moses
+
diff --git a/moses/PP/Factory.cpp b/moses/PP/Factory.cpp
index 61e96a7f2..a7e06b392 100644
--- a/moses/PP/Factory.cpp
+++ b/moses/PP/Factory.cpp
@@ -4,6 +4,7 @@
#include <iostream>
#include <vector>
+#include "moses/PP/CountsPhraseProperty.h"
#include "moses/PP/TreeStructurePhraseProperty.h"
namespace Moses
@@ -50,6 +51,7 @@ PhrasePropertyFactory::PhrasePropertyFactory()
// Properties with different key than class.
#define MOSES_PNAME2(name, type) Add(name, new DefaultPhrasePropertyCreator< type >());
+ MOSES_PNAME2("Counts", CountsPhraseProperty);
MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
}