Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/PP
diff options
context:
space:
mode:
authorMatthias Huck <mhuck@inf.ed.ac.uk>2016-02-12 20:46:57 +0300
committerMatthias Huck <mhuck@inf.ed.ac.uk>2016-02-12 20:46:57 +0300
commit1659d6b4c8c0b2a261678c012b9eab32f8c7b296 (patch)
tree6e1cc6b7bf0a17e6b34bf6bbbafd45ad19bd42d3 /moses/PP
parentc75f9854e489c14670d3c9ab6e381fc0878d27d9 (diff)
Option for target constituent constrained phrase extraction. TargetConstituentAdjacencyFeature.
Diffstat (limited to 'moses/PP')
-rw-r--r--moses/PP/Factory.cpp4
-rw-r--r--moses/PP/PhraseProperty.cpp7
-rw-r--r--moses/PP/PhraseProperty.h2
-rw-r--r--moses/PP/TargetConstituentBoundariesLeftPhraseProperty.cpp63
-rw-r--r--moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h40
-rw-r--r--moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.cpp63
-rw-r--r--moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h40
7 files changed, 218 insertions, 1 deletions
diff --git a/moses/PP/Factory.cpp b/moses/PP/Factory.cpp
index 72c927072..46ca7d362 100644
--- a/moses/PP/Factory.cpp
+++ b/moses/PP/Factory.cpp
@@ -11,6 +11,8 @@
#include "moses/PP/SpanLengthPhraseProperty.h"
#include "moses/PP/NonTermContextProperty.h"
#include "moses/PP/OrientationPhraseProperty.h"
+#include "moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h"
+#include "moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h"
namespace Moses
{
@@ -58,6 +60,8 @@ PhrasePropertyFactory::PhrasePropertyFactory()
MOSES_PNAME2("Counts", CountsPhraseProperty);
MOSES_PNAME2("SourceLabels", SourceLabelsPhraseProperty);
+ MOSES_PNAME2("TargetConstituentBoundariesLeft", TargetConstituentBoundariesLeftPhraseProperty);
+ MOSES_PNAME2("TargetConstituentBoundariesRightAdjacent", TargetConstituentBoundariesRightAdjacentPhraseProperty);
MOSES_PNAME2("TargetPreferences", TargetPreferencesPhraseProperty);
MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
MOSES_PNAME2("SpanLength", SpanLengthPhraseProperty);
diff --git a/moses/PP/PhraseProperty.cpp b/moses/PP/PhraseProperty.cpp
index 69e3c3374..4224e62dd 100644
--- a/moses/PP/PhraseProperty.cpp
+++ b/moses/PP/PhraseProperty.cpp
@@ -5,9 +5,14 @@ namespace Moses
std::ostream& operator<<(std::ostream &out, const PhraseProperty &obj)
{
- out << "Base phrase property";
+ obj.Print(out);
return out;
}
+void PhraseProperty::Print(std::ostream &out) const
+{
+ out << "Base phrase property";
+}
+
}
diff --git a/moses/PP/PhraseProperty.h b/moses/PP/PhraseProperty.h
index 76c294481..eef5be688 100644
--- a/moses/PP/PhraseProperty.h
+++ b/moses/PP/PhraseProperty.h
@@ -28,6 +28,8 @@ public:
protected:
+ virtual void Print(std::ostream& out) const;
+
std::string *m_value;
};
diff --git a/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.cpp b/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.cpp
new file mode 100644
index 000000000..e3a0917ea
--- /dev/null
+++ b/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.cpp
@@ -0,0 +1,63 @@
+#include "moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h"
+#include "moses/FactorCollection.h"
+#include "moses/Util.h"
+#include <iostream>
+#include <queue>
+#include <ostream>
+
+namespace Moses
+{
+
+void TargetConstituentBoundariesLeftPhraseProperty::ProcessValue(const std::string &value)
+{
+ FactorCollection &factorCollection = FactorCollection::Instance();
+ std::vector<std::string> tokens;
+ Tokenize(tokens, value, " ");
+ std::vector<std::string>::const_iterator tokenIter = tokens.begin();
+ while (tokenIter != tokens.end()) {
+ try {
+
+ std::vector<std::string> constituents;
+ Tokenize(constituents, *tokenIter, "<");
+ ++tokenIter;
+ float count = std::atof( tokenIter->c_str() );
+ ++tokenIter;
+
+ std::set<const Factor* > dedup;
+
+ for ( std::vector<std::string>::iterator constituentIter = constituents.begin();
+ constituentIter != constituents.end(); ++constituentIter ) {
+
+ const Factor* constituentFactor = factorCollection.AddFactor(*constituentIter,false);
+
+ std::pair< std::set<const Factor* >::iterator, bool > dedupIns =
+ dedup.insert(constituentFactor);
+ if ( dedupIns.second ) {
+
+ std::pair< TargetConstituentBoundariesLeftCollection::iterator, bool > inserted =
+ m_constituentsCollection.insert(std::make_pair(constituentFactor,count));
+ if ( !inserted.second ) {
+ (inserted.first)->second += count;
+ }
+ }
+ }
+
+ } catch (const std::exception &e) {
+ UTIL_THROW2("TargetConstituentBoundariesLeftPhraseProperty: Read error. Flawed property? " << value);
+ }
+ }
+};
+
+void TargetConstituentBoundariesLeftPhraseProperty::Print(std::ostream& out) const
+{
+ for ( TargetConstituentBoundariesLeftCollection::const_iterator it = m_constituentsCollection.begin();
+ it != m_constituentsCollection.end(); ++it ) {
+ if ( it != m_constituentsCollection.begin() ) {
+ out << " ";
+ }
+ out << *(it->first) << " " << it->second;
+ }
+}
+
+} // namespace Moses
+
diff --git a/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h b/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h
new file mode 100644
index 000000000..d9c629922
--- /dev/null
+++ b/moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "moses/Factor.h"
+#include "util/exception.hh"
+#include <map>
+#include <string>
+
+namespace Moses
+{
+
+typedef std::map<const Factor*, float> TargetConstituentBoundariesLeftCollection;
+
+
+class TargetConstituentBoundariesLeftPhraseProperty : public PhraseProperty
+{
+public:
+ TargetConstituentBoundariesLeftPhraseProperty()
+ {};
+
+ virtual void ProcessValue(const std::string &value);
+
+ const TargetConstituentBoundariesLeftCollection &GetCollection() const {
+ return m_constituentsCollection;
+ };
+
+ virtual const std::string *GetValueString() const {
+ UTIL_THROW2("TargetConstituentBoundariesLeftPhraseProperty: value string not available in this phrase property");
+ return NULL;
+ };
+
+protected:
+
+ virtual void Print(std::ostream& out) const;
+
+ TargetConstituentBoundariesLeftCollection m_constituentsCollection;
+};
+
+} // namespace Moses
+
diff --git a/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.cpp b/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.cpp
new file mode 100644
index 000000000..5bed2c764
--- /dev/null
+++ b/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.cpp
@@ -0,0 +1,63 @@
+#include "moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h"
+#include "moses/FactorCollection.h"
+#include "moses/Util.h"
+#include <iostream>
+#include <queue>
+#include <ostream>
+
+namespace Moses
+{
+
+void TargetConstituentBoundariesRightAdjacentPhraseProperty::ProcessValue(const std::string &value)
+{
+ FactorCollection &factorCollection = FactorCollection::Instance();
+ std::vector<std::string> tokens;
+ Tokenize(tokens, value, " ");
+ std::vector<std::string>::const_iterator tokenIter = tokens.begin();
+ while (tokenIter != tokens.end()) {
+ try {
+
+ std::vector<std::string> constituents;
+ Tokenize(constituents, *tokenIter, "<");
+ ++tokenIter;
+ float count = std::atof( tokenIter->c_str() );
+ ++tokenIter;
+
+ std::set<const Factor* > dedup;
+
+ for ( std::vector<std::string>::iterator constituentIter = constituents.begin();
+ constituentIter != constituents.end(); ++constituentIter ) {
+
+ const Factor* constituentFactor = factorCollection.AddFactor(*constituentIter,false);
+
+ std::pair< std::set<const Factor* >::iterator, bool > dedupIns =
+ dedup.insert(constituentFactor);
+ if ( dedupIns.second ) {
+
+ std::pair< TargetConstituentBoundariesRightAdjacentCollection::iterator, bool > inserted =
+ m_constituentsCollection.insert(std::make_pair(constituentFactor,count));
+ if ( !inserted.second ) {
+ (inserted.first)->second += count;
+ }
+ }
+ }
+
+ } catch (const std::exception &e) {
+ UTIL_THROW2("TargetConstituentBoundariesRightAdjacentPhraseProperty: Read error. Flawed property? " << value);
+ }
+ }
+};
+
+void TargetConstituentBoundariesRightAdjacentPhraseProperty::Print(std::ostream& out) const
+{
+ for ( TargetConstituentBoundariesRightAdjacentCollection::const_iterator it = m_constituentsCollection.begin();
+ it != m_constituentsCollection.end(); ++it ) {
+ if ( it != m_constituentsCollection.begin() ) {
+ out << " ";
+ }
+ out << *(it->first) << " " << it->second;
+ }
+}
+
+} // namespace Moses
+
diff --git a/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h b/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h
new file mode 100644
index 000000000..79b5c71be
--- /dev/null
+++ b/moses/PP/TargetConstituentBoundariesRightAdjacentPhraseProperty.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "moses/Factor.h"
+#include "util/exception.hh"
+#include <map>
+#include <string>
+
+namespace Moses
+{
+
+typedef std::map<const Factor*, float> TargetConstituentBoundariesRightAdjacentCollection;
+
+
+class TargetConstituentBoundariesRightAdjacentPhraseProperty : public PhraseProperty
+{
+public:
+ TargetConstituentBoundariesRightAdjacentPhraseProperty()
+ {};
+
+ virtual void ProcessValue(const std::string &value);
+
+ const TargetConstituentBoundariesRightAdjacentCollection &GetCollection() const {
+ return m_constituentsCollection;
+ };
+
+ virtual const std::string *GetValueString() const {
+ UTIL_THROW2("TargetConstituentBoundariesRightAdjacentPhraseProperty: value string not available in this phrase property");
+ return NULL;
+ };
+
+protected:
+
+ virtual void Print(std::ostream& out) const;
+
+ TargetConstituentBoundariesRightAdjacentCollection m_constituentsCollection;
+};
+
+} // namespace Moses
+