Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/PP
diff options
context:
space:
mode:
authorMatthias Huck <mhuck@inf.ed.ac.uk>2016-01-11 23:14:28 +0300
committerMatthias Huck <mhuck@inf.ed.ac.uk>2016-01-11 23:14:28 +0300
commita5a4401fe9d4ca9ad99f2c2ea9fc1120b88ac753 (patch)
tree38478507478ebd16ea9d1eca516a9be2bbb33dd8 /moses/PP
parent885b8b33a156a1c3acee960c4b36d3669542f041 (diff)
TargetPreferencesPhraseProperty
Diffstat (limited to 'moses/PP')
-rw-r--r--moses/PP/Factory.cpp2
-rw-r--r--moses/PP/TargetPreferencesPhraseProperty.cpp123
-rw-r--r--moses/PP/TargetPreferencesPhraseProperty.h71
3 files changed, 196 insertions, 0 deletions
diff --git a/moses/PP/Factory.cpp b/moses/PP/Factory.cpp
index cc393b18d..72c927072 100644
--- a/moses/PP/Factory.cpp
+++ b/moses/PP/Factory.cpp
@@ -6,6 +6,7 @@
#include "moses/PP/CountsPhraseProperty.h"
#include "moses/PP/SourceLabelsPhraseProperty.h"
+#include "moses/PP/TargetPreferencesPhraseProperty.h"
#include "moses/PP/TreeStructurePhraseProperty.h"
#include "moses/PP/SpanLengthPhraseProperty.h"
#include "moses/PP/NonTermContextProperty.h"
@@ -57,6 +58,7 @@ PhrasePropertyFactory::PhrasePropertyFactory()
MOSES_PNAME2("Counts", CountsPhraseProperty);
MOSES_PNAME2("SourceLabels", SourceLabelsPhraseProperty);
+ MOSES_PNAME2("TargetPreferences", TargetPreferencesPhraseProperty);
MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
MOSES_PNAME2("SpanLength", SpanLengthPhraseProperty);
MOSES_PNAME2("NonTermContext", NonTermContextProperty);
diff --git a/moses/PP/TargetPreferencesPhraseProperty.cpp b/moses/PP/TargetPreferencesPhraseProperty.cpp
new file mode 100644
index 000000000..9358ee4bf
--- /dev/null
+++ b/moses/PP/TargetPreferencesPhraseProperty.cpp
@@ -0,0 +1,123 @@
+#include "moses/PP/TargetPreferencesPhraseProperty.h"
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <sstream>
+#include <string>
+#include <queue>
+#include <assert.h>
+#include <limits>
+
+namespace Moses
+{
+
+void TargetPreferencesPhraseProperty::ProcessValue(const std::string &value)
+{
+ std::istringstream tokenizer(value);
+
+ if (! (tokenizer >> m_nNTs)) { // first token: number of non-terminals (incl. left-hand side)
+ UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of non-terminals. Flawed property?");
+ }
+ assert( m_nNTs > 0 );
+
+ if (! (tokenizer >> m_totalCount)) { // second token: overall rule count
+ UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read overall rule count. Flawed property?");
+ }
+ assert( m_totalCount > 0.0 );
+
+
+ // read labelled rule items
+
+ std::priority_queue<float> ruleLabelledCountsPQ;
+
+ while (tokenizer.peek() != EOF) {
+ try {
+
+ TargetPreferencesPhrasePropertyItem item;
+ size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
+
+ if (m_nNTs == 1) {
+
+ item.m_labelsRHSCount = m_totalCount;
+
+ } else { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
+
+ for (size_t i=0; i<m_nNTs-1; ++i) { // RHS non-terminal labels
+ size_t labelRHS;
+ if (! (tokenizer >> labelRHS) ) { // RHS non-terminal label
+ UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side label index. Flawed property?");
+ }
+ item.m_labelsRHS.push_back(labelRHS);
+ }
+
+ if (! (tokenizer >> item.m_labelsRHSCount)) {
+ UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side count. Flawed property?");
+ }
+
+ if (! (tokenizer >> numberOfLHSsGivenRHS)) {
+ UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of left-hand sides. Flawed property?");
+ }
+ }
+
+ for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS non-terminal labels seen with this RHS
+ size_t labelLHS;
+ if (! (tokenizer >> labelLHS)) { // LHS non-terminal label
+ UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read left-hand side label index. Flawed property?");
+ }
+ float ruleLabelledCount;
+ if (! (tokenizer >> ruleLabelledCount)) {
+ UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read count. Flawed property?");
+ }
+ item.m_labelsLHSList.push_back( std::make_pair(labelLHS,ruleLabelledCount) );
+ ruleLabelledCountsPQ.push(ruleLabelledCount);
+ }
+
+ m_labelItems.push_back(item);
+
+ } catch (const std::exception &e) {
+ UTIL_THROW2("TargetPreferencesPhraseProperty: Read error. Flawed property?");
+ }
+ }
+
+ // keep only top N label vectors
+ const size_t N=50;
+
+ if (ruleLabelledCountsPQ.size() > N) {
+
+ float topNRuleLabelledCount = std::numeric_limits<int>::max();
+ for (size_t i=0; !ruleLabelledCountsPQ.empty() && i<N; ++i) {
+ topNRuleLabelledCount = ruleLabelledCountsPQ.top();
+ ruleLabelledCountsPQ.pop();
+ }
+
+ size_t nKept=0;
+ std::list<TargetPreferencesPhrasePropertyItem>::iterator itemIter=m_labelItems.begin();
+ while (itemIter!=m_labelItems.end()) {
+ if (itemIter->m_labelsRHSCount < topNRuleLabelledCount) {
+ itemIter = m_labelItems.erase(itemIter);
+ } else {
+ std::list< std::pair<size_t,float> >::iterator itemLHSIter=(itemIter->m_labelsLHSList).begin();
+ while (itemLHSIter!=(itemIter->m_labelsLHSList).end()) {
+ if (itemLHSIter->second < topNRuleLabelledCount) {
+ itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter);
+ } else {
+ if (nKept >= N) {
+ itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter,(itemIter->m_labelsLHSList).end());
+ } else {
+ ++nKept;
+ ++itemLHSIter;
+ }
+ }
+ }
+ if ((itemIter->m_labelsLHSList).empty()) {
+ itemIter = m_labelItems.erase(itemIter);
+ } else {
+ ++itemIter;
+ }
+ }
+ }
+ }
+};
+
+} // namespace Moses
+
diff --git a/moses/PP/TargetPreferencesPhraseProperty.h b/moses/PP/TargetPreferencesPhraseProperty.h
new file mode 100644
index 000000000..84ef9b3c5
--- /dev/null
+++ b/moses/PP/TargetPreferencesPhraseProperty.h
@@ -0,0 +1,71 @@
+
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "util/exception.hh"
+#include <string>
+#include <list>
+
+namespace Moses
+{
+
+class TargetPreferencesPhrasePropertyItem
+{
+ friend class TargetPreferencesPhraseProperty;
+
+public:
+ TargetPreferencesPhrasePropertyItem() {};
+
+ float GetTargetPreferencesRHSCount() const {
+ return m_labelsRHSCount;
+ };
+
+ const std::list<size_t> &GetTargetPreferencesRHS() const {
+ return m_labelsRHS;
+ };
+
+ const std::list< std::pair<size_t,float> > &GetTargetPreferencesLHSList() const {
+ return m_labelsLHSList;
+ };
+
+private:
+ float m_labelsRHSCount;
+ std::list<size_t> m_labelsRHS; // should be of size nNTs-1 (empty if initial rule, i.e. no right-hand side non-terminals)
+ std::list< std::pair<size_t,float> > m_labelsLHSList; // list of left-hand sides for this right-hand side, with counts
+};
+
+
+class TargetPreferencesPhraseProperty : public PhraseProperty
+{
+public:
+ TargetPreferencesPhraseProperty() {};
+
+ virtual void ProcessValue(const std::string &value);
+
+ size_t GetNumberOfNonTerminals() const {
+ return m_nNTs;
+ }
+
+ float GetTotalCount() const {
+ return m_totalCount;
+ }
+
+ const std::list<TargetPreferencesPhrasePropertyItem> &GetTargetPreferencesItems() const {
+ return m_labelItems;
+ };
+
+ virtual const std::string *GetValueString() const {
+ UTIL_THROW2("TargetPreferencesPhraseProperty: value string not available in this phrase property");
+ return NULL;
+ };
+
+protected:
+
+ size_t m_nNTs;
+ float m_totalCount;
+
+ std::list<TargetPreferencesPhrasePropertyItem> m_labelItems;
+};
+
+} // namespace Moses
+