Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/PP
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2014-06-09 19:01:37 +0400
committerHieu Hoang <hieuhoang@gmail.com>2014-06-09 19:01:37 +0400
commit649b31c226228bc3f2ce34c5587c0bf19e2f91e0 (patch)
treea405d13dce7a6133080d86b37f90ea3b37cc07f6 /moses/PP
parent0178e5237ec48d20714f415a516cb8e27832e7df (diff)
span length
Diffstat (limited to 'moses/PP')
-rw-r--r--moses/PP/SpanLengthPhraseProperty.cpp58
-rw-r--r--moses/PP/SpanLengthPhraseProperty.h5
2 files changed, 50 insertions, 13 deletions
diff --git a/moses/PP/SpanLengthPhraseProperty.cpp b/moses/PP/SpanLengthPhraseProperty.cpp
index 20acb17e4..465ec2dfd 100644
--- a/moses/PP/SpanLengthPhraseProperty.cpp
+++ b/moses/PP/SpanLengthPhraseProperty.cpp
@@ -1,4 +1,3 @@
-#include <vector>
#include "SpanLengthPhraseProperty.h"
#include "moses/Util.h"
#include "util/exception.hh"
@@ -13,10 +12,25 @@ SpanLengthPhraseProperty::SpanLengthPhraseProperty(const std::string &value)
vector<string> toks;
Tokenize(toks, value);
- for (size_t i = 0; i < toks.size(); i = i + 2) {
+ set< vector<string> > indices;
+
+ for (size_t i = 0; i < toks.size(); ++i) {
const string &span = toks[i];
- float count = Scan<float>(toks[i + 1]);
- Populate(span, count);
+
+ // is it a ntIndex,sourceSpan,targetSpan or count ?
+ vector<string> toks;
+ Tokenize<string>(toks, span, ",");
+ UTIL_THROW_IF2(toks.size() != 1 && toks.size() != 3, "Incorrect format for SpanLength: " << span);
+
+ if (toks.size() == 1) {
+ float count = Scan<float>(toks[0]);
+ Populate(indices, count);
+
+ indices.clear();
+ }
+ else {
+ indices.insert(toks);
+ }
}
// totals
@@ -24,12 +38,16 @@ SpanLengthPhraseProperty::SpanLengthPhraseProperty(const std::string &value)
CalcTotals(m_target);
}
-void SpanLengthPhraseProperty::Populate(const string &span, float count)
+void SpanLengthPhraseProperty::Populate(const set< vector<string> > &indices, float count)
{
- vector<size_t> toks;
- Tokenize<size_t>(toks, span, ",");
- UTIL_THROW_IF2(toks.size() != 3, "Incorrect format for SpanLength: " << span);
- Populate(toks, count);
+ set< vector<string> >::const_iterator iter;
+ for (iter = indices.begin(); iter != indices.end(); ++iter) {
+ const vector<string> &toksStr = *iter;
+ vector<size_t> toks = Scan<size_t>(toksStr);
+ UTIL_THROW_IF2(toks.size() != 3, "Incorrect format for SpanLength. Size is " << toks.size());
+
+ Populate(toks, count);
+ }
}
void SpanLengthPhraseProperty::Populate(const std::vector<size_t> &toks, float count)
@@ -41,9 +59,24 @@ void SpanLengthPhraseProperty::Populate(const std::vector<size_t> &toks, float c
m_source.resize(ntInd + 1);
m_target.resize(ntInd + 1);
}
- m_source[ntInd].first[sourceLength] = count;
- m_target[ntInd].first[targetLength] = count;
+ Map &sourceMap = m_source[ntInd].first;
+ Map &targetMap = m_target[ntInd].first;
+ Populate(sourceMap, sourceLength, count);
+ Populate(targetMap, targetLength, count);
+}
+
+void SpanLengthPhraseProperty::Populate(Map &map, size_t span, float count)
+{
+ Map::iterator iter;
+ iter = map.find(span);
+ if (iter != map.end()) {
+ float &value = iter->second;
+ value += count;
+ }
+ else {
+ map[span] = count;
+ }
}
void SpanLengthPhraseProperty::CalcTotals(Vec &vec)
@@ -77,7 +110,8 @@ float SpanLengthPhraseProperty::GetProb(size_t ntInd, size_t sourceWidth, float
}
count += smoothing;
- float ret = count / (data.second + smoothing * map.size());
+ float total = data.second + smoothing * map.size();
+ float ret = count / total;
return ret;
}
diff --git a/moses/PP/SpanLengthPhraseProperty.h b/moses/PP/SpanLengthPhraseProperty.h
index a45974802..44f972005 100644
--- a/moses/PP/SpanLengthPhraseProperty.h
+++ b/moses/PP/SpanLengthPhraseProperty.h
@@ -2,6 +2,7 @@
#pragma once
#include <string>
+#include <set>
#include <map>
#include <vector>
#include "moses/PP/PhraseProperty.h"
@@ -21,8 +22,10 @@ protected:
typedef std::vector<std::pair<Map, float> > Vec;
Vec m_source, m_target;
- void Populate(const std::string &span, float count);
+ void Populate(const std::set< std::vector<std::string> > &indices, float count);
void Populate(const std::vector<size_t> &toks, float count);
+ void Populate(Map &map, size_t span, float count);
+
void CalcTotals(Vec &vec);
};