Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2014-12-17 19:01:06 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2014-12-17 19:01:06 +0300
commit2e777f19c3ddb850dde73e51772ae728149d447a (patch)
tree4703d757fded9d1d4be42a85272e703e3fd587eb
parent0b8d0f0a0e3199cc1a9f6e7e434384b4ff8ae330 (diff)
search for best path
-rw-r--r--mert/mtalign.cpp113
1 files changed, 105 insertions, 8 deletions
diff --git a/mert/mtalign.cpp b/mert/mtalign.cpp
index 5613b6552..d23bb00f3 100644
--- a/mert/mtalign.cpp
+++ b/mert/mtalign.cpp
@@ -173,7 +173,41 @@ class Corpus {
std::vector<Sentence> m_sentences;
};
-typedef std::vector<float> Stats;
+class Stats {
+ public:
+ Stats()
+ : m_stats(MAX_NGRAM_ORDER * 2 + 1, 0)
+ {}
+
+ float& operator[](size_t i) {
+ return m_stats[i];
+ }
+
+ float& back() {
+ return m_stats.back();
+ }
+
+ size_t size() const {
+ return m_stats.size();
+ }
+
+ Stats& operator+=(Stats& o) {
+ for(size_t i = 0; i < m_stats.size(); i++)
+ m_stats[i] += o[i];
+ return *this;
+ }
+
+ Stats operator+(Stats o) {
+ Stats out;
+ out += o;
+ out += *this;
+ return out;
+ }
+
+ private:
+ std::vector<float> m_stats;
+};
+
void computeBLEUstats(const Sentence& c, const Sentence& r, Stats& stats) {
size_t cLen = c.size();
@@ -204,7 +238,7 @@ void computeBLEUstats(const Sentence& c, const Sentence& r, Stats& stats) {
rcounts[ngram] = CountOrder(j, 1);
}
}
-
+
for(NGramCounts::iterator it = ccounts.begin(); it != ccounts.end(); it++) {
size_t order = it->second.first;
@@ -221,7 +255,14 @@ void computeBLEUstats(const Sentence& c, const Sentence& r, Stats& stats) {
stats.back() = rLen;
}
-float computeBLEU(const Stats& stats) {
+Stats computeBLEUstats(const Sentence& c, const Sentence& r) {
+ Stats stats;
+ computeBLEUstats(c, r, stats);
+ return stats;
+}
+
+
+float computeBLEU(Stats& stats) {
UTIL_THROW_IF(stats.size() != MAX_NGRAM_ORDER * 2 + 1, util::Exception, "Error");
float logbleu = 0.0;
@@ -285,15 +326,71 @@ int main(int argc, char** argv)
Corpus source(sourceFileName);
Corpus target(targetFileName);
- Stats stats(9, 0);
- for(size_t i = 0; i < source.size() && i < target.size(); i++)
+ Stats final;
+ for(size_t i = 0; i < source.size() && i < target.size(); i++) {
+ Stats stats;
computeBLEUstats(source[i], target[i], stats);
+ final = final + stats;
+ }
- for(size_t i = 0; i < stats.size(); i++) {
- std::cout << stats[i] << " ";
+ for(size_t i = 0; i < final.size(); i++) {
+ std::cout << final[i] << " ";
}
std::cout << std::endl;
- std::cout << computeBLEU(stats) << std::endl;
+ std::cout << computeBLEU(final) << std::endl;
+
+ std::vector< std::vector<Stats> > S(source.size(), std::vector<Stats>(target.size()));
+
+ Stats empty;
+
+ for(size_t i = 0; i < S.size(); i++) {
+ for(size_t j = 0; j < S[i].size(); j++) {
+
+ Stats a01 = (j > 0) ? S[i][j-1] : empty;
+ Stats a10 = (i > 0) ? S[i-1][j] : empty;
+ Stats a11 = (i > 0 && j > 0) ? S[i-1][j-1] + computeBLEUstats(source[i], target[j]) : empty;
+ Stats a12 = (i > 0 && j > 1) ? S[i-1][j-2] + computeBLEUstats(source[i], target[j-1] + target[j]) : empty;
+ Stats a21 = (i > 1 && j > 0) ? S[i-2][j-1] + computeBLEUstats(source[i-1] + source[i], target[j]) : empty;
+
+ Stats bestStats;
+ float bestBLEU = 0;
+ float temp = 0;
+
+ temp = computeBLEU(a01);
+ if(temp > bestBLEU) {
+ bestBLEU = temp;
+ bestStats = a01;
+ }
+
+ temp = computeBLEU(a10);
+ if(temp > bestBLEU) {
+ bestBLEU = temp;
+ bestStats = a10;
+ }
+
+ temp = computeBLEU(a11);
+ if(temp > bestBLEU) {
+ bestBLEU = temp;
+ bestStats = a11;
+ }
+
+ temp = computeBLEU(a12);
+ if(temp > bestBLEU) {
+ bestBLEU = temp;
+ bestStats = a12;
+ }
+
+ temp = computeBLEU(a21);
+ if(temp > bestBLEU) {
+ bestBLEU = temp;
+ bestStats = a21;
+ }
+
+ S[i][j] = bestStats;
+ }
+ }
+
+ std::cout << computeBLEU(S.back().back()) << std::endl;
}