Welcome to mirror list, hosted at ThFree Co, Russian Federation.

TranslationAnalysis.cpp « src « moses-cmd - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: add50ff3c609f2b66590799075fc363927f4da9c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// $Id: TranslationAnalysis.cpp 2717 2010-01-28 15:32:04Z phkoehn $

#include <iostream>
#include <sstream>
#include <algorithm>
#include "StaticData.h"
#include "Hypothesis.h"
#include "TranslationAnalysis.h"

using namespace Moses;

namespace TranslationAnalysis {

void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
{
	os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
  std::vector<const Hypothesis*> translationPath;
  while (hypo) {
    translationPath.push_back(hypo);
    hypo = hypo->GetPrevHypo();
  }
  std::reverse(translationPath.begin(), translationPath.end());

  std::vector<std::string> droppedWords;
  std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
  ++tpi;  // skip initial translation state
	std::vector<std::string> sourceMap;
	std::vector<std::string> targetMap;
	std::vector<unsigned int> lmAcc(0);
	size_t lmCalls = 0;
	bool doLMStats = ((*tpi)->GetLMStats() != 0);
	if (doLMStats)
		lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
  for (; tpi != translationPath.end(); ++tpi) {
		std::ostringstream sms;
		std::ostringstream tms;
    std::string target = (*tpi)->GetTargetPhraseStringRep();
    std::string source = (*tpi)->GetSourcePhraseStringRep();
		WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
		WordsRange swr = (*tpi)->GetCurrSourceWordsRange();

		// language model backoff stats,
		if (doLMStats) {
			std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
			std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
			std::vector<unsigned int>::iterator acc = lmAcc.begin();

			for (; i != lmstats.end(); ++i, ++acc) {
				std::vector<unsigned int>::iterator j = i->begin();
				lmCalls += i->size();
				for (; j != i->end(); ++j) {
					(*acc) += *j;
				}
			}
		}
		
		bool epsilon = false;
    if (target == "") {
      target="<EPSILON>";
			epsilon = true;
      droppedWords.push_back(source);
    }
    os << "         SOURCE: " << swr << " " << source << std::endl
       << "  TRANSLATED AS: "               << target << std::endl;
		size_t twr_i = twr.GetStartPos();
		size_t swr_i = swr.GetStartPos();
		if (!epsilon) { sms << twr_i; }
		if (epsilon) { tms << "del(" << swr_i << ")"; } else { tms << swr_i; }
		swr_i++; twr_i++;
		for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
				sms << '-' << twr_i;
			}
		for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
			tms << '-' << swr_i;
		}
		if (!epsilon) targetMap.push_back(sms.str());
		sourceMap.push_back(tms.str());
  }
	std::vector<std::string>::iterator si = sourceMap.begin();
	std::vector<std::string>::iterator ti = targetMap.begin();
	os << std::endl << "SOURCE/TARGET SPANS:";
	os << std::endl << "  SOURCE:";
	for (; si != sourceMap.end(); ++si) {
		os << " " << *si;
	}
	os << std::endl << "  TARGET:";
	for (; ti != targetMap.end(); ++ti) {
		os << " " << *ti;
	}
	os << std::endl << std::endl;
	if (doLMStats && lmCalls > 0) {
		std::vector<unsigned int>::iterator acc = lmAcc.begin();
		const LMList& lmlist = StaticData::Instance().GetAllLM();
		LMList::const_iterator i = lmlist.begin();
		for (; acc != lmAcc.end(); ++acc, ++i) {
			char buf[256];
			sprintf(buf, "%.4f", (float)(*acc)/(float)lmCalls);
			os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
		}
	}

  if (droppedWords.size() > 0) {
    std::vector<std::string>::iterator dwi = droppedWords.begin();
    os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
    for (; dwi != droppedWords.end(); ++dwi) {
      os << "\tdropped=" << *dwi << std::endl;
    }
  }
	os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED): ";
  StaticData::Instance().GetScoreIndexManager().PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
	os << std::endl;
}

}