Welcome to mirror list, hosted at ThFree Co, Russian Federation.

TranslationAnalysis.cpp « src « moses-cmd - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: a7795a27b1947fbdb5d123cbfadefefe05caf374 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// $Id$

#include <iostream>
#include <sstream>
#include <algorithm>
#include "StaticData.h"
#include "Hypothesis.h"
#include "TranslationAnalysis.h"

using namespace Moses;

namespace TranslationAnalysis
{

void PrintTranslationAnalysis(const TranslationSystem* system, std::ostream &os, const Hypothesis* hypo)
{
  os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
  std::vector<const Hypothesis*> translationPath;

  while (hypo) {
    translationPath.push_back(hypo);
    hypo = hypo->GetPrevHypo();
  }

  std::reverse(translationPath.begin(), translationPath.end());
  std::vector<std::string> droppedWords;
  std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
  if(tpi == translationPath.end())
    return;
  ++tpi;  // skip initial translation state
  std::vector<std::string> sourceMap;
  std::vector<std::string> targetMap;
  std::vector<unsigned int> lmAcc(0);
  size_t lmCalls = 0;
  bool doLMStats = ((*tpi)->GetLMStats() != 0);
  if (doLMStats)
    lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
  for (; tpi != translationPath.end(); ++tpi) {
    std::ostringstream sms;
    std::ostringstream tms;
    std::string target = (*tpi)->GetTargetPhraseStringRep();
    std::string source = (*tpi)->GetSourcePhraseStringRep();
    WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
    WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
    const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignmentInfo();
    // language model backoff stats,
    if (doLMStats) {
      std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
      std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
      std::vector<unsigned int>::iterator acc = lmAcc.begin();

      for (; i != lmstats.end(); ++i, ++acc) {
        std::vector<unsigned int>::iterator j = i->begin();
        lmCalls += i->size();
        for (; j != i->end(); ++j) {
          (*acc) += *j;
        }
      }
    }
    
    bool epsilon = false;
    if (target == "") {
      target="<EPSILON>";
      epsilon = true;
      droppedWords.push_back(source);
    }
    os	<< "         SOURCE: " << swr << " " << source << std::endl
        << "  TRANSLATED AS: "               << target << std::endl
        << "  WORD ALIGNED: " << alignmentInfo					<< std::endl;
    size_t twr_i = twr.GetStartPos();
    size_t swr_i = swr.GetStartPos();
    if (!epsilon) {
      sms << twr_i;
    }
    if (epsilon) {
      tms << "del(" << swr_i << ")";
    } else {
      tms << swr_i;
    }
    swr_i++;
    twr_i++;
    for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
      sms << '-' << twr_i;
    }
    for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
      tms << '-' << swr_i;
    }
    if (!epsilon) targetMap.push_back(sms.str());
    sourceMap.push_back(tms.str());
  }
  std::vector<std::string>::iterator si = sourceMap.begin();
  std::vector<std::string>::iterator ti = targetMap.begin();
  os << std::endl << "SOURCE/TARGET SPANS:";
  os << std::endl << "  SOURCE:";
  for (; si != sourceMap.end(); ++si) {
    os << " " << *si;
  }
  os << std::endl << "  TARGET:";
  for (; ti != targetMap.end(); ++ti) {
    os << " " << *ti;
  }
  os << std::endl << std::endl;
  if (doLMStats && lmCalls > 0) {
    std::vector<unsigned int>::iterator acc = lmAcc.begin();
    const LMList& lmlist = system->GetLanguageModels();
    LMList::const_iterator i = lmlist.begin();
    for (; acc != lmAcc.end(); ++acc, ++i) {
      char buf[256];
      sprintf(buf, "%.4f", (float)(*acc)/(float)lmCalls);
      os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
    }
  }

  if (droppedWords.size() > 0) {
    std::vector<std::string>::iterator dwi = droppedWords.begin();
    os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
    for (; dwi != droppedWords.end(); ++dwi) {
      os << "\tdropped=" << *dwi << std::endl;
    }
  }
	os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED): ";
  os << translationPath.back()->GetScoreBreakdown();
  os << " weighted(TODO)";
	os << std::endl;
}

}