Welcome to mirror list, hosted at ThFree Co, Russian Federation.

memscore.cpp « memscore « contrib - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c723b236e51ff34394acba2f4b5b2b9f88214819 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// memscore - in-memory phrase scoring for Statistical Machine Translation
// Christian Hardmeier, FBK-irst, Trento, 2010
// $Id$

#include <iostream>
#include <vector>

#include "phrasetable.h"
#include "scorer.h"

const char *progname;

typedef PhrasePairInfo::AlignmentVector::value_type VP;

bool cmp_counts(const VP &a1, const VP &a2);
int main(int argc, const char *argv[]);

bool cmp_counts(const VP &a1, const VP &a2)
{
  return a1.second < a2.second;
}

int main(int argc, const char *argv[])
{
  progname = argv[0];

  if(argc == 1) {
    std::cerr << "No scorers specified." << std::endl;
    usage();
  }

  MemoryPhraseTable pt;
  PhraseScorerFactory psf(pt);

  typedef std::vector<PhraseScorer *> ScorerList;
  ScorerList scorers;

  for(int argp = 1; argp < argc; ) {
    bool reverse;
    if(!strcmp(argv[argp], "-s"))
      reverse = false;
    else if(!strcmp(argv[argp], "-r"))
      reverse = true;
    else
      usage();

    scorers.push_back(psf.create_scorer(argv, ++argp, reverse));
  }

  pt.load_data(std::cin);
  pt.compute_phrase_statistics();

  for(ScorerList::iterator s = scorers.begin(); s != scorers.end(); ++s)
    (*s)->score_phrases();

  for(PhrasePairCounts::const_iterator it = pt.raw_begin(); it != pt.raw_end(); ++it) {
    PhrasePairInfo ppi(it);
    Phrase src = ppi.get_src();
    Phrase tgt = ppi.get_tgt();
    const PhrasePairInfo::AlignmentVector av = ppi.get_alignments();

    PhraseAlignment alig = std::max_element(av.begin(), av.end(), cmp_counts)->first;

    std::cout << pt.get_src_phrase(src) << " ||| " << pt.get_tgt_phrase(tgt) << " ||| " << alig << " |||";

    for(ScorerList::iterator s = scorers.begin(); s != scorers.end(); ++s)
      std::cout << ' ' << (*s)->get_score(it);
    std::cout << '\n'; // don't use std::endl to avoid flushing
  }
}

void usage()
{
  std::cerr <<	"Usage: " << progname << " <scorer1> <scorer2> ..." << std::endl <<
            "       where each scorer is specified as" << std::endl <<
            "       -s <scorer> <args>         to estimate p(s|t)" << std::endl <<
            "       -r <scorer> <args>         to estimate p(t|s)" << std::endl << std::endl;

  std::cerr <<	"Implemented scorers:" << std::endl;

  const std::vector<String> &v = PhraseScorerFactory::scorer_list();
  std::copy(v.begin(), v.end(), std::ostream_iterator<std::string>(std::cerr, "\n"));

  exit(1);
}