Welcome to mirror list, hosted at ThFree Co, Russian Federation.

processLexicalTableMin.cpp « misc - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 20183a3b6e63dc252b5badaae0f34bfc406fd265 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#include <iostream>
#include <string>

#ifdef WITH_THREADS
#include <boost/thread/thread.hpp>
#endif

#include "CompactPT/LexicalReorderingTableCreator.h"

using namespace Moses;

void printHelp(char **argv)
{
  std::cerr << "Usage " << argv[0] << ":\n"
            "  options: \n"
            "\t-in  string       -- input table file name\n"
            "\t-out string       -- prefix of binary table file\n"
#ifdef WITH_THREADS
            "\t-threads int|all  -- number of threads used for conversion\n"
#endif 
            "\n  advanced:\n"
            "\t-landmark int     -- use landmark phrase every 2^n phrases\n"
            "\t-fingerprint int  -- number of bits used for phrase fingerprints\n"
            "\t-join-scores      -- single set of Huffman codes for score components\n"
            "\t-quantize int     -- maximum number of scores per score component\n"
            "\n"
            "  For more information see: http://www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc6\n\n"
            "  If you use this please cite:\n\n"
            "  @article { junczys_pbml98_2012,\n"
            "      author = { Marcin Junczys-Dowmunt },\n"
            "      title = { Phrasal Rank-Encoding: Exploiting Phrase Redundancy and\n"
            "                Translational Relations for Phrase Table Compression },\n"
            "      journal = { The Prague Bulletin of Mathematical Linguistics },\n"
            "      volume = { 98 },\n"
            "      year = { 2012 },\n"
            "      note = { Proceedings of the MT Marathon 2012, Edinburgh },\n"
            "  }\n\n"
            "  Acknowledgments: Part of this research was carried out at and funded by\n"
            "  the World Intellectual Property Organization (WIPO) in Geneva.\n\n";
}

int main(int argc, char** argv)
{
  
  std::string inFilePath;
  std::string outFilePath("out");
  
  size_t orderBits = 10;
  size_t fingerPrintBits = 16;
  bool multipleScoreTrees = true;
  size_t quantize = 0;

#ifdef WITH_THREADS
  size_t threads = 1;
#endif   

  if(1 >= argc)
  {
    printHelp(argv);
    return 1;
  }
  for(int i = 1; i < argc; ++i)
  {
    std::string arg(argv[i]);
    if("-in" == arg && i+1 < argc)
    {
      ++i;
      inFilePath = argv[i];
    }
    else if("-out" == arg && i+1 < argc)
    {
      ++i;
      outFilePath = argv[i];
    }
    else if("-landmark" == arg && i+1 < argc)
    {
      ++i;
      orderBits = atoi(argv[i]);
    }
    else if("-fingerprint" == arg && i+1 < argc)
    {
      ++i;
      fingerPrintBits = atoi(argv[i]);
    }
    else if("-join-scores" == arg)
    {
      multipleScoreTrees = false;
    }
    else if("-quantize" == arg && i+1 < argc)
    {
      ++i;
      quantize = atoi(argv[i]);
    }
    else if("-threads" == arg && i+1 < argc)
    {
#ifdef WITH_THREADS
      ++i;
      if(std::string(argv[i]) == "all") {
        threads = boost::thread::hardware_concurrency();
        if(!threads) {
          std::cerr << "Could not determine number of hardware threads, setting to 1" << std::endl;
          threads = 1;
        }  
      }
      else
        threads = atoi(argv[i]);
#else
      std::cerr << "Thread support not compiled in" << std::endl;
      exit(1);
#endif
    }
    else
    {
      //somethings wrong... print help
      printHelp(argv);
      return 1;
    }
  }
  
  if(outFilePath.rfind(".minlexr") != outFilePath.size() - 8)
    outFilePath += ".minlexr";

  LexicalReorderingTableCreator(
    inFilePath, outFilePath,
    orderBits, fingerPrintBits,
    multipleScoreTrees, quantize
#ifdef WITH_THREADS
    , threads
#endif   
  );
}