Welcome to mirror list, hosted at ThFree Co, Russian Federation.

biconcor.cpp « biconcor « ems « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 0b62186237819b34767e7bf9b4b5be6cd0dacceb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#include "SuffixArray.h"
#include "TargetCorpus.h"
#include "Alignment.h"
#include "PhrasePairCollection.h"
#include <getopt.h>
#include "base64.h"

using namespace std;

int main(int argc, char* argv[])
{
  // handle parameters
  string query;
  string fileNameSuffix;
  string fileNameSource;
  string fileNameTarget = "";
  string fileNameAlignment = "";
  int loadFlag = false;
  int saveFlag = false;
  int createFlag = false;
  int queryFlag = false;
  int htmlFlag = false;
  string info = "usage: suffix-query\n\t[--load file]\n\t[--save file]\n\t[--create source-corpus]\n\t[--query string]\n\t[--target target-corpus]\n\t[--alignment file]\n";
  while(1) {
    static struct option long_options[] = {
      {"load", required_argument, 0, 'l'},
      {"save", required_argument, 0, 's'},
      {"create", required_argument, 0, 'c'},
      {"query", required_argument, 0, 'q'},
      {"target", required_argument, 0, 't'},
      {"alignment", required_argument, 0, 'a'},
      {"html", no_argument, &htmlFlag, 0},
      {0, 0, 0, 0}
    };
    int option_index = 0;
    int c = getopt_long (argc, argv, "l:s:c:q:Q:t:a:h", long_options, &option_index);
    if (c == -1) break;
    switch (c) {
    case 'l':
      fileNameSuffix = string(optarg);
      loadFlag = true;
      break;
    case 't':
      fileNameTarget = string(optarg);
      break;
    case 'a':
      fileNameAlignment = string(optarg);
      break;
    case 's':
      fileNameSuffix = string(optarg);
      saveFlag = true;
      break;
    case 'c':
      fileNameSource = string(optarg);
      createFlag = true;
      break;
    case 'Q':
      query = base64_decode(string(optarg));
      queryFlag = true;
      break;
    case 'q':
      query = string(optarg);
      queryFlag = true;
      break;
    default:
      cerr << info;
      exit(1);
    }
  }

  // check if parameter settings are legal
  if (saveFlag && !createFlag) {
    cerr << "error: cannot save without creating\n" << info;
    exit(1);
  }
  if (saveFlag && loadFlag) {
    cerr << "error: cannot load and save at the same time\n" << info;
    exit(1);
  }
  if (!loadFlag && !createFlag) {
    cerr << "error: neither load or create - i have no info!\n" << info;
    exit(1);
  }
  if (createFlag && (fileNameTarget == "" || fileNameAlignment == "")) {
    cerr << "error: i have no target corpus or alignment\n" << info;
    exit(1);
  }

  // do your thing
  SuffixArray suffixArray;
  TargetCorpus targetCorpus;
  Alignment alignment;
  if (createFlag) {
    cerr << "will create\n";
    cerr << "source corpus is in " << fileNameSource << endl;
    suffixArray.Create( fileNameSource );
    cerr << "target corpus is in " << fileNameTarget << endl;
    targetCorpus.Create( fileNameTarget );
    cerr << "alignment is in " << fileNameAlignment << endl;
    alignment.Create( fileNameAlignment );
    if (saveFlag) {
      suffixArray.Save( fileNameSuffix );
      targetCorpus.Save( fileNameSuffix );
      alignment.Save( fileNameSuffix );
      cerr << "will save in " << fileNameSuffix << endl;
    }
  }
  if (loadFlag) {
    cerr << "will load from " << fileNameSuffix << endl;
    suffixArray.Load( fileNameSuffix );
    targetCorpus.Load( fileNameSuffix );
    alignment.Load( fileNameSuffix );
  }
  if (queryFlag) {
    cerr << "query is " << query << endl;
    vector< string > queryString = alignment.Tokenize( query.c_str() );
    PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment );
    ppCollection.GetCollection( queryString );
    ppCollection.PrintHTML();
  }
}