Welcome to mirror list, hosted at ThFree Co, Russian Federation.

Vocabulary.h « biconcor « ems « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: dc01753873514a42e5985a33a76ee81e9dd06ef6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// $Id: tables-core.h 1470 2007-10-02 21:43:54Z redpony $

#pragma once

#include <iostream>
#include <fstream>
#include <assert.h>
#include <stdlib.h>
#include <string>
#include <queue>
#include <map>
#include <cmath>

using namespace std;

#define MAX_LENGTH 10000

#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
                _IS.getline(_LINE, _SIZE, _DELIM); \
                if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
                if (_IS.gcount() == _SIZE-1) { \
                  cerr << "Line too long! Buffer overflow. Delete lines >=" \
                    << _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \
                    << endl; \
                    exit(1); \
                } \
              }

typedef string WORD;
typedef unsigned int WORD_ID;

class Vocabulary
{
public:
  map<WORD, WORD_ID> lookup;
  vector< WORD > vocab;
  WORD_ID StoreIfNew( const WORD& );
  WORD_ID GetWordID( const WORD& );
  vector<WORD_ID> Tokenize( const char[] );
  inline WORD &GetWord( WORD_ID id ) const {
    WORD &i = (WORD&) vocab[ id ];
    return i;
  }
  void Save( string fileName );
  void Load( string fileName );
};