Welcome to mirror list, hosted at ThFree Co, Russian Federation.

vocabid.cpp « ProbingPT « TranslationModel « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: d6f442323c6b79b4e4ce0c8480257b82643a5048 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <boost/foreach.hpp>
#include "vocabid.hh"
#include "StoreVocab.h"
#include "moses/Util.h"

namespace Moses
{

void add_to_map(StoreVocab<uint64_t> &sourceVocab,
                const StringPiece &textin)
{
  //Tokenize
  util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));

  while (itWord) {
    StringPiece word = *itWord;

    util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
    while (itFactor) {
      StringPiece factor = *itFactor;

      sourceVocab.Insert(getHash(factor), factor.as_string());
      itFactor++;
    }
    itWord++;
  }
}

void serialize_map(const std::map<uint64_t, std::string> &karta,
                   const std::string &filename)
{
  std::ofstream os(filename.c_str());

  std::map<uint64_t, std::string>::const_iterator iter;
  for (iter = karta.begin(); iter != karta.end(); ++iter) {
    os << iter->first << '\t' << iter->second << std::endl;
  }

  os.close();
}

void read_map(std::map<uint64_t, std::string> &karta, const char* filename)
{
  std::ifstream is(filename);

  std::string line;
  while (getline(is, line)) {
    std::vector<std::string> toks = Tokenize(line, "\t");
    assert(toks.size() == 2);
    uint64_t ind = Scan<uint64_t>(toks[1]);
    karta[ind] = toks[0];
  }

  //Close the stream after we are done.
  is.close();
}

}