Welcome to mirror list, hosted at ThFree Co, Russian Federation.

extract-lex.h « phrase-extract - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 1d49465c8968f491c49b2ee4d729f41a6b290daf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#pragma once

#include <map>
#include <set>
#include <sstream>
#include <fstream>
#include <iostream>

namespace MosesTraining
{

class WordCount
{
  friend std::ostream& operator<<(std::ostream&, const WordCount&);
public:
  float m_count;

  std::map<const std::string*, WordCount> m_coll;

  WordCount()
    :m_count(0) {
  }

  //WordCount(const WordCount &copy);

  WordCount(float count)
    :m_count(count) {
  }

  void AddCount(float incr);

  std::map<const std::string*, WordCount> &GetColl() {
    return m_coll;
  }
  const std::map<const std::string*, WordCount> &GetColl() const {
    return m_coll;
  }

  const float GetCount() const {
    return m_count;
  }

};

class Vocab
{
  std::set<std::string> m_coll;
public:
  const std::string *GetOrAdd(const std::string &word);
};

class ExtractLex
{
  Vocab m_vocab;
  std::map<const std::string*, WordCount> m_collS2T, m_collT2S;

  void Process(const std::string *target, const std::string *source);
  void Process(WordCount &wcIn, const std::string *out);
  void ProcessUnaligned(std::vector<std::string> &toksTarget, std::vector<std::string> &toksSource
                        , const std::vector<bool> &m_sourceAligned, const std::vector<bool> &m_targetAligned);

  void Output(const std::map<const std::string*, WordCount> &coll, std::ofstream &outStream);

public:
  void Process(std::vector<std::string> &toksTarget, std::vector<std::string> &toksSource, std::vector<std::string> &toksAlign, size_t lineCount);
  void Output(std::ofstream &streamLexS2T, std::ofstream &streamLexT2S);

};

} // namespace