Welcome to mirror list, hosted at ThFree Co, Russian Federation.

DynSuffixArray.h « TranslationModel « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 62f719d57130b39e0dcffc2943c4c5552e749637 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#ifndef moses_DynSuffixArray_h
#define moses_DynSuffixArray_h

#include <vector>
#include <set>
#include <algorithm>
#include <utility>
#include "moses/Util.h"
#include "moses/File.h"
#include "moses/TranslationModel/DynSAInclude/types.h"

namespace Moses
{
using namespace std;
typedef std::vector<unsigned> vuint_t;


/// compare position /i/ in the suffix array /m_sfa/ into corpus /m_crp/
/// against reference phrase /phrase/
// added by Ulrich Germann
class ComparePosition
{
  vuint_t const& m_crp;
  vuint_t const& m_sfa;

public:
  ComparePosition(vuint_t const& crp, vuint_t const& sfa);
  bool operator()(unsigned const& i, vector<wordID_t> const& phrase) const;
  bool operator()(vector<wordID_t> const& phrase, unsigned const& i) const;
};


/** @todo ask Abbey Levenberg
 */
class DynSuffixArray
{

public:
  DynSuffixArray();
  DynSuffixArray(vuint_t*);
  ~DynSuffixArray();
  bool GetCorpusIndex(const vuint_t*, vuint_t*);
  void Load(FILE*);
  void Save(FILE*);
  void Insert(vuint_t*, unsigned);
  void Delete(unsigned, unsigned);
  void Substitute(vuint_t*, unsigned);

  size_t GetCount(vuint_t const& phrase) const;

private:
  vuint_t* m_SA;
  vuint_t* m_ISA;
  vuint_t* m_F;
  vuint_t* m_L;
  vuint_t* m_corpus;
  void BuildAuxArrays();
  void Qsort(int* array, int begin, int end);
  int Compare(int, int, int);
  void Reorder(unsigned, unsigned);
  int LastFirstFunc(unsigned);
  int Rank(unsigned, unsigned);
  int F_firstIdx(unsigned);
  void PrintAuxArrays() {
    std::cerr << "SA\tISA\tF\tL\n";
    for(size_t i=0; i < m_SA->size(); ++i)
      std::cerr << m_SA->at(i) << "\t" << m_ISA->at(i) << "\t"
                << m_F->at(i) << "\t" << m_L->at(i) << std::endl;
  }
};
} //end namespace

#endif