Welcome to mirror list, hosted at ThFree Co, Russian Federation.

n_gram.h « src « irstlm - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c76e9b922b804a6089694098de7a5e65dfceaf0c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/******************************************************************************
 IrstLM: IRST Language Model Toolkit
 Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy

 This library is free software; you can redistribute it and/or
 modify it under the terms of the GNU Lesser General Public
 License as published by the Free Software Foundation; either
 version 2.1 of the License, or (at your option) any later version.

 This library is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 Lesser General Public License for more details.

 You should have received a copy of the GNU Lesser General Public
 License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA

******************************************************************************/

// n-gram tables 
// by M. Federico
// Copyright Marcello Federico, ITC-irst, 1998

#ifndef MF_NGRAM_H
#define MF_NGRAM_H

#include <fstream>
#include <cassert>
#include "dictionary.h"

#ifdef MYMAXNGRAM
#define MAX_NGRAM MYMAXNGRAM
#else
#define MAX_NGRAM 20
#endif

class dictionary;

//typedef int code;

class ngram{
  int  word[MAX_NGRAM];  //encoded ngram
 public:
  dictionary *dict;      //dictionary
  char* link;            // ngram-tree pointer
  int  midx[MAX_NGRAM];  // ngram-tree scan pointer
  int    lev;            // ngram-tree level
  int   size;            // ngram size
  int   freq;            // ngram frequency or integer prob
  int   succ;            // number of successors
  int   bow;             // back-off weight 
  int   prob;            // probability
  
  unsigned char info;    // ngram-tree info flags
  unsigned char pinfo;   // ngram-tree parent info flags
  int  isym;             // last interruption symbol

  ngram(dictionary* d,int sz=0);
  ngram(ngram& ng);
  
  int *wordp()// n-gram pointer
    {return wordp(size);}; 
  int *wordp(int k) // n-gram pointer
    {return size>=k?&word[MAX_NGRAM-k]:0;}; 
  const int *wordp() const // n-gram pointer
    {return wordp(size);}; 
  const int *wordp(int k) const // n-gram pointer
    {return size>=k?&word[MAX_NGRAM-k]:0;}; 

  int shift(){
    for (int i=(MAX_NGRAM-1);i>0;i--){
      word[i]=word[i-1];
    }
    size--;
    return 1;
  }


  int containsWord(char* s,int lev){

    int c=dict->encode(s);
    if (c == -1) return 0;

    assert(lev <= size);
    for (int i=0;i<lev;i++){
      if (*wordp(size-i)== c) return 1;
    }
    return 0;
  }
    

  void trans(const ngram& ng);

  friend std::ifstream& operator>> (std::ifstream& fi,ngram& ng);
  friend std::ofstream& operator<< (std::ofstream& fi,ngram& ng);
  friend std::istream& operator>> (std::istream& fi,ngram& ng);
  friend std::ostream& operator<< (std::ostream& fi,ngram& ng);

  inline int ckhisto(int sz){
    
    for (int i=sz;i>1;i--) 
      if (*wordp(i)==dict->oovcode())
	return 0;
    return 1;
  }

  int pushc(int c);
  int pushw(char* w);

  //~ngram();



};

#endif