Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2006-07-18 00:15:38 +0400
committerredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2006-07-18 00:15:38 +0400
commit851e5685d0067fca5cd5e5f636c9bfbb0ea35ee2 (patch)
tree146ddf23b9d8d6a1956769f66ac3d18e4f3ef184 /irstlm/src/lmtable.h
parentf06c0b6a133d74be52334c67b0e735748553d155 (diff)
irst-lm library
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@154 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'irstlm/src/lmtable.h')
-rw-r--r--irstlm/src/lmtable.h220
1 files changed, 220 insertions, 0 deletions
diff --git a/irstlm/src/lmtable.h b/irstlm/src/lmtable.h
new file mode 100644
index 000000000..893c8214d
--- /dev/null
+++ b/irstlm/src/lmtable.h
@@ -0,0 +1,220 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+******************************************************************************/
+
+// lm-gram tables
+// by M. Federico
+// Copyright Marcello Federico, ITC-irst, 2006
+
+
+#ifndef MF_LMTABLE_H
+#define MF_LMTABLE_H
+
+#include "ngram.h"
+
+// internal data structure
+#define LMTMAXLEV 10+1
+
+#ifndef LMTCODESIZE
+#define LMTCODESIZE (int)3
+#endif
+
+#define SHORTSIZE (int)2
+#define PTRSIZE (int)sizeof(char *)
+#define INTSIZE (int)4
+#define CHARSIZE (int)1
+
+#define PROBSIZE (int)4 //use float
+#define QPROBSIZE (int)1
+#define BOUNDSIZE (int)4
+
+#define UNIGRAM_RESOLUTION 10000000.0
+
+typedef enum {INTERNAL,QINTERNAL,LEAF,QLEAF} LMT_TYPE;
+typedef char* node;
+
+typedef enum {LMT_FIND, //!< search: find an entry
+ LMT_ENTER, //!< search: enter an entry
+ LMT_INIT, //!< scan: start scan
+ LMT_CONT //!< scan: continue scan
+} LMT_ACTION;
+
+
+class lmtable{
+
+ char* table[LMTMAXLEV]; //storage of all levels
+ LMT_TYPE tbltype[LMTMAXLEV]; //table type for each levels
+ int cursize[LMTMAXLEV]; //current size of levels
+ int maxsize[LMTMAXLEV]; //current size of levels
+
+ int maxlev; //max level of table
+ char info[100]; //information put in the header
+
+ // K-means quantization
+ bool isQtable;
+
+ int NumCenters[LMTMAXLEV];
+ float* Pcenters[LMTMAXLEV];
+ float* Bcenters[LMTMAXLEV];
+
+ // log-linear quantization
+ int resolution; //resolution for quantized prob
+ double decay; //decay constant
+ double logdecay; //logdecay constant
+
+ int oov_code;
+ int oov_size;
+ int backoff_state;
+
+ public:
+
+ dictionary *dict; // dictionary
+
+ lmtable(const char* filename, int maxl,int res,double dec);
+
+ ~lmtable(){
+ for (int i=1;i<=maxlev;i++){
+ delete [] table[i];
+ if (isQtable){
+ delete [] Pcenters[i];
+ if (i<maxlev) delete [] Bcenters[i];
+ }
+ }
+ }
+ int maxlevel(){return maxlev;};
+
+ void savetxt(const char *filename);
+ void savebin(const char *filename);
+
+ void loadtxt(const char *filename, int maxl,int res, double dec);
+ void loadbin(const char *filename);
+
+ void loadQtxt(const char *filename, int maxl);
+
+ double prob(ngram ng);
+
+ void *search(char *tb,LMT_TYPE ndt,int lev,int n,int sz,int *w,
+ LMT_ACTION action,char **found=(char **)NULL);
+
+ int mybsearch(char *ar, int n, int size, unsigned char *key, int *idx);
+
+ int add(ngram& ng,double logprob,double logbow);
+ void checkbounds(int level);
+
+ int get(ngram& ng){return get(ng,ng.size,ng.size);}
+ int get(ngram& ng,int n,int lev);
+
+ int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev);
+
+ inline int putmem(char* ptr,int value,int offs,int size){
+ assert(ptr!=NULL);
+ for (int i=0;i<size;i++)
+ ptr[offs+i]=(value >> (8 * i)) & 0xff;
+ return value;
+ };
+
+ inline int getmem(char* ptr,int* value,int offs,int size){
+ assert(ptr!=NULL);
+ *value=ptr[offs] & 0xff;
+ for (int i=1;i<size;i++)
+ *value= *value | ( ( ptr[offs+i] & 0xff ) << (8 *i));
+ return *value;
+ };
+
+
+ int bo_state(int value=-1){
+ return (value==-1?backoff_state:backoff_state=value);
+ };
+
+
+ int nodesize(LMT_TYPE ndt){
+ switch (ndt){
+ case INTERNAL:
+ return LMTCODESIZE + PROBSIZE + PROBSIZE + BOUNDSIZE;
+ case QINTERNAL:
+ return LMTCODESIZE + QPROBSIZE + QPROBSIZE + BOUNDSIZE;
+ case QLEAF:
+ return LMTCODESIZE + QPROBSIZE;
+ case LEAF:
+ return LMTCODESIZE + PROBSIZE;
+ }
+ }
+
+ inline int word(node nd,int value=-1)
+ {
+ int offset=0;
+
+ if (value==-1)
+ getmem(nd,&value,offset,LMTCODESIZE);
+ else
+ putmem(nd,value,offset,LMTCODESIZE);
+
+ return value;
+ };
+
+ inline int prob(node nd,LMT_TYPE ndt, int value=-1)
+ {
+ int offs=LMTCODESIZE;
+ int size=(ndt==QINTERNAL || ndt==QLEAF?QPROBSIZE:PROBSIZE);
+
+ if (value==-1)
+ getmem(nd,&value,offs,size);
+ else
+ putmem(nd,value,offs,size);
+
+ return value;
+ };
+
+
+ inline int bow(node nd,LMT_TYPE ndt, int value=-1)
+ {
+ assert(ndt==INTERNAL || ndt==QINTERNAL);
+ int size=(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+ int offs=LMTCODESIZE+size;
+
+ if (value==-1)
+ getmem(nd,&value,offs,size);
+ else
+ putmem(nd,value,offs,size);
+
+ return value;
+ };
+
+ inline int bound(node nd,LMT_TYPE ndt, int value=-1)
+ {
+ assert(ndt==INTERNAL || ndt==QINTERNAL);
+ int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+
+ if (value==-1)
+ getmem(nd,&value,offs,BOUNDSIZE);
+ else
+ putmem(nd,value,offs,BOUNDSIZE);
+
+ return value;
+ };
+
+ void stat(int lev=0);
+
+};
+
+#endif
+
+
+
+