git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@950 1f5c12ca-751b-0410-a591-d2e778427230

author: hieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230> 2006-11-05 16:02:33 +0300
committer: hieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230> 2006-11-05 16:02:33 +0300
commit: 8313b93157fdeedf7e8c939071b085b294450c56 (patch)
tree: 64e1ef0674cb6ceb4830649d4d160f886df103e2 /irstlm/src
parent: e8cd4e752f4e239d99e427c0cceafcea8bcaeb1f (diff)
21 files changed, 5399 insertions, 0 deletions
diff --git a/irstlm/src/src/Makefile.am b/irstlm/src/src/Makefile.am
new file mode 100644
index 000000000..2c3cc5caf
--- /dev/null
+++ b/irstlm/src/src/Makefile.am
@@ -0,0 +1,30 @@
+lib_LIBRARIES = libirstlm.a
+
+libirstlm_a_SOURCES = \
+	dictionary.cpp \
+	htable.cpp \
+	lmtable.cpp \
+	mempool.cpp \
+	n_gram.cpp \
+	ngramcache.cpp \
+	util.cpp
+
+library_includedir=$(includedir)
+library_include_HEADERS = \
+        dictionary.h \
+        htable.h \
+        lmtable.h \
+        mempool.h \
+        n_gram.h \
+        ngramcache.h \
+	util.h \
+	gzfilebuf.h 
+
+bin_PROGRAMS = compile-lm quantize-lm
+
+AM_LDFLAGS=-L.
+LIBS=-lirstlm -lz
+
+compile_lm_SOURCES = compile-lm.cpp
+quantize_lm_SOURCES = quantize-lm.cpp
+
diff --git a/irstlm/src/src/cmd.c b/irstlm/src/src/cmd.c
new file mode 100644
index 000000000..aeb36d7b9
--- /dev/null
+++ b/irstlm/src/src/cmd.c
@@ -0,0 +1,661 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+#include	<stdio.h>
+#include	<stdlib.h>
+#include	<ctype.h>
+#include	<string.h>
+
+#include	"cmd.h"
+
+static Enum_T	BoolEnum[] = {
+	{	"FALSE",	0	},
+	{	"TRUE",		1	},
+	{	0,		0	}
+};
+
+#ifdef	NEEDSTRDUP
+char	*strdup();
+#endif
+
+#define	FALSE	0
+#define	TRUE	1
+
+#define	LINSIZ		10240
+#define	MAXPARAM	256
+
+static char	*GetLine(),
+		**str2array();
+static int	Scan(),
+		SetParam(),
+		SetEnum(),
+		SetSubrange(),
+		SetStrArray(),
+		SetGte(),
+		SetLte(),
+		CmdError(),
+		EnumError(),
+		SubrangeError(),
+		GteError(),
+		LteError(),
+		PrintParam(),
+		PrintEnum(),
+		PrintStrArray();
+
+static Cmd_T	cmds[MAXPARAM+1];
+static char	*SepString = " \t\n";
+
+#if defined(__STDC__)
+#include	<stdarg.h>
+int DeclareParams(char *ParName, ...)
+#else
+#include	<varargs.h>
+int DeclareParams(ParName, va_alist)
+char	*ParName;
+va_dcl
+#endif
+{
+	va_list		args;
+	static int	ParamN = 0;
+	int		j,
+			c;
+	char		*s;
+
+#if defined(__STDC__)
+	va_start(args, ParName);
+#else
+	va_start(args);
+#endif
+	for(;ParName;) {
+		if(ParamN==MAXPARAM) {
+			fprintf(stderr, "Too many parameters !!\n");
+			break;
+		}
+		for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
+			;
+		if(!c) {
+			fprintf(stderr,
+				"Warning: parameter \"%s\" declared twice.\n",
+				ParName);
+		}
+		for(c=ParamN; c>j; c--) {
+			cmds[c] = cmds[c-1];
+		}
+		cmds[j].Name = ParName;
+		cmds[j].Type = va_arg(args, int);
+		cmds[j].Val = va_arg(args, void *);
+		switch(cmds[j].Type) {
+		case CMDENUMTYPE:	/* get the pointer to Enum_T struct  */
+			cmds[j].p = va_arg(args, void *);
+			break;
+		case CMDSUBRANGETYPE:	/* get the two extremes		     */
+			cmds[j].p = (void*) calloc(2, sizeof(int));
+			((int*)cmds[j].p)[0] = va_arg(args, int);
+			((int*)cmds[j].p)[1] = va_arg(args, int);
+			break;
+		case CMDGTETYPE:	/* get lower or upper bound	     */
+		case CMDLTETYPE:
+			cmds[j].p = (void*) calloc(1, sizeof(int));
+			((int*)cmds[j].p)[0] = va_arg(args, int);
+			break;
+		case CMDSTRARRAYTYPE:	/* get the separators string	     */
+			cmds[j].p = (s=va_arg(args, char*))
+				    ? (void*)strdup(s) : 0;
+			break;
+		case CMDBOOLTYPE:
+			cmds[j].Type = CMDENUMTYPE;
+			cmds[j].p = BoolEnum;
+			break;
+		case CMDDOUBLETYPE:	/* nothing else is needed	     */
+		case CMDINTTYPE:
+		case CMDSTRINGTYPE:
+			break;
+		default:
+			fprintf(stderr, "%s: %s %d %s \"%s\"\n",
+				"DeclareParam()", "Unknown Type",
+				cmds[j].Type, "for parameter", cmds[j].Name);
+			exit(1);
+		}
+		ParamN++;
+		ParName = va_arg(args, char *);
+	}
+	cmds[ParamN].Name = NULL;
+	va_end(args);
+	return 0;
+}
+
+int GetParams(n, a, CmdFileName)
+int	*n;
+char	***a;
+char	*CmdFileName;
+{
+	char	*Line,
+		*ProgName;
+	int	argc = *n;
+	char	**argv = *a,
+		*s;
+	FILE	*fp;
+	int	IsPipe;
+
+#ifdef	MSDOS
+#define	PATHSEP '\\'
+	char	*dot = NULL;
+#else
+#define	PATHSEP '/'
+#endif
+
+	if(!(Line=malloc(LINSIZ))) {
+		fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
+			LINSIZ);
+		exit(1);
+	}
+	if((ProgName=strrchr(*argv, PATHSEP))) {
+		++ProgName;
+	} else {
+		ProgName = *argv;
+	}
+#ifdef	MSDOS
+	if(dot=strchr(ProgName, '.')) *dot = 0;
+#endif
+	--argc;
+	++argv;
+	for(;;) {
+		if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
+			CmdFileName = argv[0]+2;
+			++argv;
+			--argc;
+		}
+		if(!CmdFileName) {
+			break;
+		}
+		IsPipe = !strncmp(CmdFileName, "@@", 2);
+		fp = IsPipe
+		     ? popen(CmdFileName+2, "r")
+		     : strcmp(CmdFileName, "-")
+		       ? fopen(CmdFileName, "r")
+		       : stdin;
+		if(!fp) {
+			fprintf(stderr, "Unable to open command file %s\n",
+				CmdFileName);
+			exit(1);
+		}
+		while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
+			if(Scan(ProgName, cmds, Line)) {
+				CmdError(Line);
+			}
+		}
+		if(fp!=stdin) {
+			if(IsPipe) pclose(fp); else fclose(fp);
+		}
+		CmdFileName = NULL;
+	}
+	while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
+		*s = ' ';
+		sprintf(Line, "%s/%s", ProgName, *argv+1);
+		*s = '=';
+		if(Scan(ProgName, cmds, Line)) CmdError(*argv);
+		--argc;
+		++argv;
+	}
+	*n = argc;
+	*a = argv;
+#ifdef MSDOS
+	if(dot) *dot = '.';
+#endif
+	free(Line);
+	return 0;
+}
+
+int PrintParams(ValFlag, fp)
+int	ValFlag;
+FILE	*fp;
+{
+	int	i;
+
+	fflush(fp);
+	if(ValFlag) {
+		fprintf(fp, "Parameters Values:\n");
+	} else {
+		fprintf(fp, "Parameters:\n");
+	}
+	for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
+	fprintf(fp, "\n");
+	fflush(fp);
+	return 0;
+}
+
+int SPrintParams(a, pfx)
+char	***a,
+	*pfx;
+{
+	int	l,
+		n;
+	Cmd_T	*cmd;
+
+	if(!pfx) pfx="";
+	l = strlen(pfx);
+	for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
+	a[0] = calloc(n, sizeof(char*));
+	for(n=0, cmd=cmds; cmd->Name; cmd++) {
+		if(!cmd->ArgStr) continue;
+		a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
+		sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
+		++n;
+	}
+	return n;
+}
+
+static int CmdError(opt)
+char	*opt;
+{
+	fprintf(stderr, "Invalid option \"%s\"\n", opt);
+	fprintf(stderr, "This program expectes the following parameters:\n");
+	PrintParams(FALSE, stderr);
+	exit(0);
+}
+
+static int PrintParam(cmd, ValFlag, fp)
+Cmd_T	*cmd;
+int	ValFlag;
+FILE	*fp;
+{
+	fprintf(fp, "%4s", "");
+	switch(cmd->Type) {
+	case CMDDOUBLETYPE:
+		fprintf(fp, "%s", cmd->Name);
+		if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
+		fprintf(fp, "\n");
+		break;
+	case CMDENUMTYPE:
+		PrintEnum(cmd, ValFlag, fp);
+		break;
+	case CMDINTTYPE:
+	case CMDSUBRANGETYPE:
+	case CMDGTETYPE:
+	case CMDLTETYPE:
+		fprintf(fp, "%s", cmd->Name);
+		if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
+		fprintf(fp, "\n");
+		break;
+	case CMDSTRINGTYPE:
+		fprintf(fp, "%s", cmd->Name);
+		if(ValFlag) {
+			if(*(char **)cmd->Val) {
+				fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
+			} else {
+				fprintf(fp, ": %s", "NULL");
+			}
+		}
+		fprintf(fp, "\n");
+		break;
+	case CMDSTRARRAYTYPE:
+		PrintStrArray(cmd, ValFlag, fp);
+		break;
+	default:
+		fprintf(stderr, "%s: %s %d %s \"%s\"\n",
+			"PrintParam",
+			"Unknown Type",
+			cmd->Type,
+			"for parameter",
+			cmd->Name);
+		exit(1);
+	}
+	return 0;
+}
+
+static char *GetLine(fp, n, Line)
+FILE	*fp;
+int	n;
+char	*Line;
+{
+	int	j,
+		l,
+		offs=0;
+
+	for(;;) {
+		if(!fgets(Line+offs, n-offs, fp)) {
+			return NULL;
+		}
+		if(Line[offs]=='#') continue;
+		l = strlen(Line+offs)-1;
+		Line[offs+l] = 0;
+		for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
+			;
+		if(l<1) continue;
+		if(j > offs) {
+			char	*s = Line+offs,
+				*q = Line+j;
+
+			while((*s++=*q++))
+				;
+		}
+		if(Line[offs+l-1]=='\\') {
+			offs += l;
+			Line[offs-1] = ' ';
+		} else {
+			break;
+		}
+	}
+	return Line;
+}
+
+static int Scan(ProgName, cmds, Line)
+char	*ProgName,
+	*Line;
+Cmd_T	*cmds;
+{
+	char	*q,
+		*p;
+	int	i,
+		hl,
+		HasToMatch = FALSE,
+		c0,
+		c;
+
+	p = Line+strspn(Line, SepString);
+	if(!(hl=strcspn(p, SepString))) {
+		return 0;
+	}
+	if((q=strchr(p, '/')) && q-p<hl) {
+		*q = 0;
+		if(strcmp(p, ProgName)) {
+			*q = '/';
+			return 0;
+		}
+		*q = '/';
+		HasToMatch=TRUE;
+		p = q+1;
+	}
+	if(!(hl = strcspn(p, SepString))) {
+		return 0;
+	}
+	c0 = p[hl];
+	p[hl] = 0;
+	for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
+		;
+	p[hl] = c0;
+	if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
+	return HasToMatch && c;
+}
+
+static int SetParam(cmd, s)
+Cmd_T	*cmd;
+char	*s;
+{
+	if(!*s && cmd->Type != CMDSTRINGTYPE) {
+		fprintf(stderr,
+			"WARNING: No value specified for parameter \"%s\"\n",
+			cmd->Name);
+		return 0;
+	}
+	switch(cmd->Type) {
+	case CMDDOUBLETYPE:
+		if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
+			fprintf(stderr,
+				"Float value required for parameter \"%s\"\n",
+				cmd->Name);
+			exit(1);
+		}
+		break;
+	case CMDENUMTYPE:
+		SetEnum(cmd, s);
+		break;
+	case CMDINTTYPE:
+		if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
+			fprintf(stderr,
+				"Integer value required for parameter \"%s\"\n",
+				cmd->Name);
+			exit(1);
+		}
+		break;
+	case CMDSTRINGTYPE:
+		*(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
+				     ? strdup(s)
+				     : 0;
+		break;
+	case CMDSTRARRAYTYPE:
+		SetStrArray(cmd, s);
+		break;
+	case CMDGTETYPE:
+		SetGte(cmd, s);
+		break;
+	case CMDLTETYPE:
+		SetLte(cmd, s);
+		break;
+	case CMDSUBRANGETYPE:
+		SetSubrange(cmd, s);
+		break;
+	default:
+		fprintf(stderr, "%s: %s %d %s \"%s\"\n",
+			"SetParam",
+			"Unknown Type",
+			cmd->Type,
+			"for parameter",
+			cmd->Name);
+		exit(1);
+	}
+	cmd->ArgStr = strdup(s);
+	return 0;
+}
+
+static int SetEnum(cmd, s)
+Cmd_T	*cmd;
+char	*s;
+{
+	Enum_T	*en;
+
+	for(en=(Enum_T *)cmd->p; en->Name; en++) {
+		if(*en->Name && !strcmp(s, en->Name)) {
+			*(int *) cmd->Val = en->Idx;
+			return 0;
+		}
+	}
+	return EnumError(cmd, s);
+}
+
+static int SetSubrange(cmd, s)
+Cmd_T	*cmd;
+char	*s;
+{
+	int	n;
+
+	if(sscanf(s, "%d", &n)!=1) {
+		fprintf(stderr,
+			"Integer value required for parameter \"%s\"\n",
+			cmd->Name);
+		exit(1);
+	}
+	if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
+		return SubrangeError(cmd, n);
+	}
+	*(int *)cmd->Val = n;
+	return 0;
+}
+
+static int SetGte(cmd, s)
+Cmd_T	*cmd;
+char	*s;
+{
+	int	n;
+
+	if(sscanf(s, "%d", &n)!=1) {
+		fprintf(stderr,
+			"Integer value required for parameter \"%s\"\n",
+			cmd->Name);
+		exit(1);
+	}
+	if(n<*(int *)cmd->p) {
+		return GteError(cmd, n);
+	}
+	*(int *)cmd->Val = n;
+	return 0;
+}
+
+static int SetStrArray(cmd, s)
+Cmd_T	*cmd;
+char	*s;
+{
+	*(char***)cmd->Val = str2array(s, (char*)cmd->p);
+	return 0;
+}
+
+static int SetLte(cmd, s)
+Cmd_T	*cmd;
+char	*s;
+{
+	int	n;
+
+	if(sscanf(s, "%d", &n)!=1) {
+		fprintf(stderr,
+			"Integer value required for parameter \"%s\"\n",
+			cmd->Name);
+		exit(1);
+	}
+	if(n > *(int *)cmd->p) {
+		return LteError(cmd, n);
+	}
+	*(int *)cmd->Val = n;
+	return 0;
+}
+
+static int EnumError(cmd, s)
+Cmd_T	*cmd;
+char	*s;
+{
+	Enum_T	*en;
+
+	fprintf(stderr,
+		"Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
+	fprintf(stderr, "Valid values are:\n");
+	for(en=(Enum_T *)cmd->p; en->Name; en++) {
+		if(*en->Name) {
+			fprintf(stderr, "    %s\n", en->Name);
+		}
+	}
+	fprintf(stderr, "\n");
+	exit(1);
+}
+
+static int GteError(cmd, n)
+Cmd_T	*cmd;
+int	n;
+{
+	fprintf(stderr,
+		"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
+	fprintf(stderr, "Valid values must be greater than or equal to  %d\n",
+		*(int *)cmd->p);
+	exit(1);
+}
+
+static int LteError(cmd, n)
+Cmd_T	*cmd;
+int	n;
+{
+	fprintf(stderr,
+		"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
+	fprintf(stderr, "Valid values must be less than or equal to  %d\n",
+		*(int *)cmd->p);
+	exit(1);
+}
+
+static int SubrangeError(cmd, n)
+Cmd_T	*cmd;
+int	n;
+{
+	fprintf(stderr,
+		"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
+	fprintf(stderr, "Valid values range from %d to %d\n",
+		*(int *)cmd->p, *((int *)cmd->p+1));
+	exit(1);
+}
+
+static int PrintEnum(cmd, ValFlag, fp)
+Cmd_T	*cmd;
+int	ValFlag;
+FILE	*fp;
+{
+	Enum_T	*en;
+
+	fprintf(fp, "%s", cmd->Name);
+	if(ValFlag) {
+		for(en=(Enum_T *)cmd->p; en->Name; en++) {
+			if(*en->Name && en->Idx==*(int *)cmd->Val) {
+				fprintf(fp, ": %s", en->Name);
+			}
+		}
+	}
+	fprintf(fp, "\n");
+	return 0;
+}
+
+static int PrintStrArray(cmd, ValFlag, fp)
+Cmd_T	*cmd;
+int	ValFlag;
+FILE	*fp;
+{
+	char	*indent,
+		**s = *(char***)cmd->Val;
+	int	l = 4+strlen(cmd->Name);
+
+	fprintf(fp, "%s", cmd->Name);
+	indent = malloc(l+2);
+	memset(indent, ' ', l+1);
+	indent[l+1] = 0;
+	if(ValFlag) {
+		fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
+		if(s) while(*s) {
+			fprintf(fp, "\n%s %s", indent, *s++);
+		}
+	}
+	free(indent);
+	fprintf(fp, "\n");
+	return 0;
+}
+
+static char **str2array(s, sep)
+char	*s,
+	*sep;
+{
+	char	*p,
+		**a;
+	int	n = 0,
+		l;
+
+	if(!sep) sep = SepString;
+	p = s += strspn(s, sep);
+	while(*p) {
+		p += strcspn(p, sep);
+		p += strspn(p, sep);
+		++n;
+	}
+	a = calloc(n+1, sizeof(char *));
+	p = s;
+	n = 0;
+	while(*p) {
+		l = strcspn(p, sep);
+		a[n] = malloc(l+1);
+		memcpy(a[n], p, l);
+		a[n][l] = 0;
+		++n;
+		p += l;
+		p += strspn(p, sep);
+	}
+	return a;
+}
diff --git a/irstlm/src/src/cmd.h b/irstlm/src/src/cmd.h
new file mode 100644
index 000000000..708905f6f
--- /dev/null
+++ b/irstlm/src/src/cmd.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+#if !defined(CMD_H)
+
+#define	CMD_H
+
+#define	CMDDOUBLETYPE	1
+#define	CMDENUMTYPE	2
+#define	CMDINTTYPE	3
+#define	CMDSTRINGTYPE	4
+#define	CMDSUBRANGETYPE	5
+#define	CMDGTETYPE	6
+#define	CMDLTETYPE	7
+#define	CMDSTRARRAYTYPE	8
+#define	CMDBOOLTYPE	9
+
+typedef struct {
+	char	*Name;
+	int	Idx;
+} Enum_T;
+
+typedef struct {
+	int	Type;
+	char	*Name,
+		*ArgStr;
+	void	*Val,
+		*p;
+} Cmd_T;
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#if defined(__STDC__)
+int DeclareParams(char *, ...);
+#else
+int DeclareParams();
+#endif
+
+int	GetParams(int *n, char ***a,char *CmdFileName),
+	SPrintParams(),
+	PrintParams();
+
+#ifdef  __cplusplus
+}
+#endif
+#endif
+
+
+
diff --git a/irstlm/src/src/compile-lm.cpp b/irstlm/src/src/compile-lm.cpp
new file mode 100644
index 000000000..09f01fc94
--- /dev/null
+++ b/irstlm/src/src/compile-lm.cpp
@@ -0,0 +1,221 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit, compile LM
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+using namespace std;
+
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <stdlib.h>
+#include "util.h"
+#include "math.h"
+#include "lmtable.h"
+
+
+/* GLOBAL OPTIONS ***************/
+
+std::string stxt = "no";
+std::string seval = "";
+std::string sdebug = "0";
+std::string ssubdict = "";
+
+/********************************/
+
+void usage(const char *msg = 0) {
+  if (msg) { std::cerr << msg << std::endl; }
+  std::cerr << "Usage: compile-lm [options] input-file.lm [output-file.blm]" << std::endl;
+  if (!msg) std::cerr << std::endl
+            << "  compile-lm reads a standard LM file in ARPA format and produces" << std::endl
+            << "  a compiled representation that the IRST LM toolkit can quickly" << std::endl
+            << "  read and process. LM file can be compressed with gzip." << std::endl << std::endl;
+  std::cerr << "Options:\n"
+            << "--text=[yes|no] -t=[yes|no] (output is again in text format)\n"
+            << "--eval=text-file -e=text-file (computes perplexity of text-file and returns)\n"
+            << "--debug=1 -d=1 (verbose output for --eval option)\n"
+            << "--subdict=text-file --sd=tex-file (limits LM to include only words in text-file)\n" ;
+}
+
+bool starts_with(const std::string &s, const std::string &pre) {
+  if (pre.size() > s.size()) return false;
+
+  if (pre == s) return true;
+  std::string pre_equals(pre+'=');
+  if (pre_equals.size() > s.size()) return false;
+  return (s.substr(0,pre_equals.size()) == pre_equals);
+}
+
+std::string get_param(const std::string& opt, int argc, const char **argv, int& argi)
+{
+  std::string::size_type equals = opt.find_first_of('=');
+  if (equals != std::string::npos && equals < opt.size()-1) {
+    return opt.substr(equals+1);
+  }
+  std::string nexto;
+  if (argi + 1 < argc) { 
+    nexto = argv[++argi]; 
+  } else {
+    usage((opt + " requires a value!").c_str());
+    exit(1);
+  }
+  return nexto;
+}
+
+void handle_option(const std::string& opt, int argc, const char **argv, int& argi)
+{
+  if (opt == "--help" || opt == "-h") { usage(); exit(1); }
+   if (starts_with(opt, "--text") || starts_with(opt, "-t"))
+    stxt = get_param(opt, argc, argv, argi);
+   else
+      if (starts_with(opt, "--eval") || starts_with(opt, "-e"))
+       seval = get_param(opt, argc, argv, argi);
+  else
+    if (starts_with(opt, "--debug") || starts_with(opt, "-d"))
+      sdebug = get_param(opt, argc, argv, argi);
+
+  else
+    if (starts_with(opt, "--subdict") || starts_with(opt, "-sd"))
+      ssubdict = get_param(opt, argc, argv, argi);
+  
+  else {
+    usage(("Don't understand option " + opt).c_str());
+    exit(1);
+  }
+}
+
+int main(int argc, const char **argv)
+{
+  
+  if (argc < 2) { usage(); exit(1); }
+  std::vector<std::string> files;
+  for (int i=1; i < argc; i++) {
+    std::string opt = argv[i];
+    if (opt[0] == '-') { handle_option(opt, argc, argv, i); }
+      else files.push_back(opt);
+  }
+  if (files.size() > 2) { usage("Too many arguments"); exit(1); }
+  if (files.size() < 1) { usage("Please specify a LM file to read from"); exit(1); }
+
+  bool textoutput = (stxt == "yes"? true : false);
+  int debug = atoi(sdebug.c_str()); 
+ 
+  std::string infile = files[0];
+  std::string outfile="";
+  
+  if (files.size() == 1) {  
+    outfile=infile;
+    
+    //remove path information
+    std::string::size_type p = outfile.rfind('/');
+    if (p != std::string::npos && ((p+1) < outfile.size()))           
+      outfile.erase(0,p+1);
+          
+    //eventually strip .gz 
+    if (outfile.compare(outfile.size()-3,3,".gz")==0)
+       outfile.erase(outfile.size()-3,3);
+   
+    outfile+=(textoutput?".lm":".blm");
+  }
+  else
+     outfile = files[1];
+ 
+  cerr << "outfile: " << outfile << std::endl;
+ 
+  
+  lmtable lmt; 
+  
+  
+  if (ssubdict == ""){
+
+    std::cout << "Reading " << infile << "..." << std::endl;
+    inputfilestream inp(infile.c_str());
+    if (!inp.good()) {
+      std::cerr << "Failed to open " << infile << "!\n";
+      exit(1);
+    }    
+    lmt.load(inp);    
+    
+  }
+  else{ //load reduced LM from a binary LM file!
+    lmt.dict->generate((char *)ssubdict.c_str());
+    //eventually add OOV code so that it can be found in the table
+    lmt.dict->genoovcode(); //generate OOV code
+    //filter table from the large binary table on disk
+    lmt.filter2(infile.c_str(),1); 
+    //regenerate OOV code as dictionary has been rebuild
+    lmt.dict->genoovcode(); 
+  }
+  
+  if (seval != ""){
+    std::cerr << "Start Eval\n";
+    std::cerr << "OOV code: " << lmt.dict->oovcode() << "\n";
+    ngram ng(lmt.dict);    
+    std::cout.setf(ios::fixed);
+    std::cout.precision(2);
+    if (debug>1) std::cout.precision(8);
+    std::fstream inptxt(seval.c_str(),std::ios::in);
+    
+    int Nbo=0,Nw=0,Noov=0;
+    double logPr=0,PP=0,PPwp=0,Pr;
+    
+    int bos=ng.dict->encode(ng.dict->BoS());
+
+#ifdef TRACE_CACHE
+    lmt.init_probcache();
+#endif
+    
+    while(inptxt >> ng){
+      
+      if (ng.size>lmt.maxlevel()) ng.size=lmt.maxlevel();
+      
+      // reset ngram at begin of sentence
+      if (*ng.wordp(1)==bos) continue;
+     
+      lmt.bo_state(0);
+      if (ng.size>=1){ 
+        logPr+=(Pr=lmt.clprob(ng));
+        if (debug>1)
+          std::cout << ng << "[" << ng.size << "-gram]" << " " << Pr << "\n"; 
+        
+        if (*ng.wordp(1) == lmt.dict->oovcode()) Noov++;        
+        Nw++; if (lmt.bo_state()) Nbo++;                   
+      }
+    
+    }
+    
+    PP=exp((-logPr * log(10.0)) /Nw);
+    PPwp= PP * exp(Noov * log(10000000.0-lmt.dict->size())/Nw);
+    
+    std::cout << "%% Nw=" << Nw << " PP=" << PP << " PPwp=" << PPwp
+      << " Nbo=" << Nbo << " Noov=" << Noov 
+      << " OOV=" << (float)Noov/Nw * 100.0 << "%\n";
+    
+    return 0;    
+  };
+  
+  std::cout << "Saving to " << outfile << std::endl;
+  if (textoutput) 
+    lmt.savetxt(outfile.c_str());    
+  else 
+    lmt.savebin(outfile.c_str());
+  
+  return 0;
+}
+
diff --git a/irstlm/src/src/dictionary.cpp b/irstlm/src/src/dictionary.cpp
new file mode 100644
index 000000000..44627c1c6
--- /dev/null
+++ b/irstlm/src/src/dictionary.cpp
@@ -0,0 +1,425 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+#include <iomanip>
+#include <iostream>
+#include <fstream>
+#include "mempool.h"
+#include "htable.h"
+#include "dictionary.h"
+#include "index.h"
+
+using namespace std;
+
+dictionary::dictionary(char *filename,int size,char* isymb,char* oovlexfile){
+
+  // unitialized memory 
+  if (oovlexfile!=NULL)
+    oovlex=new dictionary(oovlexfile,size,isymb,NULL);
+  else
+    oovlex=(dictionary *)NULL;
+
+  htb = new htable(size/LOAD_FACTOR);
+  tb  = new dict_entry[size]; 
+  st  = new strstack(size * 10);
+
+  for (int i=0;i<size;i++) tb[i].freq=0;
+  
+  is=(char*) NULL;
+  intsymb(isymb);
+
+  oov_code = -1;
+  in_oov_lex=0;
+  n  = 0;
+  N  =  0;
+  dubv = 0; 
+  lim = size;
+  ifl=0;  //increment flag
+
+  if (filename==NULL) return;
+
+  std::ifstream inp(filename,ios::in);
+  
+  if (!inp){
+    cerr << "cannot open " << filename << "\n";
+    exit(1);
+  }
+  
+  char buffer[100];
+  
+  inp >> setw(100) >> buffer;
+  
+  inp.close();
+  
+  if ((strncmp(buffer,"dict",4)==0) ||
+      (strncmp(buffer,"DICT",4)==0))
+    load(filename);
+  else
+    generate(filename);
+  
+  cerr << "loaded \n";
+
+
+}
+
+
+
+void dictionary::generate(char *filename){
+
+  char buffer[MAX_WORD];
+  int k;
+
+  ifstream inp(filename,ios::in);
+  
+  if (!inp){
+    cerr << "cannot open " << filename << "\n";
+    exit(1);
+  }
+
+  cerr << "dict:";
+  
+  ifl=1; k=0;
+  while (inp >> setw(MAX_WORD) >> buffer){
+    
+    if (strlen(buffer)==(MAX_WORD-1)){
+      cerr << "dictionary: a too long word was read (" 
+	   << buffer << ")\n";
+    };
+    
+    
+    if (strlen(buffer)==0){
+      cerr << "zero lenght word!\n";
+      continue;
+    }
+
+    //if (is && (strlen(buffer)==1) && !index(is,buffer[0]))  
+    if (is && (strlen(buffer)==1) && (index(is,buffer[0])!=NULL))  
+      continue; //skip over the interruption symbol
+
+    incfreq(encode(buffer),1);
+
+    if (!(++k % 1000000)) cerr << ".";
+  }
+  ifl=0;
+  cerr << "\n";
+
+  inp.close();
+
+}
+
+void dictionary::load(char* filename){
+  char header[100];
+  char buffer[MAX_WORD];
+  char *addr;
+  int freqflag=0;
+
+  ifstream inp(filename,ios::in);
+  
+  if (!inp){
+    cerr << "\ncannot open " << filename << "\n";
+    exit(1);
+  }
+
+  cerr << "dict:";
+
+  inp.getline(header,100);
+  if (strncmp(header,"DICT",4)==0)
+    freqflag=1;
+  else 
+    if (strncmp(header,"dict",4)!=0){
+      cerr << "\ndictionary file " << filename << " has a wrong header\n";
+      exit(1);
+    }
+      
+
+  while (inp >> setw(MAX_WORD) >> buffer){
+    
+    if (strlen(buffer)==(MAX_WORD-1)){
+      cerr << "\ndictionary: a too long word was read (" 
+	   << buffer << ")\n";
+      exit(1); 
+    };
+    
+    tb[n].word=st->push(buffer);
+    tb[n].code=n;
+
+    if (freqflag) 
+      inp >> tb[n].freq;
+    else
+      tb[n].freq=0;
+
+    if ((addr=htb->search((char  *)&tb[n].word,HT_ENTER)))
+      if (addr!=(char *)&tb[n].word){
+        cerr << "dictionary::loadtxt wrong entry was found (" 
+        <<  buffer << ") in position " << n << "\n";
+        exit(1);
+      }
+
+    N+=tb[n].freq;
+
+    if (strcmp(buffer,OOV())==0) oov_code=n;
+
+    if (++n==lim) grow();
+      
+  }
+  
+  inp.close();
+}
+
+
+void dictionary::load(std::istream& inp){
+  
+  char buffer[MAX_WORD];
+  char *addr;
+  int size;
+
+  inp >> size;
+
+  for (int i=0;i<size;i++){
+    
+    inp >> setw(MAX_WORD) >> buffer;
+
+    if (strlen(buffer)==MAX_WORD-1){
+      cerr << "\ndictionary::load found word exceeding max length (" 
+      << MAX_WORD << ")" << buffer << "\n";
+      exit(1);
+    };
+        
+    tb[n].word=st->push(buffer);
+    tb[n].code=n;
+    inp >> tb[n].freq;
+    N+=tb[n].freq;
+
+    if ((addr=htb->search((char  *)&tb[n].word,HT_ENTER)))
+      if (addr!=(char *)&tb[n].word){
+	      cerr << "dictionary::loadtxt wrong entry was found (" 
+	           <<  buffer << ") in position " << n << "\n";
+        exit(1);
+      }
+    
+    if (strcmp(tb[n].word,OOV())==0)
+      oov_code=n;
+
+    if (++n==lim) grow();
+  }
+  inp.getline(buffer,MAX_WORD-1);
+}
+
+void dictionary::save(std::ostream& out){
+  out << n << "\n";
+  for (int i=0;i<n;i++)
+    out << tb[i].word << " " << tb[i].freq << "\n";
+}
+
+
+int cmpdictentry(const void *a,const void *b){
+  dict_entry *ae=(dict_entry *)a;
+  dict_entry *be=(dict_entry *)b;
+  return be->freq-ae->freq;
+}
+
+dictionary::dictionary(dictionary* d){
+  
+  //transfer values
+  
+  n=d->n;        //total entries
+  N=d->N;        //total frequency
+  lim=d->lim;    //limit of entries
+  oov_code=-1;   //code od oov must be re-defined
+  ifl=0;         //increment flag=0;
+  dubv=d->dubv;  //dictionary upperbound transferred
+  in_oov_lex=0;  //does not copy oovlex;
+  
+    
+  //creates a sorted copy of the table
+  
+  tb  = new dict_entry[lim]; 
+  htb = new htable(lim/LOAD_FACTOR);
+  st  = new strstack(lim * 10);
+
+  for (int i=0;i<n;i++){
+    tb[i].code=d->tb[i].code;
+    tb[i].freq=d->tb[i].freq;
+    tb[i].word=st->push(d->tb[i].word);
+  }
+  
+  //sort all entries according to frequency
+  cerr << "sorting dictionary ...";
+  qsort(tb,n,sizeof(dict_entry),cmpdictentry);
+  cerr << "done\n";
+
+  for (int i=0;i<n;i++){
+  
+    //eventually re-assign oov code
+    if (d->oov_code==tb[i].code) oov_code=i;
+
+    tb[i].code=i;
+    htb->search((char  *)&tb[i].word,HT_ENTER);
+  };
+
+}
+
+
+
+dictionary::~dictionary(){
+  delete htb;
+  delete st;
+  delete [] tb;
+}
+
+void dictionary::stat(){
+  cout << "dictionary class statistics\n";
+  cout << "size " << n
+       << " used memory " 
+       << (lim * sizeof(int) + 
+	   htb->used() + 
+	   st->used())/1024 << " Kb\n";
+}
+
+void dictionary::grow(){
+  
+  delete htb;  
+
+  cerr << "+\b";
+
+  dict_entry *tb2=new dict_entry[lim+GROWTH_STEP];
+  
+  memcpy(tb2,tb,sizeof(dict_entry) * lim );
+  
+  delete [] tb; tb=tb2;
+  
+  htb=new htable((lim+GROWTH_STEP)/LOAD_FACTOR);
+  
+  for (int i=0;i<lim;i++)
+    
+    htb->search((char *)&tb[i].word,HT_ENTER);
+
+  for (int i=lim;i<lim+GROWTH_STEP;i++) tb[i].freq=0;
+  
+  lim+=GROWTH_STEP;
+  
+
+}
+
+void dictionary::save(char *filename,int freqflag){
+  
+  std::ofstream out(filename,ios::out);
+
+  if (!out){
+    cerr << "cannot open " << filename << "\n";
+  } 
+  
+  // header
+  if (freqflag)
+    out << "DICTIONARY 0 " << n << "\n";
+  else
+    out << "dictionary 0 " << n << "\n";
+
+  for (int i=0;i<n;i++){
+    out << tb[i].word;
+    if (freqflag)
+      out << " " << tb[i].freq;
+    out << "\n";
+  }
+
+  out.close();
+}
+
+
+int dictionary::getcode(const char *w){
+  dict_entry* ptr=(dict_entry *)htb->search((char *)&w,HT_FIND);
+  if (ptr==NULL) return -1;
+  return ptr->code;
+}
+
+int dictionary::encode(const char *w){
+  
+  //case of strange characters
+  if (strlen(w)==0){cerr << "0";w=OOV();}
+
+	dict_entry* ptr;
+	
+	if ((ptr=(dict_entry *)htb->search((char *)&w,HT_FIND))!=NULL) 
+    return ptr->code;
+  else{
+    if (!ifl){ //do not extend dictionary
+      if (oov_code==-1){ //did not use OOV yet
+	cerr << "starting to use OOV words [" << w << "]\n";
+	tb[n].word=st->push(OOV());
+	htb->search((char  *)&tb[n].word,HT_ENTER);
+	tb[n].code=n;
+	tb[n].freq=0;
+	oov_code=n;
+	if (++n==lim) grow();
+      }
+      //if there is an oov lexicon, check if this word belongs to
+      dict_entry* oovptr;
+      if (oovlex){
+				if ((oovptr=(dict_entry *)oovlex->htb->search((char *)&w,HT_FIND))!=NULL){
+	in_oov_lex=1;
+	oov_lex_code=oovptr->code;
+				}else
+					in_oov_lex=0;
+      }
+      return encode(OOV()); 
+    }
+    else{ //extend dictionary
+      tb[n].word=st->push((char *)w);
+      htb->search((char  *)&tb[n].word,HT_ENTER);
+      tb[n].code=n;
+      tb[n].freq=0;
+      if (++n==lim) grow();
+      return n-1;
+    }
+  }
+}
+
+
+char *dictionary::decode(int c){
+  if (c>=0 && c < n)
+    return tb[c].word;
+  else{
+    cerr << "decode: code out of boundary\n";
+    return OOV();
+  }
+}
+
+
+dictionary_iter::dictionary_iter(dictionary *dict) : m_dict(dict) {
+  m_dict->htb->scan(HT_INIT);
+}
+
+dict_entry* dictionary_iter::next() {
+  return (dict_entry*)m_dict->htb->scan(HT_CONT);
+}
+
+
+
+
+
+/*
+main(int argc,char **argv){
+  dictionary d(argv[1],40000);
+  d.stat();
+  cout << "ROMA" << d.decode(0) << "\n";
+  cout << "ROMA:" << d.encode("ROMA") << "\n";
+  d.save(argv[2]);
+}
+*/
diff --git a/irstlm/src/src/dictionary.h b/irstlm/src/src/dictionary.h
new file mode 100644
index 000000000..be625e810
--- /dev/null
+++ b/irstlm/src/src/dictionary.h
@@ -0,0 +1,186 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+#ifndef MF_DICTIONARY_H
+#define MF_DICTIONARY_H
+
+#include <cstring>
+#include <iostream>
+
+#define MAX_WORD 1000
+#define LOAD_FACTOR  5
+
+#ifndef GROWTH_STEP 
+#define GROWTH_STEP 100000
+#endif
+
+#ifndef DICT_INITSIZE
+#define DICT_INITSIZE 100000
+#endif
+
+//Begin of sentence symbol
+#ifndef BOS_
+#define BOS_ "<s>"
+#endif
+
+
+//End of sentence symbol
+#ifndef EOS_ 
+#define EOS_ "</s>"
+#endif
+
+//Out-Of-Vocabulary symbol
+#ifndef OOV_ 
+#define OOV_ "_unk_"
+#endif
+
+typedef struct{
+  char *word;
+  int  code;
+  int  freq;
+}dict_entry;
+
+class strstack;
+class htable;
+
+class dictionary{
+  strstack   *st;  //!< stack of strings
+  dict_entry *tb;  //!< entry table
+  htable    *htb;  //!< hash table
+  int          n;  //!< number of entries
+  int          N;  //!< total frequency
+  int        lim;  //!< limit of entries
+  int   oov_code;  //!< code assigned to oov words
+  char*       is;  //!< interruption symbol list
+  char       ifl;  //!< increment flag
+  int        dubv; //!< dictionary size upper bound
+  int in_oov_lex;  //!< flag
+  int oov_lex_code; //!< dictionary
+  char* oov_str;   //!< oov string
+
+ public:
+
+  friend class dictionary_iter;
+
+  dictionary* oovlex; //<! additional dictionary 
+
+  inline int dub(){return dubv;}
+  inline int dub(int value){return (dubv=value);}
+
+  inline char *OOV(){return (OOV_);} 
+  inline char *BoS(){return (BOS_);}
+  inline char *EoS(){return (EOS_);}
+
+  inline int oovcode(int v=-1){return oov_code=(v>=0?v:oov_code);}
+  
+  inline char *intsymb(char* isymb=NULL){
+    if (isymb==NULL) return is;
+    if (is!=NULL) delete [] is;
+    is=new char[strlen(isymb+1)];
+    strcpy(is,isymb);
+    return is=isymb;
+  }
+
+  inline int incflag(){return ifl;}
+  inline int incflag(int v){return ifl=v;}
+  inline int oovlexsize(){return oovlex?oovlex->n:0;}
+  inline int inoovlex(){return in_oov_lex;}
+  inline int oovlexcode(){return oov_lex_code;}
+  
+
+  int isprintable(char* w){
+    char buffer[MAX_WORD];
+    sprintf(buffer,"%s",w);
+    return strcmp(w,buffer)==0;
+  }
+
+  inline void genoovcode(){
+    int c=encode(OOV());
+    std::cerr << "OOV code is "<< c << std::endl;
+    oovcode(c);
+  }
+  
+  inline dictionary* oovlexp(char *fname=NULL){
+    if (fname==NULL) return oovlex;
+    if (oovlex!=NULL) delete oovlex;
+    oovlex=new dictionary(fname,DICT_INITSIZE);
+    return oovlex;
+  }
+
+  inline int setoovrate(double oovrate){ 
+    encode(OOV()); //be sure OOV code exists
+    int oovfreq=(int)(oovrate * totfreq());
+    std::cerr << "setting OOV rate to: " << oovrate << " -- freq= " << oovfreq << std::endl;
+    return freq(oovcode(),oovfreq);
+  }
+
+
+  inline int incfreq(int code,int value){N+=value;return tb[code].freq+=value;}
+
+  inline int multfreq(int code,double value){
+    N+=(int)(value * tb[code].freq)-tb[code].freq;
+    return tb[code].freq=(int)(value * tb[code].freq);
+  }
+  
+  inline int freq(int code,int value=-1){
+    if (value>=0){
+      N+=value-tb[code].freq; 
+      tb[code].freq=value;
+    }
+    return tb[code].freq;
+  }
+
+  inline int totfreq(){return N;}
+
+  void grow();
+  //dictionary(int size=400,char* isym=NULL,char* oovlex=NULL);
+  dictionary(char *filename=NULL,int size=DICT_INITSIZE,char* isymb=NULL,char* oovlex=NULL);
+  dictionary(dictionary* d);
+
+  ~dictionary();
+  void generate(char *filename);
+  void load(char *filename);
+  void save(char *filename,int freqflag=0);
+  void load(std::istream& fd);
+  void save(std::ostream& fd);
+
+  int size(){return n;};
+  int getcode(const char *w);
+  int encode(const char *w);
+  char *decode(int c);
+  void stat();
+
+  void cleanfreq(){
+    for (int i=0;i<n;tb[i++].freq=0); 
+    N=0;
+  }
+
+};
+
+class dictionary_iter {
+ public:
+  dictionary_iter(dictionary *dict);
+  dict_entry* next();
+ private:
+  dictionary* m_dict;
+};
+
+#endif
+
diff --git a/irstlm/src/src/gzfilebuf.h b/irstlm/src/src/gzfilebuf.h
new file mode 100644
index 000000000..5e215935a
--- /dev/null
+++ b/irstlm/src/src/gzfilebuf.h
@@ -0,0 +1,80 @@
+#ifndef _GZFILEBUF_H_
+#define _GZFILEBUF_H_
+
+#include <streambuf>
+#include <zlib.h>
+
+class gzfilebuf : public std::streambuf {
+public:
+  gzfilebuf(const char *filename)
+  { _gzf = gzopen(filename, "rb"); 
+    setg (_buff+sizeof(int),     // beginning of putback area
+          _buff+sizeof(int),     // read position
+          _buff+sizeof(int));    // end position
+  }
+  ~gzfilebuf() { gzclose(_gzf); }
+protected:
+  virtual int_type overflow (int_type c) {
+      throw;
+  }
+
+  // write multiple characters
+  virtual
+  std::streamsize xsputn (const char* s,
+                          std::streamsize num) {
+      throw;
+  }
+
+  virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ){ throw;
+  }
+
+  //read one character
+  virtual int_type underflow () {
+    // is read position before end of _buff?
+        if (gptr() < egptr()) {
+            return traits_type::to_int_type(*gptr());
+        }
+
+        /* process size of putback area
+         * - use number of characters read
+         * - but at most four
+         */
+        unsigned int numPutback = gptr() - eback();
+        if (numPutback > sizeof(int)) {
+            numPutback = sizeof(int);
+        }
+
+        /* copy up to four characters previously read into
+         * the putback _buff (area of first four characters)
+         */
+        std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback,
+                      numPutback);
+
+        // read new characters
+        int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int));
+        if (num <= 0) {
+            // ERROR or EOF
+            return EOF;
+        }
+
+        // reset _buff pointers
+        setg (_buff+(sizeof(int)-numPutback),   // beginning of putback area
+              _buff+sizeof(int),                // read position
+              _buff+sizeof(int)+num);           // end of buffer
+
+        // return next character
+        return traits_type::to_int_type(*gptr());
+  }
+
+  std::streamsize xsgetn (char* s,
+                          std::streamsize num) {
+    return gzread(_gzf,s,num);
+  }
+
+private:
+  gzFile _gzf;
+  static const unsigned int _buffsize = 1024;
+  char _buff[_buffsize];
+};
+
+#endif
diff --git a/irstlm/src/src/htable.cpp b/irstlm/src/src/htable.cpp
new file mode 100644
index 000000000..73dab4d10
--- /dev/null
+++ b/irstlm/src/src/htable.cpp
@@ -0,0 +1,329 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit 
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include "mempool.h"
+#include "htable.h"
+
+//bitwise rotation of unsigned integers
+
+#define rot_right(a,k) (((a) >> k ) | ((a) << (32-(k))))
+#define rot_left(a,k) (((a) << k ) | ((a) >> (32-(k))))
+
+using namespace std;
+
+htable::htable(int n,int kl,HTYPE ht,size_t (*klf)(const char* )){
+  
+  if (ht!=STRPTR && ht!=STR && kl==0){
+    cerr << "htable: key length must be specified for non-string entries!";
+    exit(1);
+  } 
+    
+  memory=new mempool( sizeof(entry) , BlockSize );
+
+  table = new entry* [ size=n ];
+
+  memset(table,0,sizeof(entry *) * n );
+  
+  keylen=(ht==INT || ht==INTPTR?kl/sizeof(int):kl);
+
+  htype=ht;
+  
+  keys = accesses = collisions = 0;
+
+  keylenfunc=(klf?klf:&strlen);    
+   
+}
+
+
+char *htable::search(char *item, HT_ACTION action)
+
+{
+  address       h;
+  entry        *q,**p;
+  int i;
+
+  //if (action == HT_FIND) 
+  accesses++;
+  
+  h = Hash(item);
+  
+  i=(h % size);
+  
+  //cout << "htable::search() hash i=" << i << "\n";
+  
+  p = &table[h % size];
+
+  q=*p;
+
+  /*
+  ** Follow collision chain
+  */
+  
+  while (q != NULL && Comp((char *)q->key,(char *)item))
+    {
+      p = (entry **)&q->next;
+      q=*p;
+      //if (action == HT_FIND) 
+      collisions++;
+    }
+  
+  if (
+      q != NULL                 /* found        */
+      ||
+      action == HT_FIND            /* not found, search only       */
+      ||
+      (q = (entry *)memory->allocate())
+      ==
+      NULL                      /* not found, no room   */
+      )
+
+    return((q!=NULL)?(char *)q->key:(char *)NULL);
+
+  *p = q;                       /* link into chain      */
+  /*
+  ** Initialize new element
+  */
+  
+  q->key = item;
+  q->next = NULL;
+  keys++;
+
+  return((char *)q->key);
+}
+
+
+char *htable::scan(HT_ACTION action){
+
+  char *k;
+  
+  if (action == HT_INIT)
+    {
+      scan_i=0;scan_p=table[0];
+      return NULL;
+    }
+
+  // if scan_p==NULL go to the first non null pointer
+  while ((scan_p==NULL) && (++scan_i<size)) scan_p=table[scan_i];
+
+  if (scan_p!=NULL)
+    {
+      k=scan_p->key;
+      scan_p=(entry *)scan_p->next;
+      return k;
+    };
+   
+  return NULL;
+}
+
+
+void htable::map(ostream& co,int cols){
+
+  entry *p;
+  char* img=new char[cols+1];
+
+  img[cols]='\0';
+  memset(img,'.',cols);
+
+  co << "htable memory map: . (0 items), - (<5), # (>5)\n";
+  
+  for (int i=0; i<size;i++)
+  {
+    int n=0;p=table[i];
+    
+    while(p!=NULL){
+      n++;
+      p=(entry *)p->next;
+    };
+    
+    if (i && (i % cols)==0){
+      co << img << "\n";
+      memset(img,'.',cols);
+    }
+    
+    if (n>0)
+      img[i % cols]=n<=5?'-':'#';
+    
+  }
+  
+  img[size % cols]='\0';
+  co << img << "\n";
+	
+	delete []img;
+}
+
+
+void htable::stat(){
+  cerr << "htable class statistics\n";
+  cerr << "size " << size 
+       << " keys " << keys
+       << " acc " << accesses 
+       << " coll " << collisions 
+       << " used memory " << used()/1024 << "Kb\n";
+}
+
+htable::~htable()
+{
+  delete [] table;
+  delete memory;
+}
+
+address htable::HashStr(char *key)
+{
+  char *Key=(htype==STRPTR? *(char **)key:key);
+  int  length=(keylen?keylen:keylenfunc(Key));
+  
+  //cerr << "hash: " << Key << " length:" << length << "\n";
+
+  register address h=0;
+  register int i;
+
+  for (i=0,h=0;i<length;i++)
+    h = h * Prime1 ^ (Key[i] - ' ');
+  h %= Prime2;
+
+  return h;
+}
+
+//Herbert Glarner's "HSH 11/13" hash function.
+/*
+address htable::HashInt(char *key){
+  
+int *Key=(htype==INTPTR? *(int **)key:(int *)key);
+  
+address state=12345,h=0;
+register int i,j;
+
+int p=7;  //precision=8 * sizeof(int)-1, in general must be >=7
+  
+ for (i=0,h=0;i<keylen;i++){    
+   h = h ^ ((address) Key[i]);
+   for (j=0;j<p;j++){
+     state = rot_left(state,11);   //state = state left-rotate 11 bits
+     h = rot_left(h,13);           //h = h left-rotate 13 bits
+     h ^= state ;                 //h = h xor state
+     h = rot_left(h,(state & (address)31)); //h = h left-rotate (state mod 32) bits
+     h = rot_left(h, (h & (address)31));    //h = h left-rotate (h mod 32) bits
+   }
+ }
+
+ return h;
+}
+
+*/
+
+address htable::HashInt(char *key)
+{
+  int *Key=(htype==INTPTR? *(int **)key:(int *)key);
+   
+  
+  address  h;
+  register int i;
+  
+  //Thomas Wang's 32 bit Mix Function
+  for (i=0,h=0;i<keylen;i++){    
+    h+=Key[i];
+    h += ~(h << 15);
+    h ^=  (h >> 10);
+    h +=  (h << 3);
+    h ^=  (h >> 6);
+    h += ~(h << 11);
+    h ^=  (h >> 16);    
+  };
+    
+  return h;
+}
+
+int htable::CompStr(char *key1, char *key2)
+{
+  assert(key1 && key2);
+ 
+  char *Key1=(htype==STRPTR?*(char **)key1:key1);
+  char *Key2=(htype==STRPTR?*(char **)key2:key2);
+
+  assert(Key1 && Key2);
+
+  int length1=(keylen?keylen:keylenfunc(Key1));
+  int length2=(keylen?keylen:keylenfunc(Key2));
+
+  if (length1!=length2) return 1;
+  
+  register int i;
+  
+  for (i=0;i<length1;i++)
+    if (Key1[i]!=Key2[i]) return 1;
+    return 0;
+}
+
+int htable::CompInt(char *key1, char *key2)
+{
+  assert(key1 && key2);
+  
+  int *Key1=(htype==INTPTR?*(int **)key1:(int*)key1);
+  int *Key2=(htype==INTPTR?*(int **)key2:(int*)key2);
+  
+  assert(Key1 && Key2);
+      
+  register int i;
+  
+  for (i=0;i<keylen;i++)
+    if (Key1[i]!=Key2[i]) return 1;
+  return 0;
+}
+
+
+/*
+main(){
+
+const int n=1000;
+  
+htable *ht=new htable(1000/5);
+
+  char w[n][20];
+  char *c;
+
+  for (int i=0;i<n;i++) 
+    {
+      sprintf(w[i],"ciao%d",i);
+      ht->search((char *)&w[i],HT_ENTER);
+    }
+
+  for (int i=0;i<n;i++) 
+  if (ht->search((char *)&w[i],HT_FIND))
+      cout << w[i] << " trovato\n" ;
+    else
+      cout << w[i] << " non trovato\n";
+
+      ht->stat();
+  
+  delete ht;
+  htable *ht2=new htable(n);
+  for (int i=0;i<n;i++) 
+    ht2->search((char *)&w[i],HT_ENTER);
+  
+  ht2->scan(INIT);
+  cout << "elenco:\n";
+  while ((c=ht2->scan(CONT))!=NULL)
+  cout << *(char **) c << "\n";
+  
+  ht2->map();
+}
+*/
diff --git a/irstlm/src/src/htable.h b/irstlm/src/src/htable.h
new file mode 100644
index 000000000..476d45e74
--- /dev/null
+++ b/irstlm/src/src/htable.h
@@ -0,0 +1,130 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+#ifndef MF_HTABLE_H
+#define MF_HTABLE_H
+
+#include <iostream>
+
+#define Prime1                 37
+#define Prime2                 1048583
+#define BlockSize              100
+
+
+// Fast arithmetic, relying on powers of 2,
+// and on pre-processor concatenation property
+
+typedef struct{
+  char*                 key;
+  char*                next;  // secret from user
+}entry;
+
+typedef unsigned int address;
+
+typedef enum {HT_FIND,    //!< search: find an entry
+	      HT_ENTER,   //!< search: enter an entry 
+	      HT_INIT,    //!< scan: start scan
+	      HT_CONT     //!< scan: continue scan
+} HT_ACTION;
+
+typedef enum {
+        STR,     //!< string 
+	      STRPTR,   //!< pointer to string
+        INT,     //!< pointer to int
+        INTPTR  //!< pointer to pointer to int
+}HTYPE;
+
+//! Hash Table for strings
+
+class htable {
+  int        size;            //!< table size
+  int      keylen;            //!< key length
+  HTYPE     htype;            //!< type of entry pointer
+  entry   **table;            //!< hash table  
+  int      scan_i;            //!< scan support 
+  entry   *scan_p;            //!< scan support 
+  // statistics
+  long       keys;            //!< # of entries 
+  long   accesses;            //!< # of accesses
+  long collisions;            //!< # of collisions
+
+  mempool  *memory;           //!<  memory pool
+
+  size_t (*keylenfunc)(const char*);          //!< function computing key length              
+  address (*hashfunc)(const char*);               //!< hash function 
+  int (*compfunc)(const char*, const char*);   //!< comparison function 
+  
+ public:
+
+  //! Creates an hash table
+  htable(int n,int kl=0,HTYPE ht=STRPTR,size_t (*klf)(const char* )=NULL);
+
+  //! Destroys an and hash table
+  ~htable();
+
+  //! Computes the hash function
+  address Hash(char *key){
+    switch (htype){
+      case INT:case INTPTR: return HashInt(key);
+        break;
+      case STR:case STRPTR: return HashStr(key);
+      default: exit(1);
+    }
+  };
+  address HashInt(char *key);
+  address HashStr(char *key);  
+  
+  //! Compares the keys of two entries
+  int Comp(char *Key1,char *Key2){
+    switch (htype){
+      case INT:case INTPTR: return CompInt(Key1,Key2);
+        break;
+      case STR:case STRPTR: return CompStr(Key1,Key2);
+      default: exit(1);
+    };    
+  }
+
+  int CompInt(char *Key1,char *Key2);
+  int CompStr(char *Key1,char *Key2);
+  
+  //! Searches for an item
+  char *search(char *item, HT_ACTION action);
+
+  //! Scans the content
+  char *scan(HT_ACTION action);
+
+  //! Prints statistics
+  void stat();
+  
+  //! Print a map of memory use
+  void map(std::ostream& co=std::cout, int cols=80);
+
+  //! Returns amount of used memory
+  int used(){return 
+	       size * sizeof(entry **) + 
+	       memory->used();};
+};
+
+
+
+#endif
+
+
+
diff --git a/irstlm/src/src/index.h b/irstlm/src/src/index.h
new file mode 100644
index 000000000..e4e29d793
--- /dev/null
+++ b/irstlm/src/src/index.h
@@ -0,0 +1,19 @@
+
+
+#pragma once
+
+#ifdef WIN32
+
+inline const char *index(const char *str, char search)
+{
+	size_t i=0;
+	while (i< strlen(str) ){
+		if (str[i]==search) return &str[i];	
+	}
+	return NULL;
+}
+
+
+#endif
+
+
diff --git a/irstlm/src/src/lmtable.cpp b/irstlm/src/src/lmtable.cpp
new file mode 100644
index 000000000..05ab115eb
--- /dev/null
+++ b/irstlm/src/src/lmtable.cpp
@@ -0,0 +1,1217 @@
+/******************************************************************************
+IrstLM: IRST Language Model Toolkit
+Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+#include <cassert>
+#include "math.h"
+#include "mempool.h"
+#include "htable.h"
+#include "ngramcache.h"
+#include "dictionary.h"
+#include "n_gram.h"
+#include "lmtable.h"
+#include "util.h"
+
+using namespace std;
+
+inline void error(char* message){
+  cerr << message << "\n";
+  throw std::runtime_error(message);
+}
+
+
+//instantiate an empty lm table
+lmtable::lmtable(){
+  
+  configure(1,false);
+  
+  dict=new dictionary((char *)NULL,1000000,(char*)NULL,(char*)NULL);
+  
+  memset(cursize, 0, sizeof(cursize));
+	memset(tbltype, 0, sizeof(tbltype));
+	memset(maxsize, 0, sizeof(maxsize));
+	memset(info, 0, sizeof(info));
+	memset(NumCenters, 0, sizeof(NumCenters));
+ 
+  max_cache_lev=0;
+  for (int i=0;i<=LMTMAXLEV+1;i++) lmtcache[i]=NULL;
+  probcache=NULL;
+  statecache=NULL;
+  
+};
+
+ 
+//loadstd::istream& inp a lmtable from a lm file
+
+void lmtable::load(istream& inp){
+  
+  //give a look at the header to select loading method
+  char header[1024];
+	
+  inp >> header; cerr << header << "\n";
+  
+  if (strncmp(header,"Qblmt",5)==0 || strncmp(header,"blmt",4)==0)
+    loadbin(inp,header);
+  else 
+    loadtxt(inp,header);
+  
+  dict->genoovcode();
+    
+	
+  cerr << "OOV code is " << dict->oovcode() << "\n";
+  
+}
+
+
+
+int parseWords(char *sentence, char **words, int max)
+{
+  char *word;
+  int i = 0;
+  
+  char *const wordSeparators = " \t\r\n";
+  
+  for (word = strtok(sentence, wordSeparators);
+       i < max && word != 0;
+       i++, word = strtok(0, wordSeparators))
+  {
+    words[i] = word;
+  }
+		
+  if (i < max){words[i] = 0;}
+  
+  return i;
+}
+
+
+
+//Load a LM as a text file. LM could have been generated either with the 
+//IRST LM toolkit or with the SRILM Toolkit. In the latter we are not 
+//sure that n-grams are lexically ordered (according to the 1-grams).
+//However, we make the following assumption: 
+//"all successors of any prefix are sorted and written in contiguous lines!"
+//This method also loads files processed with the quantization 
+//tool: qlm
+
+int parseline(istream& inp, int Order,ngram& ng,float& prob,float& bow){
+	
+  char* words[1+ LMTMAXLEV + 1 + 1];
+  int howmany;		
+  char line[MAX_LINE];
+  
+  inp.getline(line,MAX_LINE);  
+  if (strlen(line)==MAX_LINE-1){
+      cerr << "parseline: input line exceed MAXLINE (" 
+      << MAX_LINE << ") chars " << line << "\n";
+    exit(1);
+  }
+  
+  howmany = parseWords(line, words, Order + 3);
+  assert(howmany == (Order+ 1) || howmany == (Order + 2));
+	
+  //read words
+  ng.size=0;
+  for (int i=1;i<=Order;i++) 
+    ng.pushw(strcmp(words[i],"<unk>")?words[i]:ng.dict->OOV());
+  
+  //read logprob/code and logbow/code
+  assert(sscanf(words[0],"%f",&prob));
+  if (howmany==(Order+2))
+    assert(sscanf(words[Order+1],"%f",&bow));
+  else
+    bow=0.0; //this is log10prob=0 for implicit backoff		
+  
+  return 1;
+}
+
+
+void lmtable::loadcenters(istream& inp,int Order){
+  char line[MAX_LINE];
+  
+  //first read the coodebook
+  cerr << Order << " read code book ";
+  inp >> NumCenters[Order];
+  Pcenters[Order]=new float[NumCenters[Order]];
+  Bcenters[Order]=(Order<maxlev?new float[NumCenters[Order]]:NULL);
+	
+  for (int c=0;c<NumCenters[Order];c++){
+    inp >> Pcenters[Order][c];
+    if (Order<maxlev) inp >> Bcenters[Order][c];
+  };  
+  //empty the last line  
+  inp.getline((char*)line,MAX_LINE);
+  
+}
+
+
+void lmtable::loadtxt(istream& inp,const char* header){
+  
+  
+  //open input stream and prepare an input string
+  char line[MAX_LINE];
+  
+  //prepare word dictionary
+  //dict=(dictionary*) new dictionary(NULL,1000000,NULL,NULL); 
+  dict->incflag(1);
+	
+  //put here ngrams, log10 probabilities or their codes
+  ngram ng(dict); 
+  float prob,bow;;
+  
+  //check the header to decide if the LM is quantized or not
+  isQtable=(strncmp(header,"qARPA",5)==0?true:false);
+	
+  //we will configure the table later we we know the maxlev;
+  bool yetconfigured=false;
+	
+  cerr << "loadtxt()\n"; 
+	
+  // READ ARPA Header
+  int Order,n;
+  
+  while (inp.getline(line,MAX_LINE)){
+		
+    if (strlen(line)==MAX_LINE-1){
+      cerr << "lmtable::loadtxt: input line exceed MAXLINE (" 
+      << MAX_LINE << ") chars " << line << "\n";
+      exit(1);
+    }
+    
+    bool backslash = (line[0] == '\\');
+    
+    if (sscanf(line, "ngram %d=%d", &Order, &n) == 2) {
+      maxsize[Order] = n; maxlev=Order; //upadte Order
+      
+    }
+		
+    if (backslash && sscanf(line, "\\%d-grams", &Order) == 1) {
+			
+      //at this point we are sure about the size of the LM
+      if (!yetconfigured){
+        configure(maxlev,isQtable);yetconfigured=true;
+        //allocate space for loading the table of this level
+        for (int i=1;i<=maxlev;i++)
+          table[i]= new char[maxsize[i] * nodesize(tbltype[i])];                    
+      }			 
+      
+      cerr << Order << "-grams: reading ";
+			
+      if (isQtable) loadcenters(inp,Order);						
+			
+      //allocate support vector to manage badly ordered n-grams
+      if (maxlev>1 && Order<maxlev) {
+        startpos[Order]=new int[maxsize[Order]];
+        for (int c=0;c<maxsize[Order];c++) startpos[Order][c]=-1;
+      }
+			
+      //prepare to read the n-grams entries
+      cerr << maxsize[Order] << " entries\n";
+      
+      //WE ASSUME A WELL STRUCTURED FILE!!!
+      
+      for (int c=0;c<maxsize[Order];c++){
+				
+        if (parseline(inp,Order,ng,prob,bow))
+          add(ng,
+              (int)(isQtable?prob:*((int *)&prob)),
+              (int)(isQtable?bow:*((int *)&bow)));
+      }
+      // now we can fix table at level Order -1
+      if (maxlev>1 && Order>1) checkbounds(Order-1);			
+    }
+  }
+	
+  dict->incflag(0);  
+  cerr << "done\n";
+	
+}
+
+
+//Checkbound with sorting of n-gram table on disk
+
+void lmtable::checkbounds(int level){
+  
+  char*  tbl=table[level];
+  char*  succtbl=table[level+1];
+	
+  LMT_TYPE ndt=tbltype[level], succndt=tbltype[level+1];
+  int ndsz=nodesize(ndt), succndsz=nodesize(succndt);
+	
+   //re-order table at level+1 on disk
+  //generate random filename to avoid collisions 
+  ofstream out;string filePath;
+  createtempfile(out,filePath,ios::out);
+ 
+  int start,end,newstart;
+	
+  //re-order table at level l+1
+  newstart=0;
+  for (int c=0;c<cursize[level];c++){
+    start=startpos[level][c]; end=bound(tbl+c*ndsz,ndt);
+    //is start==-1 there are no successors for this entry and end==-2
+    if (end==-2) end=start;
+    assert(start<=end);
+    assert(newstart+(end-start)<=cursize[level+1]);
+    assert(end<=cursize[level+1]);
+		
+    if (start<end)
+      out.write((char*)(succtbl + start * succndsz),(end-start) * succndsz);  
+    
+    bound(tbl+c*ndsz,ndt,newstart+(end-start));
+    newstart+=(end-start);
+  }
+  out.close();
+
+	fstream inp(filePath.c_str(),ios::in);
+  
+  inp.read(succtbl,cursize[level+1]*succndsz);
+  inp.close();  
+  removefile(filePath);
+
+}
+
+//Add method inserts n-grams in the table structure. It is ONLY used during 
+//loading of LMs in text format. It searches for the prefix, then it adds the 
+//suffix to the last level and updates the start-end positions. 
+
+int lmtable::add(ngram& ng,int iprob,int ibow){
+	
+  char *found; LMT_TYPE ndt; int ndsz;  
+  
+  if (ng.size>1){
+		
+    // find the prefix starting from the first level
+    int start=0, end=cursize[1]; 
+		
+    for (int l=1;l<ng.size;l++){
+			
+      ndt=tbltype[l]; ndsz=nodesize(ndt);
+			
+      if (search(table[l] + (start * ndsz),ndt,l,(end-start),ndsz,
+                 ng.wordp(ng.size-l+1),LMT_FIND, &found)){
+				
+        //update start-end positions for next step
+        if (l< (ng.size-1)){										
+          //set start position
+          if (found==table[l]) start=0; //first pos in table
+          else start=bound(found - ndsz,ndt); //end of previous entry 
+				  
+          //set end position
+          end=bound(found,ndt);
+        }
+      }
+      else{
+        cerr << "warning: missing back-off for ngram " << ng << "\n";
+        return 0;
+      }		
+    }
+		
+    // update book keeping information about level ng-size -1.
+    // if this is the first successor update start position
+    int position=(found-table[ng.size-1])/ndsz;
+    if (startpos[ng.size-1][position]==-1)
+      startpos[ng.size-1][position]=cursize[ng.size];
+		
+    //always update ending position	
+    bound(found,ndt,cursize[ng.size]+1);
+    //cout << "startpos: " << startpos[ng.size-1][position] 
+    //<< " endpos: " << bound(found,ndt) << "\n";
+		
+  }
+	
+  // just add at the end of table[ng.size]
+	
+  assert(cursize[ng.size]< maxsize[ng.size]); // is there enough space?
+  ndt=tbltype[ng.size];ndsz=nodesize(ndt);
+  
+  found=table[ng.size] + (cursize[ng.size] * ndsz);
+  word(found,*ng.wordp(1)); 
+  prob(found,ndt,iprob);
+  if (ng.size<maxlev){bow(found,ndt,ibow);bound(found,ndt,-2);}
+	
+  cursize[ng.size]++;
+	
+  return 1;
+	
+}
+
+
+void *lmtable::search(char* tb,
+                      LMT_TYPE ndt,
+                      int lev,
+                      int n,
+                      int sz,
+                      int *ngp,
+                      LMT_ACTION action,
+                      char **found){
+	
+  if (lev==1) return *found=(*ngp <n ? tb + *ngp * sz:NULL);
+  
+  //prepare search pattern
+  char w[LMTCODESIZE];putmem(w,ngp[0],0,LMTCODESIZE);
+	
+  int idx=0; // index returned by mybsearch
+  *found=NULL;	//initialize output variable	
+  switch(action){    
+    case LMT_FIND:			
+      if (!tb || !mybsearch(tb,n,sz,(unsigned char *)w,&idx)) return NULL;
+      else
+        return *found=tb + (idx * sz);
+    default:
+      error("lmtable::search: this option is available");
+  };
+	
+  return NULL;
+}
+
+
+int lmtable::mybsearch(char *ar, int n, int size,
+                       unsigned char *key, int *idx)
+{
+  
+  totbsearch++;
+  
+  register int low, high;
+  register unsigned char *p;
+  register int result;
+  register int i;
+  
+  /* return idx with the first 
+    position equal or greater than key */
+  
+  /*   Warning("start bsearch \n"); */
+  
+  low = 0;high = n; *idx=0;
+  while (low < high)
+  {
+    *idx = (low + high) / 2;
+    p = (unsigned char *) (ar + (*idx * size));
+    
+    //comparison
+    for (i=(LMTCODESIZE-1);i>=0;i--){
+      result=key[i]-p[i];
+      if (result) break;
+    }
+    
+    if (result < 0)
+      high = *idx;
+    else if (result > 0)
+      low = *idx + 1;
+    else
+      return 1;
+  }
+  
+  *idx=low;
+  
+  return 0;
+  
+}
+
+
+// saves a LM table in text format
+
+void lmtable::savetxt(const char *filename){
+  
+  fstream out(filename,ios::out);
+  int l;
+	
+  out.precision(7);			
+  
+  if (isQtable) out << "qARPA\n";	
+  
+  
+  ngram ng(dict,0);
+  
+  cerr << "savetxt: " << filename << "\n";
+  
+  out << "\n\\data\\\n";
+  for (l=1;l<=maxlev;l++){
+    out << "ngram " << l << "= " << cursize[l] << "\n";
+  }
+  
+  for (l=1;l<=maxlev;l++){
+    
+    out << "\n\\" << l << "-grams:\n";
+    cerr << "save: " << cursize[l] << " " << l << "-grams\n";
+    if (isQtable){
+      out << NumCenters[l] << "\n";
+      for (int c=0;c<NumCenters[l];c++){
+        out << Pcenters[l][c];
+        if (l<maxlev) out << " " << Bcenters[l][c];
+        out << "\n";
+      }
+    }
+		
+    ng.size=0;
+    dumplm(out,ng,1,l,0,cursize[1]);
+		
+  }
+	
+  out << "\\end\\\n";  
+  cerr << "done\n";
+}
+
+
+void lmtable::savebin(const char *filename){
+	
+  fstream out(filename,ios::out);  
+  cerr << "savebin: " << filename << "\n";
+	
+  // print header
+  if (isQtable){
+    out << "Qblmt " << maxlev;
+    for (int i=1;i<=maxlev;i++) out << " " << cursize[i];
+    out << "\nNumCenters";
+    for (int i=1;i<=maxlev;i++)  out << " " << NumCenters[i];
+    out << "\n";
+    
+  }else{
+    out << "blmt " << maxlev;
+    for (int i=1;i<=maxlev;i++) out << " " << cursize[i] ;
+    out << "\n";
+  }
+	
+  dict->save(out);
+  
+  for (int i=1;i<=maxlev;i++){
+    cerr << "saving " << cursize[i] << " " << i << "-grams\n";
+    if (isQtable){
+      out.write((char*)Pcenters[i],NumCenters[i] * sizeof(float));
+      if (i<maxlev) 
+        out.write((char *)Bcenters[i],NumCenters[i] * sizeof(float));
+    }
+    out.write(table[i],cursize[i]*nodesize(tbltype[i]));
+  }
+  
+  cerr << "done\n";
+}
+
+
+//manages the long header of a bin file
+
+void lmtable::loadbinheader(istream& inp,const char* header){
+  
+  // read rest of header
+  inp >> maxlev;
+  
+  if (strncmp(header,"Qblmt",5)==0) isQtable=1;
+  else if(strncmp(header,"blmt",4)==0) isQtable=0;
+  else error("loadbin: LM file is not in binary format");
+	
+  configure(maxlev,isQtable);
+  
+  for (int i=1;i<=maxlev;i++){
+    inp >> cursize[i]; maxsize[i]=cursize[i];
+    table[i]=new char[cursize[i] * nodesize(tbltype[i])];
+  }
+  
+  
+  if (isQtable){
+    char header2[100];
+    cerr << "reading num centers:";
+    inp >> header2;
+    for (int i=1;i<=maxlev;i++){
+      inp >> NumCenters[i];cerr << " " << NumCenters[i];
+      
+    }
+    cerr << "\n";
+  } 
+}
+
+//load codebook of level l
+
+void lmtable::loadbincodebook(istream& inp,int l){
+  
+  Pcenters[l]=new float [NumCenters[l]];   
+  inp.read((char*)Pcenters[l],NumCenters[l] * sizeof(float));
+  if (l<maxlev){ 
+    Bcenters[l]=new float [NumCenters[l]];
+    inp.read((char *)Bcenters[l],NumCenters[l]*sizeof(float));
+  }
+  
+}
+
+//load a binary lmfile
+
+void lmtable::loadbin(istream& inp, const char* header){
+    
+  cerr << "loadbin()\n";
+  
+  loadbinheader(inp,header);
+  
+  dict->load(inp);  
+  
+  for (int l=1;l<=maxlev;l++){
+    if (isQtable) loadbincodebook(inp,l);
+    
+    cerr << "loading " << cursize[l] << " " << l << "-grams\n";
+    inp.read(table[l],cursize[l]*nodesize(tbltype[l]));
+  };  
+  
+  cerr << "done\n";
+}
+
+
+
+int lmtable::get(ngram& ng,int n,int lev){
+  
+  //  cout << "cerco:" << ng << "\n";
+  totget[lev]++;
+  
+  if (lev > maxlev) error("get: lev exceeds maxlevel");
+  if (n < lev) error("get: ngram is too small");
+  
+  //set boudaries for 1-gram 
+  int offset=0,limit=cursize[1];
+	
+  //information of table entries
+  int hit;char* found; LMT_TYPE ndt;
+  ng.link=NULL;
+  ng.lev=0;            
+
+  for (int l=1;l<=lev;l++){
+		
+    //initialize entry information 
+    hit = 0 ; found = NULL; ndt=tbltype[l];
+    
+    //if (l==2) cout <<"bicache: searching:" << ng <<"\n";
+    
+    if (lmtcache[l] && lmtcache[l]->get(ng.wordp(n),(char *)&found))
+      hit=1;
+    else
+      search(table[l] + (offset * nodesize(ndt)),
+             ndt,
+             l,
+             (limit-offset),
+             nodesize(ndt),
+             ng.wordp(n-l+1), 
+             LMT_FIND,
+             &found);
+ 
+    //insert both found and not found items!!!
+    if (lmtcache[l] && hit==0)
+      lmtcache[l]->add(ng.wordp(n),(char *)&found);   
+    
+    if (!found) return 0;      
+    
+    ng.link=found;
+    ng.info=ndt;
+    ng.lev=l;
+    
+    if (l<maxlev){ //set start/end point for next search
+			
+      //if current offset is at the bottom also that of successors will be
+      if (offset+1==cursize[l]) limit=cursize[l+1];
+      else limit=bound(found,ndt);
+      
+      //if current start is at the begin, then also that of successors will be
+      if (found==table[l]) offset=0;
+      else offset=bound((found - nodesize(ndt)),ndt);
+      
+      assert(offset!=-1); assert(limit!=-1);      
+    }
+  }
+  
+  //put information inside ng
+  ng.size=n;  ng.freq=0; 
+  ng.succ=(lev<maxlev?limit-offset:0);
+  
+  return 1;
+}
+
+
+//recursively prints the language model table
+
+void lmtable::dumplm(fstream& out,ngram ng, int ilev, int elev, int ipos,int epos){
+	
+  LMT_TYPE ndt=tbltype[ilev];
+  int ndsz=nodesize(ndt);
+  
+	
+  assert(ng.size==ilev-1);
+  assert(ipos>=0 && epos<=cursize[ilev] && ipos<epos);
+  ng.pushc(0);
+		
+  for (int i=ipos;i<epos;i++){
+    *ng.wordp(1)=word(table[ilev]+i*ndsz);
+    if (ilev<elev){
+      //get first and last successor position
+      int isucc=(i>0?bound(table[ilev]+(i-1)*ndsz,ndt):0);
+      int esucc=bound(table[ilev]+i*ndsz,ndt);
+      if (isucc < esucc) //there are successors!
+        dumplm(out,ng,ilev+1,elev,isucc,esucc);
+      //else
+      //cout << "no successors for " << ng << "\n";
+    }
+    else{
+      //out << i << " "; //this was just to count printed n-grams
+      int ipr=prob(table[ilev]+ i * ndsz,ndt);
+      out << (isQtable?ipr:*(float *)&ipr) <<"\t";
+      for (int k=ng.size;k>=1;k--){
+        if (k<ng.size) out << " ";
+        out << dict->decode(*ng.wordp(k));				
+      }     
+      
+      if (ilev<maxlev){
+        int ibo=bow(table[ilev]+ i * ndsz,ndt);
+        if (isQtable) out << "\t" << ibo;
+        else
+          if (*((float *)&ibo)!=0.0) 
+            out << "\t" << *((float *)&ibo);
+       
+      }
+      out << "\n";				
+    }
+  }
+}
+
+//succscan iteratively returns all successors of an ngram h for which 
+//get(h,h.size,h.size) returned true. 
+
+
+int lmtable::succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev){
+  assert(lev==h.lev+1 && h.size==lev && lev<=maxlev);
+  
+  LMT_TYPE ndt=tbltype[h.lev];
+  int ndsz=nodesize(ndt);
+  
+  switch (action){
+    
+    case LMT_INIT:
+      //reset ngram local indexes
+      
+      ng.size=lev;
+      ng.trans(h);    
+      ng.midx[lev]=(h.link>table[h.lev]?bound(h.link-ndsz,ndt):0);
+      
+      return 1;
+      
+    case LMT_CONT:
+      
+      if (ng.midx[lev]<bound(h.link,ndt))
+      {
+        //put current word into ng
+        *ng.wordp(1)=word(table[lev]+ng.midx[lev]*nodesize(tbltype[lev]));
+        ng.midx[lev]++;
+        return 1;
+      }
+      else
+        return 0;
+      
+    default:
+      cerr << "succscan: only permitted options are LMT_INIT and LMT_CONT\n";
+      exit(0);
+  }
+  
+}
+
+//maxsuffptr returns the largest suffix of an n-gram that is contained 
+//in the LM table. This can be used as a compact representation of the 
+//(n-1)-gram state of a n-gram LM. if the input k-gram has k>=n then it 
+//is trimmed to its n-1 suffix.
+
+const char *lmtable::maxsuffptr(ngram ong){  
+  
+  if (ong.size==0) return (char*) NULL;
+  if (ong.size>=maxlev) ong.size=maxlev-1;
+  
+  ngram ng=ong;
+  //ngram ng(dict); //eventually use the <unk> word
+  //ng.trans(ong);
+  
+  if (get(ng,ng.size,ng.size))
+    return ng.link;
+  else{ 
+    ong.size--;
+    return maxsuffptr(ong);
+  }
+}
+
+
+const char *lmtable::cmaxsuffptr(ngram ong){  
+  
+  if (ong.size==0) return (char*) NULL;
+  if (ong.size>=maxlev) ong.size=maxlev-1;
+  
+  char* found;
+  
+  if (statecache && (ong.size==maxlev-1) && statecache->get(ong.wordp(maxlev-1),(char *)&found))
+    return found;
+  
+  found=(char *)maxsuffptr(ong);
+  
+  if (statecache && ong.size==maxlev-1){
+    //if (statecache->isfull()) statecache->reset();
+    statecache->add(ong.wordp(maxlev-1),(char *)&found);    
+  }; 
+  
+  return found;
+  
+}
+
+
+// returns the probability of an n-gram
+
+double lmtable::prob(ngram ong){
+	
+  if (ong.size==0) return 1.0;
+  if (ong.size>maxlev) ong.size=maxlev;
+  
+  ngram ng(dict);
+  ng.trans(ong);
+	
+  double rbow;
+  int ibow,iprob;
+  LMT_TYPE ndt;
+  
+	
+  if (get(ng,ng.size,ng.size)){
+    ndt=(LMT_TYPE)ng.info; iprob=prob(ng.link,ndt);		
+    return exp((double)(isQtable?Pcenters[ng.size][iprob]:*((float *)&iprob)));
+  }
+  else{ //size==1 means an OOV word 
+    if (ng.size==1)    
+      return 1.0/UNIGRAM_RESOLUTION;
+    else{ // compute backoff
+          //set backoff state, shift n-gram, set default bow prob 
+      bo_state(1); ng.shift();rbow=1.0; 			
+      if (ng.lev==ng.size){ 
+        ndt= (LMT_TYPE)ng.info; ibow=bow(ng.link,ndt);
+        rbow= (double) (isQtable?Bcenters[ng.size][ibow]:*((float *)&ibow));
+      }
+      //prepare recursion step
+      ong.size--;      
+      return exp(rbow) * prob(ong);
+    }
+  }
+}
+
+//return log10 probs
+
+double lmtable::lprob(ngram ong){
+	
+  if (ong.size==0) return 0.0;
+  if (ong.size>maxlev) ong.size=maxlev;
+  
+  ngram ng=ong;
+  //ngram ng(dict); //avoid dictionary transfer
+  //ng.trans(ong);
+	
+  double rbow;
+  int ibow,iprob;
+  LMT_TYPE ndt;
+  
+	
+  if (get(ng,ng.size,ng.size)){
+    ndt=(LMT_TYPE)ng.info; iprob=prob(ng.link,ndt);		
+    return (double)(isQtable?Pcenters[ng.size][iprob]:*((float *)&iprob));
+  }
+  else{ //size==1 means an OOV word 
+    if (ng.size==1)    
+      return -log(UNIGRAM_RESOLUTION)/log(10.0);
+    else{ // compute backoff
+          //set backoff state, shift n-gram, set default bow prob 
+      bo_state(1); ng.shift();rbow=0.0; 			
+      if (ng.lev==ng.size){ 
+        ndt= (LMT_TYPE)ng.info; ibow=bow(ng.link,ndt);
+        rbow= (double) (isQtable?Bcenters[ng.size][ibow]:*((float *)&ibow));
+      }
+      //prepare recursion step
+      ong.size--;      
+      return rbow + lprob(ong);
+    }
+  }
+}
+
+//return log10 probsL use cache memory
+
+double lmtable::clprob(ngram ong){	
+  
+  if (ong.size==0) return 0.0;
+  
+  if (ong.size>maxlev) ong.size=maxlev;
+
+  double logpr; 
+
+#ifdef TRACE_CACHE
+  if (probcache && ong.size==maxlev && sentence_id>0lo){
+   *cacheout << sentence_id << " " << ong << "\n";  
+  }
+#endif  
+  
+  if (probcache && ong.size==maxlev && probcache->get(ong.wordp(maxlev),(char *)&logpr)){
+    return logpr;   
+  }
+ 
+  logpr=lprob(ong);
+  
+  if (probcache && ong.size==maxlev){
+    //if (probcache->isfull()) probcache->reset();
+     probcache->add(ong.wordp(maxlev),(char *)&logpr);    
+  }; 
+  
+  return logpr;
+
+};
+
+
+
+//Fill the lmtable with the n-grams in a huge lmfile.
+//Use the local dictionary to select the needed ngrams 
+
+
+void lmtable::filter2(const char* binlmfile, int buffMb){
+  
+  //load header and dictionary of binary lm on disk
+  lmtable* dsklmt=new lmtable();
+  fstream inp(binlmfile,ios::in);
+  
+  // read header
+  char header[MAX_LINE]; 
+  inp >> header;
+  
+  dsklmt->loadbinheader(inp, header);
+  dsklmt->dict->load(inp); 
+  
+  //inherit properties of the dsklmt
+  isQtable=dsklmt->isQtable;
+  configure(dsklmt->maxlevel(),dsklmt->isQtable);
+  if (isQtable)
+    for (int i=1;i<=maxlev;i++) NumCenters[i]=dsklmt->NumCenters[i];
+  
+  
+  //prepare word code conversion table; words which 
+  //are not in the local dictionary will have code -1
+  //prepare a new dictionary sorted as the large dictionary
+  
+  dictionary*  newdict=new dictionary((char *)NULL,1000000,(char*)NULL,(char*)NULL);
+  newdict->incflag(1);  
+  int* code2code=new int[dsklmt->dict->size()];
+  for (int w=0;w<dsklmt->dict->size();w++){
+    if (dict->getcode(dsklmt->dict->decode(w))!=-1)
+      code2code[w]=newdict->encode(dsklmt->dict->decode(w));
+    else code2code[w]=-1;
+  }
+  newdict->incflag(0);  
+  delete dict;
+  dict=newdict;
+  
+  //service variables
+  char* p; char* q; char* r; 
+  int ndsz; LMT_TYPE type; int w; 
+  long i,j,l;
+  
+  
+  disktable* dtbl;
+  for (l=1;l<=maxlev;l++){
+    
+    //shortcuts for current table
+    type=tbltype[l]; ndsz=nodesize(type);  
+    
+    //steel eventual coodebooks from dsklm;
+    if (isQtable) loadbincodebook(inp,l);
+    
+    //allocate the maxumum number of entries to be load at each time    
+    cerr << "loading part of " << dsklmt->cursize[l] << " " << l << "-grams\n";    
+    
+    //open a on-disk table buffer starting from current position in stream inp
+    dtbl=new disktable(inp, (buffMb * 1024 *1024)/ndsz,ndsz,dsklmt->maxsize[l]);
+    
+    if (l==1){            
+      
+      //compute actual table size
+      maxsize[l]=0;
+      for (i=0;i<dsklmt->maxsize[l];i++){
+        p=dtbl->get(inp,i);
+        if ((code2code[dsklmt->word(p)]) != -1) maxsize[l]++;
+      }     
+      
+      assert(maxsize[l]<=dsklmt->maxsize[l]);
+      
+      //allocate memory for table and start positions
+      table[l]=new char[maxsize[l] * ndsz];
+      startpos[l]=new int[maxsize[l]];
+      
+      //reset position of dsklmt
+      dtbl->rewind(inp);
+      
+      //copy elements into table[l]    
+      cursize[l]=0;
+      for (i=0;i<dsklmt->maxsize[l];i++){
+        p=dtbl->get(inp,i);
+        if ((w=code2code[dsklmt->word(p)]) != -1) {
+          r=table[l] + (long) cursize[l] * ndsz;
+          memcpy(r,p,ndsz); 
+          //store the initial poition in startpos
+          startpos[l][cursize[l]]=(i==0?0:bound(dtbl->get(inp,i-1),tbltype[l]));
+          word(r,w);
+          //cout << "1-gram bound:" << bound(r,tbltype[l]) << "\n";
+          cursize[l]++;
+        }
+      }
+      
+      for (i=0;i<cursize[l];i++) assert(word(table[l]+ (long) i * ndsz)==i);
+      
+      assert(maxsize[l]==cursize[l]);        
+      
+    }
+    else{
+      
+      //shortcuts for the predecessors table
+      char* ptable=table[l-1]; 
+      LMT_TYPE ptype=tbltype[l-1];
+      int pndsz=nodesize(ptype);
+      
+      //count actual table size, allocate memory, and copy elements
+      //we scan elements through the previous table: ptable
+      //cout << inp.tellp() << "\n";      
+      
+      maxsize[l]=0;  
+      
+      for (i=0;i<cursize[l-1];i++){
+        p=ptable+i*pndsz;         
+        for (j=startpos[l-1][i];j<bound(p,ptype);j++){  
+          assert(startpos[l-1][i]<bound(p,ptype));
+          q=dtbl->get(inp,j);
+          if ((w=code2code[dsklmt->word(q)]) != -1) maxsize[l]++;   
+          
+        }
+      }  
+      
+      //allocate memory for the table, and fill it
+      assert(maxsize[l]<=dsklmt->maxsize[l]);
+      
+      table[l]=new char[maxsize[l] * ndsz];
+      if (l<maxlev) startpos[l]=new int[maxsize[l]];
+      
+      //reset position of dsklmt
+      dtbl->rewind(inp);
+      
+      r=table[l]; //next available position in table[l]
+      cursize[l]=0;
+      
+      for (i=0;i<cursize[l-1];i++){
+        p=ptable+ i * pndsz;
+        for (j=startpos[l-1][i];j<bound(p,ptype);j++){  
+          assert(startpos[l-1][i]<bound(p,ptype));                  
+          
+          q=dtbl->get(inp,j);
+          
+          
+          if ((w=code2code[dsklmt->word(q)]) != -1){
+            //copy element
+            r=table[l] + cursize[l] * ndsz;
+            memcpy(r,q,ndsz);
+            if (l<maxlev) startpos[l][cursize[l]]=(j>0?bound(dtbl->get(inp,j-1),type):0);
+            word(r,w);
+            //cout << "+" << dict->decode(word(q)) << " - bound " 
+            //<< startpos[l][cursize[l]] << " " << bound(p,ptype) << "\n";
+            cursize[l]++; //increment index in startpos
+          }
+          
+        }
+        //update bounds of predecessor
+        bound(p,ptype,cursize[l]);
+      }   
+      
+      assert(cursize[l]==maxsize[l]);     
+    }
+    
+    
+    delete dtbl;
+    if (l>1) delete [] startpos[l-1]; 
+  }
+  
+  
+}
+
+void lmtable::filter(const char* binlmfile){
+  
+  //load header and dictionary of binary lm on disk
+  lmtable* dsklmt=new lmtable();
+  fstream inp(binlmfile,ios::in);  
+  
+  // read header
+  char header[MAX_LINE]; 
+  inp >> header;
+  
+  dsklmt->loadbinheader(inp, header);
+  
+  dsklmt->dict->load(inp); 
+  
+  //inherit properties of the dsklmt
+  configure(dsklmt->maxlevel(),dsklmt->isQuantized());
+  
+  //prepare word code conversion table; words which 
+  //are not in the local dictionary will have code -1
+  //prepare a new dictionary sorted as the large dictionary
+  
+  dictionary*  newdict=new dictionary((char *)NULL,1000000,(char*)NULL,(char*)NULL);
+  newdict->incflag(1);  
+  int* code2code=new int[dsklmt->dict->size()];
+  for (int w=0;w<dsklmt->dict->size();w++){
+    if (dict->getcode(dsklmt->dict->decode(w))!=-1)
+      code2code[w]=newdict->encode(dsklmt->dict->decode(w));
+    else code2code[w]=-1;
+  }
+  newdict->incflag(0);  
+  delete dict;
+  dict=newdict;
+  
+  //service variables
+  char* p; char* q; char* r; 
+  int ndsz; LMT_TYPE type; int w; 
+  int i,j,l;
+  
+  for (l=1;l<=maxlev;l++){
+    
+    //shortcuts for current table
+    type=tbltype[l]; ndsz=nodesize(type);  
+    
+    //steel eventual coodebooks from dsklm;
+    if (isQtable) loadbincodebook(inp,l);
+    
+    //load single l-table from dsklmt: this table can be
+    //removed at the ned of this cycle
+    cerr << "loading " << dsklmt->cursize[l] << " " << l << "-grams\n";
+    dsklmt->table[l]=new char[dsklmt->cursize[l]*ndsz];
+    inp.read(dsklmt->table[l],dsklmt->cursize[l]*ndsz);
+    
+    //shortcuts for dsktable
+    char* dtbl=dsklmt->table[l];
+    int dsize=dsklmt->cursize[l];
+    
+    if (l==1){
+      
+      //count actual table size
+      maxsize[l]=0;
+      for (i=0;i<dsize;i++)    
+        if ((code2code[dsklmt->word(dtbl+i*ndsz)]) != -1) maxsize[l]++;
+      
+      assert(maxsize[l]<=dsklmt->maxsize[l]);
+      //allocate memory for table and start positions
+      table[l]=new char[maxsize[l] * ndsz];
+      startpos[l]=new int[maxsize[l]];
+      
+      //copy elements one by one
+      
+      for (i=0;i<dsize;i++){       
+        p=dtbl+i*ndsz;
+        if ((w=code2code[dsklmt->word(p)]) != -1) {
+          r=table[l] + cursize[l] * ndsz;
+          memcpy(r,p,ndsz); 
+          //store the initial poition in startpos
+          startpos[l][cursize[l]]=(i==0?0:bound(p-ndsz,tbltype[l]));
+          word(r,w);
+          cursize[l]++;
+        }
+      }
+      
+      for (i=0;i<cursize[l];i++) assert(word(table[l]+i*ndsz)==i);
+      
+      assert(maxsize[l]==cursize[l]);        
+      
+    }
+    else{ //l>=1;
+      
+      //shortcuts for the predecessors table
+      char* ptable=table[l-1]; 
+      LMT_TYPE ptype=tbltype[l-1];
+      int pndsz=nodesize(ptype);
+      
+      //count actual table size, allocate memory, and copy elements
+      //we scan elements through the previous table: ptable
+      
+      maxsize[l]=0;  
+      for (i=0;i<cursize[l-1];i++){
+        p=ptable+i*pndsz;
+        for (j=startpos[l-1][i];j<bound(p,ptype);j++){         
+          q=dsklmt->table[l] + j * ndsz;
+          if ((w=code2code[dsklmt->word(q)]) != -1){
+            maxsize[l]++;   
+          } 
+        }
+      }    
+      
+      //allocate memory for the table, and fill it
+      assert(maxsize[l]<=dsklmt->maxsize[l]);
+      table[l]=new char[maxsize[l] * ndsz];
+      if (l<maxlev) startpos[l]=new int[maxsize[l]];
+      
+      r=table[l]; //next available position in table[l]
+      cursize[l]=0;
+      for (i=0;i<cursize[l-1];i++){
+        p=ptable+i*pndsz;
+        for (j=startpos[l-1][i];j<bound(p,ptype);j++){        
+          q=dsklmt->table[l] + j * ndsz;
+          if ((w=code2code[dsklmt->word(q)]) != -1){
+            //copy element
+            r=table[l] + cursize[l] * ndsz;
+            memcpy(r,q,ndsz);
+            if (l<maxlev)
+              startpos[l][cursize[l]]=(j==0?0:dsklmt->bound(q-ndsz,type));
+            word(r,w);
+            //cout << "+" << dict->decode(word(q)) << " - bound " 
+            //<< startpos[l][cursize[l]] << " " << bound(p,ptype) << "\n";
+            cursize[l]++; //increment index in startpos
+          }         
+        }
+        //update bounds of predecessor
+        bound(p,ptype,cursize[l]);
+      }    
+      
+    }
+    
+    delete [] dsklmt->table[l];
+    if (l>1) delete [] startpos[l-1]; 
+  }
+  
+}
+
+
+void lmtable::stat(int level){
+  int totmem=0,memory;
+  float mega=1024 * 1024;
+  
+  cout.precision(2);
+  
+  cout << "lmtable class statistics\n";
+  
+  cout << "levels " << maxlev << "\n";
+  for (int l=1;l<=maxlev;l++){
+    memory=cursize[l] * nodesize(tbltype[l]);
+    cout << "lev " << l 
+      << " entries "<< cursize[l] 
+      << " used mem " << memory/mega << "Mb\n";
+    totmem+=memory;
+  }
+  
+  cout << "total allocated mem " << totmem/mega << "Mb\n";
+  
+  cout << "total number of get calls\n";
+  for (int l=1;l<=maxlev;l++){
+    cout << "level " << l << " " << totget[l] << "\n";
+  }
+  cout << "total binary search : " << totbsearch << "\n";
+  
+  if (level >1 ) dict->stat();
+  
+}
diff --git a/irstlm/src/src/lmtable.h b/irstlm/src/src/lmtable.h
new file mode 100644
index 000000000..4171dabea
--- /dev/null
+++ b/irstlm/src/src/lmtable.h
@@ -0,0 +1,366 @@
+/******************************************************************************
+IrstLM: IRST Language Model Toolkit
+Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+
+#ifndef MF_LMTABLE_H
+#define MF_LMTABLE_H
+
+#include "ngramcache.h"
+#include "dictionary.h"
+#include "n_gram.h"
+
+#undef TRACE_CACHE
+
+#define LMTMAXLEV  20
+#define MAX_LINE  1024
+
+#ifndef  LMTCODESIZE
+#define  LMTCODESIZE  (int)3
+#endif
+
+#define SHORTSIZE (int)2
+#define PTRSIZE   (int)sizeof(char *)
+#define INTSIZE   (int)4
+#define CHARSIZE  (int)1
+
+#define PROBSIZE  (int)4 //use float  
+#define QPROBSIZE (int)1 
+#define BOUNDSIZE (int)4
+
+#define UNIGRAM_RESOLUTION 10000000.0
+
+typedef enum {INTERNAL,QINTERNAL,LEAF,QLEAF} LMT_TYPE;
+typedef char* node;
+
+typedef enum {LMT_FIND,    //!< search: find an entry
+  LMT_ENTER,   //!< search: enter an entry 
+  LMT_INIT,    //!< scan: start scan
+  LMT_CONT     //!< scan: continue scan
+} LMT_ACTION;
+
+
+//disktable or accessing tables stored on disk
+
+class disktable{  
+private:
+  char* buffer;          //!< buffer of disk table
+  int   buffer_size;     //!< size of buffer
+  int   entry_size;      //!< size of each single entry in buffer
+  long  current_border;  //!< current last available entry in buffer
+  long  file_border;     //!< last element in disk table
+  long  start_position;  /* */  
+public:
+    
+	disktable(std::fstream& inp, int buffersize, int entrysize,long max_entries){
+      buffer_size=buffersize;
+      entry_size=entrysize;
+      buffer=new char[(buffer_size+1) * entry_size];
+      current_border=0;
+      file_border=max_entries;
+      start_position=inp.tellp();
+    };
+  
+  ~disktable() {delete [] buffer;};
+  
+  char* get(std::fstream& inp,long position){    
+    assert(position < file_border);
+    
+    //you can look back at maximum one position before the first in the buffer!
+    assert(position >= (current_border-buffer_size -1));
+    
+    while (position>=current_border){
+      int how_many=(current_border + buffer_size <= file_border?buffer_size:file_border-current_border);
+      //store last value in position buffer_size;
+      memcpy(buffer + buffer_size * entry_size, buffer+(buffer_size-1) * entry_size, entry_size);
+      //read the number of elements
+      inp.read(buffer,how_many * entry_size);
+      //update curent border
+      current_border+=how_many;
+    }    
+    if (position > (current_border-buffer_size-1) ) //then it is in buffer
+      return buffer + (position % buffer_size) * entry_size; 
+    else
+      if (current_border>buffer_size) //asks for last of previous block
+        return buffer + buffer_size * entry_size;
+    else return NULL;
+  };
+  
+  void rewind(std::fstream& inp){
+    inp.seekp(start_position);
+    current_border=0;      
+  }  
+  
+};
+
+class lmtable{
+  
+  char*      table[LMTMAXLEV+1]; //storage of all levels
+  LMT_TYPE tbltype[LMTMAXLEV+1]; //table type for each levels
+  int      cursize[LMTMAXLEV+1]; //current size of levels
+  int      maxsize[LMTMAXLEV+1]; //current size of levels
+  int*    startpos[LMTMAXLEV+1]; //support vector to store start positions
+	
+  int               maxlev; //max level of table
+  char           info[100]; //information put in the header
+  
+  //statistics 
+  int    totget[LMTMAXLEV+1];
+  int    totbsearch;
+  
+  //probability quantization
+  bool      isQtable;
+  
+  int       NumCenters[LMTMAXLEV+1];
+  float*    Pcenters[LMTMAXLEV+1];
+  float*    Bcenters[LMTMAXLEV+1];
+  
+  int     lmt_oov_code;
+  int     lmt_oov_size;
+  int    backoff_state; 
+  
+  //improve access speed
+  ngramcache* lmtcache[LMTMAXLEV+1];
+	ngramcache* probcache;
+  ngramcache* statecache;
+  int max_cache_lev;
+
+public:
+    
+#ifdef TRACE_CACHE
+    std::fstream* cacheout;
+  int sentence_id;
+#endif
+  
+  dictionary     *dict; // dictionary
+  
+  lmtable();
+  
+  ~lmtable(){
+    for (int i=2;i<=LMTMAXLEV;i++)        
+    if (lmtcache[i]){
+      std::cerr << i <<"-gram cache: "; lmtcache[i]->stat();
+      delete lmtcache[i]; 
+    }
+    
+    if (probcache){
+      std::cerr << "Prob Cache: "; probcache->stat();
+      delete probcache;
+#if TRACE_CACHE
+      cacheout->close();
+      delete cacheout;
+#endif
+      
+    } 
+    if (statecache){
+      std::cerr << "State Cache: "; statecache->stat();
+      delete statecache;
+    } 
+    
+    
+    for (int i=1;i<=maxlev;i++){
+      if (table[i]) delete [] table[i];
+      if (isQtable){
+        if (Pcenters[i]) delete [] Pcenters[i];
+				if (i<maxlev) 
+          if (Bcenters[i]) delete [] Bcenters[i];
+      }
+    }
+  }
+    
+  void init_probcache(){
+    assert(probcache==NULL);
+    probcache=new ngramcache(maxlev,sizeof(double),400000);
+#ifdef TRACE_CACHE
+    cacheout=new std::fstream("/tmp/tracecache",std::ios::out);
+    sentence_id=0;
+#endif 
+  }
+  
+  void init_statecache(){
+    assert(statecache==NULL);
+    statecache=new ngramcache(maxlev-1,sizeof(char *),200000);
+  }
+  
+  void init_lmtcaches(int uptolev){
+    max_cache_lev=uptolev;
+    for (int i=2;i<=max_cache_lev;i++){
+    assert(lmtcache[i]==NULL);
+    lmtcache[i]=new ngramcache(i,sizeof(char *),200000);
+    }
+  }
+  
+  void check_cache_levels(){
+    if (probcache && probcache->isfull()) probcache->reset(probcache->cursize());
+    if (statecache && statecache->isfull()) statecache->reset(statecache->cursize());
+    for (int i=2;i<=max_cache_lev;i++)
+      if (lmtcache[i]->isfull()) lmtcache[i]->reset(lmtcache[i]->cursize());
+  }
+  
+  bool is_probcache_active(){return probcache!=NULL;}
+  bool is_statecache_active(){return statecache!=NULL;}
+  bool are_lmtcaches_active(){return lmtcache[2]!=NULL;}  
+  
+	void configure(int n,bool quantized){
+		maxlev=n;
+		if (n==1)
+			tbltype[1]=(quantized?QLEAF:LEAF);
+		else{
+			for (int i=1;i<n;i++) tbltype[i]=(quantized?QINTERNAL:INTERNAL);
+			tbltype[n]=(quantized?QLEAF:LEAF);
+    }
+	};
+	
+  int maxlevel(){return maxlev;};
+	bool isQuantized(){return isQtable;}
+  
+  
+  void savetxt(const char *filename);
+  void savebin(const char *filename);
+  void dumplm(std::fstream& out,ngram ng, int ilev, int elev, int ipos,int epos);
+  
+  void load(std::istream& inp);
+  void loadtxt(std::istream& inp,const char* header);
+  void loadbin(std::istream& inp,const char* header);
+  
+  void loadbinheader(std::istream& inp, const char* header);
+  void loadbincodebook(std::istream& inp,int l);
+  
+  void filter(const char* lmfile);
+  void filter2(const char* lmfile,int buffMb=512);
+  
+  void loadcenters(std::istream& inp,int Order);
+	
+  double prob(ngram ng); 
+  double lprob(ngram ng); 
+  double clprob(ngram ng); 
+
+  
+  void *search(char *tb,LMT_TYPE ndt,int lev,int n,int sz,int *w,
+               LMT_ACTION action,char **found=(char **)NULL);
+  
+  int mybsearch(char *ar, int n, int size, unsigned char *key, int *idx);   
+  
+  int add(ngram& ng,int prob,int bow);
+  void checkbounds(int level);
+  
+  int get(ngram& ng){return get(ng,ng.size,ng.size);}
+  int get(ngram& ng,int n,int lev);
+  
+  int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev);
+  
+  const char *maxsuffptr(ngram ong);
+  const char *cmaxsuffptr(ngram ong);
+  
+  inline int putmem(char* ptr,int value,int offs,int size){
+    assert(ptr!=NULL);
+    for (int i=0;i<size;i++)
+      ptr[offs+i]=(value >> (8 * i)) & 0xff;
+    return value;
+  };
+  
+  inline int getmem(char* ptr,int* value,int offs,int size){
+    assert(ptr!=NULL);
+    *value=ptr[offs] & 0xff;
+    for (int i=1;i<size;i++)
+      *value= *value | ( ( ptr[offs+i] & 0xff ) << (8 *i));
+    return *value;
+  };
+  
+  
+  int bo_state(int value=-1){ 
+    return (value==-1?backoff_state:backoff_state=value); 
+  };
+  
+  
+  int nodesize(LMT_TYPE ndt){
+    switch (ndt){
+      case INTERNAL:
+        return LMTCODESIZE + PROBSIZE + PROBSIZE + BOUNDSIZE;
+      case QINTERNAL:
+        return LMTCODESIZE + QPROBSIZE + QPROBSIZE + BOUNDSIZE;
+      case QLEAF:
+        return LMTCODESIZE + QPROBSIZE;      
+      case LEAF:
+        return LMTCODESIZE + PROBSIZE;      
+      default:
+        assert(0);
+        return 0;
+    }
+  }  
+  
+  inline int word(node nd,int value=-1)
+  {
+    int offset=0;
+    
+    if (value==-1)
+      getmem(nd,&value,offset,LMTCODESIZE);
+    else
+      putmem(nd,value,offset,LMTCODESIZE);
+    
+    return value;
+  };
+  
+  inline int prob(node nd,LMT_TYPE ndt, int value=-1)
+  {
+    int offs=LMTCODESIZE;
+    int size=(ndt==QINTERNAL || ndt==QLEAF?QPROBSIZE:PROBSIZE);
+    
+    if (value==-1)
+      getmem(nd,&value,offs,size);
+    else
+      putmem(nd,value,offs,size);
+    
+    return value;
+  };
+  
+  
+  inline int bow(node nd,LMT_TYPE ndt, int value=-1)
+  {
+    assert(ndt==INTERNAL || ndt==QINTERNAL);
+    int size=(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+    int offs=LMTCODESIZE+size;
+    
+    if (value==-1)
+      getmem(nd,&value,offs,size);
+    else
+      putmem(nd,value,offs,size);
+    
+    return value;
+  };
+  
+  inline int bound(node nd,LMT_TYPE ndt, int value=-1)
+  {
+    assert(ndt==INTERNAL || ndt==QINTERNAL);
+    int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+    
+    if (value==-1)
+      getmem(nd,&value,offs,BOUNDSIZE);
+    else
+      putmem(nd,value,offs,BOUNDSIZE);
+    
+    return value;
+  };
+  
+  void stat(int lev=0);
+  
+};
+
+#endif
+
diff --git a/irstlm/src/src/mempool.cpp b/irstlm/src/src/mempool.cpp
new file mode 100644
index 000000000..1c690213d
--- /dev/null
+++ b/irstlm/src/src/mempool.cpp
@@ -0,0 +1,497 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+// An efficient memory pool manager 
+// by M. Federico 
+// Copyright Marcello Federico, ITC-irst, 1998
+
+#include <iostream>
+#include <ostream>
+#include <cassert>
+#include "mempool.h"
+
+using namespace std;
+
+#ifdef TRACE_ENABLE
+#define TRACE_ERR(str) { std::cerr << str; }
+#else
+#define TRACE_ERR(str) { }
+#endif
+
+/*! The pool contains:
+   - entries of size is
+   - tables for bs entries
+*/
+
+
+mempool::mempool(int is, int bs){
+  
+  // item size must be multiple of memory alignment step (4 bytes)
+  // example: is is=9  becomes i=12 (9 + 4 - 9 %4 )
+  
+  is=(is>(int)sizeof(char *)?is:0);   
+  
+  is=is + sizeof(char *) - (is % sizeof(char *));  
+  
+  item_size  = is;
+  
+  block_size = bs;      
+  
+  true_size  = is * bs;
+  
+  block_list = new memnode;
+  
+  block_list->block = new char[true_size];
+  
+  memset(block_list->block,'0',true_size);
+  
+  block_list->next  = 0;
+  
+  blocknum = 1;
+  
+  entries  = 0;
+  
+  // build free list
+  
+  char *ptr = free_list = block_list->block;
+  
+  for (int i=0;i<block_size-1;i++) {
+    *(char **)ptr= ptr + item_size;
+    ptr+=item_size;                     
+  }
+  *(char **)ptr = NULL; //last item
+  
+}
+
+
+char * mempool::allocate(){
+  
+  char *ptr;
+  
+  if (free_list==NULL)
+  {
+    memnode *new_block = new memnode;
+    
+    new_block->block = new char[true_size];
+    
+    //memset(new_block->block,'0',true_size);
+    
+    new_block->next  = block_list;
+    
+    block_list=new_block; // update block list
+    
+    /* update  free list */
+    
+    ptr = free_list = block_list->block;
+    
+    for (int i=0;i<block_size-1;i++) {
+      *(char **)ptr = ptr + item_size;
+      ptr = ptr + item_size;                 
+    } 
+    
+    *(char **)ptr=NULL;
+    
+    blocknum++;
+  }
+  
+  assert(free_list);
+  
+  ptr = free_list;
+  
+  free_list=*(char **)ptr;
+  
+  *(char **)ptr=NULL; // reset the released item
+  
+  entries++;
+  
+  return ptr;
+  
+}
+
+
+int mempool::free(char* addr){
+  
+  // do not check if it belongs to this pool !!
+  /*
+   memnode  *list=block_list;
+   while ((list != NULL) &&
+          ((addr < list->block) ||
+           (addr >= (list->block + true_size))))
+   list=list->next;
+   
+   if ((list==NULL) || (((addr - list->block) % item_size)!=0))
+   {
+     //cerr << "mempool::free-> addr does not belong to this pool\n";
+     return 0;
+   }
+   */
+  
+  *(char **)addr=free_list;
+  free_list=addr;
+  
+  entries--;
+  
+  return 1;
+}
+
+
+mempool::~mempool()
+{
+  memnode *ptr;
+  
+  while (block_list !=NULL){
+    ptr=block_list->next;    
+    delete [] block_list->block;
+    delete block_list;
+    block_list=ptr;
+  } 
+  
+}
+
+void mempool::map (ostream& co){
+  
+  co << "mempool memory map:\n";
+  //percorri piu` volte la lista libera
+  
+  memnode *bl=block_list;
+  char *fl=free_list;
+  
+  char* img=new char[block_size+1];
+  img[block_size]='\0';
+  
+  while (bl !=NULL){
+    
+    memset(img,'#',block_size);
+    
+    fl=free_list;
+    while (fl != NULL){
+      if ((fl >= bl->block) 
+          && 
+          (fl < bl->block + true_size))
+      {
+        img[(fl-bl->block)/item_size]='-';
+      }
+      
+      fl=*(char **)fl;                     
+    }  
+    
+    co << img << "\n";
+    bl=bl->next;    
+  } 
+	delete [] img;
+}
+
+void mempool::stat(){
+  
+  TRACE_ERR("mempool class statistics\n"
+            << "entries " << entries
+            << " blocks " << blocknum
+            << " used memory " << (blocknum * true_size)/1024 << " Kb\n");
+}
+
+
+
+strstack::strstack(int bs){
+  
+  size=bs;
+  list=new memnode;
+  
+  list->block=new char[size];
+  
+  list->next=0;
+  
+  memset(list->block,'\0',size);
+  idx=0;
+  
+  waste=0;
+  memory=size;
+  entries=0;
+  blocknum=1;
+  
+}
+
+
+void strstack::stat(){
+  
+  TRACE_ERR("strstack class statistics\n"
+            << "entries " << entries
+            << " blocks " << blocknum
+            << " used memory " << memory/1024 << " Kb\n");
+}
+
+
+char *strstack::push(char *s){
+  int len=strlen(s);
+  
+  if ((len+1) >= size){
+    cerr << "strstack::push string is too long\n";
+    exit(1);
+  };
+  
+  if ((idx+len+1) >= size){
+    //append a new block
+    //there must be space to 
+    //put the index after 
+    //the word
+    
+    waste+=size-idx;
+    blocknum++;
+    memory+=size;
+    
+    memnode* nd=new memnode;
+    nd->block=new char[size];
+    nd->next=list;
+    
+    list=nd;
+    
+    memset(list->block,'\0',size);
+    
+    idx=0;
+    
+  }
+  
+  // append in current block
+  
+  strcpy(&list->block[idx],s);
+  
+  idx+=len+1;
+  
+  entries++;
+  
+  return &list->block[idx-len-1];
+  
+}
+
+
+char *strstack::pop(){
+  
+  if (list==0) return 0;
+  
+  if (idx==0){
+    
+    // free this block and go to next
+    
+    memnode *ptr=list->next;
+    
+    delete [] list->block;
+    delete list;
+    
+    list=ptr;
+    
+    if (list==0) 
+      return 0;
+    else
+      idx=size-1;
+  }
+  
+  //go back to first non \0
+  while (idx>0) 
+    if (list->block[idx--]!='\0')
+      break;
+  
+  //go back to first \0
+  while (idx>0) 
+    if (list->block[idx--]=='\0')
+      break;
+  
+  entries--;
+  
+  if (list->block[idx+1]=='\0')
+  {
+    idx+=2;
+    memset(&list->block[idx],'\0',size-idx);
+    return &list->block[idx];
+  }
+  else{
+    idx=0;
+    memset(&list->block[idx],'\0',size);
+    return &list->block[0];
+  }
+}
+
+
+char *strstack::top(){
+  
+  int tidx=idx;
+  memnode *tlist=list;
+  
+  if (tlist==0) return 0;
+  
+  if (idx==0){
+    
+    tlist=tlist->next;
+    
+    if (tlist==0) return 0;
+    
+    tidx=size-1;
+  }
+  
+  //go back to first non \0
+  while (tidx>0) 
+    if (tlist->block[tidx--]!='\0')
+      break;
+  
+  //aaa\0bbb\0\0\0\0
+  
+  //go back to first \0
+  while (tidx>0) 
+    if (tlist->block[tidx--]=='\0')
+      break;
+  
+  if (tlist->block[tidx+1]=='\0')
+  {
+    tidx+=2;
+    return &tlist->block[tidx];
+  }
+  else{
+    tidx=0;
+    return &tlist->block[0];
+  }
+  
+}
+
+
+strstack::~strstack(){
+  memnode *ptr;
+  while (list !=NULL){
+    ptr=list->next;    
+    delete [] list->block;
+    delete list;
+    list=ptr;
+  } 
+}
+
+
+storage::storage(int maxsize,int blocksize)
+{
+  newmemory=0;
+  newcalls=0;
+  setsize=maxsize;
+  poolsize=blocksize; //in bytes
+  poolset=new mempool* [setsize+1];
+  for (int i=0;i<=setsize;i++)
+    poolset[i]=NULL;
+}
+
+
+storage::~storage(){
+  for (int i=0;i<=setsize;i++)
+    if (poolset[i])
+      delete poolset[i];
+  delete [] poolset;
+}
+
+char *storage::allocate(int size){
+  
+  if (size<=setsize){
+    if (!poolset[size]){
+      poolset[size]=new mempool(size,poolsize/size);
+    }
+    return poolset[size]->allocate();
+  }
+  else{
+    
+    newmemory+=size+8;
+    newcalls++;
+    char* p=(char *)calloc(sizeof(char),size);
+    if (p==NULL){
+      cerr << "storage::alloc insufficient memory\n";
+      exit(1);
+    }
+    return p;
+  }
+}
+
+char *storage::reallocate(char *oldptr,int oldsize,int newsize){
+  
+  char *newptr;
+  
+  assert(newsize>oldsize);
+  
+  if (oldsize<=setsize){
+    if (newsize<=setsize){
+      if (!poolset[newsize])
+        poolset[newsize]=new mempool(newsize,poolsize/newsize);
+      newptr=poolset[newsize]->allocate();
+      memset((char*)newptr,0,newsize);
+    }
+    else
+      newptr=(char *)calloc(sizeof(char),newsize);
+    
+    if (oldptr && oldsize){
+      memcpy(newptr,oldptr,oldsize);
+      poolset[oldsize]->free(oldptr);
+    }
+  }
+  else{
+    newptr=(char *)realloc(oldptr,newsize);
+    if (newptr==oldptr) 
+      cerr << "r\b";
+    else
+      cerr << "a\b";
+  }
+  if (newptr==NULL){
+    cerr << "storage::realloc insufficient memory\n";
+    exit(1);
+  }
+  
+  return newptr;
+}
+
+int storage::free(char *addr,int size){
+  
+  /*
+   while(size<=setsize){
+     if (poolset[size] && poolset[size]->free(addr))
+       break;
+     size++;
+   }
+   */
+  
+  if (size>setsize)
+    return free(addr),1;
+  else{
+    poolset[size] && poolset[size]->free(addr);
+  }
+  return 1;
+}
+
+
+void storage::stat(){
+  int used=0;
+  int memory=sizeof(char *) * setsize;
+  int waste=0;
+  
+  for (int i=0;i<=setsize;i++)
+    if (poolset[i]){
+      used++;
+      memory+=poolset[i]->used();
+      waste+=poolset[i]->wasted();
+    }
+      
+      TRACE_ERR("storage class statistics\n"
+                << "alloc entries " << newcalls 
+                << " used memory " << newmemory/1024 << "Kb\n"
+                << "mpools " << setsize
+                << " active  " << used 
+                << " used memory " << memory/1024 << "Kb"
+                << " wasted " << waste/1024 << "Kb\n");
+}
+
diff --git a/irstlm/src/src/mempool.h b/irstlm/src/src/mempool.h
new file mode 100644
index 000000000..eafdcd110
--- /dev/null
+++ b/irstlm/src/src/mempool.h
@@ -0,0 +1,172 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+// An efficient memory manager 
+// by M. Federico 
+// Copyright Marcello Federico, ITC-irst, 1998
+
+#ifndef MF_MEMPOOL_H
+#define MF_MEMPOOL_H
+
+#ifndef NULL
+const int NULL=0;
+//#define NULL=0;
+#endif
+
+#include <iostream>  // std::ostream
+
+//! Memory block
+/*! This can be used by:
+- mempool to store items of fixed size
+- strstack to store strings of variable size
+*/
+
+class memnode{
+  friend class mempool;   //!< grant access 
+  friend class strstack;  //!< grant access 
+  char          *block;   //!< block of memory
+  memnode        *next;   //!< next block ptr    
+};
+
+
+//! Memory pool
+
+/*! A memory pool is composed of:
+   - a linked list of block_num memory blocks
+   - each block might contain up to block_size items
+   - each item is made of exactly item_size bytes
+*/
+
+class mempool{
+  int         block_size;         //!< number of entries per block
+  int         item_size;          //!< number of bytes per entry
+  int         true_size;          //!< number of bytes per block
+  memnode*   block_list;          //!< list of blocks
+  char*       free_list;          //!< free entry list
+  int         entries;            //!< number of stored entries  
+  int         blocknum;           //!< number of allocated blocks  
+ public:
+
+  //! Creates a memory pool
+  mempool(int is, int bs); 
+
+  //! Destroys memory pool
+  ~mempool();
+  
+  //! Prints a map of memory occupancy
+  void map(std::ostream& co);
+
+  //! Allocates a single memory entry
+  char *allocate();
+
+  //! Frees a single memory entry
+  int free(char* addr);
+
+  //! Prints statistics about this mempool 
+  void stat();
+
+  //! Returns effectively used memory (bytes) 
+  /*! includes 8 bytes required by each call of new */
+     
+  int used(){return blocknum * (true_size + 8);}; 
+  
+  //! Returns amount of wasted memory (bytes) 
+  int wasted(){return used()-(entries * item_size);};
+}; 
+
+//! A stack to store strings 
+
+/*!
+  The stack is composed of 
+  - a list of blocks memnode of fixed size
+  - attribute blocknum tells the block on top
+  - attribute idx tells position of the top string
+*/
+
+class strstack{
+  memnode* list; //!< list of memory blocks
+  int   size;    //!< size of each block
+  int    idx;    //!< index of last stored string
+  int  waste;    //!< current waste of memory
+  int memory;    //!< current use of memory
+  int entries;   //!< current number of stored strings
+  int blocknum;  //!< current number of used blocks
+
+ public:
+
+  strstack(int bs=1000);
+
+  ~strstack();
+  
+  char *push(char *s);
+  
+  char *pop();
+  
+  char *top();
+  
+  void stat();
+  
+  int used(){return memory;};
+  
+  int wasted(){return waste;};
+
+};
+
+
+//! Manages multiple memory pools 
+
+/*!
+  This class permits to manage memory pools 
+  with items up to a specified size.
+  - items within the allowed range are stored in memory pools
+  - items larger than the limit are allocated with new
+*/
+
+
+class storage{
+  mempool **poolset;  //!< array of memory pools
+  int setsize;        //!< number of memory pools/maximum elem size 
+  int poolsize;       //!< size of each block
+  int newmemory;      //!< stores amount of used memory
+  int newcalls;       //!< stores number of allocated blocks
+ public:
+
+  //! Creates storage
+  storage(int maxsize,int blocksize);
+
+  //! Destroys storage
+  ~storage();
+  
+  /* names of below functions have been changed so as not to interfere with macros for malloc/realloc/etc -- EVH */
+
+  //! Allocates memory 
+  char *allocate(int size);
+
+  //! Realloc memory
+  char *reallocate(char *oldptr,int oldsize,int newsize);
+
+  //! Frees memory of an entry   
+  int free(char *addr,int size=0);
+
+  //! Prints statistics about storage
+  void stat();
+};
+
+#endif
diff --git a/irstlm/src/src/n_gram.cpp b/irstlm/src/src/n_gram.cpp
new file mode 100644
index 000000000..bf12b7015
--- /dev/null
+++ b/irstlm/src/src/n_gram.cpp
@@ -0,0 +1,214 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+#include <iomanip>
+#include <cassert>
+#include "mempool.h"
+#include "htable.h"
+#include "dictionary.h"
+#include "n_gram.h"
+#include "index.h"
+
+using namespace std;
+
+ngram::ngram(dictionary* d,int sz){
+  dict=d;
+  size=sz;
+  succ=0;
+  freq=0;
+  info=0;
+  pinfo=0;
+  link=NULL;
+  isym=-1;
+  memset(word,0,sizeof(int)*MAX_NGRAM);
+  memset(midx,0,sizeof(int)*MAX_NGRAM);
+}
+
+ngram::ngram(ngram& ng){
+  size=ng.size;
+  freq=ng.freq;
+  succ=0;
+  info=0;
+  pinfo=0;
+  link=NULL;
+  isym=-1;
+  dict=ng.dict;
+  memcpy(word,ng.word,sizeof(int)*MAX_NGRAM);
+  memcpy(midx,ng.word,sizeof(int)*MAX_NGRAM);
+
+}
+
+void ngram::trans (const ngram& ng){
+  size=ng.size;
+  freq=ng.freq;
+  if (dict == ng.dict){
+    info=ng.info;
+    isym=ng.isym;      
+    memcpy(word,ng.word,sizeof(int)*MAX_NGRAM);
+    memcpy(midx,ng.midx,sizeof(int)*MAX_NGRAM);
+  }
+  else{
+    info=0;
+    memset(midx,0,sizeof(int)*MAX_NGRAM);
+    isym=-1;
+    for (int i=1;i<=size;i++)
+      word[MAX_NGRAM-i]=dict->encode(ng.dict->decode(*ng.wordp(i)));
+  }
+}
+
+
+ifstream& operator>> ( ifstream& fi , ngram& ng){
+  char w[MAX_WORD];
+  memset(w,0,MAX_WORD);
+  w[0]='\0';
+  
+  if (!(fi >> setw(MAX_WORD) >> w))
+    return fi;
+  
+  if (strlen(w)==(MAX_WORD-1))
+    cerr << "ngram: a too long word was read (" 
+	 << w << ")\n";
+  
+  if (ng.dict->intsymb() && 
+      (strlen(w)==1) && (index(ng.dict->intsymb(),w[0])!=NULL)){
+    
+    ng.isym=(long)index(ng.dict->intsymb(),w[0]) -
+      (long)ng.dict->intsymb();
+    ng.size=0;
+    return fi;
+  }
+  
+  int c=ng.dict->encode(w);
+  
+  if (c == -1 ){
+    cerr << "ngram: " << w << " is OOV \n";
+    exit(1);
+  }
+  
+  memcpy(ng.word,ng.word+1,(MAX_NGRAM-1)*sizeof(int));
+  
+  ng.word[MAX_NGRAM-1]=(int)c;
+  ng.freq=1;
+  
+  if (ng.size<MAX_NGRAM) ng.size++;
+
+  return fi;
+
+}
+
+
+int ngram::pushw(char* w){
+  
+  assert(dict!=NULL);
+
+  int c=dict->encode(w);
+
+  if (c == -1 ){
+    cerr << "ngram: " << w << " is OOV \n";
+    exit(1);
+  }
+  
+  pushc(c);
+
+  return 1;
+
+}
+
+int ngram::pushc(int c){
+  
+  int buff[MAX_NGRAM-1];
+  memcpy(buff,word+1,(MAX_NGRAM-1)*sizeof(int));
+  memcpy(word,buff,(MAX_NGRAM-1)*sizeof(int));
+
+  word[MAX_NGRAM-1]=(int)c;
+  if (size<MAX_NGRAM) size++;
+
+  return 1;
+
+}
+
+
+istream& operator>> ( istream& fi , ngram& ng){
+  char w[MAX_WORD];
+  memset(w,0,MAX_WORD);
+  w[0]='\0';
+  
+  assert(ng.dict != NULL);
+
+  if (!(fi >> setw(MAX_WORD) >> w))
+    return fi;
+  
+  if (strlen(w)==(MAX_WORD-1))
+      cerr << "ngram: a too long word was read (" 
+	   << w << ")\n";
+
+  if (ng.dict->intsymb() && 
+      (strlen(w)==1) && (index(ng.dict->intsymb(),w[0])!=NULL)){
+    ng.isym=(long)index(ng.dict->intsymb(),w[0])-(long)ng.dict->intsymb();
+    ng.size=0;
+    return fi;
+  }
+
+  ng.pushw(w);
+
+  ng.freq=1;
+  
+  return fi;
+
+}
+
+ofstream& operator<< (ofstream& fo,ngram& ng){
+
+  assert(ng.dict != NULL);
+
+  for (int i=ng.size;i>0;i--)
+    fo << ng.dict->decode(ng.word[MAX_NGRAM-i]) << " ";
+  //fo << "[size " << ng.size << " freq " << ng.freq << "]"; 
+  fo << ng.freq; 
+  return fo;
+}
+
+ostream& operator<< (ostream& fo,ngram& ng){
+
+  assert(ng.dict != NULL);
+
+  for (int i=ng.size;i>0;i--)
+    fo << ng.dict->decode(ng.word[MAX_NGRAM-i]) << " ";
+  //fo << "[size " << ng.size << " freq " << ng.freq << "]"; 
+  fo << ng.freq; 
+
+  return fo;
+}
+
+/*
+main(int argc, char** argv){
+  dictionary d(argv[1]);
+  ifstream txt(argv[1]);
+  ngram ng(&d);
+  
+  while (txt >> ng){
+    cout << ng << "\n";
+  }
+
+  ngram ng2=ng;
+  cerr << "copia l'ultimo =" << ng << "\n";
+}
+*/
+
diff --git a/irstlm/src/src/n_gram.h b/irstlm/src/src/n_gram.h
new file mode 100644
index 000000000..f36b5d2df
--- /dev/null
+++ b/irstlm/src/src/n_gram.h
@@ -0,0 +1,118 @@
+/******************************************************************************
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+// n-gram tables 
+// by M. Federico
+// Copyright Marcello Federico, ITC-irst, 1998
+
+#ifndef MF_NGRAM_H
+#define MF_NGRAM_H
+
+#include <fstream>
+#include <cassert>
+#include "dictionary.h"
+
+#ifdef MYMAXNGRAM
+#define MAX_NGRAM MYMAXNGRAM
+#else
+#define MAX_NGRAM 20
+#endif
+
+class dictionary;
+
+//typedef int code;
+
+class ngram{
+  int  word[MAX_NGRAM];  //encoded ngram
+ public:
+  dictionary *dict;      //dictionary
+  char* link;            // ngram-tree pointer
+  int  midx[MAX_NGRAM];  // ngram-tree scan pointer
+  int    lev;            // ngram-tree level
+  int   size;            // ngram size
+  int   freq;            // ngram frequency
+  int   succ;            // number of successors
+
+  unsigned char info;    // ngram-tree info flags
+  unsigned char pinfo;   // ngram-tree parent info flags
+  int  isym;             // last interruption symbol
+
+  ngram(dictionary* d,int sz=0);
+  ngram(ngram& ng);
+  
+  int *wordp()// n-gram pointer
+    {return wordp(size);}; 
+  int *wordp(int k) // n-gram pointer
+    {return size>=k?&word[MAX_NGRAM-k]:0;}; 
+  const int *wordp() const // n-gram pointer
+    {return wordp(size);}; 
+  const int *wordp(int k) const // n-gram pointer
+    {return size>=k?&word[MAX_NGRAM-k]:0;}; 
+
+  int shift(){
+    for (int i=(MAX_NGRAM-1);i>0;i--){
+      word[i]=word[i-1];
+    }
+    size--;
+    return 1;
+  }
+
+
+  int containsWord(char* s,int lev){
+
+    int c=dict->encode(s);
+    if (c == -1) return 0;
+
+    assert(lev <= size);
+    for (int i=0;i<lev;i++){
+      if (*wordp(size-i)== c) return 1;
+    }
+    return 0;
+  }
+    
+
+  void trans(const ngram& ng);
+
+  friend std::ifstream& operator>> (std::ifstream& fi,ngram& ng);
+  friend std::ofstream& operator<< (std::ofstream& fi,ngram& ng);
+  friend std::istream& operator>> (std::istream& fi,ngram& ng);
+  friend std::ostream& operator<< (std::ostream& fi,ngram& ng);
+
+  inline int ckhisto(int sz){
+    
+    for (int i=sz;i>1;i--) 
+      if (*wordp(i)==dict->oovcode())
+	return 0;
+    return 1;
+  }
+
+  int pushc(int c);
+  int pushw(char* w);
+
+  //~ngram();
+
+
+
+};
+
+#endif
+
+
+
diff --git a/irstlm/src/src/ngramcache.cpp b/irstlm/src/src/ngramcache.cpp
new file mode 100644
index 000000000..3b4234bcc
--- /dev/null
+++ b/irstlm/src/src/ngramcache.cpp
@@ -0,0 +1,89 @@
+/******************************************************************************
+IrstLM: IRST Language Model Toolkit
+Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+#include <cassert>
+#include "math.h"
+#include "mempool.h"
+#include "htable.h"
+
+#include "ngramcache.h"
+
+using namespace std;
+
+ngramcache::ngramcache(int n,int size,int maxentries){
+      ngsize=n;
+      infosize=size;
+      maxn=maxentries;
+      entries=0;
+      ht=new htable(maxn * 2, ngsize * sizeof(int),INT,NULL); //lower load factor to reduce collisions 
+      mp=new mempool(ngsize * sizeof(int)+infosize,1000000); 
+      accesses=0;
+      hits=0;
+    };
+  
+ngramcache::~ngramcache(){
+    ht->stat();//ht->map();
+    mp->stat();
+    delete ht;delete mp;
+};
+
+
+//resize cache to specified number of entries
+void ngramcache::reset(int n){
+    ht->stat();
+    delete ht;delete mp;  
+    if (n>0) maxn=n;
+    ht=new htable(maxn * 2, ngsize * sizeof(int),INT,NULL); //load factor 2   
+    mp=new mempool(ngsize * sizeof(int)+infosize,1000000); 
+    entries=0;
+  }
+
+
+
+
+char* ngramcache::get(const int* ngp,char* info){       
+    char *found;
+   // cout << "ngramcache::get() ";
+    //for (int i=0;i<ngsize;i++) cout << ngp[i] << " "; cout <<"\n"; 
+  accesses++;
+  if (found=ht->search((char *)ngp,HT_FIND)){
+    if (info) memcpy(info,found+ngsize*sizeof(int),infosize);
+    hits++;
+  }; 
+    return found;
+  };
+  
+
+int ngramcache::add(const int* ngp,const char* info){
+    
+    char* entry=mp->allocate();
+    memcpy(entry,(char*) ngp,sizeof(int) * ngsize);    
+    memcpy(entry + ngsize * sizeof(int),(char *)info,infosize);
+    char *found=ht->search((char *)entry,HT_ENTER);
+    assert(found == entry); //false if key is already insided
+    entries++;
+    return 1;
+  }
+  
+void ngramcache::stat(){
+   cerr << "ngramcache stats: entries=" << entries << " acc=" << accesses << " hits=" << hits << "\n";
+};
diff --git a/irstlm/src/src/ngramcache.h b/irstlm/src/src/ngramcache.h
new file mode 100644
index 000000000..d7d98e644
--- /dev/null
+++ b/irstlm/src/src/ngramcache.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+IrstLM: IRST Language Model Toolkit
+Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+#ifndef MF_NGRAMCACHE_H
+#define MF_NGRAMCACHE_H
+
+#include "mempool.h"
+#include "htable.h"
+
+class ngramcache{
+private:
+  htable* ht;  
+  mempool *mp;
+  int maxn;
+  int ngsize;
+  int infosize;
+  int accesses;
+  int hits;
+  int entries;
+
+public:
+    
+  int cursize(){return entries;};
+  ngramcache(int n,int size,int maxentries);  
+  ~ngramcache();  
+  void reset(int n=0);  
+  char* get(const int* ngp,char* info=NULL);  
+  int add(const int* ngp,const char* info);  
+  int isfull(){return (entries >= maxn);};  
+  void stat();
+};
+
+#endif
+
diff --git a/irstlm/src/src/quantize-lm.cpp b/irstlm/src/src/quantize-lm.cpp
new file mode 100644
index 000000000..1b326ce97
--- /dev/null
+++ b/irstlm/src/src/quantize-lm.cpp
@@ -0,0 +1,424 @@
+/******************************************************************************
+IrstLM: IRST Language Model Toolkit, compile LM
+Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+
+******************************************************************************/
+
+using namespace std;
+
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <stdlib.h>
+#include <assert.h>
+#include "math.h"
+#include "util.h"
+
+#define MAX_LINE 1024
+
+//----------------------------------------------------------------------
+//  Special type and global variable for the BIN CLUSTERING algorithm
+//      
+//      
+//----------------------------------------------------------------------
+
+typedef struct{
+  double pt;
+  int idx;
+  short code;
+}BinEntry;
+
+
+int cmpBinEntry(const void* a,const void* b){
+  if (*(double *)a > *(double*)b)
+    return 1;
+  else if (*(double *)a < *(double*)b)
+    return -1;
+  else
+    return 0;
+}
+
+BinEntry* bintable=NULL;
+
+//----------------------------------------------------------------------
+//  Global entry points
+//----------------------------------------------------------------------
+
+int parseWords(char *sentence, char **words, int max);
+
+int ComputeCluster(int nc, double* cl,int N,double* Pts);
+
+//----------------------------------------------------------------------
+//  Global parameters (some are set in getArgs())
+//----------------------------------------------------------------------
+
+int       k      = 256;   // number of centers
+const int MAXLEV = 11;    //maximum n-gram size
+
+//----------------------------------------------------------------------
+//  Main program
+//----------------------------------------------------------------------
+
+void usage(const char *msg = 0) {
+  if (msg) { std::cerr << msg << std::endl; }
+  std::cerr << "Usage: quantize-lm input-file.lm [output-file.qlm]" << std::endl;
+  if (!msg) std::cerr << std::endl
+    << "  quantize-lm reads a standard LM file in ARPA format and produces" << std::endl
+    << "  a version of it with quantized probabilities and back-off weights"<< std::endl
+    << "  that the IRST LMtoolkit can compile. Accepts LMs with .gz suffix." << std::endl;
+  }
+
+
+int main(int argc, const char **argv)
+{
+
+  //Process Parameters 
+  
+  if (argc < 2) { usage(); exit(1); }
+  std::vector<std::string> files;
+  for (int i=1; i < argc; i++) {
+    std::string opt = argv[i];
+    files.push_back(opt);
+  }
+  if (files.size() > 2) { usage("Too many arguments"); exit(1); }
+  if (files.size() < 1) { usage("Please specify a LM file to read from"); exit(1); }
+  
+  
+  std::string infile = files[0];
+  std::string outfile="";
+
+  if (files.size() == 1) {  
+    outfile=infile;
+    
+    //remove path information
+    std::string::size_type p = outfile.rfind('/');
+    if (p != std::string::npos && ((p+1) < outfile.size()))           
+      outfile.erase(0,p+1);
+    
+    //eventually strip .gz 
+    if (outfile.compare(outfile.size()-3,3,".gz")==0)
+      outfile.erase(outfile.size()-3,3);
+    
+    outfile+=".qlm";
+  }
+  else
+    outfile = files[1];
+  
+  
+  
+  std::cout << "Reading " << infile << "..." << std::endl;
+  
+ inputfilestream inp(infile.c_str());
+  if (!inp.good()) {
+    std::cerr << "Failed to open " << infile << "!\n";
+    exit(1);
+  }
+  
+
+  std::ofstream out(outfile.c_str());
+  std::cout << "Writing " << outfile << "..." << std::endl;
+
+  //prepare temporary file to save n-gram blocks for multiple reads 
+  //this avoids using seeks which do not work with inputfilestream
+  //it's odd but i need a bidirectional filestream!
+  
+  string filePath;ofstream dummy;
+  createtempfile(dummy,filePath,ios::out);
+  dummy.close();
+  
+  fstream filebuff(filePath.c_str(),ios::out|ios::in);
+    
+  int nPts = 0;  // actual number of points
+  
+  // *** Read ARPA FILE ** 
+  
+  int numNgrams[MAXLEV + 1]; /* # n-grams for each order */
+  int Order,MaxOrder;
+  int n;
+  
+  float logprob,logbow, logten=log(10.0);
+  
+  double* dataPts=NULL;
+  double* centersP=NULL; double* centersB=NULL;
+  
+  int* mapP=NULL; int* mapB=NULL;
+  
+  int centers=k;
+  streampos iposition;
+  
+  out << "qARPA\n"; //print output header
+  
+    
+  for (int i=1;i<=MAXLEV;i++) numNgrams[i]=0;
+  
+  char line[MAX_LINE];
+  
+  while (inp.getline(line,MAX_LINE)){
+    
+    bool backslash = (line[0] == '\\');
+    
+    if (sscanf(line, "ngram %d=%d", &Order, &n) == 2) {
+      numNgrams[Order] = n;
+      MaxOrder=Order;
+    }
+    
+    if (backslash && sscanf(line, "\\%d-grams", &Order) == 1) {
+      
+      out << line << "\n";
+      cerr << "-- Start processing of " << Order << "-grams\n";
+      assert(Order <= MAXLEV);
+      
+      int N=numNgrams[Order];
+      centers=k;
+      if (Order==1) centers=256; // always use 256 centers
+      
+      char* words[MAXLEV+3];
+      dataPts=new double[N]; // allocate data         
+      
+      //reset tempout file 
+      filebuff.seekg(0);
+           
+      for (nPts=0;nPts<N;nPts++){
+        inp.getline(line,MAX_LINE);  
+        filebuff << line << std::endl;
+        int howmany = parseWords(line, words, Order + 3);
+        assert(howmany == Order+2 || howmany == Order+1);
+        sscanf(words[0],"%f",&logprob);
+        dataPts[nPts]=exp(logprob * logten);
+      }
+                
+      cerr << "quantizing " << N << " probabilities\n";
+      
+      centersP=new double[centers];
+      mapP=new int[N];
+      
+      ComputeCluster(centers,centersP,N,dataPts);
+      
+
+      assert(bintable !=NULL);
+      for (int p=0;p<N;p++){
+        mapP[bintable[p].idx]=bintable[p].code;
+      }
+      
+      if (Order<MaxOrder){
+        //second pass to read back-off weights
+      
+        filebuff.seekg(0);
+       
+        for (nPts=0;nPts<N;nPts++){
+         
+          filebuff.getline(line,MAX_LINE);
+          
+          int howmany = parseWords(line, words, Order + 3);
+          if (howmany==Order+2) //backoff is written
+            sscanf(words[Order+1],"%f",&logbow);
+          else
+            logbow=0; // backoff is implicit                    
+          dataPts[nPts]=exp(logbow * logten);
+        }
+        
+        centersB=new double[centers];
+        mapB=new int[N];
+        
+        cerr << "quantizing " << N << " backoff weights\n";
+        ComputeCluster(centers,centersB,N,dataPts);
+        
+        assert(bintable !=NULL);
+        for (int p=0;p<N;p++){
+          mapB[bintable[p].idx]=bintable[p].code;
+        }
+        
+      }
+      
+            
+      out << centers << "\n";
+      for (nPts=0;nPts<centers;nPts++){
+        out << log(centersP[nPts])/logten;
+        if (Order<MaxOrder) out << " " << log(centersB[nPts])/logten;
+        out << "\n";
+      }
+      
+      filebuff.seekg(0);
+      
+      for (nPts=0;nPts<numNgrams[Order];nPts++){
+        
+        filebuff.getline(line,MAX_LINE);
+        
+        parseWords(line, words, Order + 3);
+        
+        out << mapP[nPts];
+        
+        for (int i=1;i<=Order;i++) out << "\t" << words[i];
+        
+        if (Order < MaxOrder) out << "\t" << mapB[nPts];
+        
+        out << "\n";
+        
+      }
+            
+      if (mapP){delete [] mapP;mapP=NULL;}
+      if (mapB){delete [] mapB;mapB=NULL;}
+      
+      if (centersP){delete [] centersP; centersP=NULL;}
+      if (centersB){delete [] centersB; centersB=NULL;}
+      
+      delete [] dataPts;
+      
+      continue;
+      
+      
+    }
+    
+    out << line << "\n";
+  }
+  
+  cerr << "---- done\n";
+  
+  out.flush();
+   
+  out.close();
+  inp.close();
+  
+  removefile(filePath);
+}
+
+// Compute Clusters
+
+int ComputeCluster(int centers,double* ctrs,int N,double* dataPts){
+  
+  
+  //cerr << "\nExecuting Clutering Algorithm:  k=" << centers<< "\n";
+  
+  if (bintable) delete [] bintable;
+  
+  bintable=new BinEntry[N];
+  for (int i=0;i<N;i++){
+    bintable[i].pt=dataPts[i];
+    bintable[i].idx=i;
+    bintable[i].code=0;
+  }
+  
+  //cout << "start sort \n";
+  qsort(bintable,N,sizeof(BinEntry),cmpBinEntry);
+  
+  int different=1;
+  
+  for (int i=1;i<N;i++)
+    if (bintable[i].pt!=bintable[i-1].pt)
+      different++;
+  
+  int interval=different/centers;
+  if (interval==0) interval++;
+  
+  int* population=new int[centers];    
+  int* species=new int[centers];    
+  
+  //cout << " Different entries=" << different 
+  //     << " Total Entries=" << N << " Bin Size=" << interval << "\n";
+  
+  for (int i=0;i<centers;i++){
+    population[i]=species[i]=0;
+    ctrs[i]=0.0;
+  }
+  
+  // initial values
+  bintable[0].code=0;    
+  population[0]=1;
+  species[0]=1;
+  
+  int currcode=0;    
+  different=1;
+  
+  for (int i=1;i<N;i++){
+    
+    if ((bintable[i].pt!=bintable[i-1].pt)){
+      different++;
+      if ((different % interval) == 0)
+        if ((currcode+1) < centers 
+            && 
+            population[currcode]>0){
+                currcode++;
+        }
+    }
+      
+      if (bintable[i].pt == bintable[i-1].pt)
+        bintable[i].code=bintable[i-1].code;
+      else{
+        bintable[i].code=currcode;
+        species[currcode]++;
+      }
+      
+      population[bintable[i].code]++;
+      
+      assert(bintable[i].code < centers);
+      
+      ctrs[bintable[i].code]+=bintable[i].pt;
+      
+  }
+    
+    
+    for (int i=0;i<centers;i++){
+      if (population[i]>0){
+        ctrs[i]/=(float)population[i];
+        if (ctrs[i]<1e-99){
+            cerr << "Warning: adjusting center with too small prob " << ctrs[i] << "\n";
+            ctrs[i]=1e-99;
+        }
+      }
+      //cout << i << " ctr " << ctrs[i] << " population " << population[i] << " species " << species[i] <<"\n";
+    }
+    
+    cout.flush();
+    
+    delete [] population;
+    delete [] species;
+    
+    
+    return 1;
+    
+}
+
+//----------------------------------------------------------------------
+//  Reading/Printing utilities
+//      readPt - read a point from input stream into data storage
+//              at position i.  Returns false on error or EOF.
+//      printPt - prints a points to output file
+//----------------------------------------------------------------------
+
+
+int parseWords(char *sentence, char **words, int max)
+{
+  char *word;
+  int i = 0;
+  
+  char *const wordSeparators = " \t\r\n";
+  
+  for (word = strtok(sentence, wordSeparators);
+       i < max && word != 0;
+       i++, word = strtok(0, wordSeparators))
+  {
+    words[i] = word;
+  }
+  if (i < max) {
+    words[i] = 0;
+  }
+  
+  return i;
+}
+
+
diff --git a/irstlm/src/src/util.cpp b/irstlm/src/src/util.cpp
new file mode 100644
index 000000000..1b81218d1
--- /dev/null
+++ b/irstlm/src/src/util.cpp
@@ -0,0 +1,77 @@
+
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include "util.h"
+
+using namespace std;
+
+string gettempfolder()
+{	
+#ifdef _WIN32
+	char *tmpPath = getenv("TMP");
+	string str(tmpPath);
+	if (str.substr(str.size() - 1, 1) != "\\")
+		str += "\\";
+	return str;
+#else
+	return "/tmp/";
+#endif
+}
+
+
+void createtempfile(ofstream  &fileStream, string &filePath, std::ios_base::openmode flags)
+{	
+#ifdef _WIN32
+	char buffer[BUFSIZ];
+	::GetTempFileNameA(gettempfolder().c_str(), "", 0, buffer);
+	filePath = buffer;
+#else
+	char buffer[L_tmpnam];
+	strcpy(buffer, gettempfolder().c_str());
+	strcat(buffer, "dskbuff--XXXXXX");
+	mkstemp(buffer);
+	filePath = buffer;
+#endif
+	fileStream.open(filePath.c_str(), flags);
+}
+
+void removefile(const std::string &filePath)
+{
+#ifdef _WIN32
+	::DeleteFileA(filePath.c_str());
+#else
+  char cmd[100];
+  sprintf(cmd,"rm %s",filePath.c_str());
+  system(cmd);
+#endif
+}
+
+
+
+inputfilestream::inputfilestream(const std::string &filePath)
+: std::istream(0),
+m_streambuf(0)
+{
+  if (filePath.size() > 3 &&
+      filePath.substr(filePath.size() - 3, 3) == ".gz")
+  {
+    m_streambuf = new gzfilebuf(filePath.c_str());
+  } else {
+    std::filebuf* fb = new std::filebuf();
+    fb->open(filePath.c_str(), std::ios::in);
+    m_streambuf = fb;
+  }
+  this->init(m_streambuf);
+}
+
+inputfilestream::~inputfilestream()
+{
+  delete m_streambuf; m_streambuf = 0;
+}
+
+void inputfilestream::close()
+{
+}
+
diff --git a/irstlm/src/src/util.h b/irstlm/src/src/util.h
new file mode 100644
index 000000000..229d301cc
--- /dev/null
+++ b/irstlm/src/src/util.h
@@ -0,0 +1,25 @@
+#ifndef IRSTLM_UTIL_H
+#define IRSTLM_UTIL_H
+
+#include <string>
+#include <fstream>
+#include "gzfilebuf.h"
+
+std::string gettempfolder();
+void createtempfile(std::ofstream  &fileStream, std::string &filePath, std::ios_base::openmode flags);
+void removefile(const std::string &filePath);
+
+class inputfilestream : public std::istream
+{
+protected:
+	std::streambuf *m_streambuf;
+public:
+  
+	inputfilestream(const std::string &filePath);
+	~inputfilestream();
+  
+	void close();
+};
+
+#endif
+
author	hieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>	2006-11-05 16:02:33 +0300
committer	hieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>	2006-11-05 16:02:33 +0300
commit	8313b93157fdeedf7e8c939071b085b294450c56 (patch)
tree	64e1ef0674cb6ceb4830649d4d160f886df103e2 /irstlm/src
parent	e8cd4e752f4e239d99e427c0cceafcea8bcaeb1f (diff)