Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/irstlm
diff options
context:
space:
mode:
authorredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2006-07-20 18:28:40 +0400
committerredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2006-07-20 18:28:40 +0400
commit6020abb071efe4410f5c0be553f9be2500193b19 (patch)
tree47b76b9ec5fa062a3435caebde420189c994f093 /irstlm
parente713be1848591e4c3873319403e89fd622c37be5 (diff)
remove mfstream and make lmtable take an istream instead of a filename
- mfstream is linux-only - istream is flexible (can be used with more exotic data sources) git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@213 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'irstlm')
-rw-r--r--irstlm/config.h.in33
-rw-r--r--irstlm/src/Makefile.am1
-rw-r--r--irstlm/src/dictionary.cpp15
-rw-r--r--irstlm/src/dictionary.h5
-rw-r--r--irstlm/src/htable.h4
-rw-r--r--irstlm/src/lmtable.cpp49
-rw-r--r--irstlm/src/lmtable.h8
-rw-r--r--irstlm/src/mfstream.cpp147
-rw-r--r--irstlm/src/mfstream.h223
-rw-r--r--irstlm/src/ngram.cpp1
10 files changed, 37 insertions, 449 deletions
diff --git a/irstlm/config.h.in b/irstlm/config.h.in
index 90ee0d001..b292ea963 100644
--- a/irstlm/config.h.in
+++ b/irstlm/config.h.in
@@ -1,35 +1,5 @@
/* config.h.in. Generated from configure.in by autoheader. */
-/* Define to 1 if you have the <inttypes.h> header file. */
-#undef HAVE_INTTYPES_H
-
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* flag for SRILM */
-#undef HAVE_SRILM
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
-
-/* Define to 1 if you have the <string.h> header file. */
-#undef HAVE_STRING_H
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#undef HAVE_SYS_STAT_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
/* Name of package */
#undef PACKAGE
@@ -48,8 +18,5 @@
/* Define to the version of this package. */
#undef PACKAGE_VERSION
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
/* Version number of package */
#undef VERSION
diff --git a/irstlm/src/Makefile.am b/irstlm/src/Makefile.am
index afff1ea70..010d46d90 100644
--- a/irstlm/src/Makefile.am
+++ b/irstlm/src/Makefile.am
@@ -5,7 +5,6 @@ libirstlm_a_SOURCES = \
htable.cpp \
lmtable.cpp \
mempool.cpp \
- mfstream.cpp \
ngram.cpp
library_includedir=$(includedir)
diff --git a/irstlm/src/dictionary.cpp b/irstlm/src/dictionary.cpp
index ddfaef7ab..ecb631707 100644
--- a/irstlm/src/dictionary.cpp
+++ b/irstlm/src/dictionary.cpp
@@ -21,7 +21,8 @@
using namespace std;
#include <iomanip>
-#include "mfstream.h"
+#include <iostream>
+#include <fstream>
#include "mempool.h"
#include "htable.h"
#include "dictionary.h"
@@ -55,7 +56,7 @@ dictionary::dictionary(char *filename,int size,char* isymb,char* oovlexfile){
if (filename==NULL) return;
- mfstream inp(filename,ios::in);
+ std::ifstream inp(filename,ios::in);
if (!inp){
cerr << "cannot open " << filename << "\n";
@@ -87,7 +88,7 @@ void dictionary::generate(char *filename){
char *addr;
int k,c;
- mfstream inp(filename,ios::in);
+ ifstream inp(filename,ios::in);
if (!inp){
cerr << "cannot open " << filename << "\n";
@@ -131,7 +132,7 @@ void dictionary::load(char* filename){
char *addr;
int freqflag=0;
- mfstream inp(filename,ios::in);
+ ifstream inp(filename,ios::in);
if (!inp){
cerr << "\ncannot open " << filename << "\n";
@@ -184,7 +185,7 @@ void dictionary::load(char* filename){
}
-void dictionary::load(mfstream& inp){
+void dictionary::load(std::istream& inp){
char buffer[MAX_WORD];
char *addr;
@@ -216,7 +217,7 @@ void dictionary::load(mfstream& inp){
inp.getline(buffer,MAX_WORD-1);
}
-void dictionary::save(mfstream& out){
+void dictionary::save(std::ostream& out){
out << n << "\n";
for (int i=0;i<n;i++)
out << tb[i].word << " " << tb[i].freq << "\n";
@@ -314,7 +315,7 @@ void dictionary::grow(){
void dictionary::save(char *filename,int freqflag){
- mfstream out(filename,ios::out);
+ std::ofstream out(filename,ios::out);
if (!out){
cerr << "cannot open " << filename << "\n";
diff --git a/irstlm/src/dictionary.h b/irstlm/src/dictionary.h
index cc38e57c8..910ac2cea 100644
--- a/irstlm/src/dictionary.h
+++ b/irstlm/src/dictionary.h
@@ -74,7 +74,6 @@ typedef struct{
class strstack;
class htable;
-class mfstream;
class dictionary{
strstack *st; //!< stack of strings
@@ -175,8 +174,8 @@ class dictionary{
void generate(char *filename);
void load(char *filename);
void save(char *filename,int freqflag=0);
- void load(mfstream& fd);
- void save(mfstream& fd);
+ void load(std::istream& fd);
+ void save(std::ostream& fd);
int size(){return n;};
int getcode(const char *w);
diff --git a/irstlm/src/htable.h b/irstlm/src/htable.h
index 0dbba9b63..ba6472c49 100644
--- a/irstlm/src/htable.h
+++ b/irstlm/src/htable.h
@@ -23,6 +23,8 @@
#ifndef MF_HTABLE_H
#define MF_HTABLE_H
+#include <iostream>
+
#define Prime1 37
#define Prime2 1048583
#define BlockSize 100
@@ -90,7 +92,7 @@ class htable {
void stat();
//! Print a map of memory use
- void map(ostream& co=cout, int cols=80);
+ void map(std::ostream& co=std::cout, int cols=80);
//! Returns amount of used memory
int used(){return
diff --git a/irstlm/src/lmtable.cpp b/irstlm/src/lmtable.cpp
index a124792e9..e245a5d3e 100644
--- a/irstlm/src/lmtable.cpp
+++ b/irstlm/src/lmtable.cpp
@@ -20,7 +20,8 @@
using namespace std;
-#include "mfstream.h"
+#include <iostream>
+#include <fstream>
#include "math.h"
#include "mempool.h"
#include "htable.h"
@@ -29,7 +30,7 @@ using namespace std;
#include "lmtable.h"
-lmtable::lmtable(const char* filename, int n, int res, double dec){
+lmtable::lmtable(std::istream &in, int n, int res, double dec){
maxlev=n;
dict=NULL;
@@ -57,24 +58,22 @@ lmtable::lmtable(const char* filename, int n, int res, double dec){
}
- mfstream inp(filename,ios::in);
-
char header[1024];
char gzip_hdr[3]; gzip_hdr[0]=0x1f; gzip_hdr[1]=0x8b; gzip_hdr[2]=0;
- inp >> header;
+ in >> header;
// cerr << header << "\n";
if (strncmp(header,"Qblmt",6)==0 || strncmp(header,"blmt",4)==0)
- loadbin(filename);
+ loadbin(in, header);
else if (strncmp(header,"qARPA",6)==0)
- loadQtxt(filename,maxlev);
+ loadQtxt(in, header, maxlev);
else if (strncmp(header, gzip_hdr, 2)==0) {
- std::cerr << "TODO: implement loading with gzip\n";
+ std::cerr << "gzip'd files cannot be opened directly\n";
std::abort();
} else
- loadtxt(filename, maxlev,res,dec);
+ loadtxt(in, header, maxlev,res,dec);
dict->genoovcode();
cerr << "OOV code is " << dict->oovcode() << "\n";
@@ -102,17 +101,14 @@ unsigned int parseWords(char *sentence, char **words, unsigned int max)
}
-void lmtable::loadtxt(const char* filename,int maxOrder,int res,double dec){
+void lmtable::loadtxt(std::istream &inp,const char* header, int maxOrder,int res,double dec){
- ifstream inp(filename,ios::in);
-
dict=new dictionary(NULL,1000000,NULL,NULL);
dict->incflag(1);
ngram ng(dict); /* ngram translated to word indices */
- cerr << "loadtxt: " << filename
- << "... resolution " << res << " decay " << dec << "\n";
+ cerr << "loadtxt ... resolution " << res << " decay " << dec << "\n";
resolution=res;
decay=dec;
@@ -133,8 +129,9 @@ void lmtable::loadtxt(const char* filename,int maxOrder,int res,double dec){
* -1: pre-header, 0: header,
* 1: 1-grams, 2: 2-grams, ... */
char line[1024];
+ strncpy(line, header, 1024);
- while (inp.getline(line,1024)){
+ do { // header was already read in the calling function
if (line[0]=='\0') continue; //skip empty
@@ -284,7 +281,7 @@ void lmtable::loadtxt(const char* filename,int maxOrder,int res,double dec){
continue;
}
}
- }
+ } while (inp.getline(line,1024));
dict->incflag(0);
cerr << "done\n";
@@ -292,17 +289,15 @@ void lmtable::loadtxt(const char* filename,int maxOrder,int res,double dec){
};
-void lmtable::loadQtxt(const char* filename,int maxOrder){
+void lmtable::loadQtxt(std::istream &inp,const char* header,int maxOrder){
- mfstream inp(filename,ios::in);
-
dict=new dictionary(NULL,1000000,NULL,NULL);
dict->incflag(1);
ngram ng(dict); /* ngram translated to word indices */
- cerr << "loadQtxt: " << filename << "\n";
+ cerr << "loadQtxt\n";
isQtable=1;
@@ -568,7 +563,7 @@ int lmtable::mybsearch(char *ar, int n, int size,
void lmtable::savetxt(const char *filename){
- mfstream out(filename,ios::out);
+ std::ofstream out(filename,ios::out);
cerr << "savetxt: " << filename << "\n";
@@ -605,7 +600,7 @@ void lmtable::savetxt(const char *filename){
void lmtable::savebin(const char *filename){
- mfstream out(filename,ios::out);
+ std::ofstream out(filename,ios::out);
cerr << "savebin: " << filename << "\n";
@@ -640,16 +635,13 @@ void lmtable::savebin(const char *filename){
}
-void lmtable::loadbin(const char *filename){
+void lmtable::loadbin(std::istream &inp,const char* h){
- mfstream inp(filename,ios::in);
-
- cerr << "loadbin ... " << filename << " ";
+ cerr << "loadbin ... ";
char header[1024];
-
// read header
- inp >> header; inp >> maxlev;
+ inp >> maxlev;
if (strncmp(header,"Qblmt",6)==0) isQtable=1;
@@ -684,7 +676,6 @@ void lmtable::loadbin(const char *filename){
cerr << "loading " << cursize[i] << " " << i << "-grams\n";
inp.read(table[i],cursize[i]*nodesize(tbltype[i]));
}
- inp.close();
cerr << "done\n";
}
diff --git a/irstlm/src/lmtable.h b/irstlm/src/lmtable.h
index 893c8214d..59526925b 100644
--- a/irstlm/src/lmtable.h
+++ b/irstlm/src/lmtable.h
@@ -86,7 +86,7 @@ class lmtable{
dictionary *dict; // dictionary
- lmtable(const char* filename, int maxl,int res,double dec);
+ lmtable(std::istream& in, int maxl,int res,double dec);
~lmtable(){
for (int i=1;i<=maxlev;i++){
@@ -102,10 +102,10 @@ class lmtable{
void savetxt(const char *filename);
void savebin(const char *filename);
- void loadtxt(const char *filename, int maxl,int res, double dec);
- void loadbin(const char *filename);
+ void loadtxt(std::istream& in, const char* header, int maxl,int res, double dec);
+ void loadbin(std::istream& in, const char* header);
- void loadQtxt(const char *filename, int maxl);
+ void loadQtxt(std::istream& in, const char* header, int maxl);
double prob(ngram ng);
diff --git a/irstlm/src/mfstream.cpp b/irstlm/src/mfstream.cpp
deleted file mode 100644
index 7081af0c4..000000000
--- a/irstlm/src/mfstream.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#include <iostream>
-#include <fstream>
-#include <streambuf>
-#include <cstdio>
-#include "mfstream.h"
-
-using namespace std;
-
-void mfstream::open(const char *name,openmode mode){
-
- char cmode[10];
-
- if (strchr(name,' ')!=0){
- if (mode & ios::in)
- strcpy(cmode,"r");
- else
- if (mode & ios::out)
- strcpy(cmode,"w");
- else
- if (mode & ios::app)
- strcpy(cmode,"a");
- else{
- cerr << "cannot open file\n";
- exit(1);
- }
- _cmd=1;
- strcpy(_cmdname,name);
- _FILE=popen(name,cmode);
- buf=new fdbuf(fileno(_FILE));
- iostream::rdbuf((streambuf*) buf);
- }
- else{
- _cmd=0;
- fstream::open(name,mode);
- }
-
-}
-
-
-void mfstream::close(){
- if (_cmd==1){
- pclose(_FILE);
- delete buf;
- }
- else {
- fstream::clear();
- fstream::close();
- }
- _cmd=2;
-}
-
-
-
-int mfstream::swapbytes(char *p, int sz, int n)
-{
- char c,
- *l,
- *h;
-
- if((n<1) ||(sz<2)) return 0;
- for(; n--; p+=sz) for(h=(l=p)+sz; --h>l; l++) { c=*h; *h=*l; *l=c; }
- return 0;
-
-};
-
-
-mfstream& mfstream::iwritex(streampos loc,void *ptr,int size,int n)
-{
- streampos pos=tellp();
-
- seekp(loc);
-
- writex(ptr,size,n);
-
- seekp(pos);
-
- return *this;
-
-}
-
-
-mfstream& mfstream::readx(void *p, int sz,int n)
-{
- if(!read((char *)p, sz * n)) return *this;
-
- if(*(short *)"AB"==0x4241){
- swapbytes((char*)p, sz,n);
- }
-
- return *this;
-}
-
-mfstream& mfstream::writex(void *p, int sz,int n)
-{
- if(*(short *)"AB"==0x4241){
- swapbytes((char*)p, sz,n);
- }
-
- write((char *)p, sz * n);
-
- if(*(short *)"AB"==0x4241) swapbytes((char*)p, sz,n);
-
- return *this;
-}
-
-
-
-
-
-/*
-int main()
-{
-
- char word[1000];
-
- mfstream inp("cat pp",ios::in);
- mfbstream outp("aa",ios::out,100);
-
- while (inp >> word){
- outp << word << "\n";
- cout << word << "\n";
- }
-
-
-}
-
-*/
diff --git a/irstlm/src/mfstream.h b/irstlm/src/mfstream.h
deleted file mode 100644
index 842909721..000000000
--- a/irstlm/src/mfstream.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#include <iostream>
-#include <fstream>
-#include <streambuf>
-#include <cstdio>
-
-using namespace std;
-
-#ifndef MF_STREAM_H
-#define MF_STREAM_H
-
-extern "C" {
- ssize_t write (int fd, const void* buf, size_t num);
- ssize_t read (int fd, void* buf, size_t num);
- FILE *popen(const char *command, const char *type);
- int pclose(FILE *stream);
- int fseek( FILE *stream, long offset, int whence);
- long ftell( FILE *stream);
-};
-
-
-//! File description for I/O stream buffer
-class fdbuf : public std::streambuf {
-
-protected:
- int fd; // file descriptor
-
- // write one character
- virtual int_type overflow (int_type c) {
- char z = c;
- if (c != EOF) {
- if (write (fd, &z, 1) != 1) {
- return EOF;
- }
- }
- //cerr << "overflow: \n";
- //cerr << "pptr: " << (int) pptr() << "\n";
- return c;
- }
-
- // write multiple characters
- virtual
- std::streamsize xsputn (const char* s,
- std::streamsize num) {
- return write(fd,s,num);
-
- }
-
- virtual streampos seekpos ( streampos sp, ios_base::openmode which = ios_base::in | ios_base::out ){
- cerr << "seekpos\n";
- }
-
- //read one character
- virtual int_type underflow () {
- // is read position before end of buffer?
- if (gptr() < egptr()) {
- return traits_type::to_int_type(*gptr());
- }
-
- /* process size of putback area
- * - use number of characters read
- * - but at most four
- */
- int numPutback;
- numPutback = gptr() - eback();
- if (numPutback > 4) {
- numPutback = 4;
- }
-
- /* copy up to four characters previously read into
- * the putback buffer (area of first four characters)
- */
- std::memmove (buffer+(4-numPutback), gptr()-numPutback,
- numPutback);
-
- // read new characters
- int num;
- num = read (fd, buffer+4, bufferSize-4);
- if (num <= 0) {
- // ERROR or EOF
- return EOF;
- }
-
- // reset buffer pointers
- setg (buffer+(4-numPutback), // beginning of putback area
- buffer+4, // read position
- buffer+4+num); // end of buffer
-
- // return next character
- return traits_type::to_int_type(*gptr());
- }
-
-
- // read multiple characters
- virtual
- std::streamsize xsgetn (char* s,
- std::streamsize num) {
- return read(fd,s,num);
- }
-
- static const int bufferSize = 10; // size of the data buffer
- char buffer[bufferSize]; // data buffer
-
-public:
-
- // constructor
- fdbuf (int _fd) : fd(_fd) {
- setg (buffer+4, // beginning of putback area
- buffer+4, // read position
- buffer+4); // end position
- }
-
-};
-
-
-
-//! Extension of fstream to commands
-
-class mfstream : public std::fstream{
-
-protected:
- fdbuf* buf;
- int _cmd;
- openmode _mode;
- FILE* _FILE;
-
-
- int swapbytes(char *p, int sz, int n);
-
-public:
-
- char _cmdname[500];
-
- //! Creates and opens a file/command stream in a specified nmode
- mfstream (const char* name,openmode mode) : std::fstream() {
- _cmdname[0]='\0';
- _mode=mode;
- open(name,mode);
- }
-
- //! Closes and destroys a file/command stream
- ~mfstream(){
- if (_cmd<2) close();
- }
-
- //! Opens an existing mfstream
- void open(const char *name,openmode mode);
-
- //! Closes an existing mfstream
- void close();
-
- //! Write function for machine-independent byte order
- mfstream& writex(void *p, int sz,int n=1);
-
- //! Read function for machine-independent byte order
- mfstream& readx(void *p, int sz,int n=1);
-
- //! Write function at a given stream position for machine-independent byte order
- mfstream& iwritex(streampos loc,void *ptr,int size,int n=1);
-
- //! Tells current position within a file
- streampos tellp(){
- if (_cmd==0) return (streampos) fstream::tellg();
- cerr << "tellp not allowed on commands\n";
- exit(1);
- };
-
- //! Seeks a position within a file
- mfstream& seekp(streampos loc){
- if (_cmd==0)
- fstream::seekg(loc);
- else{
- cerr << "seekp not allowed on commands\n";
- exit(1);
- }
- return *this;
- };
-
- //! Reopens an input stream
-
- mfstream& reopen(){
-
- if (_mode != in){
- cerr << "mfstream::reopen() openmode must be ios:in\n";
- exit(1);
- }
-
- if (strlen(_cmdname)>0){
- char *a=new char[strlen(_cmdname)+1];
- strcpy(a,_cmdname);
- cerr << "close/open " << a <<"\n";
- close();
- open(a,ios::in);
- }
- else
- seekp(0);
-
- return *this;
- }
-
-};
-
-
-#endif
diff --git a/irstlm/src/ngram.cpp b/irstlm/src/ngram.cpp
index 9a1ea633a..8ad21d71b 100644
--- a/irstlm/src/ngram.cpp
+++ b/irstlm/src/ngram.cpp
@@ -21,7 +21,6 @@
using namespace std;
#include <iomanip>
-#include "mfstream.h"
#include "mempool.h"
#include "htable.h"
#include "dictionary.h"