Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/irstlm
diff options
context:
space:
mode:
authormfederico <mfederico@1f5c12ca-751b-0410-a591-d2e778427230>2006-08-14 06:45:20 +0400
committermfederico <mfederico@1f5c12ca-751b-0410-a591-d2e778427230>2006-08-14 06:45:20 +0400
commit3c28c78fba2cfb6e291f3c4cfabf92f10a0abb64 (patch)
treee227d27abf79d03c32aa0ef7733b9dcaa50f9b4b /irstlm
parentd9dd8510ffa3cb187bf94e5ffe6287bd9f29ca00 (diff)
added some options to command
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@703 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'irstlm')
-rw-r--r--irstlm/src/compile-lm.cpp22
1 files changed, 20 insertions, 2 deletions
diff --git a/irstlm/src/compile-lm.cpp b/irstlm/src/compile-lm.cpp
index f86551ed9..ca9e412c4 100644
--- a/irstlm/src/compile-lm.cpp
+++ b/irstlm/src/compile-lm.cpp
@@ -34,6 +34,7 @@ using namespace std;
std::string stxt = "no";
std::string seval = "";
+std::string sdebug = "0";
/********************************/
@@ -44,7 +45,10 @@ void usage(const char *msg = 0) {
<< " compile-lm reads a standard LM file in ARPA format and produces" << std::endl
<< " a compiled representation that the IRST LM toolkit can quickly" << std::endl
<< " read and process." << std::endl << std::endl;
- std::cerr << "Options:\n -t=[yes|no]\n";
+ std::cerr << "Options:\n"
+ << "--text=[yes|no] -t=[yes|no] (output is again in text format)\n"
+ << "--eval=text-file -e=text-file (computes perplexity of text-file and returns)\n"
+ << "--debug=1 -d=1 (verbose output for --eval option)\n";
}
bool starts_with(const std::string &s, const std::string &pre) {
@@ -80,6 +84,10 @@ void handle_option(const std::string& opt, int argc, const char **argv, int& arg
else
if (starts_with(opt, "--eval") || starts_with(opt, "-e"))
seval = get_param(opt, argc, argv, argi);
+ else
+ if (starts_with(opt, "--debug") || starts_with(opt, "-d"))
+ sdebug = get_param(opt, argc, argv, argi);
+
else {
usage(("Don't understand option " + opt).c_str());
exit(1);
@@ -101,6 +109,7 @@ int main(int argc, const char **argv)
if (files.size() < 1) { usage("Please specify a LM file to read from"); exit(1); }
bool textoutput = (stxt == "yes"? true : false);
+ int debug = atoi(sdebug.c_str());
std::string infile = files[0];
if (files.size() == 1) {
@@ -128,22 +137,31 @@ int main(int argc, const char **argv)
ngram ng(lmt.dict);
std::cout.setf(ios::fixed);
std::cout.precision(2);
-
+ if (debug>1) std::cout.precision(8);
std::fstream inptxt(seval.c_str(),std::ios::in);
int Nbo=0,Nw=0,Noov=0;
double logPr=0,PP=0,PPwp=0,Pr;
int bos=ng.dict->encode(ng.dict->BoS());
+
+#ifdef TRACE_CACHE
lmt.init_probcache();
+#endif
+
while(inptxt >> ng){
+ if (ng.size>lmt.maxlevel()) ng.size=lmt.maxlevel();
+
// reset ngram at begin of sentence
if (*ng.wordp(1)==bos) continue;
lmt.bo_state(0);
if (ng.size>=1){
logPr+=(Pr=lmt.clprob(ng));
+ if (debug>1)
+ std::cout << ng << "[" << ng.size << "-gram]" << " " << Pr << "\n";
+
if (*ng.wordp(1) == lmt.dict->oovcode()) Noov++;
Nw++; if (lmt.bo_state()) Nbo++;
}