From 24cd2f344146e2cbbdbbbb1fd00236fe40aacbf0 Mon Sep 17 00:00:00 2001 From: eherbst Date: Wed, 16 Aug 2006 16:37:11 +0000 Subject: updating docs git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@771 1f5c12ca-751b-0410-a591-d2e778427230 --- scripts/analysis/README | 1 + scripts/analysis/smtgui/file-factors | 1 + 2 files changed, 2 insertions(+) (limited to 'scripts/analysis') diff --git a/scripts/analysis/README b/scripts/analysis/README index 8eeab22f1..137b5af87 100644 --- a/scripts/analysis/README +++ b/scripts/analysis/README @@ -5,5 +5,6 @@ sentence-by-sentence.pl [EVH]: show comparison of sentences in reference transla -- requires all input files be utf8-encoded (you can convert a file with `cat FILE | perl -n -e 'binmode(STDOUT, ":utf8"); print;' > FILE.utf8`) show-phrases-used.pl [EVH]: draw colorful diagram of which source phrases map to which target phrases +-- requires the Perl GD module, which in turn requires that gd be installed and in LD_LIBRARY_PATH -- show average length of source phrases used for each sentence and overall -- command-line options -r for reference and -s for source; lone filenames are taken to be system outputs diff --git a/scripts/analysis/smtgui/file-factors b/scripts/analysis/smtgui/file-factors index 7938e9297..86789d967 100644 --- a/scripts/analysis/smtgui/file-factors +++ b/scripts/analysis/smtgui/file-factors @@ -2,6 +2,7 @@ #(the given factors should be present in all files for the given corpus) devtest2006.de-en : surf pos lemma : surf europarl.de.srilm.gz : surf europarl.en.srilm.gz devtest2006.en-de : surf pos lemma : surf europarl.en.srilm.gz : surf europarl.de.srilm.gz +test2006.en-de : surf : surf europarl.en.srilm.gz : surf europarl.de.srilm.gz #pstem: lemmas come from the Porter stemmer (and so are really a mix of stems and lemmas) pstem_devtest2006.de-en : surf pos lemma : : surf europarl.en.srilm.gz #replace esset with ss in German text -- cgit v1.2.3