Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <fishandfrolick@gmail.com>2012-10-08 20:46:17 +0400
committerHieu Hoang <fishandfrolick@gmail.com>2012-10-08 20:46:17 +0400
commitd56f815ba98ace702420db7a40c7f9a03ef7fd2c (patch)
tree3b95330417808e38369c236cf41a19a48842e4b9
parent80768667280803379f2db32386616e2fcfa9b5ff (diff)
parent2aa10c3012328d8fd66ceee26c25e3d8771f9f31 (diff)
Merge branch 'master' of github.com:moses-smt/mosesdecoder
-rw-r--r--jam-files/sanity.jam2
-rw-r--r--mert/PermutationScorer.cpp6
-rw-r--r--regression-testing/Jamfile2
-rwxr-xr-xscripts/training/wrappers/parse-de-bitpar.perl12
4 files changed, 15 insertions, 7 deletions
diff --git a/jam-files/sanity.jam b/jam-files/sanity.jam
index 2aca84e4d..c8435862c 100644
--- a/jam-files/sanity.jam
+++ b/jam-files/sanity.jam
@@ -29,7 +29,7 @@ ldflags = [ os.environ "LDFLAGS" ] ;
#Run g++ with empty main and these arguments to see if it passes.
rule test_flags ( flags * ) {
flags = $(cxxflags) $(ldflags) $(flags) ;
- local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'int main() {}' -o /dev/null >/dev/null 2>/dev/null\"" ;
+ local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'int main() {}' -o $(TOP)/dummy >/dev/null 2>/dev/null && rm $(TOP)/dummy 2>/dev/null\"" ;
local ret = [ SHELL $(cmd) : exit-status ] ;
if --debug-configuration in [ modules.peek : ARGV ] {
echo $(cmd) ;
diff --git a/mert/PermutationScorer.cpp b/mert/PermutationScorer.cpp
index c6588eec7..12025a77e 100644
--- a/mert/PermutationScorer.cpp
+++ b/mert/PermutationScorer.cpp
@@ -225,12 +225,12 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
//SCOREROUT eg: 0.04546
distanceValue*=SCORE_MULTFACT; //SCOREROUT eg: 4546 to transform float into integer
ostringstream tempStream;
- tempStream.precision(SCORE_PRECISION);
- tempStream << distanceValue << " 1"; //use for final normalization over the amount of test sentences
+ tempStream.precision(0); // decimal precision not needed as score was multiplied per SCORE_MULTFACT
+ tempStream << std::fixed << distanceValue << " 1"; //use for final normalization over the amount of test sentences
string str = tempStream.str();
entry.set(str);
- //cout << tempStream.str();
+//cout << distanceValue << "=" << distanceValue << " (str:" << tempStream.str() << ")" << endl;
}
//Will just be final score
diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile
index a27908e4c..998e46317 100644
--- a/regression-testing/Jamfile
+++ b/regression-testing/Jamfile
@@ -47,7 +47,7 @@ if $(with-regtest) {
actions reg_test_mert {
$(TOP)/regression-testing/run-test-mert.perl --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) --bin-dir=$(pwd)/$(>:D) && touch $(<)
}
- reg_test mert : [ glob $(test-dir)/mert.* ] : ../mert//mert ../mert//extractor : @reg_test_mert ;
+ reg_test mert : [ glob $(test-dir)/mert.* ] : ../mert//mert ../mert//extractor ../mert//pro : @reg_test_mert ;
alias all : phrase chart mert score extract extractrules ;
}
diff --git a/scripts/training/wrappers/parse-de-bitpar.perl b/scripts/training/wrappers/parse-de-bitpar.perl
index bbcccf877..370187d32 100755
--- a/scripts/training/wrappers/parse-de-bitpar.perl
+++ b/scripts/training/wrappers/parse-de-bitpar.perl
@@ -9,6 +9,9 @@ use File::Temp qw/tempfile/;
my $BITPAR = "/exports/home/s0565741/work/bin/bitpar";
my $TMPDIR = "tmp";
+my $SCRIPTS_ROOT_DIR = "$RealBin/../..";
+my $DEESCAPE = "$SCRIPTS_ROOT_DIR/tokenizer/deescape-special-chars.perl";
+
my $DEBUG = 0;
my $BASIC = 0;
my $OLD_BITPAR = 0;
@@ -27,11 +30,11 @@ my ($scriptname, $directories) = fileparse($0);
my ($TMP, $tmpfile) = tempfile("$scriptname-XXXXXXXXXX", DIR=>$TMPDIR, UNLINK=>1);
if ($OLD_BITPAR)
{
- open(INPUT,"iconv -c -f UTF-8 -t iso-8859-1 |");
+ open(INPUT,"$DEESCAPE | iconv -c -f UTF-8 -t iso-8859-1 |");
}
else
{
- open (INPUT,"cat |");
+ open (INPUT,"$DEESCAPE |");
}
while(<INPUT>)
{
@@ -162,7 +165,12 @@ sub is_aux_label {
sub escape {
my ($text) = @_;
$text =~ s/&/&amp;/g;
+ $text =~ s/\|/&#124;/g;
$text =~ s/</&lt;/g;
$text =~ s/>/&gt;/g;
+ $text =~ s/'/&apos;/g;
+ $text =~ s/"/&quot;/g;
+ $text =~ s/\[/&#91;/g;
+ $text =~ s/\]/&#93;/g;
return $text;
}