Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphikoehn <pkoehn@inf.ed.ac.uk>2013-01-14 23:23:02 +0400
committerphikoehn <pkoehn@inf.ed.ac.uk>2013-01-14 23:23:02 +0400
commitd5cf38cab2c64a7bb756e2a40b5a768e3a687249 (patch)
treec7bd1d83149359fdd27fdb8814bc81d0f024944c /scripts/ems
parent344b1503728364d164d2d9420f89726e7cb0c4a0 (diff)
parentc9687e3b50d64174f091808fd2d6e04c7b17a406 (diff)
Merge branch 'master' of git://github.com/moses-smt/mosesdecoder
Diffstat (limited to 'scripts/ems')
-rw-r--r--scripts/ems/experiment.meta6
-rwxr-xr-xscripts/ems/support/split-sentences.perl8
2 files changed, 7 insertions, 7 deletions
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 210fddb93..52596227c 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -401,21 +401,21 @@ mml-filter-before-wa
prepare-data
in: corpus-mml-prefilter=OR=corpus
out: prepared-data
- rerun-on-change: alignment-factors training-options script baseline-alignment-model
+ rerun-on-change: alignment-factors training-options script baseline-alignment-model external-bin-dr
ignore-if: use-berkeley
default-name: prepared
run-giza
in: prepared-data
out: giza-alignment
ignore-if: use-berkeley
- rerun-on-change: giza-settings training-options script baseline-alignment-model
+ rerun-on-change: giza-settings training-options script baseline-alignment-model external-bin-dir
default-name: giza
error: not found
not-error: 0 not found
run-giza-inverse
in: prepared-data
out: giza-alignment-inverse
- rerun-on-change: giza-settings training-options script baseline-alignment-model
+ rerun-on-change: giza-settings training-options script baseline-alignment-model external-bin-dir
ignore-if: use-berkeley
default-name: giza-inverse
error: not found
diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl
index b366d3d7e..d73e58742 100755
--- a/scripts/ems/support/split-sentences.perl
+++ b/scripts/ems/support/split-sentences.perl
@@ -84,16 +84,16 @@ sub do_it_for {
}
sub preprocess {
+ #this is one paragraph
+ my($text) = @_;
+
# clean up spaces at head and tail of each line as well as any double-spacing
$text =~ s/ +/ /g;
$text =~ s/\n /\n/g;
$text =~ s/ \n/\n/g;
$text =~ s/^ //g;
$text =~ s/ $//g;
-
- #this is one paragraph
- my($text) = @_;
-
+
#####add sentence breaks as needed#####
#non-period end of sentence markers (?!) followed by sentence starters.