Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2014-04-21 02:30:09 +0400
committerHieu Hoang <hieuhoang@gmail.com>2014-04-21 02:30:09 +0400
commit76a4609cff1b5bc9a5581e83dcddaf21d696682d (patch)
tree98fa221ae839d4228156702b7a485dd8e0b46d4c /scripts/other
parent568685cb66287dc0af72315df5095567a1854853 (diff)
add script to creat pt with only certain scores
Diffstat (limited to 'scripts/other')
-rwxr-xr-xscripts/other/delete-scores.perl61
1 files changed, 61 insertions, 0 deletions
diff --git a/scripts/other/delete-scores.perl b/scripts/other/delete-scores.perl
new file mode 100755
index 000000000..442173026
--- /dev/null
+++ b/scripts/other/delete-scores.perl
@@ -0,0 +1,61 @@
+#!/usr/bin/perl
+
+use strict;
+use Getopt::Long "GetOptions";
+
+binmode(STDIN, ":utf8");
+binmode(STDOUT, ":utf8");
+
+sub trim($);
+sub DeleteScore;
+
+my $keepScoresStr;
+GetOptions(
+ "keep-scores=s" => \$keepScoresStr
+) or exit(1);
+
+my @keepScores = split(/,/, $keepScoresStr);
+
+#MAIN LOOP
+while (my $line = <STDIN>) {
+ chomp($line);
+ #print STDERR "line=$line\n";
+
+ my @toks = split(/\|/, $line);
+ my @scores = split(/ /, $toks[6]);
+
+ $toks[6] = DeleteScore($toks[6], \@keepScores);
+
+ # output
+ print $toks[0];
+ for (my $i = 1; $i < scalar(@toks); ++$i) {
+ print "|" .$toks[$i];
+ }
+ print "\n";
+}
+
+######################
+# Perl trim function to remove whitespace from the start and end of the string
+sub trim($) {
+ my $string = shift;
+ $string =~ s/^\s+//;
+ $string =~ s/\s+$//;
+ return $string;
+}
+
+sub DeleteScore
+{
+ my $string = $_[0];
+ my @keepScores = @{$_[1]};
+
+ $string = trim($string);
+ my @toks = split(/ /, $string);
+
+ $string = "";
+ for (my $i = 0; $i < scalar(@keepScores); ++$i) {
+ $string .= $toks[ $keepScores[$i] ] ." ";
+ }
+ $string = " " .$string;
+
+ return $string;
+}