Welcome to mirror list, hosted at ThFree Co, Russian Federation.

berkeley-process.sh « support « ems « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 347ebba3c50422cc9b170827099e6ebf7ae28758 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/sh
#
# This file is part of moses.  Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

if [ $# -lt 8 ]
	then
  		echo "Usage: $0 <\"java options\"> <berkeleyaligner jar file> <input file stem> <previous berkeley param dir> <output directory> <source lang> <target lang> <alignment name (i.e. 'berk' or 'low-posterior')> <posterior threshold> [aligner options...]"
  		exit 1
fi

JAVA_OPTS=$1
JAR=$2
INFILE=$3
PARAMDIR=$4
OUTNAME=$5
SLANG=$6
TLANG=$7
TAG=$8
POSTERIOR=$9
shift
shift
shift
shift
shift
shift
shift
shift
shift

JAVA_CMD="/usr/local/share/java/bin/java $JAVA_OPTS -jar $JAR -Data.trainSources $INFILE.list -Main.loadParamsDir $PARAMDIR -exec.execDir $OUTNAME -Main.loadLexicalModelOnly false -Data.englishSuffix $SLANG -Data.foreignSuffix $TLANG -exec.create true -Main.saveParams false -Main.alignTraining true -Main.forwardModels HMM -Main.reverseModels HMM -Main.mode JOINT -Main.iters 0 -Data.testSources -EMWordAligner.posteriorDecodingThreshold $POSTERIOR $@"
echo "Running $JAVA_CMD"
$JAVA_CMD

#clean up
rm $OUTNAME/training.*Input.txt
rm $OUTNAME/training.*Trees.txt
gzip $OUTNAME/training.$SLANG-$TLANG.A3
gzip $OUTNAME/training.$TLANG-$SLANG.A3

#now shift the output
perl -e "
use strict;
while (<STDIN>) {
  chomp();
  my @pairs = split(\" \");
  for (my \$i=0;\$i<scalar(@pairs);\$i++) {
    die (\"bad pair \$pairs[\$i]\n\") unless \$pairs[\$i] =~ /(\d+)\-(\d+)/;
    \$pairs[\$i] = (\$2).\"-\".(\$1);
  }
  print join(\" \",@pairs);
  print(\"\n\");
};" < $OUTNAME/training.$SLANG-$TLANG.align > $OUTNAME.$TAG

gzip $OUTNAME/training.$SLANG-$TLANG.align