Welcome to mirror list, hosted at ThFree Co, Russian Federation.

consolidate-training-data.perl « support « ems « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: f312b1649815dfc9958d8125ba66d7613ac70ad1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env perl 

# $Id: consolidate-training-data.perl 928 2009-09-02 02:58:01Z philipp $

use strict;

my ($in,$out,$consolidated,@PART) = @ARGV;

`rm $consolidated.$in`  if -e "$consolidated.$in";
`rm $consolidated.$out` if -e "$consolidated.$out";
if (scalar @PART == 1) {
    my $part = $PART[0];
    `ln -s $part.$in $consolidated.$in`;
    `ln -s $part.$out $consolidated.$out`;
    exit;
}

foreach my $part (@PART) {
    die("ERROR: no part $part.$in or $part.$out")
	if (! -e "$part.$in" || ! -e "$part.$out");
    my $in_size = `cat $part.$in | wc -l`;
    my $out_size = `cat $part.$out | wc -l`;
    die("number of lines don't match: '$part.$in' ($in_size) != '$part.$out' ($out_size)")
	if $in_size != $out_size;
    `cat $part.$in >> $consolidated.$in`;
    `cat $part.$out >> $consolidated.$out`;
}