Welcome to mirror list, hosted at ThFree Co, Russian Federation.

extract-factors.pl « generic « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 56c71905119e5339e9f217b29ca3b8652f32916a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/env perl 

# $Id$
#extract-factors.pl: extract only the desired factors from a factored corpus
#usage: extract-factors corpusfile factor-index factor-index ... > outfile
#factor indices start at 0
#factor indices too large ought to be ignored

use warnings;
use strict;

my ($filename, @factors) = @ARGV;
my %indices = map {$_ => 1} @factors;

open(INFILE, "<$filename") or die "couldn't open '$filename' for read: $!\n";
while(my $line = <INFILE>)
{
	chop $line;
	print join(' ', map {my $i = 0; join('|', grep($indices{$i++}, split(/\|/, $_)))} split(/\s+/, $line)) . "\n";
}
close(INFILE);