Welcome to mirror list, hosted at ThFree Co, Russian Federation.

lopar2pos.pl « generic « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c7506913546c64c396aed11f4d2a783cace85beb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/usr/bin/env perl 

# $Id$
#lopar2pos: extract POSs from LOPAR output
#usage: lopar2pos.pl CORPUS.lopar > CORPUS.pos

use warnings;

my $infilename = shift @ARGV;
open(INFILE, "<$infilename") or die "couldn't open '$infilename' for read: $!\n";
while(my $line = <INFILE>)
{
	my @words = split(/\s+/, $line);
	my @tags = map {$_ =~ /^[^_]*_([A-Z]+)/; $1} @words;
	print join(' ', @tags) . "\n";
}
close(INFILE);