Welcome to mirror list, hosted at ThFree Co, Russian Federation.

extract-singletons.perl « OSM « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: faa4e8dd625378f922871bf3a0e8dd259939e426 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env perl 

use Getopt::Std;
getopts('q');

$target = shift;
$source = shift;
$align = shift or die "
Usage: extract-singletons.perl target source align

";
open(TARGET,$target) or die "Error: unable to open target file \"$target\"!\n";
open(SOURCE,$source) or die "Error: unable to open source file \"$source\"!\n";
open(ALIGN,$align) or die "Error: unable to open alignment file \"$align\"!\n";

while (<TARGET>) {
    unless (defined $opt_q) {
	print STDERR "\r$M" if ++$M%1000 == 0;
    }
    @T = split;
    $_ = <SOURCE>;
    @S = split;
    $_ = <ALIGN>;
    @A = split;

    my(@source_links,@target_links);
    for( $i=0; $i<=$#A; $i+=2 ) {
	$target_links[$A[$i]]++;
	$source_links[$A[$i+1]]++;
    }

    for( $i=0; $i<=$#A; $i+=2 ) {
	if ($target_links[$A[$i]] == 1 && $source_links[$A[$i+1]] == 1 && 
	    $T[$A[$i]] eq $S[$A[$i+1]])
	{
	    $count{$S[$A[$i+1]]}++; # Print this if it only occurs here
	}
	else {
	    $count{$S[$A[$i+1]]}+=2; # Don't print this
	}
    }
}

foreach $w (sort keys %count) {
    print "$w\n" if $count{$w}==1;
}