Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2015-04-28 11:29:58 +0300
committerHieu Hoang <hieuhoang@gmail.com>2015-04-28 11:29:58 +0300
commitb7792b227a337c36d97d3c0979d11e6955ba368c (patch)
tree8160d9e979d28c159eba0cfc5b5bd83b9c658c2b /scripts
parent8adad4fc2e1ae609ffbd8fe76261540cac19a125 (diff)
script to convert arabic to bw, and vice versa
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/other/buckwalter.perl33
1 files changed, 33 insertions, 0 deletions
diff --git a/scripts/other/buckwalter.perl b/scripts/other/buckwalter.perl
new file mode 100755
index 000000000..62544e212
--- /dev/null
+++ b/scripts/other/buckwalter.perl
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use Encode::Arabic::Buckwalter;
+use Getopt::Long "GetOptions";
+
+my $direction;
+GetOptions('direction=i' => \$direction)
+ or exit(1);
+# direction: 1=arabic->bw, 2=bw->arabic
+
+die("ERROR: need to set direction") unless defined($direction);
+
+
+
+while (my $line = <STDIN>) {
+ chomp($line);
+
+ my $lineOut;
+ if ($direction == 1) {
+ $lineOut = encode 'buckwalter', decode 'utf8', $line;
+ }
+ elsif ($direction == 2) {
+ $lineOut = encode 'utf8', decode 'buckwalter', $line;
+ }
+ else {
+ die("Unknown direction: $direction");
+ }
+ print "$lineOut\n";
+
+}
+