Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mgizapp/scripts/merge_alignment.py')
-rw-r--r--mgizapp/scripts/merge_alignment.py80
1 files changed, 0 insertions, 80 deletions
diff --git a/mgizapp/scripts/merge_alignment.py b/mgizapp/scripts/merge_alignment.py
deleted file mode 100644
index 626bc68..0000000
--- a/mgizapp/scripts/merge_alignment.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-# Author : Qin Gao
-# Date : Dec 31, 2007
-# Purpose: Combine multiple alignment files into a single one, the files are
-# prodcuced by MGIZA, which has sentence IDs, and every file is
-# ordered inside
-
-import sys
-import re
-
-if len(sys.argv)<2:
- sys.stderr.write("Provide me the file names (at least 2)\n");
- sys.exit();
-
-sent_id = 0;
-
-files = [];
-ids = [];
-
-sents = [];
-done = [];
-
-for i in range(1,len(sys.argv)):
- files.append(open(sys.argv[i],"r"));
- ids.append(0);
- sents.append("");
- done.append(False);
-
-r = re.compile("\\((\\d+)\\)");
-i = 0;
-while i< len(files):
- st1 = files[i].readline();
- st2 = files[i].readline();
- st3 = files[i].readline();
- if len(st1)==0 or len(st2)==0 or len(st3)==0:
- done[i] = True;
- else:
- mt = r.search(st1);
- id = int(mt.group(1));
- ids[i] = id;
- sents[i] = (st1, st2, st3);
- i += 1
-
-cont = True;
-while (cont):
- sent_id += 1;
- writeOne = False;
-# Now try to read more sentences
- i = 0;
- cont = False;
- while i < len(files):
- if done[i]:
- i+=1
- continue;
- cont = True;
- if ids[i] == sent_id:
- sys.stdout.write("%s%s%s"%(sents[i][0],sents[i][1],sents[i][2]));
- writeOne = True;
- st1 = files[i].readline();
- st2 = files[i].readline();
- st3 = files[i].readline();
- if len(st1)==0 or len(st2)==0 or len(st3)==0:
- done[i] = True;
- else:
- mt = r.search(st1);
- id = int(mt.group(1));
- ids[i] = id;
- sents[i] = (st1, st2, st3);
- cont = True;
- break;
- elif ids[i] < sent_id:
- sys.stderr.write("ERROR! DUPLICATED ENTRY %d\n" % ids[i]);
- sys.exit();
- else:
- cont = True;
- i+=1;
- if (not writeOne) and cont:
- sys.stderr.write("ERROR! MISSING ENTRy %d\n" % sent_id);
- #sys.exit();
-sys.stderr.write("Combined %d files, totally %d sents \n" %(len(files),sent_id-1));