Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mgizapp/scripts/plain2snt-hasvcb.py')
-rw-r--r--mgizapp/scripts/plain2snt-hasvcb.py93
1 files changed, 0 insertions, 93 deletions
diff --git a/mgizapp/scripts/plain2snt-hasvcb.py b/mgizapp/scripts/plain2snt-hasvcb.py
deleted file mode 100644
index 490c493..0000000
--- a/mgizapp/scripts/plain2snt-hasvcb.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python
-
-from sys import *
-
-def loadvcb(fname,out):
- dict={};
- df = open(fname,"r");
- for line in df:
- out.write(line);
- ws = line.strip().split();
- id = int(ws[0]);
- wd = ws[1];
- dict[wd]=id;
- return dict;
-
-if len(argv)<9:
- stderr.write("Error, the input should be \n");
- stderr.write("%s evcb fvcb etxt ftxt esnt(out) fsnt(out) evcbx(out) fvcbx(out)\n" % argv[0]);
- stderr.write("You should concatenate the evcbx and fvcbx to existing vcb files\n");
- exit();
-
-ein = open(argv[3],"r");
-fin = open(argv[4],"r");
-
-eout = open(argv[5],"w");
-fout = open(argv[6],"w");
-
-evcbx = open(argv[7],"w");
-fvcbx = open(argv[8],"w");
-evcb = loadvcb(argv[1],evcbx);
-fvcb = loadvcb(argv[2],fvcbx);
-
-i=0
-while True:
- i+=1;
- eline=ein.readline();
- fline=fin.readline();
- if len(eline)==0 or len(fline)==0:
- break;
- ewords = eline.strip().split();
- fwords = fline.strip().split();
- el = [];
- fl = [];
- j=0;
- for w in ewords:
- j+=1
- if evcb.has_key(w):
- el.append(evcb[w]);
- else:
- if evcb.has_key(w.lower()):
- el.append(evcb[w.lower()]);
- else:
- ##stdout.write("#E %d %d %s\n" % (i,j,w))
- #el.append(1);
- nid = len(evcb)+1;
- evcb[w.lower()] = nid;
- evcbx.write("%d %s 1\n" % (nid, w));
- el.append(nid);
-
- j=0;
- for w in fwords:
- j+=1
- if fvcb.has_key(w):
- fl.append(fvcb[w]);
- else:
- if fvcb.has_key(w.lower()):
- fl.append(fvcb[w.lower()]);
- else:
- #stdout.write("#F %d %d %s\n" % (i,j,w))
- nid = len(fvcb)+1;
- fvcb[w.lower()] = nid;
- fvcbx.write("%d %s 1\n" % (nid, w));
- fl.append(nid);
- #fl.append(1);
- eout.write("1\n");
- fout.write("1\n");
- for I in el:
- eout.write("%d " % I);
- eout.write("\n");
- for I in fl:
- eout.write("%d " % I);
- fout.write("%d " % I);
- eout.write("\n");
- fout.write("\n");
- for I in el:
- fout.write("%d " % I);
- fout.write("\n");
-
-fout.close();
-eout.close();
-fvcbx.close();
-evcbx.close();
-