diff options
author | Antoni Oliver <aoliverg@uoc.edu> | 2021-04-10 23:44:48 +0300 |
---|---|---|
committer | Antoni Oliver <aoliverg@uoc.edu> | 2021-04-10 23:44:48 +0300 |
commit | d77e589b4ac409f633856842eb4e2b15020465ef (patch) | |
tree | 0cfd05c76079398a26d867bf03e9f1bc76c29ede | |
parent | 2e304b345d72c8d7cbefdca10063823ef8000ca7 (diff) |
revision
-rwxr-xr-x | TBXTools.py | 73 |
1 files changed, 38 insertions, 35 deletions
diff --git a/TBXTools.py b/TBXTools.py index 3599524..cc6c6e6 100755 --- a/TBXTools.py +++ b/TBXTools.py @@ -1234,26 +1234,29 @@ class TBXTools: self.source=self.camps[0].strip() self.trad=self.camps[1].strip() self.probs=self.camps[2].split(" ") - if not self.trad[0] in self.punctuation and not self.source[0] in self.punctuation and not self.trad[-1] in self.punctuation and not self.source[-1] in self.punctuation: - #Currently, four different phrase translation scores are computed: - #0 inverse phrase translation probability φ(f|e) - #1 inverse lexical weighting lex(f|e) - #2 direct phrase translation probability φ(e|f) - #3 direct lexical weighting lex(e|f) - #self.probtrad=float(self.probs[1]) - self.probtrad=(float(self.probs[2])*float(self.probs[3])) - #print(self.source,self.trad,self.probtrad) - self.record=[] - self.record.append(self.source) - self.record.append(self.trad) - self.record.append(self.probtrad) - self.data.append(self.record) - self.continserts+=1 - if self.continserts==self.maxinserts: - self.cur.executemany("INSERT INTO index_pt (source, target, probability) VALUES (?,?,?)",self.data) - self.data=[] - self.continserts=0 - self.conn.commit() + try: + if not self.trad[0] in self.punctuation and not self.source[0] in self.punctuation and not self.trad[-1] in self.punctuation and not self.source[-1] in self.punctuation: + #Currently, four different phrase translation scores are computed: + #0 inverse phrase translation probability φ(f|e) + #1 inverse lexical weighting lex(f|e) + #2 direct phrase translation probability φ(e|f) + #3 direct lexical weighting lex(e|f) + #self.probtrad=float(self.probs[1]) + self.probtrad=(float(self.probs[2])*float(self.probs[3])) + #print(self.source,self.trad,self.probtrad) + self.record=[] + self.record.append(self.source) + self.record.append(self.trad) + self.record.append(self.probtrad) + self.data.append(self.record) + self.continserts+=1 + if self.continserts==self.maxinserts: + self.cur.executemany("INSERT INTO index_pt (source, target, probability) VALUES (?,?,?)",self.data) + self.data=[] + self.continserts=0 + self.conn.commit() + except: + pass with self.conn: self.cur.executemany("INSERT INTO index_pt (source, target, probability) VALUES (?,?,?)",self.data) self.conn.commit() @@ -1281,36 +1284,36 @@ class TBXTools: - def start_freeling_api(self,freelingpath, DATApath, LANG): + def start_freeling_api(self,freelingpath, LANG): - + if not freelingpath.endswith("/"):freelingpath=freelingpath+"/" try: - sys.path.append(freelingpath) + sys.path.append(freelingpath+"APIs/python3/") import pyfreeling except: #pass - print("No Freeling API available. Verify Freeling PATH: "+freelingpath) + print("No Freeling API available. Verify Freeling PATH: "+freelingpath+"freeling/APIs/python3/") pyfreeling.util_init_locale("default"); # create language analyzer - self.la1=pyfreeling.lang_ident(DATApath+"common/lang_ident/ident.dat"); + self.la1=pyfreeling.lang_ident(freelingpath+"common/lang_ident/ident.dat"); # create options set for maco analyzer. Default values are Ok, except for data files. self.op1= pyfreeling.maco_options(LANG); self.op1.set_data_files( "", - DATApath + "common/punct.dat", - DATApath+ LANG + "/dicc.src", - DATApath + LANG + "/afixos.dat", + freelingpath + "common/punct.dat", + freelingpath+ LANG + "/dicc.src", + freelingpath + LANG + "/afixos.dat", "", - DATApath + LANG + "/locucions.dat", - DATApath + LANG + "/np.dat", - DATApath + LANG + "/quantities.dat", - DATApath + LANG + "/probabilitats.dat"); + freelingpath + LANG + "/locucions.dat", + freelingpath + LANG + "/np.dat", + freelingpath + LANG + "/quantities.dat", + freelingpath + LANG + "/probabilitats.dat"); # create analyzers - self.tk1=pyfreeling.tokenizer(DATApath+LANG+"/tokenizer.dat"); - self.sp1=pyfreeling.splitter(DATApath+LANG+"/splitter.dat"); + self.tk1=pyfreeling.tokenizer(freelingpath+LANG+"/tokenizer.dat"); + self.sp1=pyfreeling.splitter(freelingpath+LANG+"/splitter.dat"); self.sid1=self.sp1.open_session(); self.mf1=pyfreeling.maco(self.op1); @@ -1320,7 +1323,7 @@ class TBXTools: True, False, True, True ); # default: all created submodules are used # create tagger, sense anotator, and parsers - self.tg1=pyfreeling.hmm_tagger(DATApath+LANG+"/tagger.dat",True,2); + self.tg1=pyfreeling.hmm_tagger(freelingpath+LANG+"/tagger.dat",True,2); def tag_freeling_api(self,corpus="source"): with self.conn: |