Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/aoliverg/TBXTools.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAntoni Oliver <aoliverg@uoc.edu>2021-04-10 23:44:48 +0300
committerAntoni Oliver <aoliverg@uoc.edu>2021-04-10 23:44:48 +0300
commitd77e589b4ac409f633856842eb4e2b15020465ef (patch)
tree0cfd05c76079398a26d867bf03e9f1bc76c29ede
parent2e304b345d72c8d7cbefdca10063823ef8000ca7 (diff)
revision
-rwxr-xr-xTBXTools.py73
1 files changed, 38 insertions, 35 deletions
diff --git a/TBXTools.py b/TBXTools.py
index 3599524..cc6c6e6 100755
--- a/TBXTools.py
+++ b/TBXTools.py
@@ -1234,26 +1234,29 @@ class TBXTools:
self.source=self.camps[0].strip()
self.trad=self.camps[1].strip()
self.probs=self.camps[2].split(" ")
- if not self.trad[0] in self.punctuation and not self.source[0] in self.punctuation and not self.trad[-1] in self.punctuation and not self.source[-1] in self.punctuation:
- #Currently, four different phrase translation scores are computed:
- #0 inverse phrase translation probability φ(f|e)
- #1 inverse lexical weighting lex(f|e)
- #2 direct phrase translation probability φ(e|f)
- #3 direct lexical weighting lex(e|f)
- #self.probtrad=float(self.probs[1])
- self.probtrad=(float(self.probs[2])*float(self.probs[3]))
- #print(self.source,self.trad,self.probtrad)
- self.record=[]
- self.record.append(self.source)
- self.record.append(self.trad)
- self.record.append(self.probtrad)
- self.data.append(self.record)
- self.continserts+=1
- if self.continserts==self.maxinserts:
- self.cur.executemany("INSERT INTO index_pt (source, target, probability) VALUES (?,?,?)",self.data)
- self.data=[]
- self.continserts=0
- self.conn.commit()
+ try:
+ if not self.trad[0] in self.punctuation and not self.source[0] in self.punctuation and not self.trad[-1] in self.punctuation and not self.source[-1] in self.punctuation:
+ #Currently, four different phrase translation scores are computed:
+ #0 inverse phrase translation probability φ(f|e)
+ #1 inverse lexical weighting lex(f|e)
+ #2 direct phrase translation probability φ(e|f)
+ #3 direct lexical weighting lex(e|f)
+ #self.probtrad=float(self.probs[1])
+ self.probtrad=(float(self.probs[2])*float(self.probs[3]))
+ #print(self.source,self.trad,self.probtrad)
+ self.record=[]
+ self.record.append(self.source)
+ self.record.append(self.trad)
+ self.record.append(self.probtrad)
+ self.data.append(self.record)
+ self.continserts+=1
+ if self.continserts==self.maxinserts:
+ self.cur.executemany("INSERT INTO index_pt (source, target, probability) VALUES (?,?,?)",self.data)
+ self.data=[]
+ self.continserts=0
+ self.conn.commit()
+ except:
+ pass
with self.conn:
self.cur.executemany("INSERT INTO index_pt (source, target, probability) VALUES (?,?,?)",self.data)
self.conn.commit()
@@ -1281,36 +1284,36 @@ class TBXTools:
- def start_freeling_api(self,freelingpath, DATApath, LANG):
+ def start_freeling_api(self,freelingpath, LANG):
-
+ if not freelingpath.endswith("/"):freelingpath=freelingpath+"/"
try:
- sys.path.append(freelingpath)
+ sys.path.append(freelingpath+"APIs/python3/")
import pyfreeling
except:
#pass
- print("No Freeling API available. Verify Freeling PATH: "+freelingpath)
+ print("No Freeling API available. Verify Freeling PATH: "+freelingpath+"freeling/APIs/python3/")
pyfreeling.util_init_locale("default");
# create language analyzer
- self.la1=pyfreeling.lang_ident(DATApath+"common/lang_ident/ident.dat");
+ self.la1=pyfreeling.lang_ident(freelingpath+"common/lang_ident/ident.dat");
# create options set for maco analyzer. Default values are Ok, except for data files.
self.op1= pyfreeling.maco_options(LANG);
self.op1.set_data_files( "",
- DATApath + "common/punct.dat",
- DATApath+ LANG + "/dicc.src",
- DATApath + LANG + "/afixos.dat",
+ freelingpath + "common/punct.dat",
+ freelingpath+ LANG + "/dicc.src",
+ freelingpath + LANG + "/afixos.dat",
"",
- DATApath + LANG + "/locucions.dat",
- DATApath + LANG + "/np.dat",
- DATApath + LANG + "/quantities.dat",
- DATApath + LANG + "/probabilitats.dat");
+ freelingpath + LANG + "/locucions.dat",
+ freelingpath + LANG + "/np.dat",
+ freelingpath + LANG + "/quantities.dat",
+ freelingpath + LANG + "/probabilitats.dat");
# create analyzers
- self.tk1=pyfreeling.tokenizer(DATApath+LANG+"/tokenizer.dat");
- self.sp1=pyfreeling.splitter(DATApath+LANG+"/splitter.dat");
+ self.tk1=pyfreeling.tokenizer(freelingpath+LANG+"/tokenizer.dat");
+ self.sp1=pyfreeling.splitter(freelingpath+LANG+"/splitter.dat");
self.sid1=self.sp1.open_session();
self.mf1=pyfreeling.maco(self.op1);
@@ -1320,7 +1323,7 @@ class TBXTools:
True, False, True, True ); # default: all created submodules are used
# create tagger, sense anotator, and parsers
- self.tg1=pyfreeling.hmm_tagger(DATApath+LANG+"/tagger.dat",True,2);
+ self.tg1=pyfreeling.hmm_tagger(freelingpath+LANG+"/tagger.dat",True,2);
def tag_freeling_api(self,corpus="source"):
with self.conn: