/////////////////////////////////////////////////////////////////////////////// // // // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. // // // // ModelBlocks is free software: you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation, either version 3 of the License, or // // (at your option) any later version. // // // // ModelBlocks is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with ModelBlocks. If not, see . // // // // ModelBlocks developers designate this particular file as subject to // // the "Moses" exception as provided by ModelBlocks developers in // // the LICENSE file that accompanies this code. // // // /////////////////////////////////////////////////////////////////////////////// #include "nl-cpt.h" #include "TextObsVars.h" char psX[]=""; char psSlash[]="/"; char psComma[]=","; char psSemi[]=";"; char psSemiSemi[]=";;"; char psDashDiamondDash[]="-<>-"; char psTilde[]="~"; //char psBar[]="|"; char psLBrace[]="{"; char psRBrace[]="}"; char psLangle[]="<"; char psRangle[]=">"; char psLbrack[]="["; char psRbrack[]="]"; const char* BEG_STATE = "-/-;-/-;-/-;-/-;-"; const char* END_STATE = "eos/eos;-/-;-/-;-/-;-"; //////////////////////////////////////////////////////////////////////////////// // // Random Variables // //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////// Simple Variables //// B: boolean DiscreteDomain domB; class B : public DiscreteDomainRV { public: B ( ) : DiscreteDomainRV ( ) { } B ( const char* ps ) : DiscreteDomainRV ( ps ) { } }; const B B_0 ("0"); const B B_1 ("1"); //// D: depth (input only, to HHMM models)... DiscreteDomain domD; class D : public DiscreteDomainRV { public: D ( ) : DiscreteDomainRV ( ) { } D ( int i ) : DiscreteDomainRV ( i ) { } D ( const char* ps ) : DiscreteDomainRV ( ps ) { } }; const D D_0("0"); const D D_1("1"); const D D_2("2"); const D D_3("3"); const D D_4("4"); const D D_5("5"); //// G: grammatical constituent category DiscreteDomain domG; class G : public DiscreteDomainRV { private: static SimpleHash hIsTerm; void calcDetModels ( string s ) { if (!hIsTerm.contains(*this)) { hIsTerm.set(*this) = (('A'<=s.c_str()[0] && s.c_str()[0]<='Z') || s.find('_')!=string::npos) ? B_0 : B_1; } } public: G ( ) : DiscreteDomainRV ( ) { } template G ( const G::ArrayIterator

& it ) { setVal(it); } G ( const char* ps ) : DiscreteDomainRV ( ps ) { calcDetModels(ps); } B isTerm ( ) const { return hIsTerm.get(*this); } friend pair operator>> ( StringInput si, G& g ) { return pair(si,&g); } friend StringInput operator>> ( pair si_g, const char* psD ) { if ( si_g.first == NULL ) return NULL; StringInput si=si_g.first>>(DiscreteDomainRV&)*si_g.second>>psD; si_g.second->calcDetModels(si_g.second->getString()); return si; } }; SimpleHash G::hIsTerm; const G G_NIL("-"); const G G_SUB("-"); // G_SUB = G_NIL const G G_TOP("DISC"); const G G_RST("REST"); typedef G C; //// A: added feature tags for underspec cats DiscreteDomain domA; class A : public DiscreteDomainRV { public: A ( ) : DiscreteDomainRV ( ) { } A ( const char* ps ) : DiscreteDomainRV ( ps ) { } }; const A A_NIL ("-"); //////////////////////////////////////// Formally Joint Variables Implemented as Simple Variables //// Rd: final-state (=FGA)... DiscreteDomain domRd; class Rd : public DiscreteDomainRV { private: static SimpleHash hToB; static SimpleHash hToG; static SimpleHash hFromG; void calcDetModels ( string s ) { if (!hToB.contains(*this)) { size_t i=s.find(','); assert(i!=string::npos); hToB.set(*this) = B(s.substr(0,i).c_str()); } if (!hToG.contains(*this)) { size_t i=s.find(','); assert(i!=string::npos); hToG.set(*this) = G(s.substr(i+1).c_str()); if ( '1'==s[0] ) hFromG.set(G(s.substr(i+1).c_str())) = *this; } } public: Rd ( ) : DiscreteDomainRV ( ) { } Rd ( const DiscreteDomainRV& rv ) : DiscreteDomainRV ( rv ) { } Rd ( const char* ps ) : DiscreteDomainRV ( ps ) { calcDetModels(ps); } Rd ( const G& g ) { *this = hFromG.get(g); } B getB ( ) const { return hToB.get(*this); } G getG ( ) const { return hToG.get(*this); } static Rd getRd ( G g ) { return hFromG.get(g); } friend pair operator>> ( StringInput si, Rd& m ) { return pair(si,&m); } friend StringInput operator>> ( pair si_m, const char* psD ) { if ( si_m.first == NULL ) return NULL; StringInput si=si_m.first>>(DiscreteDomainRV&)*si_m.second>>psD; si_m.second->calcDetModels(si_m.second->getString()); return si; } }; SimpleHash Rd::hToB; SimpleHash Rd::hToG; SimpleHash Rd::hFromG; const Rd Rd_INC("0,-"); // BOT const Rd Rd_SUB("1,-"); // TOP //////////////////////////////////////// Formally and Implementationally Joint Variables //// Sd: store element... class Sd : public DelimitedJoint2DRV { typedef DelimitedJoint2DRV Parent; public: Sd ( ) : Parent() { } template Sd ( const Sd::ArrayIterator

& it ) { setVal(it); } Sd ( const G& gia, const G& giw ) : Parent(gia,giw) { } const G& getAct ( ) const { return first; } const G& getAwa ( ) const { return second; } template class ArrayIterator : public Parent::ArrayIterator

{ public: G::ArrayIterator

& setAct ( ) { return Parent::ArrayIterator

::first; } G::ArrayIterator

& setAwa ( ) { return Parent::ArrayIterator

::second; } }; friend pair operator>> ( StringInput si, Sd& sd ) { return pair(si,&sd); } friend StringInput operator>> ( pair si_sd, const char* psD ) { if ( si_sd.first == NULL ) return NULL; StringInput si = si_sd.first>>*(Parent*)si_sd.second>>psD; return si; } }; const Sd Sd_TOP(G_TOP,G_RST); const Sd Sd_SUB(G_SUB,G_SUB); //// R: collection of syntactic variables at all depths in each `reduce' phase... typedef DelimitedJointArrayRV<4,psSemi,Rd> R; //// S: collection of syntactic variables at all depths in each `shift' phase... class S : public DelimitedJoint2DRV,psSemi,G,psX> { public: operator G() const { return ( ( (second != G_SUB) ? second : (first.get(3)!=Sd_SUB) ? first.get(3).second : (first.get(2)!=Sd_SUB) ? first.get(2).second : (first.get(1)!=Sd_SUB) ? first.get(1).second : first.get(0).second ) ); } bool compareFinal ( const S& s ) const { return(*this==s); } }; //// Y: the set of all (marginalized) reduce and (modeled) shift variables in the HHMM... class Y : public DelimitedJoint2DRV { public: operator R() const {return first;} operator S() const {return second;} }; //////////////////////////////////////////////////////////////////////////////// // // Models // //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////// "Wrapper" models for individual RVs... //// Model of Rd given D and Rd and Sd (from above) and Sd (from previous) class RdModel { private: HidVarCPT4DModel mRd; // Reduction model: F giv D, G (active cat from prev), G (awaited cat from above) (assume prev awa = reduc) static const HidVarCPT1DModel mRd_INC; // Fixed Rd_INC model. static const HidVarCPT1DModel mRd_SUB; // Fixed Rd_SUB model. public: //static bool Rd_ROOT_OBS; LogProb setIterProb ( Rd::ArrayIterator& rd, const D& d, const Rd& rdD, const Sd& sdP, const Sd& sdU, bool b1, int& vctr ) const { LogProb pr; if ( rdD==Rd_SUB && (sdP.getAwa()==G_SUB) ) { // _/sub 1,sub (bottom) case... pr = mRd_SUB.setIterProb(rd,vctr); } else if ( rdD==Rd_SUB && sdP.getAwa().isTerm()==B_1 ) { // _/term 1,sub (middle) case... pr = mRd.setIterProb(rd,d,sdU.getAwa(),sdP.getAct(),vctr); if ( vctr<-1 && pr==LogProb() ) cerr<<"\nERROR: no condition F "<=-1 && d==D_1 && b1!=(Rd(rd).getB()==B_1) ) pr=LogProb(); //cerr<<" Rd "< operator>> ( StringInput si, RdModel& m ) { return pair(si,&m); } friend StringInput operator>> ( pair si_m, const char* psD ) { StringInput si; return ( (si=si_m.first>>"F ">>si_m.second->mRd>>psD)!=NULL ) ? si : StringInput(NULL); } }; const HidVarCPT1DModel RdModel::mRd_INC(Rd_INC); const HidVarCPT1DModel RdModel::mRd_SUB(Rd_SUB); //// Model of Sd given D and Rd and Rd and Sd(from prev) and Sd(from above) class SdModel { public: HidVarCPT3DModel mGe; // Expansion model of G given D, G (from above) private: HidVarCPT4DModel mGtaa; // Active transition model of G (active) given D, G (above awa), G (from reduction) HidVarCPT4DModel mGtaw; // Active transition model of G (awaited) given D, G (prev act), G (from reduction) HidVarCPT4DModel mGtww; // Awaited transition model of G (awaited) given D, G (prev awa), G (reduction below) static const HidVarCPT1DModel mG_SUB; // Fixed G_SUB model static HidVarCPT2DModel mG_CPY; // Cached G_CPY model -- WARNING: STATIC NON-CONST is not thread safe! public: LogProb setIterProb ( Sd::ArrayIterator& sd, const D& d, const Rd& rdD, const Rd& rd, const Sd& sdP, const Sd& sdU, int& vctr ) const { LogProb pr,p; if (rdD.getB()!=B_0) { if (rd.getB()!=B_0 || rd.getG()!=G_NIL) { //if (rd!=Rd_INC) { if (rd.getB()==B_1) { if (sdU.getAwa().isTerm()==B_1 || sdU==Sd_SUB) { // 1,g 1,g (expansion to sub) case: pr = mG_SUB.setIterProb(sd.setAct(),vctr); pr *= mG_SUB.setIterProb(sd.setAwa(),vctr); } else { // 1,g 1,g (expansion) case: pr = p = mGe.setIterProb(sd.setAct() ,d,sdU.getAwa(),vctr); if ( vctr<-1 && p==LogProb() ) cerr<<"\nERROR: no condition Ge "< operator>> ( StringInput si, SdModel& m ) { return pair(si,&m); } friend StringInput operator>> ( pair si_m, const char* psD ) { StringInput si; return ( (si=si_m.first>>"Ge " >>si_m.second->mGe >>psD)!=NULL || (si=si_m.first>>"Gtaa ">>si_m.second->mGtaa>>psD)!=NULL || (si=si_m.first>>"Gtaw ">>si_m.second->mGtaw>>psD)!=NULL || (si=si_m.first>>"Gtww ">>si_m.second->mGtww>>psD)!=NULL ) ? si : StringInput(NULL); } }; const HidVarCPT1DModel SdModel::mG_SUB(G_SUB); HidVarCPT2DModel SdModel::mG_CPY; //////////////////////////////////////// Joint models... //////////////////// Reduce phase... //// Model of R given S class RModel : public SingleFactoredModel { public: LogProb setIterProb ( R::ArrayIterator& r, const S& sP, bool b1, int& vctr ) const { const RdModel& mRd = getM1(); LogProb pr; pr = mRd.setIterProb ( r.set(4-1), 4, Rd(sP.second) , sP.first.get(4-1), sP.first.get(3-1), b1, vctr ); pr *= mRd.setIterProb ( r.set(3-1), 3, Rd(r.get(4-1)), sP.first.get(3-1), sP.first.get(2-1), b1, vctr ); pr *= mRd.setIterProb ( r.set(2-1), 2, Rd(r.get(3-1)), sP.first.get(2-1), sP.first.get(1-1), b1, vctr ); pr *= mRd.setIterProb ( r.set(1-1), 1, Rd(r.get(2-1)), sP.first.get(1-1), Sd_TOP , b1, vctr ); return pr; } }; //////////////////// Shift phase... //// Model of S given R and S class SModel : public SingleFactoredModel { private: static const HidVarCPT1DModel mG_SUB; public: LogProb setIterProb ( S::ArrayIterator& s, const R::ArrayIterator& r, const S& sP, int& vctr ) const { const SdModel& mSd = getM1(); LogProb pr,p; pr = mSd.setIterProb ( s.first.set(1-1), 1, Rd(r.get(2-1)), Rd(r.get(1-1)), sP.first.get(1-1), Sd_TOP , vctr ); pr *= mSd.setIterProb ( s.first.set(2-1), 2, Rd(r.get(3-1)), Rd(r.get(2-1)), sP.first.get(2-1), Sd(s.first.set(1-1)), vctr ); pr *= mSd.setIterProb ( s.first.set(3-1), 3, Rd(r.get(4-1)), Rd(r.get(3-1)), sP.first.get(3-1), Sd(s.first.set(2-1)), vctr ); pr *= mSd.setIterProb ( s.first.set(4-1), 4, Rd(sP.second) , Rd(r.get(4-1)), sP.first.get(4-1), Sd(s.first.set(3-1)), vctr ); if ( G(s.first.set(4-1).second)!=G_SUB && G(s.first.set(4-1).second).isTerm()!=B_1 ) { pr *= p = mSd.mGe.setIterProb (s.second, 5, G(s.first.set(4-1).second), vctr ); if ( vctr<-1 && p==LogProb() ) cerr<<"\nERROR: no condition Ge 5 "< SModel::mG_SUB(G_SUB); //////////////////// Overall... //// Model of Y=R,S given S class YModel : public DoubleFactoredModel { public: typedef Y::ArrayIterator IterVal; S& setTrellDat ( S& s, const Y::ArrayIterator& y ) const { s.setVal(y.second); return s; } R setBackDat ( const Y::ArrayIterator& y ) const { R r; for(int i=0;i<4;i++) r.set(i)=Rd(y.first.get(i)); return r; } LogProb setIterProb ( Y::ArrayIterator& y, const S& sP, const X& x, bool b1, int& vctr ) const { const RModel& mR = getM1(); const SModel& mS = getM2(); LogProb pr; pr = mR.setIterProb ( y.first, sP, b1, vctr ); if ( LogProb()==pr ) return pr; pr *= mS.setIterProb ( y.second, y.first, sP, vctr ); return pr; } void update ( ) const { } };