Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mgizapp/src/NTables.h')
-rw-r--r--mgizapp/src/NTables.h209
1 files changed, 107 insertions, 102 deletions
diff --git a/mgizapp/src/NTables.h b/mgizapp/src/NTables.h
index 698a470..c2d9614 100644
--- a/mgizapp/src/NTables.h
+++ b/mgizapp/src/NTables.h
@@ -8,14 +8,14 @@
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,113 +32,118 @@
extern double NTablesFactorGraphemes, NTablesFactorGeneral;
-template<class VALTYPE> class nmodel {
+template<class VALTYPE> class nmodel
+{
private:
- Array2<VALTYPE, Vector<VALTYPE> > ntab;
+ Array2<VALTYPE, Vector<VALTYPE> > ntab;
public:
- nmodel(int maxw, int maxn) :
- ntab(maxw, maxn, 0.0) {
- }
- VALTYPE getValue(int w, unsigned int n) const {
- massert(w!=0);
- if (n>=ntab.getLen2())
- return 0.0;
- else
- return max(ntab(w, n), VALTYPE(PROB_SMOOTH));
- }
+ nmodel(int maxw, int maxn) :
+ ntab(maxw, maxn, 0.0) {
+ }
+ VALTYPE getValue(int w, unsigned int n) const {
+ massert(w!=0);
+ if (n>=ntab.getLen2())
+ return 0.0;
+ else
+ return max(ntab(w, n), VALTYPE(PROB_SMOOTH));
+ }
protected:
- inline VALTYPE&getRef(int w, int n) {
- //massert(w!=0);
- return ntab(w, n);
- };
- Mutex lock;
+ inline VALTYPE&getRef(int w, int n) {
+ //massert(w!=0);
+ return ntab(w, n);
+ };
+ Mutex lock;
public:
- inline void addValue(int w , int n,const VALTYPE& t){lock.lock();ntab(w,n)+=t;lock.unlock();};
+ inline void addValue(int w , int n,const VALTYPE& t) {
+ lock.lock();
+ ntab(w,n)+=t;
+ lock.unlock();
+ };
public:
- template<class COUNT> void normalize(nmodel<COUNT>&write,
- const Vector<WordEntry>* _evlist) const {
- int h1=ntab.getLen1(), h2=ntab.getLen2();
- int nParams=0;
- if (_evlist&&(NTablesFactorGraphemes||NTablesFactorGeneral)) {
- size_t maxlen=0;
- const Vector<WordEntry>&evlist=*_evlist;
- for (unsigned int i=1; i<evlist.size(); i++)
- maxlen=max(maxlen, evlist[i].word.length());
- Array2<COUNT,Vector<COUNT> > counts(maxlen+1, MAX_FERTILITY+1, 0.0);
- Vector<COUNT> nprob_general(MAX_FERTILITY+1,0.0);
- for (unsigned int i=1; i<min((unsigned int)h1,
- (unsigned int)evlist.size()); i++) {
- int l=evlist[i].word.length();
- for (int k=0; k<h2; k++) {
- counts(l, k)+=getValue(i, k);
- nprob_general[k]+=getValue(i, k);
- }
- }
- COUNT sum2=0;
- for (unsigned int i=1; i<maxlen+1; i++) {
- COUNT sum=0.0;
- for (int k=0; k<h2; k++)
- sum+=counts(i, k);
- sum2+=sum;
- if (sum) {
- double average=0.0;
- //cerr << "l: " << i << " " << sum << " ";
- for (int k=0; k<h2; k++) {
- counts(i, k)/=sum;
- //cerr << counts(i,k) << ' ';
- average+=k*counts(i, k);
- }
- //cerr << "avg: " << average << endl;
- //cerr << '\n';
- }
- }
- for (unsigned int k=0; k<nprob_general.size(); k++)
- nprob_general[k]/=sum2;
+ template<class COUNT> void normalize(nmodel<COUNT>&write,
+ const Vector<WordEntry>* _evlist) const {
+ int h1=ntab.getLen1(), h2=ntab.getLen2();
+ int nParams=0;
+ if (_evlist&&(NTablesFactorGraphemes||NTablesFactorGeneral)) {
+ size_t maxlen=0;
+ const Vector<WordEntry>&evlist=*_evlist;
+ for (unsigned int i=1; i<evlist.size(); i++)
+ maxlen=max(maxlen, evlist[i].word.length());
+ Array2<COUNT,Vector<COUNT> > counts(maxlen+1, MAX_FERTILITY+1, 0.0);
+ Vector<COUNT> nprob_general(MAX_FERTILITY+1,0.0);
+ for (unsigned int i=1; i<min((unsigned int)h1,
+ (unsigned int)evlist.size()); i++) {
+ int l=evlist[i].word.length();
+ for (int k=0; k<h2; k++) {
+ counts(l, k)+=getValue(i, k);
+ nprob_general[k]+=getValue(i, k);
+ }
+ }
+ COUNT sum2=0;
+ for (unsigned int i=1; i<maxlen+1; i++) {
+ COUNT sum=0.0;
+ for (int k=0; k<h2; k++)
+ sum+=counts(i, k);
+ sum2+=sum;
+ if (sum) {
+ double average=0.0;
+ //cerr << "l: " << i << " " << sum << " ";
+ for (int k=0; k<h2; k++) {
+ counts(i, k)/=sum;
+ //cerr << counts(i,k) << ' ';
+ average+=k*counts(i, k);
+ }
+ //cerr << "avg: " << average << endl;
+ //cerr << '\n';
+ }
+ }
+ for (unsigned int k=0; k<nprob_general.size(); k++)
+ nprob_general[k]/=sum2;
- for (int i=1; i<h1; i++) {
- int l=-1;
- if ((unsigned int)i<evlist.size())
- l=evlist[i].word.length();
- COUNT sum=0.0;
- for (int k=0; k<h2; k++)
- sum+=getValue(i, k)+((l==-1) ? 0.0 : (counts(l, k)
- *NTablesFactorGraphemes)) + NTablesFactorGeneral
- *nprob_general[k];
- assert(sum);
- for (int k=0; k<h2; k++) {
- write.getRef(i, k)=(getValue(i, k)+((l==-1) ? 0.0
- : (counts(l, k)*NTablesFactorGraphemes)))/sum
- + NTablesFactorGeneral*nprob_general[k];
- nParams++;
- }
- }
- } else
- for (int i=1; i<h1; i++) {
- COUNT sum=0.0;
- for (int k=0; k<h2; k++)
- sum+=getValue(i, k);
- assert(sum);
- for (int k=0; k<h2; k++) {
- write.getRef(i, k)=getValue(i, k)/sum;
- nParams++;
- }
- }
- cerr << "NTable contains " << nParams << " parameter.\n";
- }
+ for (int i=1; i<h1; i++) {
+ int l=-1;
+ if ((unsigned int)i<evlist.size())
+ l=evlist[i].word.length();
+ COUNT sum=0.0;
+ for (int k=0; k<h2; k++)
+ sum+=getValue(i, k)+((l==-1) ? 0.0 : (counts(l, k)
+ *NTablesFactorGraphemes)) + NTablesFactorGeneral
+ *nprob_general[k];
+ assert(sum);
+ for (int k=0; k<h2; k++) {
+ write.getRef(i, k)=(getValue(i, k)+((l==-1) ? 0.0
+ : (counts(l, k)*NTablesFactorGraphemes)))/sum
+ + NTablesFactorGeneral*nprob_general[k];
+ nParams++;
+ }
+ }
+ } else
+ for (int i=1; i<h1; i++) {
+ COUNT sum=0.0;
+ for (int k=0; k<h2; k++)
+ sum+=getValue(i, k);
+ assert(sum);
+ for (int k=0; k<h2; k++) {
+ write.getRef(i, k)=getValue(i, k)/sum;
+ nParams++;
+ }
+ }
+ cerr << "NTable contains " << nParams << " parameter.\n";
+ }
- bool merge(nmodel<VALTYPE>& n, int noEW, const Vector<WordEntry>& evlist);
- void clear() {
- int h1=ntab.getLen1(), h2=ntab.getLen2();
- for (int i=0; i<h1; i++)
- for (int k=0; k<h2; k++)
- ntab(i, k)=0;
- }
- void printNTable(int noEW, const char* filename,
- const Vector<WordEntry>& evlist, bool) const;
- void printRealNTable(int noEW, const char* filename,
- const Vector<WordEntry>& evlist, bool) const;
- bool readAugNTable(const char *filename);
- bool readNTable(const char *filename);
+ bool merge(nmodel<VALTYPE>& n, int noEW, const Vector<WordEntry>& evlist);
+ void clear() {
+ int h1=ntab.getLen1(), h2=ntab.getLen2();
+ for (int i=0; i<h1; i++)
+ for (int k=0; k<h2; k++)
+ ntab(i, k)=0;
+ }
+ void printNTable(int noEW, const char* filename,
+ const Vector<WordEntry>& evlist, bool) const;
+ void printRealNTable(int noEW, const char* filename,
+ const Vector<WordEntry>& evlist, bool) const;
+ bool readAugNTable(const char *filename);
+ bool readNTable(const char *filename);
};