/* Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI) This file is part of GIZA++ ( extension of GIZA ). This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifndef NO_TRAINING #include "ForwardBackward.h" #include "Globals.h" #include "myassert.h" #include "HMMTables.h" #include "mymath.h" double ForwardBackwardTraining(const HMMNetwork&net, Array&g, Array< Array2 >&E) { const int I = net.size1(), J = net.size2(), N = I * J; Array alpha(N, 0), beta(N, 0), sum(J); for (int i = 0; i < I; i++) beta[N - I + i] = net.getBetainit(i); double * cur_beta = conv (beta.begin()) + N - I - 1; for (int j = J - 2; j >= 0; --j) for (int ti = I - 1; ti >= 0; --ti, --cur_beta) { const double *next_beta = conv (beta.begin()) + (j + 1) * I; const double *alprob = &net.outProb(j, ti, 0), *next_node = &net.nodeProb(0, j + 1); for (int ni = 0; ni < I; ++ni, (next_node += J)) { massert(cur_beta (alpha.begin()) + I; cur_beta = conv (beta.begin()) + I; for (int j = 1; j < J; j++) { Array2&e = E[(E.size() == 1) ? 0 : (j - 1)]; if ((E.size() != 1) || j == 1) { e.resize(I, I); fill(e.begin(), e.end(), 0.0); } for (int ti = 0; ti < I; ++ti, ++cur_alpha, ++cur_beta) { const double * prev_alpha = conv (alpha.begin()) + I * (j - 1); double *cur_e = &e(ti, 0); double this_node = net.nodeProb(ti, j); const double* alprob = &net.outProb(j - 1, 0, ti); for (int pi = 0; pi < I; ++pi, ++prev_alpha, (alprob += I)) { massert(prev_alpha ()); double bsum = 0, esum = 0, esum2; for (int i = 0; i < I; i++) bsum += beta[i] * net.nodeProb(i, 0) * net.getAlphainit(i); for (unsigned int j = 0; j < (unsigned int) E.size(); j++) { Array2&e = E[j]; const double *epe = e.end(); for (const double*ep = e.begin(); ep != epe; ++ep) esum += *ep; } if (J > 1) esum2 = esum / (J - 1); else esum2 = 0.0; if (!(esum2 == 0.0 || mfabs(esum2 - bsum) / bsum < 1e-3 * I)) cout << "ERROR2: " << esum2 << " " << bsum << " " << esum << net << endl; double * sumptr = conv (sum.begin()); double* ge = conv (g.end()); for (double* gp = conv (g.begin()); gp != ge; gp += I) { *sumptr++ = normalize_if_possible(gp, gp + I); if (bsum && !(mfabs((*(sumptr - 1) - bsum) / bsum) < 1e-3 * I)) cout << "ERROR: " << *(sumptr - 1) << " " << bsum << " " << mfabs( (*(sumptr - 1) - bsum) / bsum) << ' ' << I << ' ' << J << endl; } for (unsigned int j = 0; j < (unsigned int) E.size(); j++) { Array2&e = E[j]; double* epe = e.end(); if (esum) for (double*ep = e.begin(); ep != epe; ++ep) *ep /= esum; else for (double*ep = e.begin(); ep != epe; ++ep) *ep /= 1.0 / (max(I * I, I * I * (J - 1))); } if (sum.size()) return sum[0]; else return 1.0; } void HMMViterbi(const HMMNetwork&net, Array&vit) { const int I = net.size1(), J = net.size2(); vit.resize(J); Array g; Array > e(1); ForwardBackwardTraining(net, g, e); for (int j = 0; j < J; j++) { double * begin = conv (g.begin()) + I * j; vit[j] = max_element(begin, begin + I) - begin; } } void HMMViterbi(const HMMNetwork&net, Array&g, Array&vit) { const int I = net.size1(), J = net.size2(); vit.resize(J); for (int j = 0; j < J; j++) { double* begin = conv (g.begin()) + I * j; vit[j] = max_element(begin, begin + I) - begin; } } double HMMRealViterbi(const HMMNetwork&net, Array&vitar, int pegi, int pegj, bool verbose) { const int I = net.size1(), J = net.size2(), N = I * J; Array alpha(N, -1); Array bp(N, (double*) 0); vitar.resize(J); if (J == 0) return 1.0; for (int i = 0; i < I; i++) { alpha[i] = net.getAlphainit(i) * net.nodeProb(i, 0); if (i > I / 2) alpha[i] = 0; // only first empty word can be chosen bp[i] = 0; } double *cur_alpha = conv (alpha.begin()) + I; double **cur_bp = conv (bp.begin()) + I; for (int j = 1; j < J; j++) { if (pegj + 1 == j) for (int ti = 0; ti < I; ti++) if ((pegi != -1 && ti != pegi) || (pegi == -1 && ti < I / 2)) (cur_alpha - I)[ti] = 0.0; for (int ti = 0; ti < I; ++ti, ++cur_alpha, ++cur_bp) { double* prev_alpha = conv (alpha.begin()) + I * (j - 1); double this_node = net.nodeProb(ti, j); const double *alprob = &net.outProb(j - 1, 0, ti); for (int pi = 0; pi < I; ++pi, ++prev_alpha, (alprob += I)) { massert(prev_alpha *cur_alpha) { (*cur_alpha) = alpha_increment; (*cur_bp) = prev_alpha; } } } } for (int i = 0; i < I; i++) alpha[N - I + i] *= net.getBetainit(i); if (pegj == J - 1) for (int ti = 0; ti < I; ti++) if ((pegi != -1 && ti != pegi) || (pegi == -1 && ti < I / 2)) (alpha)[N - I + ti] = 0.0; int j = J - 1; cur_alpha = conv (alpha.begin()) + j * I; vitar[J - 1] = max_element(cur_alpha, cur_alpha + I) - cur_alpha; double ret = *max_element(cur_alpha, cur_alpha + I); while (bp[vitar[j] + j * I]) { cur_alpha -= I; vitar[j - 1] = bp[vitar[j] + j * I] - cur_alpha; massert(vitar[j-1]=0); j--; } massert(j==0); if (verbose) { cout << "VERB:PEG: " << pegi << ' ' << pegj << endl; for (int j = 0; j < J; j++) cout << "NP " << net.nodeProb(vitar[j], j) << ' ' << "AP " << ((j == 0) ? net.getAlphainit(vitar[j]) : net.outProb(j - 1, vitar[j - 1], vitar[j])) << " j:" << j << " i:" << vitar[j] << "; "; cout << endl; } return ret; } double MaximumTraining(const HMMNetwork&net, Array&g, Array >&E) { Array vitar; double ret = HMMRealViterbi(net, vitar); const int I = net.size1(), J = net.size2(); if (E.size() == 1) { Array2&e = E[0]; e.resize(I, I); g.resize(I * J); fill(g.begin(), g.end(), 0.0); fill(e.begin(), e.end(), 0.0); for (int i = 0; i < J; ++i) { g[i * I + vitar[i]] = 1.0; if (i > 0) e(vitar[i], vitar[i - 1])++; } } else { g.resize(I * J); fill(g.begin(), g.end(), 0.0); for (int i = 0; i < J; ++i) { g[i * I + vitar[i]] = 1.0; if (i > 0) { Array2&e = E[i - 1]; e.resize(I, I); fill(e.begin(), e.end(), 0.0); e(vitar[i], vitar[i - 1])++; } } } return ret; } #endif