Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'experimental/dual-model/MGIZA/src/ForwardBackward.cpp')
-rw-r--r--experimental/dual-model/MGIZA/src/ForwardBackward.cpp240
1 files changed, 240 insertions, 0 deletions
diff --git a/experimental/dual-model/MGIZA/src/ForwardBackward.cpp b/experimental/dual-model/MGIZA/src/ForwardBackward.cpp
new file mode 100644
index 0000000..186ff41
--- /dev/null
+++ b/experimental/dual-model/MGIZA/src/ForwardBackward.cpp
@@ -0,0 +1,240 @@
+/*
+
+ Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
+
+ This file is part of GIZA++ ( extension of GIZA ).
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA.
+
+ */
+#ifndef NO_TRAINING
+#include "ForwardBackward.h"
+#include "Globals.h"
+#include "myassert.h"
+#include "HMMTables.h"
+#include "mymath.h"
+
+double ForwardBackwardTraining(const HMMNetwork&net, Array<double>&g, Array<
+ Array2<double> >&E) {
+ const int I = net.size1(), J = net.size2(), N = I * J;
+ Array<double> alpha(N, 0), beta(N, 0), sum(J);
+ for (int i = 0; i < I; i++)
+ beta[N - I + i] = net.getBetainit(i);
+ double * cur_beta = conv<double> (beta.begin()) + N - I - 1;
+ for (int j = J - 2; j >= 0; --j)
+ for (int ti = I - 1; ti >= 0; --ti, --cur_beta) {
+ const double *next_beta = conv<double> (beta.begin()) + (j + 1) * I;
+ const double *alprob = &net.outProb(j, ti, 0), *next_node =
+ &net.nodeProb(0, j + 1);
+ for (int ni = 0; ni < I; ++ni, (next_node += J)) {
+ massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
+ massert(next_node == &net.nodeProb(ni,j+1));
+ /* if( VERB&&(*next_beta)*(*alprob)*(*next_node) )
+ cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
+ << next_beta-beta.begin() << ") alprob:" << (*alprob) << " lexprob:" << (*next_node) << endl;*/
+ (*cur_beta) += (*next_beta++) * (*alprob++) * (*next_node);
+ }
+ }
+ for (int i = 0; i < I; i++)
+ alpha[i] = net.getAlphainit(i) * net.nodeProb(i, 0);
+ double* cur_alpha = conv<double> (alpha.begin()) + I;
+ cur_beta = conv<double> (beta.begin()) + I;
+ for (int j = 1; j < J; j++) {
+ Array2<double>&e = E[(E.size() == 1) ? 0 : (j - 1)];
+ if ((E.size() != 1) || j == 1) {
+ e.resize(I, I);
+ fill(e.begin(), e.end(), 0.0);
+ }
+
+ for (int ti = 0; ti < I; ++ti, ++cur_alpha, ++cur_beta) {
+ const double * prev_alpha = conv<double> (alpha.begin()) + I * (j
+ - 1);
+ double *cur_e = &e(ti, 0);
+ double this_node = net.nodeProb(ti, j);
+ const double* alprob = &net.outProb(j - 1, 0, ti);
+ for (int pi = 0; pi < I; ++pi, ++prev_alpha, (alprob += I)) {
+ massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
+ massert(&e(ti,pi)==cur_e);
+ const double alpha_increment = *prev_alpha * (*alprob)
+ * this_node;
+ (*cur_alpha) += alpha_increment;
+ (*cur_e++) += alpha_increment * (*cur_beta);
+ }
+ }
+ }
+ g.resize(N);
+ transform(alpha.begin(), alpha.end(), beta.begin(), g.begin(), multiplies<
+ double> ());
+ double bsum = 0, esum = 0, esum2;
+ for (int i = 0; i < I; i++)
+ bsum += beta[i] * net.nodeProb(i, 0) * net.getAlphainit(i);
+ for (unsigned int j = 0; j < (unsigned int) E.size(); j++) {
+ Array2<double>&e = E[j];
+ const double *epe = e.end();
+ for (const double*ep = e.begin(); ep != epe; ++ep)
+ esum += *ep;
+ }
+ if (J > 1)
+ esum2 = esum / (J - 1);
+ else
+ esum2 = 0.0;
+ if (!(esum2 == 0.0 || mfabs(esum2 - bsum) / bsum < 1e-3 * I))
+ cout << "ERROR2: " << esum2 << " " << bsum << " " << esum << net
+ << endl;
+ double * sumptr = conv<double> (sum.begin());
+ double* ge = conv<double> (g.end());
+ for (double* gp = conv<double> (g.begin()); gp != ge; gp += I) {
+ *sumptr++ = normalize_if_possible(gp, gp + I);
+ if (bsum && !(mfabs((*(sumptr - 1) - bsum) / bsum) < 1e-3 * I))
+ cout << "ERROR: " << *(sumptr - 1) << " " << bsum << " " << mfabs(
+ (*(sumptr - 1) - bsum) / bsum) << ' ' << I << ' ' << J
+ << endl;
+ }
+ for (unsigned int j = 0; j < (unsigned int) E.size(); j++) {
+ Array2<double>&e = E[j];
+ double* epe = e.end();
+ if (esum)
+ for (double*ep = e.begin(); ep != epe; ++ep)
+ *ep /= esum;
+ else
+ for (double*ep = e.begin(); ep != epe; ++ep)
+ *ep /= 1.0 / (max(I * I, I * I * (J - 1)));
+ }
+ if (sum.size())
+ return sum[0];
+ else
+ return 1.0;
+}
+void HMMViterbi(const HMMNetwork&net, Array<int>&vit) {
+ const int I = net.size1(), J = net.size2();
+ vit.resize(J);
+ Array<double> g;
+ Array<Array2<double> > e(1);
+ ForwardBackwardTraining(net, g, e);
+ for (int j = 0; j < J; j++) {
+ double * begin = conv<double> (g.begin()) + I * j;
+ vit[j] = max_element(begin, begin + I) - begin;
+ }
+}
+void HMMViterbi(const HMMNetwork&net, Array<double>&g, Array<int>&vit) {
+ const int I = net.size1(), J = net.size2();
+ vit.resize(J);
+ for (int j = 0; j < J; j++) {
+ double* begin = conv<double> (g.begin()) + I * j;
+ vit[j] = max_element(begin, begin + I) - begin;
+ }
+}
+
+double HMMRealViterbi(const HMMNetwork&net, Array<int>&vitar, int pegi,
+ int pegj, bool verbose) {
+ const int I = net.size1(), J = net.size2(), N = I * J;
+ Array<double> alpha(N, -1);
+ Array<double*> bp(N, (double*) 0);
+ vitar.resize(J);
+ if (J == 0)
+ return 1.0;
+ for (int i = 0; i < I; i++) {
+ alpha[i] = net.getAlphainit(i) * net.nodeProb(i, 0);
+ if (i > I / 2)
+ alpha[i] = 0; // only first empty word can be chosen
+ bp[i] = 0;
+ }
+ double *cur_alpha = conv<double> (alpha.begin()) + I;
+ double **cur_bp = conv<double*> (bp.begin()) + I;
+ for (int j = 1; j < J; j++) {
+ if (pegj + 1 == j)
+ for (int ti = 0; ti < I; ti++)
+ if ((pegi != -1 && ti != pegi) || (pegi == -1 && ti < I / 2))
+ (cur_alpha - I)[ti] = 0.0;
+ for (int ti = 0; ti < I; ++ti, ++cur_alpha, ++cur_bp) {
+ double* prev_alpha = conv<double> (alpha.begin()) + I * (j - 1);
+ double this_node = net.nodeProb(ti, j);
+ const double *alprob = &net.outProb(j - 1, 0, ti);
+ for (int pi = 0; pi < I; ++pi, ++prev_alpha, (alprob += I)) {
+ massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
+ const double alpha_increment = *prev_alpha * (*alprob)
+ * this_node;
+ if (alpha_increment > *cur_alpha) {
+ (*cur_alpha) = alpha_increment;
+ (*cur_bp) = prev_alpha;
+ }
+ }
+ }
+ }
+ for (int i = 0; i < I; i++)
+ alpha[N - I + i] *= net.getBetainit(i);
+ if (pegj == J - 1)
+ for (int ti = 0; ti < I; ti++)
+ if ((pegi != -1 && ti != pegi) || (pegi == -1 && ti < I / 2))
+ (alpha)[N - I + ti] = 0.0;
+
+ int j = J - 1;
+ cur_alpha = conv<double> (alpha.begin()) + j * I;
+ vitar[J - 1] = max_element(cur_alpha, cur_alpha + I) - cur_alpha;
+ double ret = *max_element(cur_alpha, cur_alpha + I);
+ while (bp[vitar[j] + j * I]) {
+ cur_alpha -= I;
+ vitar[j - 1] = bp[vitar[j] + j * I] - cur_alpha;
+ massert(vitar[j-1]<I&&vitar[j-1]>=0);
+ j--;
+ }
+ massert(j==0);
+ if (verbose) {
+ cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
+ for (int j = 0; j < J; j++)
+ cout << "NP " << net.nodeProb(vitar[j], j) << ' ' << "AP " << ((j
+ == 0) ? net.getAlphainit(vitar[j]) : net.outProb(j - 1,
+ vitar[j - 1], vitar[j])) << " j:" << j << " i:" << vitar[j]
+ << "; ";
+ cout << endl;
+ }
+ return ret;
+}
+
+double MaximumTraining(const HMMNetwork&net, Array<double>&g, Array<Array2<
+ double> >&E) {
+ Array<int> vitar;
+ double ret = HMMRealViterbi(net, vitar);
+ const int I = net.size1(), J = net.size2();
+ if (E.size() == 1) {
+ Array2<double>&e = E[0];
+ e.resize(I, I);
+ g.resize(I * J);
+ fill(g.begin(), g.end(), 0.0);
+ fill(e.begin(), e.end(), 0.0);
+ for (int i = 0; i < J; ++i) {
+ g[i * I + vitar[i]] = 1.0;
+ if (i > 0)
+ e(vitar[i], vitar[i - 1])++;
+ }
+ } else {
+ g.resize(I * J);
+ fill(g.begin(), g.end(), 0.0);
+ for (int i = 0; i < J; ++i) {
+ g[i * I + vitar[i]] = 1.0;
+ if (i > 0) {
+ Array2<double>&e = E[i - 1];
+ e.resize(I, I);
+ fill(e.begin(), e.end(), 0.0);
+ e(vitar[i], vitar[i - 1])++;
+ }
+ }
+ }
+ return ret;
+}
+
+#endif
+