Merge branch 'master' of https://github.com/hieuhoang/marian

author: Hieu Hoang <hieuhoang@gmail.com> 2018-01-16 19:25:36 +0300
committer: Hieu Hoang <hieuhoang@gmail.com> 2018-01-16 19:25:36 +0300
commit: 8003afcdac90fbd508b19b078e33ed793d97c020 (patch)
tree: 0008d49b62937bc4442811ed5ca16b9359222b9f
parent: ed9ca006fd2dd4427b9b4bb884e9bc50846f6c8b (diff)
parent: cf5a42d1e70954a13eb11145bc2b8801ba6dc21d (diff)
1 files changed, 13 insertions, 12 deletions
diff --git a/src/amun/gpu/mblas/matrix_functions.cu b/src/amun/gpu/mblas/matrix_functions.cu
index 457039f3..75ab0dcd 100644
--- a/src/amun/gpu/mblas/matrix_functions.cu
+++ b/src/amun/gpu/mblas/matrix_functions.cu
@@ -1330,13 +1330,14 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest,
   cerr << "beamSizeSum=" << beamSizeSum << endl;
   cerr << "batchSize=" << batchSize << endl;
   cerr << "candidateInd=" << candidateInd << endl;
-  cerr << "hypo2BeamSize=" << Debug(hypo2BeamSize, 0) << endl;
-  cerr << "hypo2Candidate=" << Debug(hypo2Candidate, 0) << endl;
-  cerr << "batch2Hypo=" << Debug(batch2Hypo, 0) << endl;
-  cerr << "nBest=" << Debug(nBest, 0) << endl;
-  cerr << "nBestCandidates=" << Debug(nBestCandidates, 0) << endl;
+  cerr << "hypo2BeamSize=" << hypo2BeamSize.Debug(0) << endl;
+  cerr << "hypo2Candidate=" << hypo2Candidate.Debug(0) << endl;
+  cerr << "batch2Hypo=" << batch2Hypo.Debug(0) << endl;
+  cerr << "nBest=" << nBest.Debug(0) << endl;
+  cerr << "nBestCandidates=" << nBestCandidates.Debug(0) << endl;
   cerr << endl;
   */
+
   MatrixWrapper<float> inWrap(in);
   MatrixWrapper<float> b4Wrap(b4);
   VectorWrapper<unsigned> hypo2BeamSizeWrap(hypo2BeamSize);
@@ -1349,12 +1350,6 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest,
   VectorWrapper<unsigned> beamSizesWrap(d_beamSizes);
 
   //PAUSE_TIMER("LogSoftmax excl kernels");
-
-  int blocks = std::min(MAX_BLOCKS, (int)in.dim(0));
-  int threads = std::min(MAX_THREADS, (int)in.dim(1));
-  int shared = sizeof(NthOutBatch) * threads * maxBeamSize
-             + sizeof(float) * threads;
-
   //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
       
   //BEGIN_TIMER("gBeamSizeInit");
@@ -1376,6 +1371,11 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest,
   */
   //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
     
+  unsigned blocks = std::min((unsigned) MAX_BLOCKS, in.dim(0));
+  unsigned threads = std::min((unsigned)MAX_THREADS, in.dim(1));
+  unsigned shared = sizeof(NthOutBatch) * threads * maxBeamSize
+             + sizeof(float) * threads;
+
   //BEGIN_TIMER("gLogSoftMax");
   gLogSoftMax<<<blocks, threads, shared, CudaStreamHandler::GetStream()>>>
     (nBestCandidatesWrap,
@@ -1388,7 +1388,8 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest,
   //PAUSE_TIMER("gLogSoftMax");
   
   //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
-  
+  blocks = std::min((unsigned) MAX_BLOCKS, batchSize);
+
   //BEGIN_TIMER("gNBestPerBatch");
   gNBestPerBatch<<<blocks, 1, 0, CudaStreamHandler::GetStream()>>>
     (nBestWrap,
author	Hieu Hoang <hieuhoang@gmail.com>	2018-01-16 19:25:36 +0300
committer	Hieu Hoang <hieuhoang@gmail.com>	2018-01-16 19:25:36 +0300
commit	8003afcdac90fbd508b19b078e33ed793d97c020 (patch)
tree	0008d49b62937bc4442811ed5ca16b9359222b9f
parent	ed9ca006fd2dd4427b9b4bb884e9bc50846f6c8b (diff)
parent	cf5a42d1e70954a13eb11145bc2b8801ba6dc21d (diff)