don't copy if no rows specified

author: Hieu Hoang <hieuhoang@gmail.com> 2018-01-11 23:36:49 +0300
committer: Hieu Hoang <hieuhoang@gmail.com> 2018-01-11 23:36:49 +0300
commit: 6414036d1acba950e51860fd5c290a69df6f1ca9 (patch)
tree: 067c1af477f4527ef314fdcb3329be3fe12a0124
parent: 712eb1b7205e44739a2e9c52ea57459697b975cb (diff)
2 files changed, 19 insertions, 10 deletions
diff --git a/src/amun/gpu/decoder/encoder_decoder.cu b/src/amun/gpu/decoder/encoder_decoder.cu
index a9d8b64f..c696916c 100644
--- a/src/amun/gpu/decoder/encoder_decoder.cu
+++ b/src/amun/gpu/decoder/encoder_decoder.cu
@@ -386,11 +386,11 @@ void EncoderDecoder::TopupBatch(Histories &histories,
     //cerr << "maxLength=" << maxLength << endl;
 
     UpdateSentenceLengths(d_newBatchIds, d_newSentenceLengths, sentenceLengths);
-    //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
-    //cerr << "TopupBatch8" << endl;
+    HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
+    cerr << "TopupBatch8" << endl;
 
     // source context
-    //cerr << "1sourceContext=" << sourceContext.Debug() << endl;
+    cerr << "1sourceContext=" << sourceContext.Debug() << endl;
     ResizeMatrix3(sourceContext, {0, maxLength}, d_oldBatchIds);
     //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
     //cerr << "2sourceContext=" << sourceContext.Debug() << endl;
diff --git a/src/amun/gpu/mblas/matrix_functions.cu b/src/amun/gpu/mblas/matrix_functions.cu
index f6b9338b..b24e20d6 100644
--- a/src/amun/gpu/mblas/matrix_functions.cu
+++ b/src/amun/gpu/mblas/matrix_functions.cu
@@ -318,15 +318,24 @@ Matrix& CopyRows(Matrix& out,
   assert(out.dim(2) == 1);
   assert(out.dim(3) == 1);
 
-  Shape shape(inRows.size(), out.dim(1), 1, 1);
-  unsigned size = shape.size();
-  //cerr << "size=" << size << endl;
+  if (inRows.size()) {
+    Shape shape(inRows.size(), out.dim(1), 1, 1);
+    unsigned size = shape.size();
+    cerr << "size=" << size << endl;
+    HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
+    cerr << "CopyRows1" << endl;
 
-  unsigned threads = std::min(MAX_THREADS, size);
-  unsigned blocks = size / threads + ((size % threads == 0) ?  0 : 1);
+    unsigned threads = std::min(MAX_THREADS, size);
+    unsigned blocks = size / threads + ((size % threads == 0) ?  0 : 1);
 
-  gCopyRows<<<blocks, threads, 0, CudaStreamHandler::GetStream()>>>
-    (out, in, inRows, outRows, shape);
+    HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
+    cerr << "CopyRows2" << endl;
+
+    gCopyRows<<<blocks, threads, 0, CudaStreamHandler::GetStream()>>>
+      (out, in, inRows, outRows, shape);
+    HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
+    cerr << "CopyRows3" << endl;
+  }
 
   return out;
author	Hieu Hoang <hieuhoang@gmail.com>	2018-01-11 23:36:49 +0300
committer	Hieu Hoang <hieuhoang@gmail.com>	2018-01-11 23:36:49 +0300
commit	6414036d1acba950e51860fd5c290a69df6f1ca9 (patch)
tree	067c1af477f4527ef314fdcb3329be3fe12a0124
parent	712eb1b7205e44739a2e9c52ea57459697b975cb (diff)