diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2018-01-11 23:36:49 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2018-01-11 23:36:49 +0300 |
commit | 6414036d1acba950e51860fd5c290a69df6f1ca9 (patch) | |
tree | 067c1af477f4527ef314fdcb3329be3fe12a0124 | |
parent | 712eb1b7205e44739a2e9c52ea57459697b975cb (diff) |
don't copy if no rows specified
-rw-r--r-- | src/amun/gpu/decoder/encoder_decoder.cu | 6 | ||||
-rw-r--r-- | src/amun/gpu/mblas/matrix_functions.cu | 23 |
2 files changed, 19 insertions, 10 deletions
diff --git a/src/amun/gpu/decoder/encoder_decoder.cu b/src/amun/gpu/decoder/encoder_decoder.cu index a9d8b64f..c696916c 100644 --- a/src/amun/gpu/decoder/encoder_decoder.cu +++ b/src/amun/gpu/decoder/encoder_decoder.cu @@ -386,11 +386,11 @@ void EncoderDecoder::TopupBatch(Histories &histories, //cerr << "maxLength=" << maxLength << endl; UpdateSentenceLengths(d_newBatchIds, d_newSentenceLengths, sentenceLengths); - //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); - //cerr << "TopupBatch8" << endl; + HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); + cerr << "TopupBatch8" << endl; // source context - //cerr << "1sourceContext=" << sourceContext.Debug() << endl; + cerr << "1sourceContext=" << sourceContext.Debug() << endl; ResizeMatrix3(sourceContext, {0, maxLength}, d_oldBatchIds); //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); //cerr << "2sourceContext=" << sourceContext.Debug() << endl; diff --git a/src/amun/gpu/mblas/matrix_functions.cu b/src/amun/gpu/mblas/matrix_functions.cu index f6b9338b..b24e20d6 100644 --- a/src/amun/gpu/mblas/matrix_functions.cu +++ b/src/amun/gpu/mblas/matrix_functions.cu @@ -318,15 +318,24 @@ Matrix& CopyRows(Matrix& out, assert(out.dim(2) == 1); assert(out.dim(3) == 1); - Shape shape(inRows.size(), out.dim(1), 1, 1); - unsigned size = shape.size(); - //cerr << "size=" << size << endl; + if (inRows.size()) { + Shape shape(inRows.size(), out.dim(1), 1, 1); + unsigned size = shape.size(); + cerr << "size=" << size << endl; + HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); + cerr << "CopyRows1" << endl; - unsigned threads = std::min(MAX_THREADS, size); - unsigned blocks = size / threads + ((size % threads == 0) ? 0 : 1); + unsigned threads = std::min(MAX_THREADS, size); + unsigned blocks = size / threads + ((size % threads == 0) ? 0 : 1); - gCopyRows<<<blocks, threads, 0, CudaStreamHandler::GetStream()>>> - (out, in, inRows, outRows, shape); + HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); + cerr << "CopyRows2" << endl; + + gCopyRows<<<blocks, threads, 0, CudaStreamHandler::GetStream()>>> + (out, in, inRows, outRows, shape); + HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); + cerr << "CopyRows3" << endl; + } return out; |