Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2018-01-11 23:36:49 +0300
committerHieu Hoang <hieuhoang@gmail.com>2018-01-11 23:36:49 +0300
commit6414036d1acba950e51860fd5c290a69df6f1ca9 (patch)
tree067c1af477f4527ef314fdcb3329be3fe12a0124
parent712eb1b7205e44739a2e9c52ea57459697b975cb (diff)
don't copy if no rows specified
-rw-r--r--src/amun/gpu/decoder/encoder_decoder.cu6
-rw-r--r--src/amun/gpu/mblas/matrix_functions.cu23
2 files changed, 19 insertions, 10 deletions
diff --git a/src/amun/gpu/decoder/encoder_decoder.cu b/src/amun/gpu/decoder/encoder_decoder.cu
index a9d8b64f..c696916c 100644
--- a/src/amun/gpu/decoder/encoder_decoder.cu
+++ b/src/amun/gpu/decoder/encoder_decoder.cu
@@ -386,11 +386,11 @@ void EncoderDecoder::TopupBatch(Histories &histories,
//cerr << "maxLength=" << maxLength << endl;
UpdateSentenceLengths(d_newBatchIds, d_newSentenceLengths, sentenceLengths);
- //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
- //cerr << "TopupBatch8" << endl;
+ HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
+ cerr << "TopupBatch8" << endl;
// source context
- //cerr << "1sourceContext=" << sourceContext.Debug() << endl;
+ cerr << "1sourceContext=" << sourceContext.Debug() << endl;
ResizeMatrix3(sourceContext, {0, maxLength}, d_oldBatchIds);
//HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
//cerr << "2sourceContext=" << sourceContext.Debug() << endl;
diff --git a/src/amun/gpu/mblas/matrix_functions.cu b/src/amun/gpu/mblas/matrix_functions.cu
index f6b9338b..b24e20d6 100644
--- a/src/amun/gpu/mblas/matrix_functions.cu
+++ b/src/amun/gpu/mblas/matrix_functions.cu
@@ -318,15 +318,24 @@ Matrix& CopyRows(Matrix& out,
assert(out.dim(2) == 1);
assert(out.dim(3) == 1);
- Shape shape(inRows.size(), out.dim(1), 1, 1);
- unsigned size = shape.size();
- //cerr << "size=" << size << endl;
+ if (inRows.size()) {
+ Shape shape(inRows.size(), out.dim(1), 1, 1);
+ unsigned size = shape.size();
+ cerr << "size=" << size << endl;
+ HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
+ cerr << "CopyRows1" << endl;
- unsigned threads = std::min(MAX_THREADS, size);
- unsigned blocks = size / threads + ((size % threads == 0) ? 0 : 1);
+ unsigned threads = std::min(MAX_THREADS, size);
+ unsigned blocks = size / threads + ((size % threads == 0) ? 0 : 1);
- gCopyRows<<<blocks, threads, 0, CudaStreamHandler::GetStream()>>>
- (out, in, inRows, outRows, shape);
+ HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
+ cerr << "CopyRows2" << endl;
+
+ gCopyRows<<<blocks, threads, 0, CudaStreamHandler::GetStream()>>>
+ (out, in, inRows, outRows, shape);
+ HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream()));
+ cerr << "CopyRows3" << endl;
+ }
return out;