diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2017-12-01 20:35:00 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2017-12-01 20:35:00 +0300 |
commit | 29d8a51d03741063c05ea0d4b517a3284c13634f (patch) | |
tree | e8c3c49167362073137bb2f03095ca678248faa2 | |
parent | ceef6dc34631e2ce69562484420a98a5ff1c6256 (diff) |
timers on softmax
-rw-r--r-- | src/amun/half/mblas/matrix_functions.cu | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/src/amun/half/mblas/matrix_functions.cu b/src/amun/half/mblas/matrix_functions.cu index bc277e69..38cea099 100644 --- a/src/amun/half/mblas/matrix_functions.cu +++ b/src/amun/half/mblas/matrix_functions.cu @@ -1322,7 +1322,7 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest, uint beamSizeSum, bool isFirst) { - //BEGIN_TIMER("LogSoftmax excl kernels"); + BEGIN_TIMER("LogSoftmax excl kernels"); //cerr << "in=" << in.Debug(0) << endl; //cerr << "beamSizes=" << beamSizes.size() << endl; @@ -1376,7 +1376,7 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest, VectorWrapper<uint> beamSizesWrap(d_beamSizes); - //PAUSE_TIMER("LogSoftmax excl kernels"); + PAUSE_TIMER("LogSoftmax excl kernels"); int blocks = std::min(MAX_BLOCKS, (int)in.dim(0)); int threads = std::min(MAX_THREADS, (int)in.dim(1)); @@ -1386,7 +1386,7 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest, //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); //cerr << "step0" << endl; - //BEGIN_TIMER("gBeamSizeInit"); + BEGIN_TIMER("gBeamSizeInit"); gBeamSizeInit<<<1, 1, 0, CudaStreamHandler::GetStream()>>> (hypo2BeamSizeWrap, batch2HypoWrap, @@ -1395,7 +1395,7 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest, beamSizeSum, beamSizesWrap ); - //PAUSE_TIMER("gBeamSizeInit"); + PAUSE_TIMER("gBeamSizeInit"); /* cerr << "hypo2BeamSize=" << Debug(hypo2BeamSize, 2) << endl; @@ -1406,7 +1406,7 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest, //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); //cerr << "step1" << endl; - //BEGIN_TIMER("gLogSoftMax"); + BEGIN_TIMER("gLogSoftMax"); gLogSoftMax<<<blocks, threads, shared, CudaStreamHandler::GetStream()>>> (nBestCandidatesWrap, inWrap, @@ -1415,12 +1415,12 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest, forbidUNK, hypo2BeamSizeWrap, hypo2CandidateWrap); - //PAUSE_TIMER("gLogSoftMax"); + PAUSE_TIMER("gLogSoftMax"); //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); //cerr << "step2" << endl; - //BEGIN_TIMER("gNBestPerBatch"); + BEGIN_TIMER("gNBestPerBatch"); gNBestPerBatch<<<blocks, 1, 0, CudaStreamHandler::GetStream()>>> (nBestWrap, nBestCandidatesWrap, @@ -1432,7 +1432,7 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest, hypo2BeamSizeWrap, batch2HypoWrap, hypo2CandidateWrap); - //PAUSE_TIMER("gNBestPerBatch"); + PAUSE_TIMER("gNBestPerBatch"); //HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); //cerr << "step3" << endl; |