diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2018-01-20 19:44:39 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2018-01-20 19:44:39 +0300 |
commit | 1549edefa8ee594876ff4255a0135aaf50ce3a2d (patch) | |
tree | ef795474e38edebd5aff4531021bb6dd4004eb20 | |
parent | d88cbf7f823eee5b10df98ed98c3681ffd0a7c3b (diff) |
don't use shape for now but leave code in
-rw-r--r-- | src/amun/gpu/mblas/matrix_functions.h | 29 |
1 files changed, 15 insertions, 14 deletions
diff --git a/src/amun/gpu/mblas/matrix_functions.h b/src/amun/gpu/mblas/matrix_functions.h index 02eee41d..eadca839 100644 --- a/src/amun/gpu/mblas/matrix_functions.h +++ b/src/amun/gpu/mblas/matrix_functions.h @@ -128,12 +128,12 @@ __global__ void gBroadcast(Functor functor, MatrixWrapper<float> out, const MatrixWrapper<float> in1, const MatrixWrapper<float> in2, - const VectorWrapper<unsigned> hypo2Batch, - const Shape shape) + const VectorWrapper<unsigned> hypo2Batch) { + const Shape &shape = out.GetShape(); unsigned id = threadIdx.x + blockIdx.x * blockDim.x; if (id < shape.size()) { - ///* + /* unsigned indices[SHAPE_SIZE]; shape.id2Indices(id, indices); @@ -141,8 +141,8 @@ __global__ void gBroadcast(Functor functor, unsigned stateIdx = indices[1]; unsigned beamIdx = indices[2]; //assert(0 == indices[3]); - //*/ - /* + */ + ///* unsigned cols = in1.GetShape().dim(1); unsigned srcSize = out.GetShape().dim(0); @@ -150,19 +150,19 @@ __global__ void gBroadcast(Functor functor, unsigned stateIdx = id % cols; unsigned beamIdx = row / srcSize; unsigned srcId = row % srcSize; - */ + //*/ unsigned batchIdx = hypo2Batch[ beamIdx ]; assert(srcId < out.GetShape().dim(0)); assert(srcId < in1.GetShape().dim(0)); assert(beamIdx < in2.GetShape().dim(0)); assert(batchIdx < in1.GetShape().dim(3)); - //out[id] = functor(in1[(batchIdx * srcSize + srcId) * cols + stateIdx], - // in2[beamIdx * cols + stateIdx]); + out[id] = functor(in1[(batchIdx * srcSize + srcId) * cols + stateIdx], + in2[beamIdx * cols + stateIdx]); //out[id] = functor(in1(indices[0], indices[1], 0, batchIdx), // in2(indices[2], indices[1], 0, 0)); - out(srcId, stateIdx, beamIdx) = functor(in1(srcId, stateIdx, 0, batchIdx), - in2(beamIdx, stateIdx)); + //out(srcId, stateIdx, beamIdx) = functor(in1(srcId, stateIdx, 0, batchIdx), + // in2(beamIdx, stateIdx)); } } @@ -188,12 +188,13 @@ Matrix& Broadcast(Functor functor, std::cerr << "in1=" << in1.Debug(0) << std::endl; std::cerr << "in2=" << in2.Debug(0) << std::endl; std::cerr << "hypo2Batch=" << hypo2Batch.Debug(0) << std::endl; - */ std::cerr << "srcSize=" << srcSize << " " << activeBatchMaxLength << std::endl; - //std::cerr << std::endl; + std::cerr << std::endl; Shape shape(activeBatchMaxLength, cols, sumOfBeamSizes, 1); - unsigned size = shape.size(); + */ + + unsigned size = out.size(); unsigned threads = std::min(MAX_THREADS, size); unsigned blocks = (size / threads) + ((size % threads == 0) ? 0 : 1); /* @@ -208,7 +209,7 @@ Matrix& Broadcast(Functor functor, std::cerr << std::endl; */ gBroadcast<<<blocks, threads, 0, CudaStreamHandler::GetStream()>>> - (functor, out, in1, in2, hypo2Batch, shape); + (functor, out, in1, in2, hypo2Batch); HANDLE_ERROR(cudaGetLastError()); PAUSE_TIMER("Broadcast"); |