don't use shape for now but leave code in

author: Hieu Hoang <hieuhoang@gmail.com> 2018-01-20 19:44:39 +0300
committer: Hieu Hoang <hieuhoang@gmail.com> 2018-01-20 19:44:39 +0300
commit: 1549edefa8ee594876ff4255a0135aaf50ce3a2d (patch)
tree: ef795474e38edebd5aff4531021bb6dd4004eb20
parent: d88cbf7f823eee5b10df98ed98c3681ffd0a7c3b (diff)
1 files changed, 15 insertions, 14 deletions
diff --git a/src/amun/gpu/mblas/matrix_functions.h b/src/amun/gpu/mblas/matrix_functions.h
index 02eee41d..eadca839 100644
--- a/src/amun/gpu/mblas/matrix_functions.h
+++ b/src/amun/gpu/mblas/matrix_functions.h
@@ -128,12 +128,12 @@ __global__ void gBroadcast(Functor functor,
                            MatrixWrapper<float> out,
                            const MatrixWrapper<float> in1,
                            const MatrixWrapper<float> in2,
-                           const VectorWrapper<unsigned> hypo2Batch,
-                           const Shape shape)
+                           const VectorWrapper<unsigned> hypo2Batch)
 {
+  const Shape &shape = out.GetShape();
   unsigned id = threadIdx.x + blockIdx.x * blockDim.x;
   if (id < shape.size()) {
-    ///*
+    /*
     unsigned indices[SHAPE_SIZE];
     shape.id2Indices(id, indices);
 
@@ -141,8 +141,8 @@ __global__ void gBroadcast(Functor functor,
     unsigned stateIdx = indices[1];
     unsigned beamIdx = indices[2];
     //assert(0 == indices[3]);
-    //*/
-    /*
+    */
+    ///*
     unsigned cols  = in1.GetShape().dim(1);
     unsigned srcSize = out.GetShape().dim(0);
 
@@ -150,19 +150,19 @@ __global__ void gBroadcast(Functor functor,
     unsigned stateIdx = id % cols;
     unsigned beamIdx = row / srcSize;
     unsigned srcId = row % srcSize;
-    */
+    //*/
     unsigned batchIdx = hypo2Batch[ beamIdx ];
 
     assert(srcId < out.GetShape().dim(0));
     assert(srcId < in1.GetShape().dim(0));
     assert(beamIdx < in2.GetShape().dim(0));
     assert(batchIdx < in1.GetShape().dim(3));
-    //out[id] = functor(in1[(batchIdx * srcSize + srcId) * cols + stateIdx],
-    //                      in2[beamIdx * cols + stateIdx]);
+    out[id] = functor(in1[(batchIdx * srcSize + srcId) * cols + stateIdx],
+                          in2[beamIdx * cols + stateIdx]);
     //out[id] = functor(in1(indices[0], indices[1], 0, batchIdx),
     //                      in2(indices[2], indices[1], 0, 0));
-    out(srcId, stateIdx, beamIdx) = functor(in1(srcId, stateIdx, 0, batchIdx),
-                                                  in2(beamIdx, stateIdx));
+    //out(srcId, stateIdx, beamIdx) = functor(in1(srcId, stateIdx, 0, batchIdx),
+    //                                              in2(beamIdx, stateIdx));
   }
 }
 
@@ -188,12 +188,13 @@ Matrix& Broadcast(Functor functor,
   std::cerr << "in1=" << in1.Debug(0) << std::endl;
   std::cerr << "in2=" << in2.Debug(0) << std::endl;
   std::cerr << "hypo2Batch=" << hypo2Batch.Debug(0) << std::endl;
-  */
   std::cerr << "srcSize=" << srcSize << " " << activeBatchMaxLength << std::endl;
-  //std::cerr << std::endl;
+  std::cerr << std::endl;
 
   Shape shape(activeBatchMaxLength, cols, sumOfBeamSizes, 1);
-  unsigned size = shape.size();
+  */
+
+  unsigned size = out.size();
   unsigned threads = std::min(MAX_THREADS, size);
   unsigned blocks  = (size / threads) + ((size % threads == 0) ?  0 : 1);
   /*
@@ -208,7 +209,7 @@ Matrix& Broadcast(Functor functor,
   std::cerr << std::endl;
   */
   gBroadcast<<<blocks, threads, 0, CudaStreamHandler::GetStream()>>>
-    (functor, out, in1, in2, hypo2Batch, shape);
+    (functor, out, in1, in2, hypo2Batch);
   HANDLE_ERROR(cudaGetLastError());
 
   PAUSE_TIMER("Broadcast");
author	Hieu Hoang <hieuhoang@gmail.com>	2018-01-20 19:44:39 +0300
committer	Hieu Hoang <hieuhoang@gmail.com>	2018-01-20 19:44:39 +0300
commit	1549edefa8ee594876ff4255a0135aaf50ce3a2d (patch)
tree	ef795474e38edebd5aff4531021bb6dd4004eb20
parent	d88cbf7f823eee5b10df98ed98c3681ffd0a7c3b (diff)