Move pooling code to cudnn files

author: Tomasz Dwojak <t.dwojak@amu.edu.pl> 2017-11-10 16:11:07 +0300
committer: Tomasz Dwojak <t.dwojak@amu.edu.pl> 2017-11-13 11:40:38 +0300
commit: 9b24f9c6ac23d50ca698daa7ca2f13ecc1e96ead (patch)
tree: 884348ae51a39fef93fc5b6eca1d5ddb0d89f1e5 /src
parent: 72761ed08425cc1d20a9f522c619052bef6f5dd2 (diff)
5 files changed, 209 insertions, 151 deletions
diff --git a/src/graph/expression_operators.cu b/src/graph/expression_operators.cu
index 98570862..b271d971 100644
--- a/src/graph/expression_operators.cu
+++ b/src/graph/expression_operators.cu
@@ -303,33 +303,43 @@ Expr convolution(Expr x, Expr filters, Expr bias) {
   return Expression<ConvolutionOp>(nodes);
 }
 
-// clang-format off
 Expr avg_pooling(
     Expr x,
-    int height, int width,
-    int padHeight, int padWidth,
-    int strideHeight, int strideWidth)
+    int height,
+    int width,
+    int padHeight,
+    int padWidth,
+    int strideHeight,
+    int strideWidth)
 {
   return Expression<PoolingOp>(x,
-      height, width,
-      padHeight, padWidth,
-      strideHeight, strideWidth,
-      PoolingOp::Mode::AVERAGE_POOLING);
+      height,
+      width,
+      padHeight,
+      padWidth,
+      strideHeight,
+      strideWidth,
+      "avg");
 }
 
 Expr max_pooling(
     Expr x,
-    int height, int width,
-    int padHeight, int padWidth,
-    int strideHeight, int strideWidth)
+    int height,
+    int width,
+    int padHeight,
+    int padWidth,
+    int strideHeight,
+    int strideWidth)
 {
   return Expression<PoolingOp>(x,
-      height, width,
-      padHeight, padWidth,
-      strideHeight, strideWidth,
-      PoolingOp::Mode::MAX_POOLING);
+      height,
+      width,
+      padHeight,
+      padWidth,
+      strideHeight,
+      strideWidth,
+      "max");
 }
-// clang-format on
 
 #endif
 }
diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h
index 4b8dd70e..a3d24179 100644
--- a/src/graph/node_operators_binary.h
+++ b/src/graph/node_operators_binary.h
@@ -712,18 +712,23 @@ class ConvolutionOp : public NaryNodeOp {
 public:
   ConvolutionOp(const std::vector<Expr>& nodes)
     : NaryNodeOp(nodes),
-      conv_(nodes[1]->val(), nodes[2]->val()) {
-    conv_.getOutputShape(nodes[0]->val(), shape_);
+      conv_(nodes[1]->shape(), nodes[2]->shape()) {
+    conv_.getOutputShape(nodes[0]->shape(), shape_);
   }
 
   NodeOps forwardOps() {
-    return {NodeOp(conv_.forward(child(0)->val(), val_))};
+    return {NodeOp(conv_.forward(
+          child(0)->val(),
+          child(1)->val(),
+          child(2)->val(),
+          val_))};
   }
 
   NodeOps backwardOps() {
     return {NodeOp(conv_.backward(
           child(0)->val(),
           child(0)->grad(),
+          child(1)->val(),
           child(1)->grad(),
           child(2)->grad(),
           adj_))};
diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h
index 6554c4f0..90995ddf 100644
--- a/src/graph/node_operators_unary.h
+++ b/src/graph/node_operators_unary.h
@@ -6,30 +6,8 @@
 #include "kernels/tensor_operators.h"
 #include "tensors/tensor.h"
 #include "functional/functional.h"
+#include "kernels/cudnn_wrappers.h"
 
-#ifdef CUDNN
-
-#include <cudnn.h>
-
-#define CUDA_CALL(x)                                  \
-  do {                                                \
-    if((x) != cudaSuccess) {                          \
-      printf("Error at %s:%d\n", __FILE__, __LINE__); \
-      return EXIT_FAILURE;                            \
-    }                                                 \
-  } while(0)
-
-#define CUDNN_CALL(x)                 \
-  do {                                \
-    if((x) != CUDNN_STATUS_SUCCESS) { \
-      printf("Error (%s) at %s:%d\n", \
-             cudnnGetErrorString(x),  \
-             __FILE__,                \
-             __LINE__);               \
-    }                                 \
-  } while(0)
-
-#endif
 
 namespace marian {
 
@@ -1034,12 +1012,8 @@ struct ShiftNodeOp : public UnaryNodeOp {
 //  Ptr<sparse::CSR> lf_;
 //};
 
-#ifdef CUDNN
-
 class PoolingOp : public UnaryNodeOp {
 public:
-  enum class Mode { MAX_POOLING, AVERAGE_POOLING };
-
   PoolingOp(Expr x,
             int height,
             int width,
@@ -1047,119 +1021,34 @@ public:
             int padWidth,
             int strideHeight,
             int strideWidth,
-            Mode mode = Mode::AVERAGE_POOLING)
-      : UnaryNodeOp(x) {
-    CUDNN_CALL(cudnnCreate(&cudnnHandle_));
-
-    CUDNN_CALL(cudnnCreateTensorDescriptor(&xDesc_));
-    CUDNN_CALL(cudnnSetTensor4dDescriptor(xDesc_,
-                                          CUDNN_TENSOR_NCHW,
-                                          CUDNN_DATA_FLOAT,
-                                          x->shape()[0],
-                                          x->shape()[1],
-                                          x->shape()[2],
-                                          x->shape()[3]));
-
-    cudnnPoolingMode_t cudnnPoolingMode;
-    switch(mode) {
-      case Mode::MAX_POOLING: cudnnPoolingMode = CUDNN_POOLING_MAX; break;
-      case Mode::AVERAGE_POOLING:
-        cudnnPoolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
-        break;
-      default: break;
-    };
-
-    height = std::min(height, x->shape()[2]);
-    strideHeight = std::min(strideHeight, x->shape()[2]);
-
-    CUDNN_CALL(cudnnCreatePoolingDescriptor(&poolingDesc_));
-    CUDNN_CALL(cudnnSetPooling2dDescriptor(poolingDesc_,
-                                           cudnnPoolingMode,
-                                           CUDNN_NOT_PROPAGATE_NAN,
-                                           height,
-                                           width,
-                                           padHeight,
-                                           padWidth,
-                                           strideHeight,
-                                           strideWidth));
-   /* @TODO: does not compile
-    CUDNN_CALL(cudnnGetPooling2dForwardOutputDim(poolingDesc_,
-                                                 xDesc_,
-                                                 shape_.begin(),
-                                                 shape_.begin() + 1,
-                                                 shape_.begin() + 2,
-                                                 shape_.begin() + 3));
-*/
-    CUDNN_CALL(cudnnCreateTensorDescriptor(&yDesc_));
-    CUDNN_CALL(cudnnSetTensor4dDescriptor(yDesc_,
-                                          CUDNN_TENSOR_NCHW,
-                                          CUDNN_DATA_FLOAT,
-                                          shape_[0],
-                                          shape_[1],
-                                          shape_[2],
-                                          shape_[3]));
-    CUDNN_CALL(cudnnCreateTensorDescriptor(&adjDesc_));
-    CUDNN_CALL(cudnnSetTensor4dDescriptor(adjDesc_,
-                                          CUDNN_TENSOR_NCHW,
-                                          CUDNN_DATA_FLOAT,
-                                          shape_[0],
-                                          shape_[1],
-                                          shape_[2],
-                                          shape_[3]));
+            std::string mode)
+      : UnaryNodeOp(x),
+        pooling_(height,
+                 width,
+                 padHeight,
+                 padWidth,
+                 strideHeight,
+                 strideWidth,
+                 mode) {
   }
 
   NodeOps forwardOps() {
-    const float alpha = 1.0f;
-    const float beta = 0.0f;
-
-    cudaSetDevice(val_->getDevice());
-
-    return {NodeOp(CUDNN_CALL(cudnnPoolingForward(cudnnHandle_,
-                                                  poolingDesc_,
-                                                  &alpha,
-                                                  xDesc_,
-                                                  children_[0]->val()->data(),
-                                                  &beta,
-                                                  yDesc_,
-                                                  val_->data())))};
+    return {NodeOp(pooling_.forward(child(0)->val(), val_))};
   }
 
   NodeOps backwardOps() {
-    cudaSetDevice(adj_->getDevice());
-    const float alpha = 1.0f;
-    const float beta = 1.0f;
-    return {
-        NodeOp(CUDNN_CALL(cudnnPoolingBackward(cudnnHandle_,
-                                               poolingDesc_,
-                                               &alpha,
-                                               yDesc_,
-                                               val_->data(),
-                                               adjDesc_,
-                                               adj_->data(),
-                                               xDesc_,
-                                               children_[0]->val()->data(),
-                                               &beta,
-                                               xDesc_,
-                                               children_[0]->grad()->data())))};
-  }
-
-  const std::string type() { return "layer_max_pooling"; }
-
-  virtual ~PoolingOp() {
-    CUDNN_CALL(cudnnDestroy(cudnnHandle_));
-    CUDNN_CALL(cudnnDestroyPoolingDescriptor(poolingDesc_));
-    CUDNN_CALL(cudnnDestroyTensorDescriptor(xDesc_));
-    CUDNN_CALL(cudnnDestroyTensorDescriptor(yDesc_));
-    CUDNN_CALL(cudnnDestroyTensorDescriptor(adjDesc_));
+    return {NodeOp(pooling_.backward(
+          child(0)->val(),
+          child(0)->grad(),
+          val_,
+          adj_))};
   }
 
+  const std::string type() { return "layer_pooling"; }
+
+
 protected:
-  cudnnHandle_t cudnnHandle_;
-  cudnnPoolingDescriptor_t poolingDesc_;
-  cudnnTensorDescriptor_t xDesc_;
-  cudnnTensorDescriptor_t yDesc_;
-  cudnnTensorDescriptor_t adjDesc_;
+  PoolingWrapper pooling_;
 };
 
-#endif
 }
diff --git a/src/kernels/cudnn_wrappers.cu b/src/kernels/cudnn_wrappers.cu
index fbc7e9ea..2cd994c1 100644
--- a/src/kernels/cudnn_wrappers.cu
+++ b/src/kernels/cudnn_wrappers.cu
@@ -76,13 +76,14 @@ void ConvolutionWrapper::forward(
     Tensor kernels,
     Tensor bias,
     Tensor y) {
+  cudaSetDevice(x->getDevice());
+
   cudnnTensorDescriptor_t xDesc, yDesc;
   setCudnnTensor(xDesc, x);
   setCudnnTensor(yDesc, y);
 
   const float alpha = 1.0f;
   const float beta = 0.0f;
-  cudaSetDevice(x->getDevice());
 
   CUDNN_CALL(cudnnConvolutionForward(
         cudnnHandle_,
@@ -219,6 +220,125 @@ void ConvolutionWrapper::setKernelDescriptor(const Shape& shape) {
         kernelW));
 }
 
+
+/******************************************************************************
+ * PoolingWrapper
+ *****************************************************************************/
+
+PoolingWrapper::PoolingWrapper(
+    int height,
+    int width,
+    int padHeight,
+    int padWidth,
+    int strideHeight,
+    int strideWidth,
+    std::string mode) {
+  if (mode == "max") {
+    poolingMode_ = CUDNN_POOLING_MAX;
+  } else if (mode == "avg") {
+    poolingMode_ = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
+  } else {
+      ABORT("Unknown pooling mode.");
+  }
+
+  setPoolingDescriptor(height, width,
+      padHeight, padWidth,
+      strideHeight, strideWidth);
+}
+
+void PoolingWrapper::getOutputShape(
+    const Shape& xShape,
+    Shape& shape) {
+  cudnnTensorDescriptor_t xDesc;
+  setCudnnTensor(xDesc, xShape);
+  shape.resize(4);
+  CUDNN_CALL(cudnnGetPooling2dForwardOutputDim(
+        poolingDesc_,
+        xDesc,
+        shape.data(),
+        shape.data() + 1,
+        shape.data() + 2,
+        shape.data() + 3));
+  cudnnDestroyTensorDescriptor(xDesc);
+}
+
+void PoolingWrapper::forward(Tensor x, Tensor y) {
+  cudaSetDevice(x->getDevice());
+
+  cudnnTensorDescriptor_t xDesc, yDesc;
+  setCudnnTensor(xDesc, x);
+  setCudnnTensor(yDesc, y);
+
+  const float alpha = 1.0f;
+  const float beta = 0.0f;
+
+  CUDNN_CALL(cudnnPoolingForward(
+        cudnnHandle_,
+        poolingDesc_,
+        &alpha,
+        xDesc,
+        x->data(),
+        &beta,
+        yDesc,
+        y->data()));
+  cudnnDestroyTensorDescriptor(xDesc);
+  cudnnDestroyTensorDescriptor(yDesc);
+}
+
+void PoolingWrapper::backward(
+    Tensor x,
+    Tensor xGrad,
+    Tensor y,
+    Tensor yGrad) {
+  cudaSetDevice(x->getDevice());
+
+  cudnnTensorDescriptor_t xDesc, yDesc;
+  setCudnnTensor(xDesc, x);
+  setCudnnTensor(yDesc, y);
+
+  const float alpha = 1.0f;
+  const float beta = 1.0f;
+
+  CUDNN_CALL(cudnnPoolingBackward(
+        cudnnHandle_,
+        poolingDesc_,
+        &alpha,
+        yDesc,
+        y->data(),
+        yDesc,
+        yGrad->data(),
+        xDesc,
+        x->data(),
+        &beta,
+        xDesc,
+        xGrad->data()));
+  cudnnDestroyTensorDescriptor(xDesc);
+  cudnnDestroyTensorDescriptor(yDesc);
+}
+
+void PoolingWrapper::setPoolingDescriptor(
+    int height,
+    int width,
+    int padHeight,
+    int padWidth,
+    int strideHeight,
+    int strideWidth) {
+  CUDNN_CALL(cudnnCreatePoolingDescriptor(&poolingDesc_));
+  CUDNN_CALL(cudnnSetPooling2dDescriptor(poolingDesc_,
+                                          poolingMode_,
+                                          CUDNN_NOT_PROPAGATE_NAN,
+                                          height,
+                                          width,
+                                          padHeight,
+                                          padWidth,
+                                          strideHeight,
+                                          strideWidth));
+}
+
+PoolingWrapper::~PoolingWrapper() {
+  CUDNN_CALL(cudnnDestroyPoolingDescriptor(poolingDesc_));
+}
+
 }
 
 #endif
diff --git a/src/kernels/cudnn_wrappers.h b/src/kernels/cudnn_wrappers.h
index e4870083..b26ebcaf 100644
--- a/src/kernels/cudnn_wrappers.h
+++ b/src/kernels/cudnn_wrappers.h
@@ -59,5 +59,39 @@ protected:
 
 };
 
+class PoolingWrapper : public CUDNNWrapper {
+public:
+  PoolingWrapper(
+      int height,
+      int width,
+      int padHeight,
+      int padWidth,
+      int strideHeight,
+      int strideWidth,
+      std::string mode);
+
+  void getOutputShape(const Shape& xShape, Shape& shape);
+
+  void forward(Tensor x, Tensor y);
+
+  void backward(Tensor x, Tensor xGrad, Tensor y, Tensor yGrad);
+
+  virtual ~PoolingWrapper();
+
+protected:
+  void setPoolingDescriptor(
+      int height,
+      int width,
+      int padHeight,
+      int padWidth,
+      int strideHeight,
+      int strideWidth);
+
+protected:
+  cudnnPoolingDescriptor_t poolingDesc_;
+  cudnnPoolingMode_t poolingMode_;
+
+};
+
 }
author	Tomasz Dwojak <t.dwojak@amu.edu.pl>	2017-11-10 16:11:07 +0300
committer	Tomasz Dwojak <t.dwojak@amu.edu.pl>	2017-11-13 11:40:38 +0300
commit	9b24f9c6ac23d50ca698daa7ca2f13ecc1e96ead (patch)
tree	884348ae51a39fef93fc5b6eca1d5ddb0d89f1e5 /src
parent	72761ed08425cc1d20a9f522c619052bef6f5dd2 (diff)