diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2018-07-27 20:14:21 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2018-07-27 20:14:21 +0300 |
commit | dceb7185d86ed8fd1994e86dc3e3c0e03740ec4a (patch) | |
tree | 3514f87aa2da28313043959ebd0381b3ba7de233 /src/graph | |
parent | 5cc8674d974bb5cae7bc8f25a51472166164a579 (diff) | |
parent | 8b0e2f951b5ce09a622fa7239b2e1e5bd8344fe4 (diff) |
fix merge
Diffstat (limited to 'src/graph')
-rw-r--r-- | src/graph/expression_graph.h | 7 | ||||
-rw-r--r-- | src/graph/expression_operators.cpp | 13 | ||||
-rw-r--r-- | src/graph/expression_operators.h | 1 | ||||
-rw-r--r-- | src/graph/node_operators_binary.h | 62 | ||||
-rw-r--r-- | src/graph/node_operators_unary.h | 12 |
5 files changed, 72 insertions, 23 deletions
diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index d901000c..199994d0 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -244,12 +244,13 @@ public: } } - void backward() { + void backward(bool zero = true) { ABORT_IF(topNodes_.size() > 1, "There are more than one top most node for backward step"); params_->allocateBackward(); - params_->set_zero_adjoint(); + if(zero) + params_->set_zero_adjoint(); for(auto&& v : topNodes_) v->init_dependent(); @@ -264,7 +265,7 @@ public: nodesBackward_.pop_back(); for(auto&& child : v->children()) { - if(child->trainable()) + if(child->trainable() && child->type() != "param") child->set_zero_adjoint(); } diff --git a/src/graph/expression_operators.cpp b/src/graph/expression_operators.cpp index 1666357a..ea8077fa 100644 --- a/src/graph/expression_operators.cpp +++ b/src/graph/expression_operators.cpp @@ -313,7 +313,9 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) { if(bc != b) bc = rec2(bc); - std::vector<Expr> nodes = {ac, bc, bias}; + int rows = ac->shape().elements() / ac->shape()[-1]; + Expr ones = ac->graph()->ones({rows, 1}); + std::vector<Expr> nodes = {ac, bc, bias, ones}; return rec2(Expression<AffineNodeOp>(nodes, transA, transB, scale), true); }; @@ -333,13 +335,16 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) { } else { // general version, MKL, CBlas or CUDA + // if clipValue > 0, the inputs will be clipped to range [-clipValue, clipValue] // This is meant to keep values at the same range as used during training when // optimizing for 8-bit integer products. Likely to be removed in the future // when we explore better ways to handle this. - std::vector<Expr> nodes = {clip(a, clipValue), clip(b, clipValue), bias}; - return Expression<AffineNodeOp>(nodes, transA, transB, scale); + int rows = a->shape().elements() / a->shape()[-1]; + Expr ones = a->graph()->ones({rows, 1}); + std::vector<Expr> nodes = {clip(a, clipValue), clip(b, clipValue), bias, ones}; + return Expression<AffineNodeOp>(nodes, transA, transB, scale); } } @@ -462,6 +467,7 @@ Expr shift(Expr a, Shape shift, float padValue) { //} #ifdef CUDA_FOUND +#ifdef CUDNN Expr avg_pooling(Expr x, int height, @@ -526,4 +532,5 @@ Expr pooling_with_masking(Expr x, Expr mask, int width, bool isEven) { } #endif +#endif } diff --git a/src/graph/expression_operators.h b/src/graph/expression_operators.h index cc07dafb..53cf5966 100644 --- a/src/graph/expression_operators.h +++ b/src/graph/expression_operators.h @@ -106,7 +106,6 @@ Expr flatten_2d(Expr a); Expr rows(Expr a, const std::vector<size_t>& indices); Expr cols(Expr a, const std::vector<size_t>& indices); - Expr select(Expr a, int axis, const std::vector<size_t>& indices); /*********************************************************/ diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h index 5b1f9865..ea2a3dfe 100644 --- a/src/graph/node_operators_binary.h +++ b/src/graph/node_operators_binary.h @@ -4,9 +4,12 @@ #include "functional/functional.h" #include "graph/node.h" -#include "tensors/gpu/cudnn_wrappers.h" #include "tensors/tensor_operators.h" +#ifdef CUDNN +#include "tensors/gpu/cudnn_wrappers.h" +#endif + namespace marian { class DotNodeOp : public NaryNodeOp { @@ -167,15 +170,17 @@ public: NodeOps forwardOps() { using namespace functional; + return { - NodeOp(ProdWithBias(val_, - child(0)->val(), - child(1)->val(), - child(2)->val(), - transA_, - transB_, - 0.f, - scalar_)) + NodeOp(Prod(val_, + child(0)->val(), + child(1)->val(), + transA_, transB_, 0.f, scalar_); + Prod(val_, + child(3)->val(), + child(2)->val(), + false, false, 1.f, 1.f) + ) }; } @@ -202,7 +207,12 @@ public: false, 1.0, scalar_)), - NodeOp(Add(_1, child(2)->grad(), adj_))}; + NodeOp(Prod(child(2)->grad(), + child(3)->val(), adj_, + true, false, + 0.f, 1.f)) + //NodeOp(Add(_1, child(2)->grad(), adj_)) + }; if(transA_ && !transB_) return {NodeOp(Prod(child(0)->grad(), @@ -219,7 +229,12 @@ public: false, 1.0, scalar_)), - NodeOp(Add(_1, child(2)->grad(), adj_))}; + NodeOp(Prod(child(2)->grad(), + child(3)->val(), adj_, + true, false, + 0.f, 1.f)) + //NodeOp(Add(_1, child(2)->grad(), adj_)) + }; if(transA_ && transB_) return {NodeOp(Prod(child(0)->grad(), @@ -236,7 +251,12 @@ public: true, 1.0, scalar_)), - NodeOp(Add(_1, child(2)->grad(), adj_))}; + NodeOp(Prod(child(2)->grad(), + child(3)->val(), adj_, + true, false, + 0.f, 1.f)) + //NodeOp(Add(_1, child(2)->grad(), adj_)) + }; return {NodeOp(Prod(child(0)->grad(), adj_, @@ -252,7 +272,12 @@ public: false, 1.0, scalar_)), - NodeOp(Add(_1, child(2)->grad(), adj_))}; + NodeOp(Prod(child(2)->grad(), + child(3)->val(), adj_, + true, false, + 0.f, 1.f)) + //NodeOp(Add(_1, child(2)->grad(), adj_)) + }; } const std::string type() { return "affine"; } @@ -294,6 +319,7 @@ public: NodeOps forwardOps() { // C = alpha * dot(op(A), op(B)) return {NodeOp(ProdBatched(val_, + graph()->allocator(), child(0)->val(), child(1)->val(), transA_, @@ -311,6 +337,7 @@ public: if(!transA_ && transB_) return {NodeOp(ProdBatched(child(0)->grad(), + graph()->allocator(), adj_, child(1)->val(), false, @@ -318,6 +345,7 @@ public: 1.0, scalar_)), NodeOp(ProdBatched(child(1)->grad(), + graph()->allocator(), adj_, child(0)->val(), true, @@ -327,6 +355,7 @@ public: if(transA_ && !transB_) return {NodeOp(ProdBatched(child(0)->grad(), + graph()->allocator(), child(1)->val(), adj_, false, @@ -334,6 +363,7 @@ public: 1.0, scalar_)), NodeOp(ProdBatched(child(1)->grad(), + graph()->allocator(), child(0)->val(), adj_, false, @@ -343,6 +373,7 @@ public: if(transA_ && transB_) return {NodeOp(ProdBatched(child(0)->grad(), + graph()->allocator(), child(1)->val(), adj_, true, @@ -350,6 +381,7 @@ public: 1.0, scalar_)), NodeOp(ProdBatched(child(1)->grad(), + graph()->allocator(), adj_, child(0)->val(), true, @@ -358,6 +390,7 @@ public: scalar_))}; return {NodeOp(ProdBatched(child(0)->grad(), + graph()->allocator(), adj_, child(1)->val(), false, @@ -365,6 +398,7 @@ public: 1.0, scalar_)), NodeOp(ProdBatched(child(1)->grad(), + graph()->allocator(), child(0)->val(), adj_, true, @@ -766,6 +800,7 @@ struct HighwayNodeOp : public NaryNodeOp { const std::string type() { return "highway"; } }; +#ifdef CUDNN class ConvolutionOp : public NaryNodeOp { public: ConvolutionOp(const std::vector<Expr>& nodes, @@ -802,4 +837,5 @@ public: protected: ConvolutionWrapper conv_; }; +#endif } diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h index fa6d25c7..d7ef751d 100644 --- a/src/graph/node_operators_unary.h +++ b/src/graph/node_operators_unary.h @@ -7,7 +7,9 @@ #include "graph/node.h" #include "tensors/tensor_operators.h" -//#include "tensors/gpu/cudnn_wrappers.h" +#ifdef CUDNN +#include "tensors/gpu/cudnn_wrappers.h" +#endif namespace marian { @@ -815,7 +817,7 @@ struct TransposeNodeOp : public UnaryNodeOp { } NodeOps backwardOps() { - return {NodeOp(TransposeND(child(0)->grad(), adj_, axes_))}; + return {NodeOp(TransposeNDGrad(child(0)->grad(), adj_, axes_))}; } template <class... Args> @@ -1009,7 +1011,9 @@ struct ShiftNodeOp : public UnaryNodeOp { } NodeOps backwardOps() { - return {NodeOp(Shift(child(0)->grad(), adj_, shift_, /*padValue=*/0.f, /*invert=*/true))}; + // last parameter beta=1 says to use += (out = in + beta * out) + // @TODO: check need for padValue_ + return {NodeOp(ShiftGrad(child(0)->grad(), adj_, shift_, true))}; } const std::string type() { return "shift"; } @@ -1076,6 +1080,7 @@ struct ShiftNodeOp : public UnaryNodeOp { // Ptr<sparse::CSR> lf_; //}; +#ifdef CUDNN class PoolingOp : public UnaryNodeOp { public: PoolingOp(Expr x, @@ -1109,6 +1114,7 @@ public: protected: PoolingWrapper pooling_; }; +#endif class PoolingWithMaskingOp : public UnaryNodeOp { public: |