Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src/graph
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2018-07-27 20:14:21 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2018-07-27 20:14:21 +0300
commitdceb7185d86ed8fd1994e86dc3e3c0e03740ec4a (patch)
tree3514f87aa2da28313043959ebd0381b3ba7de233 /src/graph
parent5cc8674d974bb5cae7bc8f25a51472166164a579 (diff)
parent8b0e2f951b5ce09a622fa7239b2e1e5bd8344fe4 (diff)
fix merge
Diffstat (limited to 'src/graph')
-rw-r--r--src/graph/expression_graph.h7
-rw-r--r--src/graph/expression_operators.cpp13
-rw-r--r--src/graph/expression_operators.h1
-rw-r--r--src/graph/node_operators_binary.h62
-rw-r--r--src/graph/node_operators_unary.h12
5 files changed, 72 insertions, 23 deletions
diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h
index d901000c..199994d0 100644
--- a/src/graph/expression_graph.h
+++ b/src/graph/expression_graph.h
@@ -244,12 +244,13 @@ public:
}
}
- void backward() {
+ void backward(bool zero = true) {
ABORT_IF(topNodes_.size() > 1,
"There are more than one top most node for backward step");
params_->allocateBackward();
- params_->set_zero_adjoint();
+ if(zero)
+ params_->set_zero_adjoint();
for(auto&& v : topNodes_)
v->init_dependent();
@@ -264,7 +265,7 @@ public:
nodesBackward_.pop_back();
for(auto&& child : v->children()) {
- if(child->trainable())
+ if(child->trainable() && child->type() != "param")
child->set_zero_adjoint();
}
diff --git a/src/graph/expression_operators.cpp b/src/graph/expression_operators.cpp
index 1666357a..ea8077fa 100644
--- a/src/graph/expression_operators.cpp
+++ b/src/graph/expression_operators.cpp
@@ -313,7 +313,9 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) {
if(bc != b)
bc = rec2(bc);
- std::vector<Expr> nodes = {ac, bc, bias};
+ int rows = ac->shape().elements() / ac->shape()[-1];
+ Expr ones = ac->graph()->ones({rows, 1});
+ std::vector<Expr> nodes = {ac, bc, bias, ones};
return rec2(Expression<AffineNodeOp>(nodes, transA, transB, scale),
true);
};
@@ -333,13 +335,16 @@ Expr affine(Expr a, Expr b, Expr bias, bool transA, bool transB, float scale) {
}
else {
// general version, MKL, CBlas or CUDA
+
// if clipValue > 0, the inputs will be clipped to range [-clipValue, clipValue]
// This is meant to keep values at the same range as used during training when
// optimizing for 8-bit integer products. Likely to be removed in the future
// when we explore better ways to handle this.
- std::vector<Expr> nodes = {clip(a, clipValue), clip(b, clipValue), bias};
- return Expression<AffineNodeOp>(nodes, transA, transB, scale);
+ int rows = a->shape().elements() / a->shape()[-1];
+ Expr ones = a->graph()->ones({rows, 1});
+ std::vector<Expr> nodes = {clip(a, clipValue), clip(b, clipValue), bias, ones};
+ return Expression<AffineNodeOp>(nodes, transA, transB, scale);
}
}
@@ -462,6 +467,7 @@ Expr shift(Expr a, Shape shift, float padValue) {
//}
#ifdef CUDA_FOUND
+#ifdef CUDNN
Expr avg_pooling(Expr x,
int height,
@@ -526,4 +532,5 @@ Expr pooling_with_masking(Expr x, Expr mask, int width, bool isEven) {
}
#endif
+#endif
}
diff --git a/src/graph/expression_operators.h b/src/graph/expression_operators.h
index cc07dafb..53cf5966 100644
--- a/src/graph/expression_operators.h
+++ b/src/graph/expression_operators.h
@@ -106,7 +106,6 @@ Expr flatten_2d(Expr a);
Expr rows(Expr a, const std::vector<size_t>& indices);
Expr cols(Expr a, const std::vector<size_t>& indices);
-
Expr select(Expr a, int axis, const std::vector<size_t>& indices);
/*********************************************************/
diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h
index 5b1f9865..ea2a3dfe 100644
--- a/src/graph/node_operators_binary.h
+++ b/src/graph/node_operators_binary.h
@@ -4,9 +4,12 @@
#include "functional/functional.h"
#include "graph/node.h"
-#include "tensors/gpu/cudnn_wrappers.h"
#include "tensors/tensor_operators.h"
+#ifdef CUDNN
+#include "tensors/gpu/cudnn_wrappers.h"
+#endif
+
namespace marian {
class DotNodeOp : public NaryNodeOp {
@@ -167,15 +170,17 @@ public:
NodeOps forwardOps() {
using namespace functional;
+
return {
- NodeOp(ProdWithBias(val_,
- child(0)->val(),
- child(1)->val(),
- child(2)->val(),
- transA_,
- transB_,
- 0.f,
- scalar_))
+ NodeOp(Prod(val_,
+ child(0)->val(),
+ child(1)->val(),
+ transA_, transB_, 0.f, scalar_);
+ Prod(val_,
+ child(3)->val(),
+ child(2)->val(),
+ false, false, 1.f, 1.f)
+ )
};
}
@@ -202,7 +207,12 @@ public:
false,
1.0,
scalar_)),
- NodeOp(Add(_1, child(2)->grad(), adj_))};
+ NodeOp(Prod(child(2)->grad(),
+ child(3)->val(), adj_,
+ true, false,
+ 0.f, 1.f))
+ //NodeOp(Add(_1, child(2)->grad(), adj_))
+ };
if(transA_ && !transB_)
return {NodeOp(Prod(child(0)->grad(),
@@ -219,7 +229,12 @@ public:
false,
1.0,
scalar_)),
- NodeOp(Add(_1, child(2)->grad(), adj_))};
+ NodeOp(Prod(child(2)->grad(),
+ child(3)->val(), adj_,
+ true, false,
+ 0.f, 1.f))
+ //NodeOp(Add(_1, child(2)->grad(), adj_))
+ };
if(transA_ && transB_)
return {NodeOp(Prod(child(0)->grad(),
@@ -236,7 +251,12 @@ public:
true,
1.0,
scalar_)),
- NodeOp(Add(_1, child(2)->grad(), adj_))};
+ NodeOp(Prod(child(2)->grad(),
+ child(3)->val(), adj_,
+ true, false,
+ 0.f, 1.f))
+ //NodeOp(Add(_1, child(2)->grad(), adj_))
+ };
return {NodeOp(Prod(child(0)->grad(),
adj_,
@@ -252,7 +272,12 @@ public:
false,
1.0,
scalar_)),
- NodeOp(Add(_1, child(2)->grad(), adj_))};
+ NodeOp(Prod(child(2)->grad(),
+ child(3)->val(), adj_,
+ true, false,
+ 0.f, 1.f))
+ //NodeOp(Add(_1, child(2)->grad(), adj_))
+ };
}
const std::string type() { return "affine"; }
@@ -294,6 +319,7 @@ public:
NodeOps forwardOps() {
// C = alpha * dot(op(A), op(B))
return {NodeOp(ProdBatched(val_,
+ graph()->allocator(),
child(0)->val(),
child(1)->val(),
transA_,
@@ -311,6 +337,7 @@ public:
if(!transA_ && transB_)
return {NodeOp(ProdBatched(child(0)->grad(),
+ graph()->allocator(),
adj_,
child(1)->val(),
false,
@@ -318,6 +345,7 @@ public:
1.0,
scalar_)),
NodeOp(ProdBatched(child(1)->grad(),
+ graph()->allocator(),
adj_,
child(0)->val(),
true,
@@ -327,6 +355,7 @@ public:
if(transA_ && !transB_)
return {NodeOp(ProdBatched(child(0)->grad(),
+ graph()->allocator(),
child(1)->val(),
adj_,
false,
@@ -334,6 +363,7 @@ public:
1.0,
scalar_)),
NodeOp(ProdBatched(child(1)->grad(),
+ graph()->allocator(),
child(0)->val(),
adj_,
false,
@@ -343,6 +373,7 @@ public:
if(transA_ && transB_)
return {NodeOp(ProdBatched(child(0)->grad(),
+ graph()->allocator(),
child(1)->val(),
adj_,
true,
@@ -350,6 +381,7 @@ public:
1.0,
scalar_)),
NodeOp(ProdBatched(child(1)->grad(),
+ graph()->allocator(),
adj_,
child(0)->val(),
true,
@@ -358,6 +390,7 @@ public:
scalar_))};
return {NodeOp(ProdBatched(child(0)->grad(),
+ graph()->allocator(),
adj_,
child(1)->val(),
false,
@@ -365,6 +398,7 @@ public:
1.0,
scalar_)),
NodeOp(ProdBatched(child(1)->grad(),
+ graph()->allocator(),
child(0)->val(),
adj_,
true,
@@ -766,6 +800,7 @@ struct HighwayNodeOp : public NaryNodeOp {
const std::string type() { return "highway"; }
};
+#ifdef CUDNN
class ConvolutionOp : public NaryNodeOp {
public:
ConvolutionOp(const std::vector<Expr>& nodes,
@@ -802,4 +837,5 @@ public:
protected:
ConvolutionWrapper conv_;
};
+#endif
}
diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h
index fa6d25c7..d7ef751d 100644
--- a/src/graph/node_operators_unary.h
+++ b/src/graph/node_operators_unary.h
@@ -7,7 +7,9 @@
#include "graph/node.h"
#include "tensors/tensor_operators.h"
-//#include "tensors/gpu/cudnn_wrappers.h"
+#ifdef CUDNN
+#include "tensors/gpu/cudnn_wrappers.h"
+#endif
namespace marian {
@@ -815,7 +817,7 @@ struct TransposeNodeOp : public UnaryNodeOp {
}
NodeOps backwardOps() {
- return {NodeOp(TransposeND(child(0)->grad(), adj_, axes_))};
+ return {NodeOp(TransposeNDGrad(child(0)->grad(), adj_, axes_))};
}
template <class... Args>
@@ -1009,7 +1011,9 @@ struct ShiftNodeOp : public UnaryNodeOp {
}
NodeOps backwardOps() {
- return {NodeOp(Shift(child(0)->grad(), adj_, shift_, /*padValue=*/0.f, /*invert=*/true))};
+ // last parameter beta=1 says to use += (out = in + beta * out)
+ // @TODO: check need for padValue_
+ return {NodeOp(ShiftGrad(child(0)->grad(), adj_, shift_, true))};
}
const std::string type() { return "shift"; }
@@ -1076,6 +1080,7 @@ struct ShiftNodeOp : public UnaryNodeOp {
// Ptr<sparse::CSR> lf_;
//};
+#ifdef CUDNN
class PoolingOp : public UnaryNodeOp {
public:
PoolingOp(Expr x,
@@ -1109,6 +1114,7 @@ public:
protected:
PoolingWrapper pooling_;
};
+#endif
class PoolingWithMaskingOp : public UnaryNodeOp {
public: