Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src/graph
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>2019-01-29 20:35:53 +0300
committerMarcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>2019-01-29 20:35:53 +0300
commit49c139d950184c777d02417ed2586349d853e862 (patch)
tree6aed8d19d6cd6d6629be208c1befff97d2a267ce /src/graph
parent6a663f60baa868fa1659a0cea014df26f89623c4 (diff)
remove debug artifacts
Diffstat (limited to 'src/graph')
-rw-r--r--src/graph/chainable.h2
-rw-r--r--src/graph/expression_graph.cpp148
-rw-r--r--src/graph/expression_graph.h81
-rw-r--r--src/graph/node.cpp19
-rw-r--r--src/graph/node.h2
5 files changed, 78 insertions, 174 deletions
diff --git a/src/graph/chainable.h b/src/graph/chainable.h
index 3aa81d3c..2679843e 100644
--- a/src/graph/chainable.h
+++ b/src/graph/chainable.h
@@ -96,8 +96,6 @@ public:
virtual const std::string& name() const = 0;
virtual void debug(const std::string& message) = 0;
- virtual void dump(const std::string& filename) = 0;
-
virtual bool marked_for_debug() = 0;
virtual const std::string& debug_message() = 0;
diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp
index 5b4df339..b3c237d1 100644
--- a/src/graph/expression_graph.cpp
+++ b/src/graph/expression_graph.cpp
@@ -24,153 +24,11 @@ Expr ExpressionGraph::dropout(float prob, const Shape& shape) {
return constant(shape, inits::dropout(prob));
}
-void ExpressionGraph::checkNan(Tensor t, bool& isNan, bool& isInf, bool zero) {
- IsNan(t, allocator(), isNan, isInf, zero);
+void ExpressionGraph::checkNan(Tensor t) {
+ ABORT_IF(throwNaN_, "Not implemented"); t;
+ // ABORT_IF(throwNaN_ && IsNan(t), "Tensor has NaN");
}
-io::Item itemFromTensor(Tensor t, const std::string name, Ptr<Backend> backend) {
- io::Item item;
- item.name = name;
- item.shape = t->shape();
- item.type = t->type();
-
- size_t bytesWithoutPadding = t->shape().elements() * sizeOf(t->type());
- item.bytes.resize(bytesWithoutPadding);
- copy(backend,
- (char*)t->data(),
- (char*)t->data() + bytesWithoutPadding,
- item.bytes.data());
- return item;
-}
-
-void recChildren(Expr node, const std::string& parent, std::vector<io::Item>& items, Ptr<Backend> backend) {
- std::string name = node->type() + "_" + std::to_string(node->getId()) + "_p:" + parent;
- items.push_back(itemFromTensor(node->val(), name, backend));
- for(auto&& child : node->children())
- recChildren(child, std::to_string(node->getId()), items, backend);
-}
-
-void ExpressionGraph::forwardNext() {
- // @TODO: check if allocation works properly
- tensors_->clearShorttermMemory();
-
- while(!nodesForward_.empty()) {
- auto v = nodesForward_.front();
- v->allocate();
- v->init();
- v->forward();
-
- if(v->trainable() && throwNan_) {
- bool isNan = false, isInf = false;
- checkNan(v->val(), isNan, isInf);
- if(isNan || isInf) {
- LOG(critical, "Detected NaN ({}) or Inf ({}) in value (forward pass)", isNan, isInf);
- LOG(critical, "\tType: {}, Shape: {}, Name: {}, Id: {}, Hash: {}",
- v->type(), v->shape(), v->name(), v->getId(), v->hash());
- LOG(critical, "Value debug {}", v->val()->debug());
-
- LOG(critical, "Children: {}", v->children().size());
- for(auto&& child : v->children()) {
- LOG(critical, "\tType: {}, Shape: {}, Name: {}, Id: {}, Hash: {}",
- child->type(), child->shape(), child->name(), child->getId(), child->hash());
- LOG(critical, "Value debug {}", child->val()->debug());
- }
-
- std::vector<io::Item> ioItems;
- recChildren(v, "root", ioItems, backend_);
- io::saveItems("dump-for-nans.npz", ioItems);
-
- ABORT("Aborting");
- }
- }
-
- if(v->marked_for_debug()) {
- LOG(info, "Debug: {} op={}", v->debug_message(), v->type());
- LOG(info, v->val()->debug());
- }
-
- if(inferenceOnly_)
- v->children().clear();
- nodesForward_.pop_front();
- }
-}
-
-void ExpressionGraph::backward(bool zero, float clipValue) {
- if(topNodes_.size() > 1) {
- LOG(critical, "There are more ({}) than one top most nodes for backward pass:", topNodes_.size());
- for(auto node : topNodes_) {
- LOG(critical,
- "\tType: {}, Shape: {}, Name: {}, Id: {}, Hash: {}",
- node->type(),
- node->shape(),
- node->name(),
- node->getId(),
- node->hash());
- }
- ABORT("Aborting");
- }
-
- params_->allocateBackward();
- if(zero)
- params_->set_zero_adjoint();
-
- for(auto&& v : topNodes_)
- v->init_dependent();
-
- // named_.clear();
- topNodes_.clear();
-
- tensors_->clearShorttermMemory();
-
- while(!nodesBackward_.empty()) {
- auto v = nodesBackward_.back();
- nodesBackward_.pop_back();
-
- for(auto&& child : v->children()) {
- if(child->trainable() && child->type() != "param")
- child->set_zero_adjoint();
- }
-
- if(v->trainable()) {
- v->backward();
- if(clipValue != 0) {
- using namespace functional;
- Element(_1 = clip(_1, clipValue), v->grad());
- }
- }
-
-
- if(throwNan_) {
- for(auto&& child : v->children()) {
- if(child->trainable()) {
- bool isNan = false, isInf = false;
- checkNan(child->grad(), isNan, isInf);
- if(isNan || isInf) {
- LOG(critical, "Detected NaN ({}) or Inf ({}) in gradient (backward pass) of child node", isNan, isInf);
- LOG(critical, "Child - Type: {}, Shape: {}, Name: {}, Id: {}, Hash: {}",
- child->type(), child->shape(), child->name(), child->getId(), child->hash());
- LOG(critical, "Value debug: {}", child->val()->debug());
- LOG(critical, "Grad debug: {}", child->grad()->debug());
- LOG(critical, "Parent - Type: {}, Shape: {}, Name: {}, Id: {}, Hash: {}",
- v->type(), v->shape(), v->name(), v->getId(), v->hash());
- LOG(critical, "Value debug: {}", v->val()->debug());
- LOG(critical, "Grad debug: {}", v->grad()->debug());
- ABORT("Aborting");
- }
- }
- }
- }
-
- if(v->trainable() && v->marked_for_debug()) {
- LOG(info, "Debug Grad: {} op={}", v->debug_message(), v->type());
- LOG(info, v->grad()->debug());
- }
-
- v->children().clear();
- }
-}
-
-
void ExpressionGraph::save(std::vector<io::Item>& ioItems) {
for(auto p : params()->getMap()) {
std::string pName = p.first;
diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h
index 389c6e3e..fe836161 100644
--- a/src/graph/expression_graph.h
+++ b/src/graph/expression_graph.h
@@ -136,7 +136,7 @@ private:
bool reloaded_{false};
std::string namespace_;
- bool throwNan_{false};
+ bool throwNaN_{false};
protected:
// Delete, copy and move constructors
@@ -217,11 +217,81 @@ public:
forwardNext();
}
- void checkNan(Tensor t, bool& isNan, bool& isInf, bool zero = false);
+ void checkNan(Tensor t);
- void forwardNext();
+ void forwardNext() {
+ // @TODO: check if allocation works properly
+ tensors_->clearShorttermMemory();
- void backward(bool zero = true, float clipValue = 0.f);
+ while(!nodesForward_.empty()) {
+ auto v = nodesForward_.front();
+ v->allocate();
+ v->init();
+ v->forward();
+
+ checkNan(v->val());
+
+ if(v->marked_for_debug()) {
+ std::cerr << "Debug: " << v->debug_message() << " op=" << v->type()
+ << std::endl;
+ std::cerr << v->val()->debug() << std::endl;
+ }
+
+ if(inferenceOnly_)
+ v->children().clear();
+ nodesForward_.pop_front();
+ }
+ }
+
+ void backward(bool zero = true) {
+ if(topNodes_.size() > 1) {
+ LOG(critical, "There are more ({}) than one top most node for backward step:", topNodes_.size());
+ for(auto node : topNodes_) {
+ LOG(critical,
+ "\tType: {}, Shape: {}, Name: {}, Id: {}, Hash: {}",
+ node->type(),
+ node->shape(),
+ node->name(),
+ node->getId(),
+ node->hash());
+ }
+ ABORT("Aborting");
+ }
+
+ params_->allocateBackward();
+ if(zero)
+ params_->set_zero_adjoint();
+
+ for(auto&& v : topNodes_)
+ v->init_dependent();
+
+ // named_.clear();
+ topNodes_.clear();
+
+ tensors_->clearShorttermMemory();
+
+ while(!nodesBackward_.empty()) {
+ auto v = nodesBackward_.back();
+ nodesBackward_.pop_back();
+
+ for(auto&& child : v->children()) {
+ if(child->trainable() && child->type() != "param")
+ child->set_zero_adjoint();
+ }
+
+ if(v->trainable())
+ v->backward();
+
+ checkNan(v->grad());
+
+ if(v->trainable() && v->marked_for_debug()) {
+ std::cerr << "Debug Grad: " << v->debug_message() << std::endl;
+ std::cerr << v->grad()->debug() << std::endl;
+ }
+
+ v->children().clear();
+ }
+ }
std::string graphviz() {
std::stringstream ss;
@@ -390,8 +460,7 @@ public:
void setReloaded(bool reloaded) { reloaded_ = reloaded; }
- void setThrowNan(bool throwNan) { throwNan_ = throwNan; }
- bool getThrowNan() { return throwNan_; }
+ void setThrowNaN(bool throwNaN) { throwNaN_ = throwNaN; }
public:
// convert all parameters into an array of IoItem elements, for loading
diff --git a/src/graph/node.cpp b/src/graph/node.cpp
index bdb50116..c11531da 100644
--- a/src/graph/node.cpp
+++ b/src/graph/node.cpp
@@ -2,8 +2,6 @@
#include "graph/auto_tuner.h"
#include "graph/expression_graph.h"
#include "tensors/backend.h"
-#include "tensors/tensor_operators.h"
-#include "common/io.h"
namespace marian {
@@ -85,21 +83,4 @@ void Node::record(Ptr<AutoTunerRecorder> recorder,
recorderHash_ = recorderHash;
recorderStop_ = stop;
}
-
-void Node::dump(const std::string& filename) {
- io::Item item;
- item.name = "dump";
- item.shape = val_->shape();
- item.type = val_->type();
-
- size_t bytesWithoutPadding = val_->shape().elements() * sizeOf(val_->type());
- item.bytes.resize(bytesWithoutPadding);
- copy(graph()->getBackend(),
- (char*)val_->data(),
- (char*)val_->data() + bytesWithoutPadding,
- item.bytes.data());
-
- std::vector<io::Item> items({item});
- io::saveItems(filename, items);
-}
} // namespace marian
diff --git a/src/graph/node.h b/src/graph/node.h
index defefd5b..1397e74b 100644
--- a/src/graph/node.h
+++ b/src/graph/node.h
@@ -100,8 +100,6 @@ public:
virtual bool marked_for_debug() override { return markedForDebug_; }
virtual const std::string& debug_message() override { return debugMessage_; }
- virtual void dump(const std::string& filename) override;
-
virtual size_t allocate() override;
virtual void free() override;