diff options
author | Marcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com> | 2019-01-29 20:35:53 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com> | 2019-01-29 20:35:53 +0300 |
commit | 49c139d950184c777d02417ed2586349d853e862 (patch) | |
tree | 6aed8d19d6cd6d6629be208c1befff97d2a267ce /src/graph | |
parent | 6a663f60baa868fa1659a0cea014df26f89623c4 (diff) |
remove debug artifacts
Diffstat (limited to 'src/graph')
-rw-r--r-- | src/graph/chainable.h | 2 | ||||
-rw-r--r-- | src/graph/expression_graph.cpp | 148 | ||||
-rw-r--r-- | src/graph/expression_graph.h | 81 | ||||
-rw-r--r-- | src/graph/node.cpp | 19 | ||||
-rw-r--r-- | src/graph/node.h | 2 |
5 files changed, 78 insertions, 174 deletions
diff --git a/src/graph/chainable.h b/src/graph/chainable.h index 3aa81d3c..2679843e 100644 --- a/src/graph/chainable.h +++ b/src/graph/chainable.h @@ -96,8 +96,6 @@ public: virtual const std::string& name() const = 0; virtual void debug(const std::string& message) = 0; - virtual void dump(const std::string& filename) = 0; - virtual bool marked_for_debug() = 0; virtual const std::string& debug_message() = 0; diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp index 5b4df339..b3c237d1 100644 --- a/src/graph/expression_graph.cpp +++ b/src/graph/expression_graph.cpp @@ -24,153 +24,11 @@ Expr ExpressionGraph::dropout(float prob, const Shape& shape) { return constant(shape, inits::dropout(prob)); } -void ExpressionGraph::checkNan(Tensor t, bool& isNan, bool& isInf, bool zero) { - IsNan(t, allocator(), isNan, isInf, zero); +void ExpressionGraph::checkNan(Tensor t) { + ABORT_IF(throwNaN_, "Not implemented"); t; + // ABORT_IF(throwNaN_ && IsNan(t), "Tensor has NaN"); } -io::Item itemFromTensor(Tensor t, const std::string name, Ptr<Backend> backend) { - io::Item item; - item.name = name; - item.shape = t->shape(); - item.type = t->type(); - - size_t bytesWithoutPadding = t->shape().elements() * sizeOf(t->type()); - item.bytes.resize(bytesWithoutPadding); - copy(backend, - (char*)t->data(), - (char*)t->data() + bytesWithoutPadding, - item.bytes.data()); - return item; -} - -void recChildren(Expr node, const std::string& parent, std::vector<io::Item>& items, Ptr<Backend> backend) { - std::string name = node->type() + "_" + std::to_string(node->getId()) + "_p:" + parent; - items.push_back(itemFromTensor(node->val(), name, backend)); - for(auto&& child : node->children()) - recChildren(child, std::to_string(node->getId()), items, backend); -} - -void ExpressionGraph::forwardNext() { - // @TODO: check if allocation works properly - tensors_->clearShorttermMemory(); - - while(!nodesForward_.empty()) { - auto v = nodesForward_.front(); - v->allocate(); - v->init(); - v->forward(); - - if(v->trainable() && throwNan_) { - bool isNan = false, isInf = false; - checkNan(v->val(), isNan, isInf); - if(isNan || isInf) { - LOG(critical, "Detected NaN ({}) or Inf ({}) in value (forward pass)", isNan, isInf); - LOG(critical, "\tType: {}, Shape: {}, Name: {}, Id: {}, Hash: {}", - v->type(), v->shape(), v->name(), v->getId(), v->hash()); - LOG(critical, "Value debug {}", v->val()->debug()); - - LOG(critical, "Children: {}", v->children().size()); - for(auto&& child : v->children()) { - LOG(critical, "\tType: {}, Shape: {}, Name: {}, Id: {}, Hash: {}", - child->type(), child->shape(), child->name(), child->getId(), child->hash()); - LOG(critical, "Value debug {}", child->val()->debug()); - } - - std::vector<io::Item> ioItems; - recChildren(v, "root", ioItems, backend_); - io::saveItems("dump-for-nans.npz", ioItems); - - ABORT("Aborting"); - } - } - - if(v->marked_for_debug()) { - LOG(info, "Debug: {} op={}", v->debug_message(), v->type()); - LOG(info, v->val()->debug()); - } - - if(inferenceOnly_) - v->children().clear(); - nodesForward_.pop_front(); - } -} - -void ExpressionGraph::backward(bool zero, float clipValue) { - if(topNodes_.size() > 1) { - LOG(critical, "There are more ({}) than one top most nodes for backward pass:", topNodes_.size()); - for(auto node : topNodes_) { - LOG(critical, - "\tType: {}, Shape: {}, Name: {}, Id: {}, Hash: {}", - node->type(), - node->shape(), - node->name(), - node->getId(), - node->hash()); - } - ABORT("Aborting"); - } - - params_->allocateBackward(); - if(zero) - params_->set_zero_adjoint(); - - for(auto&& v : topNodes_) - v->init_dependent(); - - // named_.clear(); - topNodes_.clear(); - - tensors_->clearShorttermMemory(); - - while(!nodesBackward_.empty()) { - auto v = nodesBackward_.back(); - nodesBackward_.pop_back(); - - for(auto&& child : v->children()) { - if(child->trainable() && child->type() != "param") - child->set_zero_adjoint(); - } - - if(v->trainable()) { - v->backward(); - if(clipValue != 0) { - using namespace functional; - Element(_1 = clip(_1, clipValue), v->grad()); - } - } - - - if(throwNan_) { - for(auto&& child : v->children()) { - if(child->trainable()) { - bool isNan = false, isInf = false; - checkNan(child->grad(), isNan, isInf); - if(isNan || isInf) { - LOG(critical, "Detected NaN ({}) or Inf ({}) in gradient (backward pass) of child node", isNan, isInf); - LOG(critical, "Child - Type: {}, Shape: {}, Name: {}, Id: {}, Hash: {}", - child->type(), child->shape(), child->name(), child->getId(), child->hash()); - LOG(critical, "Value debug: {}", child->val()->debug()); - LOG(critical, "Grad debug: {}", child->grad()->debug()); - LOG(critical, "Parent - Type: {}, Shape: {}, Name: {}, Id: {}, Hash: {}", - v->type(), v->shape(), v->name(), v->getId(), v->hash()); - LOG(critical, "Value debug: {}", v->val()->debug()); - LOG(critical, "Grad debug: {}", v->grad()->debug()); - ABORT("Aborting"); - } - } - } - } - - if(v->trainable() && v->marked_for_debug()) { - LOG(info, "Debug Grad: {} op={}", v->debug_message(), v->type()); - LOG(info, v->grad()->debug()); - } - - v->children().clear(); - } -} - - void ExpressionGraph::save(std::vector<io::Item>& ioItems) { for(auto p : params()->getMap()) { std::string pName = p.first; diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index 389c6e3e..fe836161 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -136,7 +136,7 @@ private: bool reloaded_{false}; std::string namespace_; - bool throwNan_{false}; + bool throwNaN_{false}; protected: // Delete, copy and move constructors @@ -217,11 +217,81 @@ public: forwardNext(); } - void checkNan(Tensor t, bool& isNan, bool& isInf, bool zero = false); + void checkNan(Tensor t); - void forwardNext(); + void forwardNext() { + // @TODO: check if allocation works properly + tensors_->clearShorttermMemory(); - void backward(bool zero = true, float clipValue = 0.f); + while(!nodesForward_.empty()) { + auto v = nodesForward_.front(); + v->allocate(); + v->init(); + v->forward(); + + checkNan(v->val()); + + if(v->marked_for_debug()) { + std::cerr << "Debug: " << v->debug_message() << " op=" << v->type() + << std::endl; + std::cerr << v->val()->debug() << std::endl; + } + + if(inferenceOnly_) + v->children().clear(); + nodesForward_.pop_front(); + } + } + + void backward(bool zero = true) { + if(topNodes_.size() > 1) { + LOG(critical, "There are more ({}) than one top most node for backward step:", topNodes_.size()); + for(auto node : topNodes_) { + LOG(critical, + "\tType: {}, Shape: {}, Name: {}, Id: {}, Hash: {}", + node->type(), + node->shape(), + node->name(), + node->getId(), + node->hash()); + } + ABORT("Aborting"); + } + + params_->allocateBackward(); + if(zero) + params_->set_zero_adjoint(); + + for(auto&& v : topNodes_) + v->init_dependent(); + + // named_.clear(); + topNodes_.clear(); + + tensors_->clearShorttermMemory(); + + while(!nodesBackward_.empty()) { + auto v = nodesBackward_.back(); + nodesBackward_.pop_back(); + + for(auto&& child : v->children()) { + if(child->trainable() && child->type() != "param") + child->set_zero_adjoint(); + } + + if(v->trainable()) + v->backward(); + + checkNan(v->grad()); + + if(v->trainable() && v->marked_for_debug()) { + std::cerr << "Debug Grad: " << v->debug_message() << std::endl; + std::cerr << v->grad()->debug() << std::endl; + } + + v->children().clear(); + } + } std::string graphviz() { std::stringstream ss; @@ -390,8 +460,7 @@ public: void setReloaded(bool reloaded) { reloaded_ = reloaded; } - void setThrowNan(bool throwNan) { throwNan_ = throwNan; } - bool getThrowNan() { return throwNan_; } + void setThrowNaN(bool throwNaN) { throwNaN_ = throwNaN; } public: // convert all parameters into an array of IoItem elements, for loading diff --git a/src/graph/node.cpp b/src/graph/node.cpp index bdb50116..c11531da 100644 --- a/src/graph/node.cpp +++ b/src/graph/node.cpp @@ -2,8 +2,6 @@ #include "graph/auto_tuner.h" #include "graph/expression_graph.h" #include "tensors/backend.h" -#include "tensors/tensor_operators.h" -#include "common/io.h" namespace marian { @@ -85,21 +83,4 @@ void Node::record(Ptr<AutoTunerRecorder> recorder, recorderHash_ = recorderHash; recorderStop_ = stop; } - -void Node::dump(const std::string& filename) { - io::Item item; - item.name = "dump"; - item.shape = val_->shape(); - item.type = val_->type(); - - size_t bytesWithoutPadding = val_->shape().elements() * sizeOf(val_->type()); - item.bytes.resize(bytesWithoutPadding); - copy(graph()->getBackend(), - (char*)val_->data(), - (char*)val_->data() + bytesWithoutPadding, - item.bytes.data()); - - std::vector<io::Item> items({item}); - io::saveItems(filename, items); -} } // namespace marian diff --git a/src/graph/node.h b/src/graph/node.h index defefd5b..1397e74b 100644 --- a/src/graph/node.h +++ b/src/graph/node.h @@ -100,8 +100,6 @@ public: virtual bool marked_for_debug() override { return markedForDebug_; } virtual const std::string& debug_message() override { return debugMessage_; } - virtual void dump(const std::string& filename) override; - virtual size_t allocate() override; virtual void free() override; |