diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2017-10-29 17:42:01 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2017-10-29 17:42:01 +0300 |
commit | bc95140cfb7afc51fa104f4ecab3e5453bd706ef (patch) | |
tree | 8d57999c8c556cae36d7b9f8c992826cd0804445 /src | |
parent | 2e16934080d4bf41d0ab7557836732bedb635efd (diff) | |
parent | 46433253735e79e03613fcbd28e64ff393f72451 (diff) |
merge rnn_test.cpp
Diffstat (limited to 'src')
53 files changed, 601 insertions, 254 deletions
diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 406dcf65..013a635a 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -137,52 +137,46 @@ void ConfigParser::validateOptions() const { if(mode_ == ConfigMode::translating) return; - UTIL_THROW_IF2( + ABORT_IF( !has("train-sets") || get<std::vector<std::string>>("train-sets").empty(), "No train sets given in config file or on command line"); - UTIL_THROW_IF2( - has("vocabs") - && get<std::vector<std::string>>("vocabs").size() - != get<std::vector<std::string>>("train-sets").size(), - "There should be as many vocabularies as training sets"); - UTIL_THROW_IF2( - has("embedding-vectors") - && get<std::vector<std::string>>("embedding-vectors").size() - != get<std::vector<std::string>>("train-sets").size(), - "There should be as many files with embedding vectors as " - "training sets"); + ABORT_IF(has("vocabs") + && get<std::vector<std::string>>("vocabs").size() + != get<std::vector<std::string>>("train-sets").size(), + "There should be as many vocabularies as training sets"); + ABORT_IF(has("embedding-vectors") + && get<std::vector<std::string>>("embedding-vectors").size() + != get<std::vector<std::string>>("train-sets").size(), + "There should be as many files with embedding vectors as " + "training sets"); if(mode_ == ConfigMode::rescoring) return; boost::filesystem::path modelPath(get<std::string>("model")); auto modelDir = modelPath.parent_path(); - UTIL_THROW_IF2( - !modelDir.empty() && !boost::filesystem::is_directory(modelDir), - "Model directory does not exist"); + ABORT_IF(!modelDir.empty() && !boost::filesystem::is_directory(modelDir), + "Model directory does not exist"); - UTIL_THROW_IF2( - has("valid-sets") - && get<std::vector<std::string>>("valid-sets").size() - != get<std::vector<std::string>>("train-sets").size(), - "There should be as many validation sets as training sets"); + ABORT_IF(has("valid-sets") + && get<std::vector<std::string>>("valid-sets").size() + != get<std::vector<std::string>>("train-sets").size(), + "There should be as many validation sets as training sets"); // validations for learning rate decaying - UTIL_THROW_IF2(get<double>("lr-decay") > 1.0, - "Learning rate decay factor greater than 1.0 is unusual"); - UTIL_THROW_IF2( - (get<std::string>("lr-decay-strategy") == "epoch+batches" - || get<std::string>("lr-decay-strategy") == "epoch+stalled") - && get<std::vector<size_t>>("lr-decay-start").size() != 2, - "Decay strategies 'epoch+batches' and 'epoch+stalled' require two " - "values specified with --lr-decay-start options"); - UTIL_THROW_IF2( - (get<std::string>("lr-decay-strategy") == "epoch" - || get<std::string>("lr-decay-strategy") == "batches" - || get<std::string>("lr-decay-strategy") == "stalled") - && get<std::vector<size_t>>("lr-decay-start").size() != 1, - "Single decay strategies require only one value specified with " - "--lr-decay-start option"); + ABORT_IF(get<double>("lr-decay") > 1.0, + "Learning rate decay factor greater than 1.0 is unusual"); + ABORT_IF((get<std::string>("lr-decay-strategy") == "epoch+batches" + || get<std::string>("lr-decay-strategy") == "epoch+stalled") + && get<std::vector<size_t>>("lr-decay-start").size() != 2, + "Decay strategies 'epoch+batches' and 'epoch+stalled' require two " + "values specified with --lr-decay-start options"); + ABORT_IF((get<std::string>("lr-decay-strategy") == "epoch" + || get<std::string>("lr-decay-strategy") == "batches" + || get<std::string>("lr-decay-strategy") == "stalled") + && get<std::vector<size_t>>("lr-decay-start").size() != 1, + "Single decay strategies require only one value specified with " + "--lr-decay-start option"); } void ConfigParser::addOptionsCommon(po::options_description& desc) { diff --git a/src/common/file_stream.h b/src/common/file_stream.h index 6ccf78c3..53d41faf 100644 --- a/src/common/file_stream.h +++ b/src/common/file_stream.h @@ -9,7 +9,8 @@ #include <sys/stat.h> -#include "exception.h" +#include "3rd_party/exception.h" +#include "common/logging.h" namespace io = boost::iostreams; @@ -22,7 +23,7 @@ private: int mkstemp_and_unlink(char* tmpl) { int ret = mkstemp(tmpl); if(unlink_ && ret != -1) { - UTIL_THROW_IF2(unlink(tmpl), "while deleting " << tmpl); + ABORT_IF(unlink(tmpl), "Error while deleting '{}'", tmpl); } return ret; } @@ -32,8 +33,9 @@ private: name += "marian.XXXXXX"; name.push_back(0); int ret; - UTIL_THROW_IF2(-1 == (ret = mkstemp_and_unlink(&name[0])), - "while making a temporary based on " << base); + ABORT_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), + "Error while making a temporary based on '{}'", + base); name_ = name; return ret; } @@ -61,7 +63,7 @@ public: ~TemporaryFile() { if(fd_ != -1 && !unlink_) { - UTIL_THROW_IF2(unlink(name_.c_str()), "while deleting " << name_); + ABORT_IF(unlink(name_.c_str()), "Error while deleting '{}'", name_); } if(fd_ != -1 && close(fd_)) { std::cerr << "Could not close file " << fd_ << std::endl; @@ -77,8 +79,8 @@ public: class InputFileStream { public: InputFileStream(const std::string& file) : file_(file), ifstream_(file_) { - UTIL_THROW_IF2(!boost::filesystem::exists(file_), - "File " << file << " does not exist"); + ABORT_IF( + !boost::filesystem::exists(file_), "File '{}' does not exist", file); if(file_.extension() == ".gz") istream_.push(io::gzip_decompressor()); @@ -117,8 +119,8 @@ private: class OutputFileStream { public: OutputFileStream(const std::string& file) : file_(file), ofstream_(file_) { - UTIL_THROW_IF2(!boost::filesystem::exists(file_), - "File " << file << " does not exist"); + ABORT_IF( + !boost::filesystem::exists(file_), "File '{}' does not exist", file); if(file_.extension() == ".gz") ostream_.push(io::gzip_compressor()); diff --git a/src/common/logging.h b/src/common/logging.h index 9564b33c..127a9700 100644 --- a/src/common/logging.h +++ b/src/common/logging.h @@ -3,21 +3,20 @@ #include "spdlog/spdlog.h" /** - * @brief Prints logging message into stderr and a file specified with `--log` - * option. + * Prints logging message into stderr and a file specified with `--log` option. * * Example usage: `LOG(info, "[data] Vocab size: {}", vocabSize)` * - * A good practise is to put `[namespace]` at the beginning of your message. + * A good practice is to put `[namespace]` at the beginning of the message. * * @param level Logging level: trace, debug, info, warn, error, critical - * @param ... + * @param ... Message text and variables */ #define LOG(level, ...) checkedLog("general", #level, __VA_ARGS__) /** - * @brief Prints logging message regarding validation into stderr and a file - * specified with `--valid-log` option. + * Prints logging message regarding validation into stderr and a file specified + * with `--valid-log` option. * * The message is automatically preceded by "[valid] ". * @@ -25,6 +24,34 @@ */ #define LOG_VALID(level, ...) checkedLog("valid", #level, __VA_ARGS__) +/** + * Prints critical error message and causes abnormal program termination by + * calling std::abort(). + * + * @param ... Message text and variables + */ +#define ABORT(...) \ + do { \ + checkedLog("general", "critical", __VA_ARGS__); \ + std::abort(); \ + } while(0) + +/** + * Prints critical error message and causes abnormal program termination if + * conditions is true. + * + * @param condition Condition expression + * @param ... Message text and variables + * + * @see \def ABORT(...) + */ +#define ABORT_IF(condition, ...) \ + do { \ + if(condition) { \ + ABORT(__VA_ARGS__); \ + } \ + } while(0) + typedef std::shared_ptr<spdlog::logger> Logger; Logger stderrLogger(const std::string&, const std::string&, @@ -38,8 +65,13 @@ class Config; template <class... Args> void checkedLog(std::string logger, std::string level, Args... args) { Logger log = spdlog::get(logger); - if(!log) + if(!log) { + if(level == "critical") { + auto stderr = stderrLogger("error", "Error: %v - aborting"); + stderr->critical(args...); + } return; + } if(level == "trace") log->trace(args...); diff --git a/src/common/options.h b/src/common/options.h index 0d68d543..1737a661 100644 --- a/src/common/options.h +++ b/src/common/options.h @@ -62,8 +62,7 @@ public: template <typename T> T get(const std::string& key) { - UTIL_THROW_IF2(!has(key), - "Required option \"" << key << "\" has not been set"); + ABORT_IF(!has(key), "Required option '{}' has not been set", key); return options_[key].as<T>(); } diff --git a/src/common/utils.cpp b/src/common/utils.cpp index f4bd5790..a13241cd 100644 --- a/src/common/utils.cpp +++ b/src/common/utils.cpp @@ -2,7 +2,8 @@ #include <sstream> #include "3rd_party/exception.h" -#include "utils.h" +#include "common/logging.h" +#include "common/utils.h" void Trim(std::string& s) { boost::trim_if(s, boost::is_any_of(" \t\n")); @@ -46,7 +47,7 @@ std::string Exec(const std::string& cmd) { std::string result; std::shared_ptr<std::FILE> pipe(popen(cmd.c_str(), "r"), pclose); if(!pipe) - UTIL_THROW2("popen() failed!"); + ABORT("popen() failed!"); while(!std::feof(pipe.get())) { if(std::fgets(buffer.data(), 128, pipe.get()) != NULL) diff --git a/src/data/batch_generator.h b/src/data/batch_generator.h index ceea5493..62040861 100644 --- a/src/data/batch_generator.h +++ b/src/data/batch_generator.h @@ -138,8 +138,7 @@ public: operator bool() const { return !bufferedBatches_.empty(); } BatchPtr next() { - UTIL_THROW_IF2(bufferedBatches_.empty(), - "No batches to fetch, run prepare()"); + ABORT_IF(bufferedBatches_.empty(), "No batches to fetch, run prepare()"); currentBatch_ = bufferedBatches_.front(); bufferedBatches_.pop_front(); diff --git a/src/data/batch_stats.h b/src/data/batch_stats.h index 2ee65cce..70bdbba1 100644 --- a/src/data/batch_stats.h +++ b/src/data/batch_stats.h @@ -22,7 +22,7 @@ public: while(it != map_.end() && it->first[i] < lengths[i]) it++; - UTIL_THROW_IF2(it == map_.end(), "Missing batch statistics"); + ABORT_IF(it == map_.end(), "Missing batch statistics"); return it->second; } diff --git a/src/data/corpus.cpp b/src/data/corpus.cpp index a93d6fe9..b5df6aef 100644 --- a/src/data/corpus.cpp +++ b/src/data/corpus.cpp @@ -44,8 +44,8 @@ Corpus::Corpus(Ptr<Config> options, bool translate) vocabPaths = options_->get<std::vector<std::string>>("vocabs"); if(training) { - UTIL_THROW_IF2(!vocabPaths.empty() && paths_.size() != vocabPaths.size(), - "Number of corpus files and vocab files does not agree"); + ABORT_IF(!vocabPaths.empty() && paths_.size() != vocabPaths.size(), + "Number of corpus files and vocab files does not agree"); } std::vector<int> maxVocabs = options_->get<std::vector<int>>("dim-vocabs"); @@ -61,7 +61,10 @@ Corpus::Corpus(Ptr<Config> options, bool translate) for(size_t i = 0; i < paths_.size(); ++i) { Ptr<Vocab> vocab = New<Vocab>(); int vocSize = vocab->loadOrCreate("", paths_[i], maxVocabs[i]); - LOG(info, "[data] Setting vocabulary size for input {} to {}", i, vocSize); + LOG(info, + "[data] Setting vocabulary size for input {} to {}", + i, + vocSize); options_->get()["dim-vocabs"][i] = vocSize; options_->get()["vocabs"].push_back(paths_[i] + ".yml"); @@ -76,15 +79,17 @@ Corpus::Corpus(Ptr<Config> options, bool translate) Ptr<Vocab> vocab = New<Vocab>(); int vocSize = vocab->loadOrCreate(vocabPaths[i], paths_[i], maxVocabs[i]); - LOG(info, "[data] Setting vocabulary size for input {} to {}", i, vocSize); + LOG(info, + "[data] Setting vocabulary size for input {} to {}", + i, + vocSize); options_->get()["dim-vocabs"][i] = vocSize; vocabs_.emplace_back(vocab); } } } else { // i.e., if translating - UTIL_THROW_IF2(vocabPaths.empty(), - "translating but vocabularies are missing!"); + ABORT_IF(vocabPaths.empty(), "Translating but vocabularies are missing!"); if(maxVocabs.size() < vocabPaths.size()) maxVocabs.resize(paths_.size(), 0); @@ -107,7 +112,7 @@ Corpus::Corpus(Ptr<Config> options, bool translate) files_.emplace_back(new InputFileStream(std::cin)); else { files_.emplace_back(new InputFileStream(path)); - UTIL_THROW_IF2(files_.back()->empty(), "File " << path << " is empty"); + ABORT_IF(files_.back()->empty(), "File '{}' is empty", path); } } } @@ -120,8 +125,8 @@ Corpus::Corpus(std::vector<std::string> paths, options_(options), vocabs_(vocabs), maxLength_(maxLength ? maxLength : options_->get<size_t>("max-length")) { - UTIL_THROW_IF2(paths_.size() != vocabs_.size(), - "Number of corpus files and vocab files does not agree"); + ABORT_IF(paths_.size() != vocabs_.size(), + "Number of corpus files and vocab files does not agree"); for(auto path : paths_) { files_.emplace_back(new InputFileStream(path)); diff --git a/src/data/dataset.h b/src/data/dataset.h index 17a146d0..d019b3b3 100644 --- a/src/data/dataset.h +++ b/src/data/dataset.h @@ -77,9 +77,7 @@ public: void push_back(Input input) { inputs_.push_back(input); } - virtual std::vector<Ptr<Batch>> split(size_t n) { - UTIL_THROW2("Not implemented"); - } + virtual std::vector<Ptr<Batch>> split(size_t n) { ABORT("Not implemented"); } Data& features() { return inputs_[0].data(); } diff --git a/src/data/filter.h b/src/data/filter.h index 5c11ef25..e40e8794 100644 --- a/src/data/filter.h +++ b/src/data/filter.h @@ -89,7 +89,7 @@ public: std::vector<std::string> vals = options_->get<std::vector<std::string>>("filter"); - UTIL_THROW_IF2(vals.empty(), "No path to filter path given"); + ABORT_IF(vals.empty(), "No path to filter path given"); std::string fname = vals[0]; firstNum_ = vals.size() > 1 ? std::stoi(vals[1]) : 100; diff --git a/src/data/types.h b/src/data/types.h index f3224999..3e36c454 100644 --- a/src/data/types.h +++ b/src/data/types.h @@ -27,10 +27,16 @@ const std::string DEL_STR = "<d>"; const std::string RPL_STR = "<r>"; const std::unordered_map<std::string, Word> SPEC2SYM = { - {STP_STR, STP_ID}, {CPY_STR, CPY_ID}, {DEL_STR, DEL_ID}, {RPL_STR, RPL_ID}, + {STP_STR, STP_ID}, + {CPY_STR, CPY_ID}, + {DEL_STR, DEL_ID}, + {RPL_STR, RPL_ID}, }; const std::unordered_map<Word, std::string> SYM2SPEC = { - {STP_ID, STP_STR}, {CPY_ID, CPY_STR}, {DEL_ID, DEL_STR}, {RPL_ID, RPL_STR}, + {STP_ID, STP_STR}, + {CPY_ID, CPY_STR}, + {DEL_ID, DEL_STR}, + {RPL_ID, RPL_STR}, }; }
\ No newline at end of file diff --git a/src/data/vocab.cpp b/src/data/vocab.cpp index 18b81d21..0e534ee5 100644 --- a/src/data/vocab.cpp +++ b/src/data/vocab.cpp @@ -52,7 +52,7 @@ std::vector<std::string> Vocab::operator()(const Words& sentence, } const std::string& Vocab::operator[](size_t id) const { - UTIL_THROW_IF2(id >= id2str_.size(), "Unknown word id: " << id); + ABORT_IF(id >= id2str_.size(), "Unknown word id: ", id); return id2str_[id]; } @@ -100,7 +100,7 @@ int Vocab::load(const std::string& vocabPath, int max) { id2str_[id] = str; } } - UTIL_THROW_IF2(id2str_.empty(), "Empty vocabulary " << vocabPath); + ABORT_IF(id2str_.empty(), "Empty vocabulary: ", vocabPath); id2str_[EOS_ID] = EOS_STR; id2str_[UNK_ID] = UNK_STR; @@ -126,8 +126,9 @@ public: void Vocab::create(const std::string& vocabPath, const std::string& trainPath) { LOG(info, "[data] Creating vocabulary {} from {}", vocabPath, trainPath); - UTIL_THROW_IF2(boost::filesystem::exists(vocabPath), - "Vocab file " << vocabPath << " exists. Not overwriting"); + ABORT_IF(boost::filesystem::exists(vocabPath), + "Vocab file '{}' exists. Not overwriting", + vocabPath); InputFileStream trainStrm(trainPath); diff --git a/src/examples/mnist/dataset.h b/src/examples/mnist/dataset.h index 3a60b659..391bf926 100644 --- a/src/examples/mnist/dataset.h +++ b/src/examples/mnist/dataset.h @@ -31,13 +31,11 @@ public: } void loadData() { - UTIL_THROW_IF2(paths_.size() != 2, - "Paths to MNIST data files are not specified"); + ABORT_IF(paths_.size() != 2, "Paths to MNIST data files are not specified"); auto features = ReadImages(paths_[0]); auto labels = ReadLabels(paths_[1]); - UTIL_THROW_IF2(features.size() != labels.size(), - "Features do not match labels"); + ABORT_IF(features.size() != labels.size(), "Features do not match labels"); for(size_t i = 0; i < features.size(); ++i) { Example ex = {features[i], labels[i]}; @@ -57,14 +55,13 @@ private: std::vector<Data> ReadImages(const std::string &full_path) { std::ifstream file(full_path); - UTIL_THROW_IF2(!file.is_open(), "Cannot open file `" + full_path + "`!"); + ABORT_IF(!file.is_open(), "Cannot open file `" + full_path + "`!"); int magic_number = 0; file.read((char *)&magic_number, sizeof(magic_number)); magic_number = reverseInt(magic_number); - UTIL_THROW_IF2(magic_number != IMAGE_MAGIC_NUMBER, - "Invalid MNIST image file!"); + ABORT_IF(magic_number != IMAGE_MAGIC_NUMBER, "Invalid MNIST image file!"); int number_of_images; int n_rows = 0; diff --git a/src/graph/expression_graph.cu b/src/graph/expression_graph.cu index fb720ecc..829af085 100644 --- a/src/graph/expression_graph.cu +++ b/src/graph/expression_graph.cu @@ -49,6 +49,6 @@ Expr ExpressionGraph::gaussian(float mean, float stddev, Shape shape) { } void ExpressionGraph::checkNan(Tensor t) { - UTIL_THROW_IF2(throwNaN_ && IsNan(t), "Tensor has NaN"); + ABORT_IF(throwNaN_ && IsNan(t), "Tensor has NaN"); } } diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index 1ca0204a..67a5f5ef 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -178,8 +178,8 @@ public: * and that all backward pass computations have been performed. */ void backward() { - UTIL_THROW_IF2(topNodes_.size() > 1, - "There are more than one top most node for backward step"); + ABORT_IF(topNodes_.size() > 1, + "There are more than one top most node for backward step"); params_->allocateBackward(); params_->set_zero_adjoint(); @@ -269,23 +269,22 @@ public: if(p) { // if yes add to tape and return - UTIL_THROW_IF2(shape != p->shape(), - "Requested shape for existing parameter " - << name - << " does not match original shape"); + ABORT_IF(shape != p->shape(), + "Requested shape for existing parameter '{}' does not match " + "original shape", + name); add(p); return p; } // if graph was reloaded do not allow creation of new parameters - UTIL_THROW_IF2(reloaded_, - "Graph was reloaded and parameter " << name - << " is newly created"); + ABORT_IF(reloaded_, + "Graph was reloaded and parameter '{}' is newly created", + name); // if not check if name is not taken by other node - UTIL_THROW_IF2(get(name), - "Non-parameter with name " << name << "already exists"); + ABORT_IF(get(name), "Non-parameter with name '{}' already exists", name); // create parameter node (adds to tape) p = Expression<ParamNode>( diff --git a/src/graph/expression_operators.cu b/src/graph/expression_operators.cu index af592625..10861c14 100644 --- a/src/graph/expression_operators.cu +++ b/src/graph/expression_operators.cu @@ -195,11 +195,11 @@ Expr affine(Expr a, Expr b, Expr c) { } Expr plus(const std::vector<Expr>&) { - UTIL_THROW2("Not implemented"); + ABORT("Not implemented"); } Expr swish(const std::vector<Expr>&) { - UTIL_THROW2("Not implemented"); + ABORT("Not implemented"); } Expr tanh(const std::vector<Expr>& nodes) { @@ -207,11 +207,11 @@ Expr tanh(const std::vector<Expr>& nodes) { } Expr logit(const std::vector<Expr>&) { - UTIL_THROW2("Not implemented"); + ABORT("Not implemented"); } Expr relu(const std::vector<Expr>&) { - UTIL_THROW2("Not implemented"); + ABORT("Not implemented"); } Expr sqrt(Expr a, float eps) { diff --git a/src/graph/expression_operators.h b/src/graph/expression_operators.h index 8824bb40..c99af41d 100644 --- a/src/graph/expression_operators.h +++ b/src/graph/expression_operators.h @@ -109,7 +109,7 @@ Expr dropout(Expr x, Args... args) { auto mask = Get(keywords::mask, nullptr, args...); float dropout_prob = Get(keywords::dropout_prob, 0.0f, args...); - UTIL_THROW_IF2(!mask && !dropout_prob, "Neither mask nor dropout prob given"); + ABORT_IF(!mask && !dropout_prob, "Neither mask nor dropout prob given"); if(!mask) { auto graph = x->graph(); mask = graph->dropout(dropout_prob, x->shape()); diff --git a/src/graph/node_operators.h b/src/graph/node_operators.h index cec27bfe..1cb112dd 100644 --- a/src/graph/node_operators.h +++ b/src/graph/node_operators.h @@ -11,8 +11,7 @@ struct ConstantNode : public Node { : Node(args...), init_(Get(keywords::init, [](Tensor) {})), initialized_(false) { - UTIL_THROW_IF2(!Has(keywords::shape), - "Constant items require shape information"); + ABORT_IF(!Has(keywords::shape), "Constant items require shape information"); setTrainable(false); } @@ -47,8 +46,7 @@ struct ParamNode : public Node { : Node(args...), init_(Get(keywords::init, [](Tensor) {})), initialized_(false) { - UTIL_THROW_IF2(!Has(keywords::shape), - "Param items require shape information"); + ABORT_IF(!Has(keywords::shape), "Param items require shape information"); setTrainable(!Get(keywords::fixed, false)); } diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h index 74da4b14..4ec5c092 100644 --- a/src/graph/node_operators_binary.h +++ b/src/graph/node_operators_binary.h @@ -41,10 +41,15 @@ private: public: template <typename... Args> - DotNodeOp( - Expr a, Expr b, bool transA, bool transB, float scalar, Args... args) - : NaryNodeOp( - {a, b}, keywords::shape = newShape(a, b, transA, transB), args...), + DotNodeOp(Expr a, + Expr b, + bool transA, + bool transB, + float scalar, + Args... args) + : NaryNodeOp({a, b}, + keywords::shape = newShape(a, b, transA, transB), + args...), transA_(transA), transB_(transB), scalar_(scalar) {} @@ -240,10 +245,15 @@ private: public: template <typename... Args> - DotBatchedNodeOp( - Expr a, Expr b, bool transA, bool transB, float scalar, Args... args) - : NaryNodeOp( - {a, b}, keywords::shape = newShape(a, b, transA, transB), args...), + DotBatchedNodeOp(Expr a, + Expr b, + bool transA, + bool transB, + float scalar, + Args... args) + : NaryNodeOp({a, b}, + keywords::shape = newShape(a, b, transA, transB), + args...), transA_(transA), transB_(transB), scalar_(scalar) {} @@ -263,8 +273,8 @@ public: Shape outShape = shapeA; outShape.set(1, shapeB[1]); - UTIL_THROW_IF2(shapeA[1] != shapeB[0], - "matrix product requires dimensions to match"); + ABORT_IF(shapeA[1] != shapeB[0], + "matrix product requires dimensions to match"); return outShape; } @@ -425,8 +435,8 @@ struct ElementBinaryNodeOp : public NaryNodeOp { Shape shape1 = a->shape(); Shape shape2 = b->shape(); for(int i = 0; i < shape1.size(); ++i) { - UTIL_THROW_IF2(shape1[i] != shape2[i] && shape1[i] != 1 && shape2[i] != 1, - "Shapes cannot be broadcasted"); + ABORT_IF(shape1[i] != shape2[i] && shape1[i] != 1 && shape2[i] != 1, + "Shapes cannot be broadcasted"); shape1.set(i, std::max(shape1[i], shape2[i])); } return shape1; @@ -625,7 +635,7 @@ struct TanhPlus3NodeOp : public NaryNodeOp { for(int n = 1; n < nodes.size(); ++n) { Shape shapen = nodes[n]->shape(); for(int i = 0; i < shapen.size(); ++i) { - UTIL_THROW_IF2(shape[i] != shapen[i] && shape[i] != 1 && shapen[i] != 1, + ABORT_IF(shape[i] != shapen[i] && shape[i] != 1 && shapen[i] != 1, "Shapes cannot be broadcasted"); shape.set(i, std::max(shape[i], shapen[i])); } diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h index 9881357c..a3f60366 100644 --- a/src/graph/node_operators_unary.h +++ b/src/graph/node_operators_unary.h @@ -146,8 +146,8 @@ struct TanhNodeOp : public NaryNodeOp { for(int n = 1; n < nodes.size(); ++n) { Shape shapen = nodes[n]->shape(); for(int i = 0; i < shapen.size(); ++i) { - UTIL_THROW_IF2(shape[i] != shapen[i] && shape[i] != 1 && shapen[i] != 1, - "Shapes cannot be broadcasted"); + ABORT_IF(shape[i] != shapen[i] && shape[i] != 1 && shapen[i] != 1, + "Shapes cannot be broadcasted"); shape.set(i, std::max(shape[i], shapen[i])); } } @@ -237,8 +237,11 @@ struct SwishNodeOp : public UnaryNodeOp { } NodeOps backwardOps() { - return {NodeOp( - Add(_1 * (_3 + Sigma(_2) * (1.f - _3)), child(0)->grad(), adj_, child(0)->val(), val_))}; + return {NodeOp(Add(_1 * (_3 + Sigma(_2) * (1.f - _3)), + child(0)->grad(), + adj_, + child(0)->val(), + val_))}; } const std::string type() { return "swish"; } diff --git a/src/graph/parameters.h b/src/graph/parameters.h index bbe02f36..df73dbd5 100644 --- a/src/graph/parameters.h +++ b/src/graph/parameters.h @@ -51,8 +51,7 @@ public: void add(Expr p, const std::string& name) { params_.push_back(p); - UTIL_THROW_IF2(named_.count(name), - "Parameter " << name << "already exists"); + ABORT_IF(named_.count(name), "Parameter '{}' already exists", name); named_[name] = p; } diff --git a/src/kernels/sparse.cu b/src/kernels/sparse.cu index 911d29ce..1d104474 100644 --- a/src/kernels/sparse.cu +++ b/src/kernels/sparse.cu @@ -7,8 +7,11 @@ namespace marian { namespace sparse { -void multiply( - Ptr<CSR> C, const Ptr<CSR> A, const Ptr<CSR> B, bool transA, bool transB) { +void multiply(Ptr<CSR> C, + const Ptr<CSR> A, + const Ptr<CSR> B, + bool transA, + bool transB) { cudaSetDevice(C->getDevice()); int nnzTotal; C->allocRowIndices(A->rows()); @@ -101,8 +104,8 @@ void LfaForward(Tensor out, Tensor logits, Tensor att, Ptr<CSR> sparseLf) { for(size_t i = 0; i < nonzeros; ++i) { int r = (i % batch) + (i / (srcWords * batch)) * batch; int c = i % (srcWords * batch); - UTIL_THROW_IF2(r >= trgWords * batch, "Row index too large"); - UTIL_THROW_IF2(c >= srcWords * batch, "Column index too large"); + ABORT_IF(r >= trgWords * batch, "Row index too large"); + ABORT_IF(c >= srcWords * batch, "Column index too large"); coo.emplace_back(r, c, values[i]); } std::sort(coo.begin(), coo.end()); @@ -130,8 +133,11 @@ void LfaForward(Tensor out, Tensor logits, Tensor att, Ptr<CSR> sparseLf) { sparseLfa->toTensor(out); } -__global__ void gCollapseAtt( - float* out, const float* in, int batch, int srcWords, int nonzeros) { +__global__ void gCollapseAtt(float* out, + const float* in, + int batch, + int srcWords, + int nonzeros) { for(int bid = 0; bid < nonzeros; bid += blockDim.x * gridDim.x) { int index = bid + blockDim.x * blockIdx.x + threadIdx.x; if(index < nonzeros) { diff --git a/src/kernels/sparse.h b/src/kernels/sparse.h index 447d48de..625ebe4b 100644 --- a/src/kernels/sparse.h +++ b/src/kernels/sparse.h @@ -130,7 +130,7 @@ public: void toTensor(Tensor dense) { cudaSetDevice(device_); - UTIL_THROW_IF2(dense->size() != rows_ * cols_, "Matrix sizes do not match"); + ABORT_IF(dense->size() != rows_ * cols_, "Matrix sizes do not match"); cusparseScsc2dense(handle_, cols_, @@ -197,8 +197,11 @@ public: } }; -void multiply( - Ptr<CSR>, const Ptr<CSR>, const Ptr<CSR>, bool = false, bool = false); +void multiply(Ptr<CSR>, + const Ptr<CSR>, + const Ptr<CSR>, + bool = false, + bool = false); void LfaForward(Tensor out, Tensor logits, Tensor att, Ptr<CSR> sparseLf); diff --git a/src/kernels/tensor_operators.cu b/src/kernels/tensor_operators.cu index cacb5316..177f98da 100644 --- a/src/kernels/tensor_operators.cu +++ b/src/kernels/tensor_operators.cu @@ -17,8 +17,7 @@ __device__ inline float stableLogit(float x) { if(x >= 0) { float z = expf(-x); return 1.0 / (1.0 + z); - } - else { + } else { float z = expf(x); return z / (1.0 + z); } @@ -162,7 +161,6 @@ void Deconcatenate(std::vector<Tensor>& outputs, const Tensor in, int ax) { SplitCont(outputs, in, ax); } - __global__ void gTranspose4D(float* out, ShapeGPU outShape, const float* in, @@ -953,7 +951,6 @@ __global__ void gGRUFastForward(float* out, for(int tid = 0; tid < cols; tid += blockDim.x) { int i = tid + threadIdx.x; if(i < cols) { - float r = stableLogit(xWrow[i] + sUrow[i] + b[i]); int k = i + cols; @@ -1527,8 +1524,11 @@ __global__ void gLNormalization(float* out, } } -void LayerNormalization( - Tensor out, Tensor in, Tensor gamma, Tensor beta, float eps) { +void LayerNormalization(Tensor out, + Tensor in, + Tensor gamma, + Tensor beta, + float eps) { cudaSetDevice(out->getDevice()); int rows = in->shape().elements() / in->shape().back(); diff --git a/src/kernels/tensor_operators.h b/src/kernels/tensor_operators.h index ea1ca9ba..e178eeac 100644 --- a/src/kernels/tensor_operators.h +++ b/src/kernels/tensor_operators.h @@ -389,8 +389,11 @@ __global__ void gAdd1R3(Functor functor, } template <class Functor> -void Add( - Functor functor, Tensor out, Tensor in1, Tensor in2, float scale = 1.0) { +void Add(Functor functor, + Tensor out, + Tensor in1, + Tensor in2, + float scale = 1.0) { cudaSetDevice(out->getDevice()); UTIL_THROW_IF2(out->shape().size() != in1->shape().size(), @@ -454,8 +457,11 @@ void Add( } template <class Functor> -void Reduce( - Functor functor, Tensor out, Tensor in1, Tensor in2, float scale = 1.0) { +void Reduce(Functor functor, + Tensor out, + Tensor in1, + Tensor in2, + float scale = 1.0) { out->set(0); Add(functor, out, in1, in2, scale); } @@ -934,8 +940,11 @@ void AttBack(Tensor gva, Tensor coverage, Tensor adj); -void LayerNormalization( - Tensor out, Tensor in, Tensor gamma, Tensor beta, float eps = 1e-9); +void LayerNormalization(Tensor out, + Tensor in, + Tensor gamma, + Tensor beta, + float eps = 1e-9); void LayerNormalizationGrad(Tensor gradX, Tensor gradGamma, Tensor gradBeta, diff --git a/src/kernels/thrust_functions.h b/src/kernels/thrust_functions.h index b49f0983..67f37a13 100644 --- a/src/kernels/thrust_functions.h +++ b/src/kernels/thrust_functions.h @@ -37,8 +37,7 @@ struct unary_sigma : public thrust::unary_function<T, T> { if(x >= 0) { float z = expf(-x); return 1.0 / (1.0 + z); - } - else { + } else { float z = expf(x); return z / (1.0 + z); } diff --git a/src/layers/generic.h b/src/layers/generic.h index 1577b076..64ebab4f 100644 --- a/src/layers/generic.h +++ b/src/layers/generic.h @@ -59,7 +59,7 @@ public: } Expr apply(const std::vector<Expr>& inputs) { - UTIL_THROW_IF2(inputs.empty(), "No inputs"); + ABORT_IF(inputs.empty(), "No inputs"); if(inputs.size() == 1) return apply(inputs[0]); diff --git a/src/layers/guided_alignment.cpp b/src/layers/guided_alignment.cpp index 10875716..8da75924 100644 --- a/src/layers/guided_alignment.cpp +++ b/src/layers/guided_alignment.cpp @@ -30,7 +30,7 @@ Expr guidedAlignmentCost(Ptr<ExpressionGraph> graph, } else if(guidedCostType == "ce") { alnCost = -sum(flatten(aln * log(att + eps))) / dimBatch; } else { - UTIL_THROW2("Unknown alignment cost type"); + ABORT("Unknown alignment cost type"); } float guidedScalar = options->get<float>("guided-alignment-weight"); diff --git a/src/layers/param_initializers.cu b/src/layers/param_initializers.cu index dea0bc79..3b3cc2a4 100644 --- a/src/layers/param_initializers.cu +++ b/src/layers/param_initializers.cu @@ -89,8 +89,8 @@ void svd(std::vector<float>& vec, Shape shape) { int n = std::min(rows, cols); int m = std::max(rows, cols); - UTIL_THROW_IF2(m % n != 0, - "Matrix dimensions must be equal or multiples of each other"); + ABORT_IF(m % n != 0, + "Matrix dimensions must be equal or multiples of each other"); for(int i = 0; i < shape.elements(); i += n * n) { std::vector<float> t1(n); diff --git a/src/layers/word2vec_reader.h b/src/layers/word2vec_reader.h index ff77cea1..4e0b7fbb 100644 --- a/src/layers/word2vec_reader.h +++ b/src/layers/word2vec_reader.h @@ -21,8 +21,8 @@ public: LOG(info, "[data] Loading embedding vectors from {}", fileName); std::ifstream embFile(fileName); - UTIL_THROW_IF2(!embFile.is_open(), - "Unable to open file with embeddings: " + fileName); + ABORT_IF(!embFile.is_open(), + "Unable to open file with embeddings: " + fileName); std::string line; std::vector<std::string> values; @@ -32,10 +32,10 @@ public: // vocabulary and the length of embedding vectors std::getline(embFile, line); Split(line, values); - UTIL_THROW_IF2(values.size() != 2, - "Unexpected format of the first line in embedding file"); - UTIL_THROW_IF2(stoi(values[1]) != dimEmb, - "Unexpected length of embedding vectors"); + ABORT_IF(values.size() != 2, + "Unexpected format of the first line in embedding file"); + ABORT_IF(stoi(values[1]) != dimEmb, + "Unexpected length of embedding vectors"); // Read embedding vectors into a map std::unordered_map<Word, std::vector<float>> word2vec; diff --git a/src/models/amun.h b/src/models/amun.h index 1d27dff6..33e3f225 100644 --- a/src/models/amun.h +++ b/src/models/amun.h @@ -9,32 +9,30 @@ namespace marian { class Amun : public EncoderDecoder { public: Amun(Ptr<Options> options) : EncoderDecoder(options) { - UTIL_THROW_IF2(options_->get<int>("enc-depth") > 1, - "--type amun does not currently support multiple encoder " - "layers, use --type s2s"); - UTIL_THROW_IF2(options_->get<int>("enc-cell-depth") > 1, - "--type amun does not currently support stacked encoder " - "cells, use --type s2s"); - UTIL_THROW_IF2(options_->get<bool>("skip"), - "--type amun does not currently support skip connections, " - "use --type s2s"); - UTIL_THROW_IF2(options_->get<int>("dec-depth") > 1, - "--type amun does not currently support multiple decoder " - "layers, use --type s2s"); - UTIL_THROW_IF2(options_->get<int>("dec-cell-base-depth") != 2, - "--type amun does not currently support multiple decoder " - "base cells, use --type s2s"); - UTIL_THROW_IF2(options_->get<int>("dec-cell-high-depth") > 1, - "--type amun does not currently support multiple decoder " - "high cells, use --type s2s"); - UTIL_THROW_IF2( - options_->get<std::string>("enc-cell") != "gru", - "--type amun does not currently support other rnn cells than gru, " - "use --type s2s"); - UTIL_THROW_IF2( - options_->get<std::string>("dec-cell") != "gru", - "--type amun does not currently support other rnn cells than gru, " - "use --type s2s"); + ABORT_IF(options_->get<int>("enc-depth") > 1, + "--type amun does not currently support multiple encoder " + "layers, use --type s2s"); + ABORT_IF(options_->get<int>("enc-cell-depth") > 1, + "--type amun does not currently support stacked encoder " + "cells, use --type s2s"); + ABORT_IF(options_->get<bool>("skip"), + "--type amun does not currently support skip connections, " + "use --type s2s"); + ABORT_IF(options_->get<int>("dec-depth") > 1, + "--type amun does not currently support multiple decoder " + "layers, use --type s2s"); + ABORT_IF(options_->get<int>("dec-cell-base-depth") != 2, + "--type amun does not currently support multiple decoder " + "base cells, use --type s2s"); + ABORT_IF(options_->get<int>("dec-cell-high-depth") > 1, + "--type amun does not currently support multiple decoder " + "high cells, use --type s2s"); + ABORT_IF(options_->get<std::string>("enc-cell") != "gru", + "--type amun does not currently support other rnn cells than gru, " + "use --type s2s"); + ABORT_IF(options_->get<std::string>("dec-cell") != "gru", + "--type amun does not currently support other rnn cells than gru, " + "use --type s2s"); } void load(Ptr<ExpressionGraph> graph, const std::string& name) { diff --git a/src/models/encdec.h b/src/models/encdec.h index 27da58b2..6a83a721 100644 --- a/src/models/encdec.h +++ b/src/models/encdec.h @@ -327,8 +327,8 @@ public: if(options_->has("guided-alignment") && !inference_) { auto alignments = decoders_[0]->getAlignments(); - UTIL_THROW_IF2(alignments.empty(), - "Model does not seem to support alignments"); + ABORT_IF(alignments.empty(), "Model does not seem to support alignments"); + auto att = concatenate(alignments, axis = 3); return cost + guidedAlignmentCost(graph, batch, options_, att); } else { diff --git a/src/models/experimental/lex_probs.h b/src/models/experimental/lex_probs.h index 034c5d9e..1b5b62f0 100644 --- a/src/models/experimental/lex_probs.h +++ b/src/models/experimental/lex_probs.h @@ -107,7 +107,7 @@ public: std::vector<int> colIndices(rows); for(size_t i = 0; i < rows; ++i) { - UTIL_THROW_IF2(indices[i] >= srcDim_, "column index to large"); + ABORT_IF(indices[i] >= srcDim_, "Column index to large"); values[i] = 1; rowIndices[i] = i; colIndices[i] = indices[i]; diff --git a/src/models/hardatt.h b/src/models/hardatt.h index 2b82d486..20b6c028 100644 --- a/src/models/hardatt.h +++ b/src/models/hardatt.h @@ -31,7 +31,7 @@ public: } virtual std::vector<size_t>& getAttentionIndices() { - UTIL_THROW_IF2(attentionIndices_.empty(), "Empty attention indices"); + ABORT_IF(attentionIndices_.empty(), "Empty attention indices"); return attentionIndices_; } diff --git a/src/models/model_factory.cpp b/src/models/model_factory.cpp index dbb6ef27..0416cfea 100644 --- a/src/models/model_factory.cpp +++ b/src/models/model_factory.cpp @@ -21,7 +21,7 @@ Ptr<EncoderBase> EncoderFactory::construct() { if(options_->get<std::string>("type") == "transformer") return New<EncoderTransformer>(options_); - UTIL_THROW2("Unknown encoder type"); + ABORT("Unknown encoder type"); } Ptr<DecoderBase> DecoderFactory::construct() { @@ -34,7 +34,7 @@ Ptr<DecoderBase> DecoderFactory::construct() { if(options_->get<std::string>("type") == "hard-soft-att") return New<DecoderHardAtt>(options_); - UTIL_THROW2("Unknown decoder type"); + ABORT("Unknown decoder type"); } Ptr<EncoderDecoder> EncoderDecoderFactory::construct() { @@ -174,7 +174,7 @@ Ptr<ModelBase> by_type(std::string type, Ptr<Options> options) { } #endif - UTIL_THROW2("Unknown model type: " + type); + ABORT("Unknown model type: {}", type); } Ptr<ModelBase> from_options(Ptr<Options> options) { diff --git a/src/models/nematus.h b/src/models/nematus.h index c6fa6561..cd81e83c 100644 --- a/src/models/nematus.h +++ b/src/models/nematus.h @@ -11,29 +11,28 @@ public: template <class... Args> Nematus(Ptr<Options> options) : EncoderDecoder(options), nameMap_(createNameMap()) { - UTIL_THROW_IF2(options_->get<std::string>("enc-type") != "bidirectional", - "--type nematus does not currently support other encoder " - "type than bidirectional, use --type s2s"); - UTIL_THROW_IF2(options_->get<int>("enc-depth") > 1, - "--type nematus does not currently support multiple encoder " - "layers, use --type s2s"); - UTIL_THROW_IF2( - options_->get<bool>("skip"), - "--type nematus does not currently support skip connections, " - "use --type s2s"); - UTIL_THROW_IF2(options_->get<int>("dec-depth") > 1, - "--type nematus does not currently support multiple decoder " - "layers, use --type s2s"); - UTIL_THROW_IF2(options_->get<int>("dec-cell-high-depth") > 1, - "--type nematus does not currently support multiple decoder " - "high cells, use --type s2s"); - - UTIL_THROW_IF2(options_->get<std::string>("enc-cell") != "gru-nematus", - "--type nematus does not currently support other rnn cells " - "than gru-nematus, use --type s2s"); - UTIL_THROW_IF2(options_->get<std::string>("dec-cell") != "gru-nematus", - "--type nematus does not currently support other rnn cells " - "than gru-nematus, use --type s2s"); + ABORT_IF(options_->get<std::string>("enc-type") != "bidirectional", + "--type nematus does not currently support other encoder " + "type than bidirectional, use --type s2s"); + ABORT_IF(options_->get<int>("enc-depth") > 1, + "--type nematus does not currently support multiple encoder " + "layers, use --type s2s"); + ABORT_IF(options_->get<bool>("skip"), + "--type nematus does not currently support skip connections, " + "use --type s2s"); + ABORT_IF(options_->get<int>("dec-depth") > 1, + "--type nematus does not currently support multiple decoder " + "layers, use --type s2s"); + ABORT_IF(options_->get<int>("dec-cell-high-depth") > 1, + "--type nematus does not currently support multiple decoder " + "high cells, use --type s2s"); + + ABORT_IF(options_->get<std::string>("enc-cell") != "gru-nematus", + "--type nematus does not currently support other rnn cells " + "than gru-nematus, use --type s2s"); + ABORT_IF(options_->get<std::string>("dec-cell") != "gru-nematus", + "--type nematus does not currently support other rnn cells " + "than gru-nematus, use --type s2s"); } void load(Ptr<ExpressionGraph> graph, const std::string& name) { diff --git a/src/models/transformer.h b/src/models/transformer.h index d5b1c1f2..d3c9f406 100644 --- a/src/models/transformer.h +++ b/src/models/transformer.h @@ -173,8 +173,8 @@ public: int dimBeamQ = q->shape()[3]; int dimBeamK = k->shape()[3]; if(dimBeamQ != dimBeamK) { - k = concatenate(std::vector<Expr>(dimBeamQ, k), axis=3); - v = concatenate(std::vector<Expr>(dimBeamQ, v), axis=3); + k = concatenate(std::vector<Expr>(dimBeamQ, k), axis = 3); + v = concatenate(std::vector<Expr>(dimBeamQ, v), axis = 3); } auto weights = softmax(bdot(q, k, false, true, scale) + mask); @@ -245,7 +245,7 @@ public: Expr output; if(outputs.size() > 1) - output = concatenate(outputs, axis=1); + output = concatenate(outputs, axis = 1); else output = outputs.front(); @@ -550,7 +550,8 @@ public: for(int i = 1; i <= opt<int>("dec-depth"); ++i) { auto values = query; if(prevDecoderStates.size() > 0) - values = concatenate({prevDecoderStates[i - 1].output, query}, axis=0); + values + = concatenate({prevDecoderStates[i - 1].output, query}, axis = 0); decoderStates.push_back({values, nullptr}); @@ -595,7 +596,7 @@ public: inference_); } } else { - UTIL_THROW2("Unknown value for transformer-multi-encoder: " << comb); + ABORT("Unknown value for transformer-multi-encoder: {}", comb); } } diff --git a/src/optimizers/optimizers.cu b/src/optimizers/optimizers.cu index 146cfd01..dc5ed976 100644 --- a/src/optimizers/optimizers.cu +++ b/src/optimizers/optimizers.cu @@ -97,7 +97,7 @@ Ptr<OptimizerBase> Optimizer(Ptr<Config> options) { } else if(opt == "adam") { return Optimizer<Adam>(lrate, clipper, params); } else { - UTIL_THROW2("Unknown optimizer: " << opt); + ABORT("Unknown optimizer: {}", opt); } } } diff --git a/src/rnn/attention.cu b/src/rnn/attention.cu index 8e80be76..3adf817b 100644 --- a/src/rnn/attention.cu +++ b/src/rnn/attention.cu @@ -19,13 +19,13 @@ struct AttentionNodeOp : public NaryNodeOp { Shape stateShape = nodes[2]->shape(); for(int i = 0; i < stateShape.size(); ++i) { - UTIL_THROW_IF2(ctxShape[i] != stateShape[i] && ctxShape[i] != 1 - && stateShape[i] != 1, - "Shapes cannot be broadcasted"); + ABORT_IF(ctxShape[i] != stateShape[i] && ctxShape[i] != 1 + && stateShape[i] != 1, + "Shapes cannot be broadcasted"); shape.set(i, std::max(ctxShape[i], stateShape[i])); } - UTIL_THROW_IF2(vaShape[0] != shape[1] || vaShape[1] != 1, "Wrong size"); + ABORT_IF(vaShape[0] != shape[1] || vaShape[1] != 1, "Wrong size"); shape.set(1, 1); return shape; @@ -76,4 +76,4 @@ Expr attOps(Expr va, Expr context, Expr state, Expr coverage) { {dimWords, dimBatch, 1, dimBeam}); } } -}
\ No newline at end of file +} diff --git a/src/rnn/cells.h b/src/rnn/cells.h index 8f678cf3..057a7c3c 100644 --- a/src/rnn/cells.h +++ b/src/rnn/cells.h @@ -643,7 +643,7 @@ public: } virtual std::vector<Expr> applyInput(std::vector<Expr> inputs) { - UTIL_THROW_IF2(inputs.empty(), "Multiplicative LSTM expects input"); + ABORT_IF(inputs.empty(), "Multiplicative LSTM expects input"); Expr input; if(inputs.size() > 1) @@ -738,7 +738,7 @@ public: } std::vector<Expr> applyInput(std::vector<Expr> inputs) { - UTIL_THROW_IF2(inputs.empty(), "Slow LSTM expects input"); + ABORT_IF(inputs.empty(), "Slow LSTM expects input"); Expr input; if(inputs.size() > 1) @@ -836,7 +836,7 @@ public: } std::vector<Expr> applyInput(std::vector<Expr> inputs) { - UTIL_THROW_IF2(inputs.empty(), "Test LSTM expects input"); + ABORT_IF(inputs.empty(), "Test LSTM expects input"); Expr input; if(inputs.size() > 1) diff --git a/src/rnn/constructors.h b/src/rnn/constructors.h index cbe981f0..88c52314 100644 --- a/src/rnn/constructors.h +++ b/src/rnn/constructors.h @@ -63,7 +63,7 @@ public: cell->setLazyInputs(inputs_); return cell; } else { - UTIL_THROW2("Unknown RNN cell type"); + ABORT("Unknown RNN cell type"); } } @@ -140,7 +140,7 @@ public: AttentionFactory(Ptr<ExpressionGraph> graph) : InputFactory(graph) {} Ptr<CellInput> construct() { - UTIL_THROW_IF2(!state_, "EncoderState not set"); + ABORT_IF(!state_, "EncoderState not set"); return New<Attention>(graph_, options_, state_); } @@ -150,7 +150,7 @@ public: } int dimAttended() { - UTIL_THROW_IF2(!state_, "EncoderState not set"); + ABORT_IF(!state_, "EncoderState not set"); return state_->getAttended()->shape()[1]; } }; diff --git a/src/rnn/rnn.h b/src/rnn/rnn.h index ad82a614..67947db6 100644 --- a/src/rnn/rnn.h +++ b/src/rnn/rnn.h @@ -135,7 +135,7 @@ public: void push_back(Ptr<Cell> cell) { cell_ = cell; } virtual Ptr<Cell> at(int i) { - UTIL_THROW_IF2(i > 0, "SingleRNN only has one cell"); + ABORT_IF(i > 0, "SingleRNN only has one cell"); return cell_; } }; @@ -160,7 +160,7 @@ public: } Expr transduce(Expr input, Expr mask = nullptr) { - UTIL_THROW_IF2(rnns_.empty(), "0 layers in RNN"); + ABORT_IF(rnns_.empty(), "0 layers in RNN"); Expr output; Expr layerInput = input; @@ -187,7 +187,7 @@ public: } Expr transduce(Expr input, States states, Expr mask = nullptr) { - UTIL_THROW_IF2(rnns_.empty(), "0 layers in RNN"); + ABORT_IF(rnns_.empty(), "0 layers in RNN"); Expr output; Expr layerInput = input; @@ -216,7 +216,7 @@ public: } Expr transduce(Expr input, State state, Expr mask = nullptr) { - UTIL_THROW_IF2(rnns_.empty(), "0 layers in RNN"); + ABORT_IF(rnns_.empty(), "0 layers in RNN"); Expr output; Expr layerInput = input; diff --git a/src/rnn/types.h b/src/rnn/types.h index 02931533..9e288d5a 100644 --- a/src/rnn/types.h +++ b/src/rnn/types.h @@ -242,15 +242,15 @@ public: } virtual std::vector<Expr> getLazyInputs(Ptr<rnn::RNN> parent) { - UTIL_THROW_IF2(!stackables_[0]->is<Cell>(), - "First stackable should be of type Cell"); + ABORT_IF(!stackables_[0]->is<Cell>(), + "First stackable should be of type Cell"); return stackables_[0]->as<Cell>()->getLazyInputs(parent); } virtual void setLazyInputs( std::vector<std::function<Expr(Ptr<rnn::RNN>)>> lazy) { - UTIL_THROW_IF2(!stackables_[0]->is<Cell>(), - "First stackable should be of type Cell"); + ABORT_IF(!stackables_[0]->is<Cell>(), + "First stackable should be of type Cell"); stackables_[0]->as<Cell>()->setLazyInputs(lazy); } }; diff --git a/src/tensors/allocator.h b/src/tensors/allocator.h index 73ca04dd..8a84e4d7 100644 --- a/src/tensors/allocator.h +++ b/src/tensors/allocator.h @@ -186,7 +186,7 @@ public: bool free(uint8_t* ptr, size_t bytes) { bytes = align(bytes); - UTIL_THROW_IF2(ptr == 0, "Double free?"); + ABORT_IF(ptr == 0, "Double free?"); if(!ptr) return false; diff --git a/src/tensors/device_gpu.cu b/src/tensors/device_gpu.cu index c4312c26..19ecacc2 100644 --- a/src/tensors/device_gpu.cu +++ b/src/tensors/device_gpu.cu @@ -18,7 +18,7 @@ void DeviceGPU::reserve(size_t size) { size = align(size); cudaSetDevice(device_); - UTIL_THROW_IF2(size < size_, "New size must be larger than old size"); + ABORT_IF(size < size_, "New size must be larger than old size"); if(data_) { // Allocate memory by going through host memory @@ -34,4 +34,4 @@ void DeviceGPU::reserve(size_t size) { size_ = size; } -}
\ No newline at end of file +} diff --git a/src/tensors/tensor.h b/src/tensors/tensor.h index 6dcd7293..a8da4ac4 100644 --- a/src/tensors/tensor.h +++ b/src/tensors/tensor.h @@ -35,7 +35,7 @@ public: virtual size_t size() { return shape_.elements(); } virtual float scalar() { - UTIL_THROW_IF2(size() != 1, "Tensor is not a scalar"); + ABORT_IF(size() != 1, "Tensor is not a scalar"); return get(0); } diff --git a/src/tensors/tensor_cpu.h b/src/tensors/tensor_cpu.h index 7e322a1c..18725b3d 100644 --- a/src/tensors/tensor_cpu.h +++ b/src/tensors/tensor_cpu.h @@ -63,7 +63,7 @@ private: typedef TensorCPU tensor_type; void reserve(size_t size) { - UTIL_THROW_IF2(size < size_, "New size must be larger than old size"); + ABORT_IF(size < size_, "New size must be larger than old size"); float* temp = new float[size]; if(data_) { diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 7e23225a..ed53e784 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -2,6 +2,7 @@ set(TEST_SOURCES graph_tests.cpp operator_tests.cpp + rnn_tests.cpp ) add_executable(run_tests run_tests.cpp ${TEST_SOURCES}) diff --git a/src/tests/allocator_test.cpp b/src/tests/allocator_test.cpp index b9146cf4..a3d0fbda 100644 --- a/src/tests/allocator_test.cpp +++ b/src/tests/allocator_test.cpp @@ -25,7 +25,7 @@ public: void reserve(size_t size) { size = align(size); - UTIL_THROW_IF2(size < size_, "New size must be larger than old size"); + ABORT_IF(size < size_, "New size must be larger than old size"); if(data_) { // Allocate memory by going through host memory diff --git a/src/tests/rnn_tests.cpp b/src/tests/rnn_tests.cpp new file mode 100644 index 00000000..96c1c120 --- /dev/null +++ b/src/tests/rnn_tests.cpp @@ -0,0 +1,277 @@ +#include "catch.hpp" +#include "marian.h" + +using namespace marian; + +TEST_CASE("Model components, RNN etc.", "[model]") { + + auto floatApprox = [](float x, float y) { return x == Approx(y); }; + + std::vector<size_t> vWords = { + 43, 2, 83, 78, + 6, 38, 80, 40, + 40, 70, 26, 60, + 106, 13, 111, 32, + 126, 62, 115, 72, + 127, 82, 55, 0, + 86, 0, 124, 0, + 0, 0, 0, 0 + }; + + std::vector<size_t> vMask = { + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 0, + 1, 0, 1, 0, + }; + + SECTION("Simple RNN") { + Config::seed = 1234; + + auto graph = New<ExpressionGraph>(); + graph->setDevice(0); + graph->reserveWorkspaceMB(16); + + std::vector<float> values; + + auto input = graph->constant({1, 4, 4}, + keywords::init=inits::glorot_uniform); + + auto rnn = rnn::rnn(graph) // + ("prefix", "rnntest") // + ("type", "tanh") // + ("dimInput", 4) // + ("dimState", 4) // + .push_back(rnn::cell(graph)) // + .construct(); + + auto output = rnn->transduce(input); + + graph->forward(); + + CHECK(output->shape() == Shape({1, 4, 4})); + + std::vector<float> vOutput({ + 0.108774, 0.237905, -0.819769, -0.212601, + -0.684652, 0.455977, 0.504662, -0.184837, + 0.769393, 0.28449, -0.200995, -0.260122, + -0.324909, -0.337419, -0.959819, 0.559088 + }); + + output->val()->get(values); + CHECK( std::equal(values.begin(), values.end(), + vOutput.begin(), floatApprox) ); + } + + SECTION("S2S-style encoder") { + Config::seed = 1234; + + auto graph = New<ExpressionGraph>(); + graph->setDevice(0); + graph->reserveWorkspaceMB(16); + + std::vector<float> values; + + auto buildRnn = [&graph] (std::string prefix, + Expr input, Expr mask, + int dimRnn=32, + int depth=1, + int cellDepth=1, + std::string type="bidirectional", + std::string cellType="gru", + bool layerNorm=false, + bool skip=false) { + + int dimEmb = input->shape()[1]; + + int first, second; + if(type == "bidirectional" || type == "alternating") { + // build two separate stacks, concatenate top output + first = depth; + second = 0; + } else { + // build 1-layer bidirectional stack, concatenate, + // build n-1 layer unidirectional stack + first = 1; + second = depth - first; + } + + auto forward = type == "alternating" ? rnn::dir::alternating_forward + : rnn::dir::forward; + + auto backward = type == "alternating" ? rnn::dir::alternating_backward + : rnn::dir::backward; + + using namespace keywords; + + auto rnnFw = rnn::rnn(graph) // + ("type", cellType) // + ("direction", forward) // + ("dimInput", dimEmb) // + ("dimState", dimRnn) // + ("layer-normalization", layerNorm) // + ("skip", skip); + + for(int i = 1; i <= first; ++i) { + auto stacked = rnn::stacked_cell(graph); + for(int j = 1; j <= cellDepth; ++j) { + std::string paramPrefix = prefix + "_bi"; + if(i > 1) + paramPrefix += "_l" + std::to_string(i); + if(i > 1 || j > 1) + paramPrefix += "_cell" + std::to_string(j); + + stacked.push_back(rnn::cell(graph)("prefix", paramPrefix)); + } + rnnFw.push_back(stacked); + } + + auto rnnBw = rnn::rnn(graph) // + ("type", cellType) // + ("direction", backward) // + ("dimInput", dimEmb) // + ("dimState", dimRnn) // + ("layer-normalization", layerNorm) // + ("skip", skip); + + for(int i = 1; i <= first; ++i) { + auto stacked = rnn::stacked_cell(graph); + for(int j = 1; j <= cellDepth; ++j) { + std::string paramPrefix = prefix + "_bi_r"; + if(i > 1) + paramPrefix += "_l" + std::to_string(i); + if(i > 1 || j > 1) + paramPrefix += "_cell" + std::to_string(j); + + stacked.push_back(rnn::cell(graph)("prefix", paramPrefix)); + } + rnnBw.push_back(stacked); + } + + auto context = concatenate({rnnFw->transduce(input, mask), + rnnBw->transduce(input, mask)}, + axis = 1); + + if(second > 0) { + // add more layers (unidirectional) by transducing the output of the + // previous bidirectional RNN through multiple layers + + // construct RNN first + auto rnnUni = rnn::rnn(graph) // + ("type", cellType) // + ("dimInput", 2 * dimRnn) // + ("dimState", dimRnn) // + ("layer-normalization", layerNorm) // + ("skip", skip); + + for(int i = first + 1; i <= second + first; ++i) { + auto stacked = rnn::stacked_cell(graph); + for(int j = 1; j <= cellDepth; ++j) { + std::string paramPrefix = prefix + "_l" + std::to_string(i) + "_cell" + + std::to_string(j); + stacked.push_back(rnn::cell(graph)("prefix", paramPrefix)); + } + rnnUni.push_back(stacked); + } + + // transduce context to new context + context = rnnUni->transduce(context); + } + return context; + }; + + int dimEmb = 16; + int dimBatch = 4; + int dimTime = 8; + + auto emb = graph->param("Embeddings", + {128, dimEmb}, + keywords::init=inits::glorot_uniform); + + auto input = reshape(rows(emb, vWords), {dimBatch, dimEmb, dimTime}); + auto mask = graph->constant({dimBatch, 1, dimTime}, + keywords::init=inits::from_vector(vMask)); + + int dimRnn = 32; + auto context1 = buildRnn("enc1", input, mask, dimRnn); + auto contextSum1 = sum(context1, keywords::axis=1); + + auto context2 = buildRnn("enc2", input, mask, dimRnn, 2, 2); + auto contextSum2 = sum(context2, keywords::axis=1); + + // @TODO: why is this numerically instable on different machines? + //auto context3 = buildRnn("enc3", input, mask, + // dimRnn, 4, 4, + // "alternating", "lstm", + // true, true); + //auto contextSum3 = sum(context3, keywords::axis=1); + + graph->forward(); + + CHECK(context1->shape() == Shape({dimBatch, 2 * dimRnn, dimTime})); + CHECK(contextSum1->shape() == Shape({dimBatch, 1, dimTime})); + + std::vector<float> vContextSum1({ + 0.14076, -0.102, 0.22832, -0.42283, + -0.15911, 0.33222, 0.74858, -0.59844, + -0.70797, -0.12694, -0.14322, 0.25016, + -0.91476, 0.39106, -0.75152, -0.02236, + -0.59753, 0.17417, -0.24941, -0.36464, + -0.62975, 0.35372, 0.12781, -0.79948, + -0.33747, -0.54613, 0.32809, -0.63282, + -0.78209, -0.37947, -0.50397, -0.63282 + }); + + contextSum1->val()->get(values); + CHECK( std::equal(values.begin(), values.end(), + vContextSum1.begin(), floatApprox) ); + + CHECK(context2->shape() == Shape({dimBatch, 2 * dimRnn, dimTime})); + CHECK(contextSum2->shape() == Shape({dimBatch, 1, dimTime})); + + std::vector<float> vContextSum2({ + -0.0168112, -0.0524664, -0.0196701, -0.0118004, + 0.00975164, -0.0470996, -0.014982, -0.0248614, + -0.0110038, 0.00297422, -0.00327533, 0.0175996, + 0.0319444, 0.0196884, -0.0436654, -0.0257596, + 0.0131209, -0.0533302, -0.058655, 0.0666001, + 0.00823802, 0.0133473, -0.00715647, 0.119427, + 0.0282871, 0.104641, -0.0271743, 0.0658893, + 0.0687114, 0.0511032, 0.0673459, 0.0658893 + }); + + contextSum2->val()->get(values); + CHECK( std::equal(values.begin(), values.end(), + vContextSum2.begin(), floatApprox) ); + + //CHECK(context3->shape() == Shape({dimBatch, 2 * dimRnn, dimTime})); + //CHECK(contextSum3->shape() == Shape({dimBatch, 1, dimTime})); + // + //std::vector<float> vContextSum3({ + // 4.79443, 1.52788, 2.32984, 2.59648, + // -1.04159, -4.89242, 4.13013, -1.42554, + // 2.59088, 0.165236, -4.05358, -2.30649, + // 3.6943, -2.13945, -4.50602, 2.39471, + // -2.17873, 0.994103, -3.78782, 0.549939, + // -0.830426, -3.83337, -7.88747, 0.757133, + // -12.4974, -1.73116, -4.51886, 0.336533, + // -1.92069, -1.91202, 0.468423, 0.336285 + //}); + // + //contextSum3->val()->get(values); + // + ////for(int i = 0; i < values.size(); ++i) { + //// if(i && i % 4 == 0) + //// std::cout << std::endl; + //// + //// std::cout << values[i] << ", "; + ////} + // + //CHECK( std::equal(values.begin(), values.end(), + // vContextSum3.begin(), floatApprox) ); + } +} diff --git a/src/training/dropper.h b/src/training/dropper.h index 2b6a4ab1..0c19d327 100644 --- a/src/training/dropper.h +++ b/src/training/dropper.h @@ -12,8 +12,11 @@ namespace marian { -__global__ void grad_drop( - float* data, float* tmp, float* errors, float cut_off, int max_size) { +__global__ void grad_drop(float* data, + float* tmp, + float* errors, + float cut_off, + int max_size) { int idx = blockDim.x * blockIdx.x + threadIdx.x; if(idx >= max_size) return; @@ -63,8 +66,11 @@ __global__ void buildIndices(float* denseData, } } -__global__ void randomSampling( - float* originalData, float* data, int size, int scale, int fullSize) { +__global__ void randomSampling(float* originalData, + float* data, + int size, + int scale, + int fullSize) { int idx = blockDim.x * blockIdx.x + threadIdx.x; if(idx >= size) return; @@ -78,8 +84,11 @@ class GradientDropBase { int step; int _device; - void grad_drop_do( - float* data, float* errors, float* tmp, int len, float rate) { + void grad_drop_do(float* data, + float* errors, + float* tmp, + int len, + float rate) { int threads = 512; int blocks = 1 + len / threads; cudaSetDevice(_device); diff --git a/src/training/validator.cpp b/src/training/validator.cpp index 4fbd8672..60ce60cd 100644 --- a/src/training/validator.cpp +++ b/src/training/validator.cpp @@ -3,7 +3,8 @@ namespace marian { std::vector<Ptr<Validator<data::Corpus>>> Validators( - std::vector<Ptr<Vocab>> vocabs, Ptr<Config> config) { + std::vector<Ptr<Vocab>> vocabs, + Ptr<Config> config) { std::vector<Ptr<Validator<data::Corpus>>> validators; auto validMetrics = config->get<std::vector<std::string>>("valid-metrics"); diff --git a/src/training/validator.h b/src/training/validator.h index 837734bc..ec803d5d 100644 --- a/src/training/validator.h +++ b/src/training/validator.h @@ -154,8 +154,8 @@ public: opts->set("inference", true); builder_ = models::from_options(opts); - UTIL_THROW_IF2(!options_->has("valid-script-path"), - "valid-script metric but no script given"); + ABORT_IF(!options_->has("valid-script-path"), + "valid-script metric but no script given"); } virtual float validate(Ptr<ExpressionGraph> graph) { @@ -305,5 +305,6 @@ protected: * @return Vector of validator objects */ std::vector<Ptr<Validator<data::Corpus>>> Validators( - std::vector<Ptr<Vocab>> vocabs, Ptr<Config> config); + std::vector<Ptr<Vocab>> vocabs, + Ptr<Config> config); } |