Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2017-10-29 17:42:01 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2017-10-29 17:42:01 +0300
commitbc95140cfb7afc51fa104f4ecab3e5453bd706ef (patch)
tree8d57999c8c556cae36d7b9f8c992826cd0804445 /src
parent2e16934080d4bf41d0ab7557836732bedb635efd (diff)
parent46433253735e79e03613fcbd28e64ff393f72451 (diff)
merge rnn_test.cpp
Diffstat (limited to 'src')
-rw-r--r--src/common/config_parser.cpp64
-rw-r--r--src/common/file_stream.h20
-rw-r--r--src/common/logging.h46
-rw-r--r--src/common/options.h3
-rw-r--r--src/common/utils.cpp5
-rw-r--r--src/data/batch_generator.h3
-rw-r--r--src/data/batch_stats.h2
-rw-r--r--src/data/corpus.cpp23
-rw-r--r--src/data/dataset.h4
-rw-r--r--src/data/filter.h2
-rw-r--r--src/data/types.h10
-rw-r--r--src/data/vocab.cpp9
-rw-r--r--src/examples/mnist/dataset.h11
-rw-r--r--src/graph/expression_graph.cu2
-rw-r--r--src/graph/expression_graph.h21
-rw-r--r--src/graph/expression_operators.cu8
-rw-r--r--src/graph/expression_operators.h2
-rw-r--r--src/graph/node_operators.h6
-rw-r--r--src/graph/node_operators_binary.h36
-rw-r--r--src/graph/node_operators_unary.h11
-rw-r--r--src/graph/parameters.h3
-rw-r--r--src/kernels/sparse.cu18
-rw-r--r--src/kernels/sparse.h9
-rw-r--r--src/kernels/tensor_operators.cu12
-rw-r--r--src/kernels/tensor_operators.h21
-rw-r--r--src/kernels/thrust_functions.h3
-rw-r--r--src/layers/generic.h2
-rw-r--r--src/layers/guided_alignment.cpp2
-rw-r--r--src/layers/param_initializers.cu4
-rw-r--r--src/layers/word2vec_reader.h12
-rw-r--r--src/models/amun.h50
-rw-r--r--src/models/encdec.h4
-rw-r--r--src/models/experimental/lex_probs.h2
-rw-r--r--src/models/hardatt.h2
-rw-r--r--src/models/model_factory.cpp6
-rw-r--r--src/models/nematus.h45
-rw-r--r--src/models/transformer.h11
-rw-r--r--src/optimizers/optimizers.cu2
-rw-r--r--src/rnn/attention.cu10
-rw-r--r--src/rnn/cells.h6
-rw-r--r--src/rnn/constructors.h6
-rw-r--r--src/rnn/rnn.h8
-rw-r--r--src/rnn/types.h8
-rw-r--r--src/tensors/allocator.h2
-rw-r--r--src/tensors/device_gpu.cu4
-rw-r--r--src/tensors/tensor.h2
-rw-r--r--src/tensors/tensor_cpu.h2
-rw-r--r--src/tests/CMakeLists.txt1
-rw-r--r--src/tests/allocator_test.cpp2
-rw-r--r--src/tests/rnn_tests.cpp277
-rw-r--r--src/training/dropper.h21
-rw-r--r--src/training/validator.cpp3
-rw-r--r--src/training/validator.h7
53 files changed, 601 insertions, 254 deletions
diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp
index 406dcf65..013a635a 100644
--- a/src/common/config_parser.cpp
+++ b/src/common/config_parser.cpp
@@ -137,52 +137,46 @@ void ConfigParser::validateOptions() const {
if(mode_ == ConfigMode::translating)
return;
- UTIL_THROW_IF2(
+ ABORT_IF(
!has("train-sets") || get<std::vector<std::string>>("train-sets").empty(),
"No train sets given in config file or on command line");
- UTIL_THROW_IF2(
- has("vocabs")
- && get<std::vector<std::string>>("vocabs").size()
- != get<std::vector<std::string>>("train-sets").size(),
- "There should be as many vocabularies as training sets");
- UTIL_THROW_IF2(
- has("embedding-vectors")
- && get<std::vector<std::string>>("embedding-vectors").size()
- != get<std::vector<std::string>>("train-sets").size(),
- "There should be as many files with embedding vectors as "
- "training sets");
+ ABORT_IF(has("vocabs")
+ && get<std::vector<std::string>>("vocabs").size()
+ != get<std::vector<std::string>>("train-sets").size(),
+ "There should be as many vocabularies as training sets");
+ ABORT_IF(has("embedding-vectors")
+ && get<std::vector<std::string>>("embedding-vectors").size()
+ != get<std::vector<std::string>>("train-sets").size(),
+ "There should be as many files with embedding vectors as "
+ "training sets");
if(mode_ == ConfigMode::rescoring)
return;
boost::filesystem::path modelPath(get<std::string>("model"));
auto modelDir = modelPath.parent_path();
- UTIL_THROW_IF2(
- !modelDir.empty() && !boost::filesystem::is_directory(modelDir),
- "Model directory does not exist");
+ ABORT_IF(!modelDir.empty() && !boost::filesystem::is_directory(modelDir),
+ "Model directory does not exist");
- UTIL_THROW_IF2(
- has("valid-sets")
- && get<std::vector<std::string>>("valid-sets").size()
- != get<std::vector<std::string>>("train-sets").size(),
- "There should be as many validation sets as training sets");
+ ABORT_IF(has("valid-sets")
+ && get<std::vector<std::string>>("valid-sets").size()
+ != get<std::vector<std::string>>("train-sets").size(),
+ "There should be as many validation sets as training sets");
// validations for learning rate decaying
- UTIL_THROW_IF2(get<double>("lr-decay") > 1.0,
- "Learning rate decay factor greater than 1.0 is unusual");
- UTIL_THROW_IF2(
- (get<std::string>("lr-decay-strategy") == "epoch+batches"
- || get<std::string>("lr-decay-strategy") == "epoch+stalled")
- && get<std::vector<size_t>>("lr-decay-start").size() != 2,
- "Decay strategies 'epoch+batches' and 'epoch+stalled' require two "
- "values specified with --lr-decay-start options");
- UTIL_THROW_IF2(
- (get<std::string>("lr-decay-strategy") == "epoch"
- || get<std::string>("lr-decay-strategy") == "batches"
- || get<std::string>("lr-decay-strategy") == "stalled")
- && get<std::vector<size_t>>("lr-decay-start").size() != 1,
- "Single decay strategies require only one value specified with "
- "--lr-decay-start option");
+ ABORT_IF(get<double>("lr-decay") > 1.0,
+ "Learning rate decay factor greater than 1.0 is unusual");
+ ABORT_IF((get<std::string>("lr-decay-strategy") == "epoch+batches"
+ || get<std::string>("lr-decay-strategy") == "epoch+stalled")
+ && get<std::vector<size_t>>("lr-decay-start").size() != 2,
+ "Decay strategies 'epoch+batches' and 'epoch+stalled' require two "
+ "values specified with --lr-decay-start options");
+ ABORT_IF((get<std::string>("lr-decay-strategy") == "epoch"
+ || get<std::string>("lr-decay-strategy") == "batches"
+ || get<std::string>("lr-decay-strategy") == "stalled")
+ && get<std::vector<size_t>>("lr-decay-start").size() != 1,
+ "Single decay strategies require only one value specified with "
+ "--lr-decay-start option");
}
void ConfigParser::addOptionsCommon(po::options_description& desc) {
diff --git a/src/common/file_stream.h b/src/common/file_stream.h
index 6ccf78c3..53d41faf 100644
--- a/src/common/file_stream.h
+++ b/src/common/file_stream.h
@@ -9,7 +9,8 @@
#include <sys/stat.h>
-#include "exception.h"
+#include "3rd_party/exception.h"
+#include "common/logging.h"
namespace io = boost::iostreams;
@@ -22,7 +23,7 @@ private:
int mkstemp_and_unlink(char* tmpl) {
int ret = mkstemp(tmpl);
if(unlink_ && ret != -1) {
- UTIL_THROW_IF2(unlink(tmpl), "while deleting " << tmpl);
+ ABORT_IF(unlink(tmpl), "Error while deleting '{}'", tmpl);
}
return ret;
}
@@ -32,8 +33,9 @@ private:
name += "marian.XXXXXX";
name.push_back(0);
int ret;
- UTIL_THROW_IF2(-1 == (ret = mkstemp_and_unlink(&name[0])),
- "while making a temporary based on " << base);
+ ABORT_IF(-1 == (ret = mkstemp_and_unlink(&name[0])),
+ "Error while making a temporary based on '{}'",
+ base);
name_ = name;
return ret;
}
@@ -61,7 +63,7 @@ public:
~TemporaryFile() {
if(fd_ != -1 && !unlink_) {
- UTIL_THROW_IF2(unlink(name_.c_str()), "while deleting " << name_);
+ ABORT_IF(unlink(name_.c_str()), "Error while deleting '{}'", name_);
}
if(fd_ != -1 && close(fd_)) {
std::cerr << "Could not close file " << fd_ << std::endl;
@@ -77,8 +79,8 @@ public:
class InputFileStream {
public:
InputFileStream(const std::string& file) : file_(file), ifstream_(file_) {
- UTIL_THROW_IF2(!boost::filesystem::exists(file_),
- "File " << file << " does not exist");
+ ABORT_IF(
+ !boost::filesystem::exists(file_), "File '{}' does not exist", file);
if(file_.extension() == ".gz")
istream_.push(io::gzip_decompressor());
@@ -117,8 +119,8 @@ private:
class OutputFileStream {
public:
OutputFileStream(const std::string& file) : file_(file), ofstream_(file_) {
- UTIL_THROW_IF2(!boost::filesystem::exists(file_),
- "File " << file << " does not exist");
+ ABORT_IF(
+ !boost::filesystem::exists(file_), "File '{}' does not exist", file);
if(file_.extension() == ".gz")
ostream_.push(io::gzip_compressor());
diff --git a/src/common/logging.h b/src/common/logging.h
index 9564b33c..127a9700 100644
--- a/src/common/logging.h
+++ b/src/common/logging.h
@@ -3,21 +3,20 @@
#include "spdlog/spdlog.h"
/**
- * @brief Prints logging message into stderr and a file specified with `--log`
- * option.
+ * Prints logging message into stderr and a file specified with `--log` option.
*
* Example usage: `LOG(info, "[data] Vocab size: {}", vocabSize)`
*
- * A good practise is to put `[namespace]` at the beginning of your message.
+ * A good practice is to put `[namespace]` at the beginning of the message.
*
* @param level Logging level: trace, debug, info, warn, error, critical
- * @param ...
+ * @param ... Message text and variables
*/
#define LOG(level, ...) checkedLog("general", #level, __VA_ARGS__)
/**
- * @brief Prints logging message regarding validation into stderr and a file
- * specified with `--valid-log` option.
+ * Prints logging message regarding validation into stderr and a file specified
+ * with `--valid-log` option.
*
* The message is automatically preceded by "[valid] ".
*
@@ -25,6 +24,34 @@
*/
#define LOG_VALID(level, ...) checkedLog("valid", #level, __VA_ARGS__)
+/**
+ * Prints critical error message and causes abnormal program termination by
+ * calling std::abort().
+ *
+ * @param ... Message text and variables
+ */
+#define ABORT(...) \
+ do { \
+ checkedLog("general", "critical", __VA_ARGS__); \
+ std::abort(); \
+ } while(0)
+
+/**
+ * Prints critical error message and causes abnormal program termination if
+ * conditions is true.
+ *
+ * @param condition Condition expression
+ * @param ... Message text and variables
+ *
+ * @see \def ABORT(...)
+ */
+#define ABORT_IF(condition, ...) \
+ do { \
+ if(condition) { \
+ ABORT(__VA_ARGS__); \
+ } \
+ } while(0)
+
typedef std::shared_ptr<spdlog::logger> Logger;
Logger stderrLogger(const std::string&,
const std::string&,
@@ -38,8 +65,13 @@ class Config;
template <class... Args>
void checkedLog(std::string logger, std::string level, Args... args) {
Logger log = spdlog::get(logger);
- if(!log)
+ if(!log) {
+ if(level == "critical") {
+ auto stderr = stderrLogger("error", "Error: %v - aborting");
+ stderr->critical(args...);
+ }
return;
+ }
if(level == "trace")
log->trace(args...);
diff --git a/src/common/options.h b/src/common/options.h
index 0d68d543..1737a661 100644
--- a/src/common/options.h
+++ b/src/common/options.h
@@ -62,8 +62,7 @@ public:
template <typename T>
T get(const std::string& key) {
- UTIL_THROW_IF2(!has(key),
- "Required option \"" << key << "\" has not been set");
+ ABORT_IF(!has(key), "Required option '{}' has not been set", key);
return options_[key].as<T>();
}
diff --git a/src/common/utils.cpp b/src/common/utils.cpp
index f4bd5790..a13241cd 100644
--- a/src/common/utils.cpp
+++ b/src/common/utils.cpp
@@ -2,7 +2,8 @@
#include <sstream>
#include "3rd_party/exception.h"
-#include "utils.h"
+#include "common/logging.h"
+#include "common/utils.h"
void Trim(std::string& s) {
boost::trim_if(s, boost::is_any_of(" \t\n"));
@@ -46,7 +47,7 @@ std::string Exec(const std::string& cmd) {
std::string result;
std::shared_ptr<std::FILE> pipe(popen(cmd.c_str(), "r"), pclose);
if(!pipe)
- UTIL_THROW2("popen() failed!");
+ ABORT("popen() failed!");
while(!std::feof(pipe.get())) {
if(std::fgets(buffer.data(), 128, pipe.get()) != NULL)
diff --git a/src/data/batch_generator.h b/src/data/batch_generator.h
index ceea5493..62040861 100644
--- a/src/data/batch_generator.h
+++ b/src/data/batch_generator.h
@@ -138,8 +138,7 @@ public:
operator bool() const { return !bufferedBatches_.empty(); }
BatchPtr next() {
- UTIL_THROW_IF2(bufferedBatches_.empty(),
- "No batches to fetch, run prepare()");
+ ABORT_IF(bufferedBatches_.empty(), "No batches to fetch, run prepare()");
currentBatch_ = bufferedBatches_.front();
bufferedBatches_.pop_front();
diff --git a/src/data/batch_stats.h b/src/data/batch_stats.h
index 2ee65cce..70bdbba1 100644
--- a/src/data/batch_stats.h
+++ b/src/data/batch_stats.h
@@ -22,7 +22,7 @@ public:
while(it != map_.end() && it->first[i] < lengths[i])
it++;
- UTIL_THROW_IF2(it == map_.end(), "Missing batch statistics");
+ ABORT_IF(it == map_.end(), "Missing batch statistics");
return it->second;
}
diff --git a/src/data/corpus.cpp b/src/data/corpus.cpp
index a93d6fe9..b5df6aef 100644
--- a/src/data/corpus.cpp
+++ b/src/data/corpus.cpp
@@ -44,8 +44,8 @@ Corpus::Corpus(Ptr<Config> options, bool translate)
vocabPaths = options_->get<std::vector<std::string>>("vocabs");
if(training) {
- UTIL_THROW_IF2(!vocabPaths.empty() && paths_.size() != vocabPaths.size(),
- "Number of corpus files and vocab files does not agree");
+ ABORT_IF(!vocabPaths.empty() && paths_.size() != vocabPaths.size(),
+ "Number of corpus files and vocab files does not agree");
}
std::vector<int> maxVocabs = options_->get<std::vector<int>>("dim-vocabs");
@@ -61,7 +61,10 @@ Corpus::Corpus(Ptr<Config> options, bool translate)
for(size_t i = 0; i < paths_.size(); ++i) {
Ptr<Vocab> vocab = New<Vocab>();
int vocSize = vocab->loadOrCreate("", paths_[i], maxVocabs[i]);
- LOG(info, "[data] Setting vocabulary size for input {} to {}", i, vocSize);
+ LOG(info,
+ "[data] Setting vocabulary size for input {} to {}",
+ i,
+ vocSize);
options_->get()["dim-vocabs"][i] = vocSize;
options_->get()["vocabs"].push_back(paths_[i] + ".yml");
@@ -76,15 +79,17 @@ Corpus::Corpus(Ptr<Config> options, bool translate)
Ptr<Vocab> vocab = New<Vocab>();
int vocSize
= vocab->loadOrCreate(vocabPaths[i], paths_[i], maxVocabs[i]);
- LOG(info, "[data] Setting vocabulary size for input {} to {}", i, vocSize);
+ LOG(info,
+ "[data] Setting vocabulary size for input {} to {}",
+ i,
+ vocSize);
options_->get()["dim-vocabs"][i] = vocSize;
vocabs_.emplace_back(vocab);
}
}
} else { // i.e., if translating
- UTIL_THROW_IF2(vocabPaths.empty(),
- "translating but vocabularies are missing!");
+ ABORT_IF(vocabPaths.empty(), "Translating but vocabularies are missing!");
if(maxVocabs.size() < vocabPaths.size())
maxVocabs.resize(paths_.size(), 0);
@@ -107,7 +112,7 @@ Corpus::Corpus(Ptr<Config> options, bool translate)
files_.emplace_back(new InputFileStream(std::cin));
else {
files_.emplace_back(new InputFileStream(path));
- UTIL_THROW_IF2(files_.back()->empty(), "File " << path << " is empty");
+ ABORT_IF(files_.back()->empty(), "File '{}' is empty", path);
}
}
}
@@ -120,8 +125,8 @@ Corpus::Corpus(std::vector<std::string> paths,
options_(options),
vocabs_(vocabs),
maxLength_(maxLength ? maxLength : options_->get<size_t>("max-length")) {
- UTIL_THROW_IF2(paths_.size() != vocabs_.size(),
- "Number of corpus files and vocab files does not agree");
+ ABORT_IF(paths_.size() != vocabs_.size(),
+ "Number of corpus files and vocab files does not agree");
for(auto path : paths_) {
files_.emplace_back(new InputFileStream(path));
diff --git a/src/data/dataset.h b/src/data/dataset.h
index 17a146d0..d019b3b3 100644
--- a/src/data/dataset.h
+++ b/src/data/dataset.h
@@ -77,9 +77,7 @@ public:
void push_back(Input input) { inputs_.push_back(input); }
- virtual std::vector<Ptr<Batch>> split(size_t n) {
- UTIL_THROW2("Not implemented");
- }
+ virtual std::vector<Ptr<Batch>> split(size_t n) { ABORT("Not implemented"); }
Data& features() { return inputs_[0].data(); }
diff --git a/src/data/filter.h b/src/data/filter.h
index 5c11ef25..e40e8794 100644
--- a/src/data/filter.h
+++ b/src/data/filter.h
@@ -89,7 +89,7 @@ public:
std::vector<std::string> vals
= options_->get<std::vector<std::string>>("filter");
- UTIL_THROW_IF2(vals.empty(), "No path to filter path given");
+ ABORT_IF(vals.empty(), "No path to filter path given");
std::string fname = vals[0];
firstNum_ = vals.size() > 1 ? std::stoi(vals[1]) : 100;
diff --git a/src/data/types.h b/src/data/types.h
index f3224999..3e36c454 100644
--- a/src/data/types.h
+++ b/src/data/types.h
@@ -27,10 +27,16 @@ const std::string DEL_STR = "<d>";
const std::string RPL_STR = "<r>";
const std::unordered_map<std::string, Word> SPEC2SYM = {
- {STP_STR, STP_ID}, {CPY_STR, CPY_ID}, {DEL_STR, DEL_ID}, {RPL_STR, RPL_ID},
+ {STP_STR, STP_ID},
+ {CPY_STR, CPY_ID},
+ {DEL_STR, DEL_ID},
+ {RPL_STR, RPL_ID},
};
const std::unordered_map<Word, std::string> SYM2SPEC = {
- {STP_ID, STP_STR}, {CPY_ID, CPY_STR}, {DEL_ID, DEL_STR}, {RPL_ID, RPL_STR},
+ {STP_ID, STP_STR},
+ {CPY_ID, CPY_STR},
+ {DEL_ID, DEL_STR},
+ {RPL_ID, RPL_STR},
};
} \ No newline at end of file
diff --git a/src/data/vocab.cpp b/src/data/vocab.cpp
index 18b81d21..0e534ee5 100644
--- a/src/data/vocab.cpp
+++ b/src/data/vocab.cpp
@@ -52,7 +52,7 @@ std::vector<std::string> Vocab::operator()(const Words& sentence,
}
const std::string& Vocab::operator[](size_t id) const {
- UTIL_THROW_IF2(id >= id2str_.size(), "Unknown word id: " << id);
+ ABORT_IF(id >= id2str_.size(), "Unknown word id: ", id);
return id2str_[id];
}
@@ -100,7 +100,7 @@ int Vocab::load(const std::string& vocabPath, int max) {
id2str_[id] = str;
}
}
- UTIL_THROW_IF2(id2str_.empty(), "Empty vocabulary " << vocabPath);
+ ABORT_IF(id2str_.empty(), "Empty vocabulary: ", vocabPath);
id2str_[EOS_ID] = EOS_STR;
id2str_[UNK_ID] = UNK_STR;
@@ -126,8 +126,9 @@ public:
void Vocab::create(const std::string& vocabPath, const std::string& trainPath) {
LOG(info, "[data] Creating vocabulary {} from {}", vocabPath, trainPath);
- UTIL_THROW_IF2(boost::filesystem::exists(vocabPath),
- "Vocab file " << vocabPath << " exists. Not overwriting");
+ ABORT_IF(boost::filesystem::exists(vocabPath),
+ "Vocab file '{}' exists. Not overwriting",
+ vocabPath);
InputFileStream trainStrm(trainPath);
diff --git a/src/examples/mnist/dataset.h b/src/examples/mnist/dataset.h
index 3a60b659..391bf926 100644
--- a/src/examples/mnist/dataset.h
+++ b/src/examples/mnist/dataset.h
@@ -31,13 +31,11 @@ public:
}
void loadData() {
- UTIL_THROW_IF2(paths_.size() != 2,
- "Paths to MNIST data files are not specified");
+ ABORT_IF(paths_.size() != 2, "Paths to MNIST data files are not specified");
auto features = ReadImages(paths_[0]);
auto labels = ReadLabels(paths_[1]);
- UTIL_THROW_IF2(features.size() != labels.size(),
- "Features do not match labels");
+ ABORT_IF(features.size() != labels.size(), "Features do not match labels");
for(size_t i = 0; i < features.size(); ++i) {
Example ex = {features[i], labels[i]};
@@ -57,14 +55,13 @@ private:
std::vector<Data> ReadImages(const std::string &full_path) {
std::ifstream file(full_path);
- UTIL_THROW_IF2(!file.is_open(), "Cannot open file `" + full_path + "`!");
+ ABORT_IF(!file.is_open(), "Cannot open file `" + full_path + "`!");
int magic_number = 0;
file.read((char *)&magic_number, sizeof(magic_number));
magic_number = reverseInt(magic_number);
- UTIL_THROW_IF2(magic_number != IMAGE_MAGIC_NUMBER,
- "Invalid MNIST image file!");
+ ABORT_IF(magic_number != IMAGE_MAGIC_NUMBER, "Invalid MNIST image file!");
int number_of_images;
int n_rows = 0;
diff --git a/src/graph/expression_graph.cu b/src/graph/expression_graph.cu
index fb720ecc..829af085 100644
--- a/src/graph/expression_graph.cu
+++ b/src/graph/expression_graph.cu
@@ -49,6 +49,6 @@ Expr ExpressionGraph::gaussian(float mean, float stddev, Shape shape) {
}
void ExpressionGraph::checkNan(Tensor t) {
- UTIL_THROW_IF2(throwNaN_ && IsNan(t), "Tensor has NaN");
+ ABORT_IF(throwNaN_ && IsNan(t), "Tensor has NaN");
}
}
diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h
index 1ca0204a..67a5f5ef 100644
--- a/src/graph/expression_graph.h
+++ b/src/graph/expression_graph.h
@@ -178,8 +178,8 @@ public:
* and that all backward pass computations have been performed.
*/
void backward() {
- UTIL_THROW_IF2(topNodes_.size() > 1,
- "There are more than one top most node for backward step");
+ ABORT_IF(topNodes_.size() > 1,
+ "There are more than one top most node for backward step");
params_->allocateBackward();
params_->set_zero_adjoint();
@@ -269,23 +269,22 @@ public:
if(p) {
// if yes add to tape and return
- UTIL_THROW_IF2(shape != p->shape(),
- "Requested shape for existing parameter "
- << name
- << " does not match original shape");
+ ABORT_IF(shape != p->shape(),
+ "Requested shape for existing parameter '{}' does not match "
+ "original shape",
+ name);
add(p);
return p;
}
// if graph was reloaded do not allow creation of new parameters
- UTIL_THROW_IF2(reloaded_,
- "Graph was reloaded and parameter " << name
- << " is newly created");
+ ABORT_IF(reloaded_,
+ "Graph was reloaded and parameter '{}' is newly created",
+ name);
// if not check if name is not taken by other node
- UTIL_THROW_IF2(get(name),
- "Non-parameter with name " << name << "already exists");
+ ABORT_IF(get(name), "Non-parameter with name '{}' already exists", name);
// create parameter node (adds to tape)
p = Expression<ParamNode>(
diff --git a/src/graph/expression_operators.cu b/src/graph/expression_operators.cu
index af592625..10861c14 100644
--- a/src/graph/expression_operators.cu
+++ b/src/graph/expression_operators.cu
@@ -195,11 +195,11 @@ Expr affine(Expr a, Expr b, Expr c) {
}
Expr plus(const std::vector<Expr>&) {
- UTIL_THROW2("Not implemented");
+ ABORT("Not implemented");
}
Expr swish(const std::vector<Expr>&) {
- UTIL_THROW2("Not implemented");
+ ABORT("Not implemented");
}
Expr tanh(const std::vector<Expr>& nodes) {
@@ -207,11 +207,11 @@ Expr tanh(const std::vector<Expr>& nodes) {
}
Expr logit(const std::vector<Expr>&) {
- UTIL_THROW2("Not implemented");
+ ABORT("Not implemented");
}
Expr relu(const std::vector<Expr>&) {
- UTIL_THROW2("Not implemented");
+ ABORT("Not implemented");
}
Expr sqrt(Expr a, float eps) {
diff --git a/src/graph/expression_operators.h b/src/graph/expression_operators.h
index 8824bb40..c99af41d 100644
--- a/src/graph/expression_operators.h
+++ b/src/graph/expression_operators.h
@@ -109,7 +109,7 @@ Expr dropout(Expr x, Args... args) {
auto mask = Get(keywords::mask, nullptr, args...);
float dropout_prob = Get(keywords::dropout_prob, 0.0f, args...);
- UTIL_THROW_IF2(!mask && !dropout_prob, "Neither mask nor dropout prob given");
+ ABORT_IF(!mask && !dropout_prob, "Neither mask nor dropout prob given");
if(!mask) {
auto graph = x->graph();
mask = graph->dropout(dropout_prob, x->shape());
diff --git a/src/graph/node_operators.h b/src/graph/node_operators.h
index cec27bfe..1cb112dd 100644
--- a/src/graph/node_operators.h
+++ b/src/graph/node_operators.h
@@ -11,8 +11,7 @@ struct ConstantNode : public Node {
: Node(args...),
init_(Get(keywords::init, [](Tensor) {})),
initialized_(false) {
- UTIL_THROW_IF2(!Has(keywords::shape),
- "Constant items require shape information");
+ ABORT_IF(!Has(keywords::shape), "Constant items require shape information");
setTrainable(false);
}
@@ -47,8 +46,7 @@ struct ParamNode : public Node {
: Node(args...),
init_(Get(keywords::init, [](Tensor) {})),
initialized_(false) {
- UTIL_THROW_IF2(!Has(keywords::shape),
- "Param items require shape information");
+ ABORT_IF(!Has(keywords::shape), "Param items require shape information");
setTrainable(!Get(keywords::fixed, false));
}
diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h
index 74da4b14..4ec5c092 100644
--- a/src/graph/node_operators_binary.h
+++ b/src/graph/node_operators_binary.h
@@ -41,10 +41,15 @@ private:
public:
template <typename... Args>
- DotNodeOp(
- Expr a, Expr b, bool transA, bool transB, float scalar, Args... args)
- : NaryNodeOp(
- {a, b}, keywords::shape = newShape(a, b, transA, transB), args...),
+ DotNodeOp(Expr a,
+ Expr b,
+ bool transA,
+ bool transB,
+ float scalar,
+ Args... args)
+ : NaryNodeOp({a, b},
+ keywords::shape = newShape(a, b, transA, transB),
+ args...),
transA_(transA),
transB_(transB),
scalar_(scalar) {}
@@ -240,10 +245,15 @@ private:
public:
template <typename... Args>
- DotBatchedNodeOp(
- Expr a, Expr b, bool transA, bool transB, float scalar, Args... args)
- : NaryNodeOp(
- {a, b}, keywords::shape = newShape(a, b, transA, transB), args...),
+ DotBatchedNodeOp(Expr a,
+ Expr b,
+ bool transA,
+ bool transB,
+ float scalar,
+ Args... args)
+ : NaryNodeOp({a, b},
+ keywords::shape = newShape(a, b, transA, transB),
+ args...),
transA_(transA),
transB_(transB),
scalar_(scalar) {}
@@ -263,8 +273,8 @@ public:
Shape outShape = shapeA;
outShape.set(1, shapeB[1]);
- UTIL_THROW_IF2(shapeA[1] != shapeB[0],
- "matrix product requires dimensions to match");
+ ABORT_IF(shapeA[1] != shapeB[0],
+ "matrix product requires dimensions to match");
return outShape;
}
@@ -425,8 +435,8 @@ struct ElementBinaryNodeOp : public NaryNodeOp {
Shape shape1 = a->shape();
Shape shape2 = b->shape();
for(int i = 0; i < shape1.size(); ++i) {
- UTIL_THROW_IF2(shape1[i] != shape2[i] && shape1[i] != 1 && shape2[i] != 1,
- "Shapes cannot be broadcasted");
+ ABORT_IF(shape1[i] != shape2[i] && shape1[i] != 1 && shape2[i] != 1,
+ "Shapes cannot be broadcasted");
shape1.set(i, std::max(shape1[i], shape2[i]));
}
return shape1;
@@ -625,7 +635,7 @@ struct TanhPlus3NodeOp : public NaryNodeOp {
for(int n = 1; n < nodes.size(); ++n) {
Shape shapen = nodes[n]->shape();
for(int i = 0; i < shapen.size(); ++i) {
- UTIL_THROW_IF2(shape[i] != shapen[i] && shape[i] != 1 && shapen[i] != 1,
+ ABORT_IF(shape[i] != shapen[i] && shape[i] != 1 && shapen[i] != 1,
"Shapes cannot be broadcasted");
shape.set(i, std::max(shape[i], shapen[i]));
}
diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h
index 9881357c..a3f60366 100644
--- a/src/graph/node_operators_unary.h
+++ b/src/graph/node_operators_unary.h
@@ -146,8 +146,8 @@ struct TanhNodeOp : public NaryNodeOp {
for(int n = 1; n < nodes.size(); ++n) {
Shape shapen = nodes[n]->shape();
for(int i = 0; i < shapen.size(); ++i) {
- UTIL_THROW_IF2(shape[i] != shapen[i] && shape[i] != 1 && shapen[i] != 1,
- "Shapes cannot be broadcasted");
+ ABORT_IF(shape[i] != shapen[i] && shape[i] != 1 && shapen[i] != 1,
+ "Shapes cannot be broadcasted");
shape.set(i, std::max(shape[i], shapen[i]));
}
}
@@ -237,8 +237,11 @@ struct SwishNodeOp : public UnaryNodeOp {
}
NodeOps backwardOps() {
- return {NodeOp(
- Add(_1 * (_3 + Sigma(_2) * (1.f - _3)), child(0)->grad(), adj_, child(0)->val(), val_))};
+ return {NodeOp(Add(_1 * (_3 + Sigma(_2) * (1.f - _3)),
+ child(0)->grad(),
+ adj_,
+ child(0)->val(),
+ val_))};
}
const std::string type() { return "swish"; }
diff --git a/src/graph/parameters.h b/src/graph/parameters.h
index bbe02f36..df73dbd5 100644
--- a/src/graph/parameters.h
+++ b/src/graph/parameters.h
@@ -51,8 +51,7 @@ public:
void add(Expr p, const std::string& name) {
params_.push_back(p);
- UTIL_THROW_IF2(named_.count(name),
- "Parameter " << name << "already exists");
+ ABORT_IF(named_.count(name), "Parameter '{}' already exists", name);
named_[name] = p;
}
diff --git a/src/kernels/sparse.cu b/src/kernels/sparse.cu
index 911d29ce..1d104474 100644
--- a/src/kernels/sparse.cu
+++ b/src/kernels/sparse.cu
@@ -7,8 +7,11 @@ namespace marian {
namespace sparse {
-void multiply(
- Ptr<CSR> C, const Ptr<CSR> A, const Ptr<CSR> B, bool transA, bool transB) {
+void multiply(Ptr<CSR> C,
+ const Ptr<CSR> A,
+ const Ptr<CSR> B,
+ bool transA,
+ bool transB) {
cudaSetDevice(C->getDevice());
int nnzTotal;
C->allocRowIndices(A->rows());
@@ -101,8 +104,8 @@ void LfaForward(Tensor out, Tensor logits, Tensor att, Ptr<CSR> sparseLf) {
for(size_t i = 0; i < nonzeros; ++i) {
int r = (i % batch) + (i / (srcWords * batch)) * batch;
int c = i % (srcWords * batch);
- UTIL_THROW_IF2(r >= trgWords * batch, "Row index too large");
- UTIL_THROW_IF2(c >= srcWords * batch, "Column index too large");
+ ABORT_IF(r >= trgWords * batch, "Row index too large");
+ ABORT_IF(c >= srcWords * batch, "Column index too large");
coo.emplace_back(r, c, values[i]);
}
std::sort(coo.begin(), coo.end());
@@ -130,8 +133,11 @@ void LfaForward(Tensor out, Tensor logits, Tensor att, Ptr<CSR> sparseLf) {
sparseLfa->toTensor(out);
}
-__global__ void gCollapseAtt(
- float* out, const float* in, int batch, int srcWords, int nonzeros) {
+__global__ void gCollapseAtt(float* out,
+ const float* in,
+ int batch,
+ int srcWords,
+ int nonzeros) {
for(int bid = 0; bid < nonzeros; bid += blockDim.x * gridDim.x) {
int index = bid + blockDim.x * blockIdx.x + threadIdx.x;
if(index < nonzeros) {
diff --git a/src/kernels/sparse.h b/src/kernels/sparse.h
index 447d48de..625ebe4b 100644
--- a/src/kernels/sparse.h
+++ b/src/kernels/sparse.h
@@ -130,7 +130,7 @@ public:
void toTensor(Tensor dense) {
cudaSetDevice(device_);
- UTIL_THROW_IF2(dense->size() != rows_ * cols_, "Matrix sizes do not match");
+ ABORT_IF(dense->size() != rows_ * cols_, "Matrix sizes do not match");
cusparseScsc2dense(handle_,
cols_,
@@ -197,8 +197,11 @@ public:
}
};
-void multiply(
- Ptr<CSR>, const Ptr<CSR>, const Ptr<CSR>, bool = false, bool = false);
+void multiply(Ptr<CSR>,
+ const Ptr<CSR>,
+ const Ptr<CSR>,
+ bool = false,
+ bool = false);
void LfaForward(Tensor out, Tensor logits, Tensor att, Ptr<CSR> sparseLf);
diff --git a/src/kernels/tensor_operators.cu b/src/kernels/tensor_operators.cu
index cacb5316..177f98da 100644
--- a/src/kernels/tensor_operators.cu
+++ b/src/kernels/tensor_operators.cu
@@ -17,8 +17,7 @@ __device__ inline float stableLogit(float x) {
if(x >= 0) {
float z = expf(-x);
return 1.0 / (1.0 + z);
- }
- else {
+ } else {
float z = expf(x);
return z / (1.0 + z);
}
@@ -162,7 +161,6 @@ void Deconcatenate(std::vector<Tensor>& outputs, const Tensor in, int ax) {
SplitCont(outputs, in, ax);
}
-
__global__ void gTranspose4D(float* out,
ShapeGPU outShape,
const float* in,
@@ -953,7 +951,6 @@ __global__ void gGRUFastForward(float* out,
for(int tid = 0; tid < cols; tid += blockDim.x) {
int i = tid + threadIdx.x;
if(i < cols) {
-
float r = stableLogit(xWrow[i] + sUrow[i] + b[i]);
int k = i + cols;
@@ -1527,8 +1524,11 @@ __global__ void gLNormalization(float* out,
}
}
-void LayerNormalization(
- Tensor out, Tensor in, Tensor gamma, Tensor beta, float eps) {
+void LayerNormalization(Tensor out,
+ Tensor in,
+ Tensor gamma,
+ Tensor beta,
+ float eps) {
cudaSetDevice(out->getDevice());
int rows = in->shape().elements() / in->shape().back();
diff --git a/src/kernels/tensor_operators.h b/src/kernels/tensor_operators.h
index ea1ca9ba..e178eeac 100644
--- a/src/kernels/tensor_operators.h
+++ b/src/kernels/tensor_operators.h
@@ -389,8 +389,11 @@ __global__ void gAdd1R3(Functor functor,
}
template <class Functor>
-void Add(
- Functor functor, Tensor out, Tensor in1, Tensor in2, float scale = 1.0) {
+void Add(Functor functor,
+ Tensor out,
+ Tensor in1,
+ Tensor in2,
+ float scale = 1.0) {
cudaSetDevice(out->getDevice());
UTIL_THROW_IF2(out->shape().size() != in1->shape().size(),
@@ -454,8 +457,11 @@ void Add(
}
template <class Functor>
-void Reduce(
- Functor functor, Tensor out, Tensor in1, Tensor in2, float scale = 1.0) {
+void Reduce(Functor functor,
+ Tensor out,
+ Tensor in1,
+ Tensor in2,
+ float scale = 1.0) {
out->set(0);
Add(functor, out, in1, in2, scale);
}
@@ -934,8 +940,11 @@ void AttBack(Tensor gva,
Tensor coverage,
Tensor adj);
-void LayerNormalization(
- Tensor out, Tensor in, Tensor gamma, Tensor beta, float eps = 1e-9);
+void LayerNormalization(Tensor out,
+ Tensor in,
+ Tensor gamma,
+ Tensor beta,
+ float eps = 1e-9);
void LayerNormalizationGrad(Tensor gradX,
Tensor gradGamma,
Tensor gradBeta,
diff --git a/src/kernels/thrust_functions.h b/src/kernels/thrust_functions.h
index b49f0983..67f37a13 100644
--- a/src/kernels/thrust_functions.h
+++ b/src/kernels/thrust_functions.h
@@ -37,8 +37,7 @@ struct unary_sigma : public thrust::unary_function<T, T> {
if(x >= 0) {
float z = expf(-x);
return 1.0 / (1.0 + z);
- }
- else {
+ } else {
float z = expf(x);
return z / (1.0 + z);
}
diff --git a/src/layers/generic.h b/src/layers/generic.h
index 1577b076..64ebab4f 100644
--- a/src/layers/generic.h
+++ b/src/layers/generic.h
@@ -59,7 +59,7 @@ public:
}
Expr apply(const std::vector<Expr>& inputs) {
- UTIL_THROW_IF2(inputs.empty(), "No inputs");
+ ABORT_IF(inputs.empty(), "No inputs");
if(inputs.size() == 1)
return apply(inputs[0]);
diff --git a/src/layers/guided_alignment.cpp b/src/layers/guided_alignment.cpp
index 10875716..8da75924 100644
--- a/src/layers/guided_alignment.cpp
+++ b/src/layers/guided_alignment.cpp
@@ -30,7 +30,7 @@ Expr guidedAlignmentCost(Ptr<ExpressionGraph> graph,
} else if(guidedCostType == "ce") {
alnCost = -sum(flatten(aln * log(att + eps))) / dimBatch;
} else {
- UTIL_THROW2("Unknown alignment cost type");
+ ABORT("Unknown alignment cost type");
}
float guidedScalar = options->get<float>("guided-alignment-weight");
diff --git a/src/layers/param_initializers.cu b/src/layers/param_initializers.cu
index dea0bc79..3b3cc2a4 100644
--- a/src/layers/param_initializers.cu
+++ b/src/layers/param_initializers.cu
@@ -89,8 +89,8 @@ void svd(std::vector<float>& vec, Shape shape) {
int n = std::min(rows, cols);
int m = std::max(rows, cols);
- UTIL_THROW_IF2(m % n != 0,
- "Matrix dimensions must be equal or multiples of each other");
+ ABORT_IF(m % n != 0,
+ "Matrix dimensions must be equal or multiples of each other");
for(int i = 0; i < shape.elements(); i += n * n) {
std::vector<float> t1(n);
diff --git a/src/layers/word2vec_reader.h b/src/layers/word2vec_reader.h
index ff77cea1..4e0b7fbb 100644
--- a/src/layers/word2vec_reader.h
+++ b/src/layers/word2vec_reader.h
@@ -21,8 +21,8 @@ public:
LOG(info, "[data] Loading embedding vectors from {}", fileName);
std::ifstream embFile(fileName);
- UTIL_THROW_IF2(!embFile.is_open(),
- "Unable to open file with embeddings: " + fileName);
+ ABORT_IF(!embFile.is_open(),
+ "Unable to open file with embeddings: " + fileName);
std::string line;
std::vector<std::string> values;
@@ -32,10 +32,10 @@ public:
// vocabulary and the length of embedding vectors
std::getline(embFile, line);
Split(line, values);
- UTIL_THROW_IF2(values.size() != 2,
- "Unexpected format of the first line in embedding file");
- UTIL_THROW_IF2(stoi(values[1]) != dimEmb,
- "Unexpected length of embedding vectors");
+ ABORT_IF(values.size() != 2,
+ "Unexpected format of the first line in embedding file");
+ ABORT_IF(stoi(values[1]) != dimEmb,
+ "Unexpected length of embedding vectors");
// Read embedding vectors into a map
std::unordered_map<Word, std::vector<float>> word2vec;
diff --git a/src/models/amun.h b/src/models/amun.h
index 1d27dff6..33e3f225 100644
--- a/src/models/amun.h
+++ b/src/models/amun.h
@@ -9,32 +9,30 @@ namespace marian {
class Amun : public EncoderDecoder {
public:
Amun(Ptr<Options> options) : EncoderDecoder(options) {
- UTIL_THROW_IF2(options_->get<int>("enc-depth") > 1,
- "--type amun does not currently support multiple encoder "
- "layers, use --type s2s");
- UTIL_THROW_IF2(options_->get<int>("enc-cell-depth") > 1,
- "--type amun does not currently support stacked encoder "
- "cells, use --type s2s");
- UTIL_THROW_IF2(options_->get<bool>("skip"),
- "--type amun does not currently support skip connections, "
- "use --type s2s");
- UTIL_THROW_IF2(options_->get<int>("dec-depth") > 1,
- "--type amun does not currently support multiple decoder "
- "layers, use --type s2s");
- UTIL_THROW_IF2(options_->get<int>("dec-cell-base-depth") != 2,
- "--type amun does not currently support multiple decoder "
- "base cells, use --type s2s");
- UTIL_THROW_IF2(options_->get<int>("dec-cell-high-depth") > 1,
- "--type amun does not currently support multiple decoder "
- "high cells, use --type s2s");
- UTIL_THROW_IF2(
- options_->get<std::string>("enc-cell") != "gru",
- "--type amun does not currently support other rnn cells than gru, "
- "use --type s2s");
- UTIL_THROW_IF2(
- options_->get<std::string>("dec-cell") != "gru",
- "--type amun does not currently support other rnn cells than gru, "
- "use --type s2s");
+ ABORT_IF(options_->get<int>("enc-depth") > 1,
+ "--type amun does not currently support multiple encoder "
+ "layers, use --type s2s");
+ ABORT_IF(options_->get<int>("enc-cell-depth") > 1,
+ "--type amun does not currently support stacked encoder "
+ "cells, use --type s2s");
+ ABORT_IF(options_->get<bool>("skip"),
+ "--type amun does not currently support skip connections, "
+ "use --type s2s");
+ ABORT_IF(options_->get<int>("dec-depth") > 1,
+ "--type amun does not currently support multiple decoder "
+ "layers, use --type s2s");
+ ABORT_IF(options_->get<int>("dec-cell-base-depth") != 2,
+ "--type amun does not currently support multiple decoder "
+ "base cells, use --type s2s");
+ ABORT_IF(options_->get<int>("dec-cell-high-depth") > 1,
+ "--type amun does not currently support multiple decoder "
+ "high cells, use --type s2s");
+ ABORT_IF(options_->get<std::string>("enc-cell") != "gru",
+ "--type amun does not currently support other rnn cells than gru, "
+ "use --type s2s");
+ ABORT_IF(options_->get<std::string>("dec-cell") != "gru",
+ "--type amun does not currently support other rnn cells than gru, "
+ "use --type s2s");
}
void load(Ptr<ExpressionGraph> graph, const std::string& name) {
diff --git a/src/models/encdec.h b/src/models/encdec.h
index 27da58b2..6a83a721 100644
--- a/src/models/encdec.h
+++ b/src/models/encdec.h
@@ -327,8 +327,8 @@ public:
if(options_->has("guided-alignment") && !inference_) {
auto alignments = decoders_[0]->getAlignments();
- UTIL_THROW_IF2(alignments.empty(),
- "Model does not seem to support alignments");
+ ABORT_IF(alignments.empty(), "Model does not seem to support alignments");
+
auto att = concatenate(alignments, axis = 3);
return cost + guidedAlignmentCost(graph, batch, options_, att);
} else {
diff --git a/src/models/experimental/lex_probs.h b/src/models/experimental/lex_probs.h
index 034c5d9e..1b5b62f0 100644
--- a/src/models/experimental/lex_probs.h
+++ b/src/models/experimental/lex_probs.h
@@ -107,7 +107,7 @@ public:
std::vector<int> colIndices(rows);
for(size_t i = 0; i < rows; ++i) {
- UTIL_THROW_IF2(indices[i] >= srcDim_, "column index to large");
+ ABORT_IF(indices[i] >= srcDim_, "Column index to large");
values[i] = 1;
rowIndices[i] = i;
colIndices[i] = indices[i];
diff --git a/src/models/hardatt.h b/src/models/hardatt.h
index 2b82d486..20b6c028 100644
--- a/src/models/hardatt.h
+++ b/src/models/hardatt.h
@@ -31,7 +31,7 @@ public:
}
virtual std::vector<size_t>& getAttentionIndices() {
- UTIL_THROW_IF2(attentionIndices_.empty(), "Empty attention indices");
+ ABORT_IF(attentionIndices_.empty(), "Empty attention indices");
return attentionIndices_;
}
diff --git a/src/models/model_factory.cpp b/src/models/model_factory.cpp
index dbb6ef27..0416cfea 100644
--- a/src/models/model_factory.cpp
+++ b/src/models/model_factory.cpp
@@ -21,7 +21,7 @@ Ptr<EncoderBase> EncoderFactory::construct() {
if(options_->get<std::string>("type") == "transformer")
return New<EncoderTransformer>(options_);
- UTIL_THROW2("Unknown encoder type");
+ ABORT("Unknown encoder type");
}
Ptr<DecoderBase> DecoderFactory::construct() {
@@ -34,7 +34,7 @@ Ptr<DecoderBase> DecoderFactory::construct() {
if(options_->get<std::string>("type") == "hard-soft-att")
return New<DecoderHardAtt>(options_);
- UTIL_THROW2("Unknown decoder type");
+ ABORT("Unknown decoder type");
}
Ptr<EncoderDecoder> EncoderDecoderFactory::construct() {
@@ -174,7 +174,7 @@ Ptr<ModelBase> by_type(std::string type, Ptr<Options> options) {
}
#endif
- UTIL_THROW2("Unknown model type: " + type);
+ ABORT("Unknown model type: {}", type);
}
Ptr<ModelBase> from_options(Ptr<Options> options) {
diff --git a/src/models/nematus.h b/src/models/nematus.h
index c6fa6561..cd81e83c 100644
--- a/src/models/nematus.h
+++ b/src/models/nematus.h
@@ -11,29 +11,28 @@ public:
template <class... Args>
Nematus(Ptr<Options> options)
: EncoderDecoder(options), nameMap_(createNameMap()) {
- UTIL_THROW_IF2(options_->get<std::string>("enc-type") != "bidirectional",
- "--type nematus does not currently support other encoder "
- "type than bidirectional, use --type s2s");
- UTIL_THROW_IF2(options_->get<int>("enc-depth") > 1,
- "--type nematus does not currently support multiple encoder "
- "layers, use --type s2s");
- UTIL_THROW_IF2(
- options_->get<bool>("skip"),
- "--type nematus does not currently support skip connections, "
- "use --type s2s");
- UTIL_THROW_IF2(options_->get<int>("dec-depth") > 1,
- "--type nematus does not currently support multiple decoder "
- "layers, use --type s2s");
- UTIL_THROW_IF2(options_->get<int>("dec-cell-high-depth") > 1,
- "--type nematus does not currently support multiple decoder "
- "high cells, use --type s2s");
-
- UTIL_THROW_IF2(options_->get<std::string>("enc-cell") != "gru-nematus",
- "--type nematus does not currently support other rnn cells "
- "than gru-nematus, use --type s2s");
- UTIL_THROW_IF2(options_->get<std::string>("dec-cell") != "gru-nematus",
- "--type nematus does not currently support other rnn cells "
- "than gru-nematus, use --type s2s");
+ ABORT_IF(options_->get<std::string>("enc-type") != "bidirectional",
+ "--type nematus does not currently support other encoder "
+ "type than bidirectional, use --type s2s");
+ ABORT_IF(options_->get<int>("enc-depth") > 1,
+ "--type nematus does not currently support multiple encoder "
+ "layers, use --type s2s");
+ ABORT_IF(options_->get<bool>("skip"),
+ "--type nematus does not currently support skip connections, "
+ "use --type s2s");
+ ABORT_IF(options_->get<int>("dec-depth") > 1,
+ "--type nematus does not currently support multiple decoder "
+ "layers, use --type s2s");
+ ABORT_IF(options_->get<int>("dec-cell-high-depth") > 1,
+ "--type nematus does not currently support multiple decoder "
+ "high cells, use --type s2s");
+
+ ABORT_IF(options_->get<std::string>("enc-cell") != "gru-nematus",
+ "--type nematus does not currently support other rnn cells "
+ "than gru-nematus, use --type s2s");
+ ABORT_IF(options_->get<std::string>("dec-cell") != "gru-nematus",
+ "--type nematus does not currently support other rnn cells "
+ "than gru-nematus, use --type s2s");
}
void load(Ptr<ExpressionGraph> graph, const std::string& name) {
diff --git a/src/models/transformer.h b/src/models/transformer.h
index d5b1c1f2..d3c9f406 100644
--- a/src/models/transformer.h
+++ b/src/models/transformer.h
@@ -173,8 +173,8 @@ public:
int dimBeamQ = q->shape()[3];
int dimBeamK = k->shape()[3];
if(dimBeamQ != dimBeamK) {
- k = concatenate(std::vector<Expr>(dimBeamQ, k), axis=3);
- v = concatenate(std::vector<Expr>(dimBeamQ, v), axis=3);
+ k = concatenate(std::vector<Expr>(dimBeamQ, k), axis = 3);
+ v = concatenate(std::vector<Expr>(dimBeamQ, v), axis = 3);
}
auto weights = softmax(bdot(q, k, false, true, scale) + mask);
@@ -245,7 +245,7 @@ public:
Expr output;
if(outputs.size() > 1)
- output = concatenate(outputs, axis=1);
+ output = concatenate(outputs, axis = 1);
else
output = outputs.front();
@@ -550,7 +550,8 @@ public:
for(int i = 1; i <= opt<int>("dec-depth"); ++i) {
auto values = query;
if(prevDecoderStates.size() > 0)
- values = concatenate({prevDecoderStates[i - 1].output, query}, axis=0);
+ values
+ = concatenate({prevDecoderStates[i - 1].output, query}, axis = 0);
decoderStates.push_back({values, nullptr});
@@ -595,7 +596,7 @@ public:
inference_);
}
} else {
- UTIL_THROW2("Unknown value for transformer-multi-encoder: " << comb);
+ ABORT("Unknown value for transformer-multi-encoder: {}", comb);
}
}
diff --git a/src/optimizers/optimizers.cu b/src/optimizers/optimizers.cu
index 146cfd01..dc5ed976 100644
--- a/src/optimizers/optimizers.cu
+++ b/src/optimizers/optimizers.cu
@@ -97,7 +97,7 @@ Ptr<OptimizerBase> Optimizer(Ptr<Config> options) {
} else if(opt == "adam") {
return Optimizer<Adam>(lrate, clipper, params);
} else {
- UTIL_THROW2("Unknown optimizer: " << opt);
+ ABORT("Unknown optimizer: {}", opt);
}
}
}
diff --git a/src/rnn/attention.cu b/src/rnn/attention.cu
index 8e80be76..3adf817b 100644
--- a/src/rnn/attention.cu
+++ b/src/rnn/attention.cu
@@ -19,13 +19,13 @@ struct AttentionNodeOp : public NaryNodeOp {
Shape stateShape = nodes[2]->shape();
for(int i = 0; i < stateShape.size(); ++i) {
- UTIL_THROW_IF2(ctxShape[i] != stateShape[i] && ctxShape[i] != 1
- && stateShape[i] != 1,
- "Shapes cannot be broadcasted");
+ ABORT_IF(ctxShape[i] != stateShape[i] && ctxShape[i] != 1
+ && stateShape[i] != 1,
+ "Shapes cannot be broadcasted");
shape.set(i, std::max(ctxShape[i], stateShape[i]));
}
- UTIL_THROW_IF2(vaShape[0] != shape[1] || vaShape[1] != 1, "Wrong size");
+ ABORT_IF(vaShape[0] != shape[1] || vaShape[1] != 1, "Wrong size");
shape.set(1, 1);
return shape;
@@ -76,4 +76,4 @@ Expr attOps(Expr va, Expr context, Expr state, Expr coverage) {
{dimWords, dimBatch, 1, dimBeam});
}
}
-} \ No newline at end of file
+}
diff --git a/src/rnn/cells.h b/src/rnn/cells.h
index 8f678cf3..057a7c3c 100644
--- a/src/rnn/cells.h
+++ b/src/rnn/cells.h
@@ -643,7 +643,7 @@ public:
}
virtual std::vector<Expr> applyInput(std::vector<Expr> inputs) {
- UTIL_THROW_IF2(inputs.empty(), "Multiplicative LSTM expects input");
+ ABORT_IF(inputs.empty(), "Multiplicative LSTM expects input");
Expr input;
if(inputs.size() > 1)
@@ -738,7 +738,7 @@ public:
}
std::vector<Expr> applyInput(std::vector<Expr> inputs) {
- UTIL_THROW_IF2(inputs.empty(), "Slow LSTM expects input");
+ ABORT_IF(inputs.empty(), "Slow LSTM expects input");
Expr input;
if(inputs.size() > 1)
@@ -836,7 +836,7 @@ public:
}
std::vector<Expr> applyInput(std::vector<Expr> inputs) {
- UTIL_THROW_IF2(inputs.empty(), "Test LSTM expects input");
+ ABORT_IF(inputs.empty(), "Test LSTM expects input");
Expr input;
if(inputs.size() > 1)
diff --git a/src/rnn/constructors.h b/src/rnn/constructors.h
index cbe981f0..88c52314 100644
--- a/src/rnn/constructors.h
+++ b/src/rnn/constructors.h
@@ -63,7 +63,7 @@ public:
cell->setLazyInputs(inputs_);
return cell;
} else {
- UTIL_THROW2("Unknown RNN cell type");
+ ABORT("Unknown RNN cell type");
}
}
@@ -140,7 +140,7 @@ public:
AttentionFactory(Ptr<ExpressionGraph> graph) : InputFactory(graph) {}
Ptr<CellInput> construct() {
- UTIL_THROW_IF2(!state_, "EncoderState not set");
+ ABORT_IF(!state_, "EncoderState not set");
return New<Attention>(graph_, options_, state_);
}
@@ -150,7 +150,7 @@ public:
}
int dimAttended() {
- UTIL_THROW_IF2(!state_, "EncoderState not set");
+ ABORT_IF(!state_, "EncoderState not set");
return state_->getAttended()->shape()[1];
}
};
diff --git a/src/rnn/rnn.h b/src/rnn/rnn.h
index ad82a614..67947db6 100644
--- a/src/rnn/rnn.h
+++ b/src/rnn/rnn.h
@@ -135,7 +135,7 @@ public:
void push_back(Ptr<Cell> cell) { cell_ = cell; }
virtual Ptr<Cell> at(int i) {
- UTIL_THROW_IF2(i > 0, "SingleRNN only has one cell");
+ ABORT_IF(i > 0, "SingleRNN only has one cell");
return cell_;
}
};
@@ -160,7 +160,7 @@ public:
}
Expr transduce(Expr input, Expr mask = nullptr) {
- UTIL_THROW_IF2(rnns_.empty(), "0 layers in RNN");
+ ABORT_IF(rnns_.empty(), "0 layers in RNN");
Expr output;
Expr layerInput = input;
@@ -187,7 +187,7 @@ public:
}
Expr transduce(Expr input, States states, Expr mask = nullptr) {
- UTIL_THROW_IF2(rnns_.empty(), "0 layers in RNN");
+ ABORT_IF(rnns_.empty(), "0 layers in RNN");
Expr output;
Expr layerInput = input;
@@ -216,7 +216,7 @@ public:
}
Expr transduce(Expr input, State state, Expr mask = nullptr) {
- UTIL_THROW_IF2(rnns_.empty(), "0 layers in RNN");
+ ABORT_IF(rnns_.empty(), "0 layers in RNN");
Expr output;
Expr layerInput = input;
diff --git a/src/rnn/types.h b/src/rnn/types.h
index 02931533..9e288d5a 100644
--- a/src/rnn/types.h
+++ b/src/rnn/types.h
@@ -242,15 +242,15 @@ public:
}
virtual std::vector<Expr> getLazyInputs(Ptr<rnn::RNN> parent) {
- UTIL_THROW_IF2(!stackables_[0]->is<Cell>(),
- "First stackable should be of type Cell");
+ ABORT_IF(!stackables_[0]->is<Cell>(),
+ "First stackable should be of type Cell");
return stackables_[0]->as<Cell>()->getLazyInputs(parent);
}
virtual void setLazyInputs(
std::vector<std::function<Expr(Ptr<rnn::RNN>)>> lazy) {
- UTIL_THROW_IF2(!stackables_[0]->is<Cell>(),
- "First stackable should be of type Cell");
+ ABORT_IF(!stackables_[0]->is<Cell>(),
+ "First stackable should be of type Cell");
stackables_[0]->as<Cell>()->setLazyInputs(lazy);
}
};
diff --git a/src/tensors/allocator.h b/src/tensors/allocator.h
index 73ca04dd..8a84e4d7 100644
--- a/src/tensors/allocator.h
+++ b/src/tensors/allocator.h
@@ -186,7 +186,7 @@ public:
bool free(uint8_t* ptr, size_t bytes) {
bytes = align(bytes);
- UTIL_THROW_IF2(ptr == 0, "Double free?");
+ ABORT_IF(ptr == 0, "Double free?");
if(!ptr)
return false;
diff --git a/src/tensors/device_gpu.cu b/src/tensors/device_gpu.cu
index c4312c26..19ecacc2 100644
--- a/src/tensors/device_gpu.cu
+++ b/src/tensors/device_gpu.cu
@@ -18,7 +18,7 @@ void DeviceGPU::reserve(size_t size) {
size = align(size);
cudaSetDevice(device_);
- UTIL_THROW_IF2(size < size_, "New size must be larger than old size");
+ ABORT_IF(size < size_, "New size must be larger than old size");
if(data_) {
// Allocate memory by going through host memory
@@ -34,4 +34,4 @@ void DeviceGPU::reserve(size_t size) {
size_ = size;
}
-} \ No newline at end of file
+}
diff --git a/src/tensors/tensor.h b/src/tensors/tensor.h
index 6dcd7293..a8da4ac4 100644
--- a/src/tensors/tensor.h
+++ b/src/tensors/tensor.h
@@ -35,7 +35,7 @@ public:
virtual size_t size() { return shape_.elements(); }
virtual float scalar() {
- UTIL_THROW_IF2(size() != 1, "Tensor is not a scalar");
+ ABORT_IF(size() != 1, "Tensor is not a scalar");
return get(0);
}
diff --git a/src/tensors/tensor_cpu.h b/src/tensors/tensor_cpu.h
index 7e322a1c..18725b3d 100644
--- a/src/tensors/tensor_cpu.h
+++ b/src/tensors/tensor_cpu.h
@@ -63,7 +63,7 @@ private:
typedef TensorCPU tensor_type;
void reserve(size_t size) {
- UTIL_THROW_IF2(size < size_, "New size must be larger than old size");
+ ABORT_IF(size < size_, "New size must be larger than old size");
float* temp = new float[size];
if(data_) {
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 7e23225a..ed53e784 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -2,6 +2,7 @@
set(TEST_SOURCES
graph_tests.cpp
operator_tests.cpp
+ rnn_tests.cpp
)
add_executable(run_tests run_tests.cpp ${TEST_SOURCES})
diff --git a/src/tests/allocator_test.cpp b/src/tests/allocator_test.cpp
index b9146cf4..a3d0fbda 100644
--- a/src/tests/allocator_test.cpp
+++ b/src/tests/allocator_test.cpp
@@ -25,7 +25,7 @@ public:
void reserve(size_t size) {
size = align(size);
- UTIL_THROW_IF2(size < size_, "New size must be larger than old size");
+ ABORT_IF(size < size_, "New size must be larger than old size");
if(data_) {
// Allocate memory by going through host memory
diff --git a/src/tests/rnn_tests.cpp b/src/tests/rnn_tests.cpp
new file mode 100644
index 00000000..96c1c120
--- /dev/null
+++ b/src/tests/rnn_tests.cpp
@@ -0,0 +1,277 @@
+#include "catch.hpp"
+#include "marian.h"
+
+using namespace marian;
+
+TEST_CASE("Model components, RNN etc.", "[model]") {
+
+ auto floatApprox = [](float x, float y) { return x == Approx(y); };
+
+ std::vector<size_t> vWords = {
+ 43, 2, 83, 78,
+ 6, 38, 80, 40,
+ 40, 70, 26, 60,
+ 106, 13, 111, 32,
+ 126, 62, 115, 72,
+ 127, 82, 55, 0,
+ 86, 0, 124, 0,
+ 0, 0, 0, 0
+ };
+
+ std::vector<size_t> vMask = {
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 1, 1, 1, 0,
+ 1, 0, 1, 0,
+ };
+
+ SECTION("Simple RNN") {
+ Config::seed = 1234;
+
+ auto graph = New<ExpressionGraph>();
+ graph->setDevice(0);
+ graph->reserveWorkspaceMB(16);
+
+ std::vector<float> values;
+
+ auto input = graph->constant({1, 4, 4},
+ keywords::init=inits::glorot_uniform);
+
+ auto rnn = rnn::rnn(graph) //
+ ("prefix", "rnntest") //
+ ("type", "tanh") //
+ ("dimInput", 4) //
+ ("dimState", 4) //
+ .push_back(rnn::cell(graph)) //
+ .construct();
+
+ auto output = rnn->transduce(input);
+
+ graph->forward();
+
+ CHECK(output->shape() == Shape({1, 4, 4}));
+
+ std::vector<float> vOutput({
+ 0.108774, 0.237905, -0.819769, -0.212601,
+ -0.684652, 0.455977, 0.504662, -0.184837,
+ 0.769393, 0.28449, -0.200995, -0.260122,
+ -0.324909, -0.337419, -0.959819, 0.559088
+ });
+
+ output->val()->get(values);
+ CHECK( std::equal(values.begin(), values.end(),
+ vOutput.begin(), floatApprox) );
+ }
+
+ SECTION("S2S-style encoder") {
+ Config::seed = 1234;
+
+ auto graph = New<ExpressionGraph>();
+ graph->setDevice(0);
+ graph->reserveWorkspaceMB(16);
+
+ std::vector<float> values;
+
+ auto buildRnn = [&graph] (std::string prefix,
+ Expr input, Expr mask,
+ int dimRnn=32,
+ int depth=1,
+ int cellDepth=1,
+ std::string type="bidirectional",
+ std::string cellType="gru",
+ bool layerNorm=false,
+ bool skip=false) {
+
+ int dimEmb = input->shape()[1];
+
+ int first, second;
+ if(type == "bidirectional" || type == "alternating") {
+ // build two separate stacks, concatenate top output
+ first = depth;
+ second = 0;
+ } else {
+ // build 1-layer bidirectional stack, concatenate,
+ // build n-1 layer unidirectional stack
+ first = 1;
+ second = depth - first;
+ }
+
+ auto forward = type == "alternating" ? rnn::dir::alternating_forward
+ : rnn::dir::forward;
+
+ auto backward = type == "alternating" ? rnn::dir::alternating_backward
+ : rnn::dir::backward;
+
+ using namespace keywords;
+
+ auto rnnFw = rnn::rnn(graph) //
+ ("type", cellType) //
+ ("direction", forward) //
+ ("dimInput", dimEmb) //
+ ("dimState", dimRnn) //
+ ("layer-normalization", layerNorm) //
+ ("skip", skip);
+
+ for(int i = 1; i <= first; ++i) {
+ auto stacked = rnn::stacked_cell(graph);
+ for(int j = 1; j <= cellDepth; ++j) {
+ std::string paramPrefix = prefix + "_bi";
+ if(i > 1)
+ paramPrefix += "_l" + std::to_string(i);
+ if(i > 1 || j > 1)
+ paramPrefix += "_cell" + std::to_string(j);
+
+ stacked.push_back(rnn::cell(graph)("prefix", paramPrefix));
+ }
+ rnnFw.push_back(stacked);
+ }
+
+ auto rnnBw = rnn::rnn(graph) //
+ ("type", cellType) //
+ ("direction", backward) //
+ ("dimInput", dimEmb) //
+ ("dimState", dimRnn) //
+ ("layer-normalization", layerNorm) //
+ ("skip", skip);
+
+ for(int i = 1; i <= first; ++i) {
+ auto stacked = rnn::stacked_cell(graph);
+ for(int j = 1; j <= cellDepth; ++j) {
+ std::string paramPrefix = prefix + "_bi_r";
+ if(i > 1)
+ paramPrefix += "_l" + std::to_string(i);
+ if(i > 1 || j > 1)
+ paramPrefix += "_cell" + std::to_string(j);
+
+ stacked.push_back(rnn::cell(graph)("prefix", paramPrefix));
+ }
+ rnnBw.push_back(stacked);
+ }
+
+ auto context = concatenate({rnnFw->transduce(input, mask),
+ rnnBw->transduce(input, mask)},
+ axis = 1);
+
+ if(second > 0) {
+ // add more layers (unidirectional) by transducing the output of the
+ // previous bidirectional RNN through multiple layers
+
+ // construct RNN first
+ auto rnnUni = rnn::rnn(graph) //
+ ("type", cellType) //
+ ("dimInput", 2 * dimRnn) //
+ ("dimState", dimRnn) //
+ ("layer-normalization", layerNorm) //
+ ("skip", skip);
+
+ for(int i = first + 1; i <= second + first; ++i) {
+ auto stacked = rnn::stacked_cell(graph);
+ for(int j = 1; j <= cellDepth; ++j) {
+ std::string paramPrefix = prefix + "_l" + std::to_string(i) + "_cell"
+ + std::to_string(j);
+ stacked.push_back(rnn::cell(graph)("prefix", paramPrefix));
+ }
+ rnnUni.push_back(stacked);
+ }
+
+ // transduce context to new context
+ context = rnnUni->transduce(context);
+ }
+ return context;
+ };
+
+ int dimEmb = 16;
+ int dimBatch = 4;
+ int dimTime = 8;
+
+ auto emb = graph->param("Embeddings",
+ {128, dimEmb},
+ keywords::init=inits::glorot_uniform);
+
+ auto input = reshape(rows(emb, vWords), {dimBatch, dimEmb, dimTime});
+ auto mask = graph->constant({dimBatch, 1, dimTime},
+ keywords::init=inits::from_vector(vMask));
+
+ int dimRnn = 32;
+ auto context1 = buildRnn("enc1", input, mask, dimRnn);
+ auto contextSum1 = sum(context1, keywords::axis=1);
+
+ auto context2 = buildRnn("enc2", input, mask, dimRnn, 2, 2);
+ auto contextSum2 = sum(context2, keywords::axis=1);
+
+ // @TODO: why is this numerically instable on different machines?
+ //auto context3 = buildRnn("enc3", input, mask,
+ // dimRnn, 4, 4,
+ // "alternating", "lstm",
+ // true, true);
+ //auto contextSum3 = sum(context3, keywords::axis=1);
+
+ graph->forward();
+
+ CHECK(context1->shape() == Shape({dimBatch, 2 * dimRnn, dimTime}));
+ CHECK(contextSum1->shape() == Shape({dimBatch, 1, dimTime}));
+
+ std::vector<float> vContextSum1({
+ 0.14076, -0.102, 0.22832, -0.42283,
+ -0.15911, 0.33222, 0.74858, -0.59844,
+ -0.70797, -0.12694, -0.14322, 0.25016,
+ -0.91476, 0.39106, -0.75152, -0.02236,
+ -0.59753, 0.17417, -0.24941, -0.36464,
+ -0.62975, 0.35372, 0.12781, -0.79948,
+ -0.33747, -0.54613, 0.32809, -0.63282,
+ -0.78209, -0.37947, -0.50397, -0.63282
+ });
+
+ contextSum1->val()->get(values);
+ CHECK( std::equal(values.begin(), values.end(),
+ vContextSum1.begin(), floatApprox) );
+
+ CHECK(context2->shape() == Shape({dimBatch, 2 * dimRnn, dimTime}));
+ CHECK(contextSum2->shape() == Shape({dimBatch, 1, dimTime}));
+
+ std::vector<float> vContextSum2({
+ -0.0168112, -0.0524664, -0.0196701, -0.0118004,
+ 0.00975164, -0.0470996, -0.014982, -0.0248614,
+ -0.0110038, 0.00297422, -0.00327533, 0.0175996,
+ 0.0319444, 0.0196884, -0.0436654, -0.0257596,
+ 0.0131209, -0.0533302, -0.058655, 0.0666001,
+ 0.00823802, 0.0133473, -0.00715647, 0.119427,
+ 0.0282871, 0.104641, -0.0271743, 0.0658893,
+ 0.0687114, 0.0511032, 0.0673459, 0.0658893
+ });
+
+ contextSum2->val()->get(values);
+ CHECK( std::equal(values.begin(), values.end(),
+ vContextSum2.begin(), floatApprox) );
+
+ //CHECK(context3->shape() == Shape({dimBatch, 2 * dimRnn, dimTime}));
+ //CHECK(contextSum3->shape() == Shape({dimBatch, 1, dimTime}));
+ //
+ //std::vector<float> vContextSum3({
+ // 4.79443, 1.52788, 2.32984, 2.59648,
+ // -1.04159, -4.89242, 4.13013, -1.42554,
+ // 2.59088, 0.165236, -4.05358, -2.30649,
+ // 3.6943, -2.13945, -4.50602, 2.39471,
+ // -2.17873, 0.994103, -3.78782, 0.549939,
+ // -0.830426, -3.83337, -7.88747, 0.757133,
+ // -12.4974, -1.73116, -4.51886, 0.336533,
+ // -1.92069, -1.91202, 0.468423, 0.336285
+ //});
+ //
+ //contextSum3->val()->get(values);
+ //
+ ////for(int i = 0; i < values.size(); ++i) {
+ //// if(i && i % 4 == 0)
+ //// std::cout << std::endl;
+ ////
+ //// std::cout << values[i] << ", ";
+ ////}
+ //
+ //CHECK( std::equal(values.begin(), values.end(),
+ // vContextSum3.begin(), floatApprox) );
+ }
+}
diff --git a/src/training/dropper.h b/src/training/dropper.h
index 2b6a4ab1..0c19d327 100644
--- a/src/training/dropper.h
+++ b/src/training/dropper.h
@@ -12,8 +12,11 @@
namespace marian {
-__global__ void grad_drop(
- float* data, float* tmp, float* errors, float cut_off, int max_size) {
+__global__ void grad_drop(float* data,
+ float* tmp,
+ float* errors,
+ float cut_off,
+ int max_size) {
int idx = blockDim.x * blockIdx.x + threadIdx.x;
if(idx >= max_size)
return;
@@ -63,8 +66,11 @@ __global__ void buildIndices(float* denseData,
}
}
-__global__ void randomSampling(
- float* originalData, float* data, int size, int scale, int fullSize) {
+__global__ void randomSampling(float* originalData,
+ float* data,
+ int size,
+ int scale,
+ int fullSize) {
int idx = blockDim.x * blockIdx.x + threadIdx.x;
if(idx >= size)
return;
@@ -78,8 +84,11 @@ class GradientDropBase {
int step;
int _device;
- void grad_drop_do(
- float* data, float* errors, float* tmp, int len, float rate) {
+ void grad_drop_do(float* data,
+ float* errors,
+ float* tmp,
+ int len,
+ float rate) {
int threads = 512;
int blocks = 1 + len / threads;
cudaSetDevice(_device);
diff --git a/src/training/validator.cpp b/src/training/validator.cpp
index 4fbd8672..60ce60cd 100644
--- a/src/training/validator.cpp
+++ b/src/training/validator.cpp
@@ -3,7 +3,8 @@
namespace marian {
std::vector<Ptr<Validator<data::Corpus>>> Validators(
- std::vector<Ptr<Vocab>> vocabs, Ptr<Config> config) {
+ std::vector<Ptr<Vocab>> vocabs,
+ Ptr<Config> config) {
std::vector<Ptr<Validator<data::Corpus>>> validators;
auto validMetrics = config->get<std::vector<std::string>>("valid-metrics");
diff --git a/src/training/validator.h b/src/training/validator.h
index 837734bc..ec803d5d 100644
--- a/src/training/validator.h
+++ b/src/training/validator.h
@@ -154,8 +154,8 @@ public:
opts->set("inference", true);
builder_ = models::from_options(opts);
- UTIL_THROW_IF2(!options_->has("valid-script-path"),
- "valid-script metric but no script given");
+ ABORT_IF(!options_->has("valid-script-path"),
+ "valid-script metric but no script given");
}
virtual float validate(Ptr<ExpressionGraph> graph) {
@@ -305,5 +305,6 @@ protected:
* @return Vector of validator objects
*/
std::vector<Ptr<Validator<data::Corpus>>> Validators(
- std::vector<Ptr<Vocab>> vocabs, Ptr<Config> config);
+ std::vector<Ptr<Vocab>> vocabs,
+ Ptr<Config> config);
}