Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-03-12 23:34:10 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-03-12 23:34:10 +0300
commit6d0c75cf48bab913e2c9c52f1c4c6cd0d656005d (patch)
tree717342edade369af33a771f00a7dd05354ea8afb /src
parent5f2eedc6e505eecf5bdef474be3e4f7066702fa7 (diff)
Autoformat files
Diffstat (limited to 'src')
-rw-r--r--src/command/marian.cpp3
-rw-r--r--src/common/config.cpp1
-rw-r--r--src/common/config.h8
-rw-r--r--src/common/config_parser.cpp43
-rw-r--r--src/common/definitions.h3
-rw-r--r--src/common/shape.h336
-rw-r--r--src/data/batch_generator.h31
-rw-r--r--src/data/corpus_base.h1
-rw-r--r--src/data/corpus_nbest.cpp16
-rw-r--r--src/data/corpus_sqlite.cpp1
-rw-r--r--src/data/corpus_sqlite.h1
-rw-r--r--src/data/vocab.cpp5
-rw-r--r--src/functional/array.h2
-rw-r--r--src/functional/floats.h175
-rw-r--r--src/functional/functional.h36
-rw-r--r--src/functional/operands.h100
-rw-r--r--src/functional/predicates.h453
-rw-r--r--src/functional/shape.h22
-rw-r--r--src/functional/tensor.h14
-rw-r--r--src/functional/tmp.h99
-rw-r--r--src/graph/expression_graph.cpp11
-rw-r--r--src/graph/expression_graph.h10
-rw-r--r--src/graph/expression_operators.cpp87
-rw-r--r--src/graph/expression_operators.h4
-rw-r--r--src/graph/node.cpp1
-rw-r--r--src/graph/node.h5
-rw-r--r--src/graph/node_initializers.cpp11
-rw-r--r--src/graph/node_initializers.h6
-rw-r--r--src/graph/node_operators.h11
-rw-r--r--src/graph/node_operators_binary.h248
-rw-r--r--src/graph/node_operators_unary.h126
-rw-r--r--src/layers/constructors.h17
-rw-r--r--src/layers/convolution.cpp26
-rw-r--r--src/layers/factory.h1
-rw-r--r--src/layers/generic.h76
-rw-r--r--src/layers/guided_alignment.h14
-rw-r--r--src/models/char_s2s.h30
-rw-r--r--src/models/encdec.h50
-rw-r--r--src/models/hardatt.h25
-rw-r--r--src/models/model_base.h2
-rw-r--r--src/models/model_factory.cpp9
-rw-r--r--src/models/model_factory.h3
-rw-r--r--src/models/s2s.h3
-rw-r--r--src/models/states.h6
-rw-r--r--src/models/transformer.h55
-rw-r--r--src/optimizers/clippers.cpp1
-rw-r--r--src/optimizers/optimizers.cpp9
-rw-r--r--src/optimizers/optimizers.h2
-rw-r--r--src/rescorer/rescorer.h27
-rw-r--r--src/rescorer/score_collector.h18
-rw-r--r--src/rnn/attention_constructors.h5
-rw-r--r--src/rnn/cells.cpp6
-rw-r--r--src/rnn/cells.h6
-rw-r--r--src/rnn/constructors.h2
-rw-r--r--src/rnn/rnn.h11
-rw-r--r--src/tensors/allocator.h11
-rw-r--r--src/tensors/backend.cpp1
-rw-r--r--src/tensors/backend.h4
-rw-r--r--src/tensors/cpu/add.h20
-rw-r--r--src/tensors/cpu/backend.h10
-rw-r--r--src/tensors/cpu/device.cpp38
-rw-r--r--src/tensors/cpu/dropout.cpp23
-rw-r--r--src/tensors/cpu/element.h8
-rw-r--r--src/tensors/cpu/prod.cpp82
-rw-r--r--src/tensors/cpu/tensor_operators.cpp457
-rw-r--r--src/tensors/device.h30
-rw-r--r--src/tensors/dispatch.h372
-rw-r--r--src/tensors/gpu/add.cu17
-rw-r--r--src/tensors/gpu/add.h8
-rw-r--r--src/tensors/gpu/algorithm.cu78
-rw-r--r--src/tensors/gpu/algorithm.h19
-rw-r--r--src/tensors/gpu/backend.h11
-rw-r--r--src/tensors/gpu/cuda_helpers.h7
-rw-r--r--src/tensors/gpu/device.cu50
-rw-r--r--src/tensors/gpu/dropout.cu40
-rw-r--r--src/tensors/gpu/element.cu17
-rw-r--r--src/tensors/gpu/element.h5
-rw-r--r--src/tensors/gpu/prod.cu16
-rw-r--r--src/tensors/gpu/prod.h1
-rw-r--r--src/tensors/gpu/sparse.h2
-rw-r--r--src/tensors/gpu/tensor_operators.cu124
-rw-r--r--src/tensors/tensor.h20
-rw-r--r--src/tensors/tensor_operators.h259
-rw-r--r--src/training/graph_group_async.cpp12
-rw-r--r--src/training/graph_group_async.h1
-rw-r--r--src/training/graph_group_multinode.cpp9
-rw-r--r--src/training/graph_group_singleton.cpp8
-rw-r--r--src/training/graph_group_singleton.h1
-rw-r--r--src/training/graph_group_sync.cpp11
-rw-r--r--src/training/graph_group_sync.h1
-rw-r--r--src/training/scheduler.h3
-rw-r--r--src/training/sparse_tensor.cu22
-rw-r--r--src/training/training.h1
-rw-r--r--src/training/validator.h12
-rw-r--r--src/translator/beam_search.h40
-rw-r--r--src/translator/helpers.cpp4
-rw-r--r--src/translator/helpers.cu1
-rw-r--r--src/translator/helpers.h3
-rw-r--r--src/translator/history.h9
-rw-r--r--src/translator/nth_element.cpp49
-rw-r--r--src/translator/nth_element.cu22
-rw-r--r--src/translator/nth_element.h20
-rw-r--r--src/translator/printer.h9
-rw-r--r--src/translator/scorers.h22
-rw-r--r--src/translator/translator.h6
105 files changed, 2111 insertions, 2159 deletions
diff --git a/src/command/marian.cpp b/src/command/marian.cpp
index 637506e5..92b7be7a 100644
--- a/src/command/marian.cpp
+++ b/src/command/marian.cpp
@@ -10,7 +10,6 @@
#include "training/graph_group_async_drop.h"
#endif
-
bool configureMPI(int, char**);
int main(int argc, char** argv) {
@@ -18,7 +17,7 @@ int main(int argc, char** argv) {
auto options = New<Config>(argc, argv);
auto devices = options->getDevices();
-
+
if(options->get<bool>("multi-node")) {
ABORT_IF(!configureMPI(argc, argv), "MPI not found.");
diff --git a/src/common/config.cpp b/src/common/config.cpp
index 9eca1f10..76b84950 100644
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@@ -65,5 +65,4 @@ void Config::AddYamlToNpz(const YAML::Node& yaml,
unsigned shape = out.size() + 1;
cnpy::npz_save(fName, varName, out.c_str(), &shape, 1, "a");
}
-
}
diff --git a/src/common/config.h b/src/common/config.h
index d6749478..68b61a58 100644
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -76,7 +76,9 @@ public:
log();
if(has("version"))
- LOG(info, "[config] Model created with Marian {}", get("version").as<std::string>());
+ LOG(info,
+ "[config] Model created with Marian {}",
+ get("version").as<std::string>());
}
Config(const Config& other) : config_(YAML::Clone(other.config_)) {}
@@ -110,9 +112,7 @@ public:
YAML::Node getModelParameters();
void loadModelParameters(const std::string& name);
- const std::vector<DeviceId>& getDevices() {
- return devices_;
- }
+ const std::vector<DeviceId>& getDevices() { return devices_; }
void save(const std::string& name) {
OutputFileStream out(name);
diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp
index 31e76f92..4f5b6701 100644
--- a/src/common/config_parser.cpp
+++ b/src/common/config_parser.cpp
@@ -2,8 +2,8 @@
#include <boost/algorithm/string.hpp>
#include <boost/regex.hpp>
#include <set>
-#include <string>
#include <stdexcept>
+#include <string>
#if MKL_FOUND
//#include <omp.h>
@@ -15,9 +15,9 @@
#endif
#endif
-
#include "3rd_party/cnpy/cnpy.h"
#include "common/definitions.h"
+
#include "common/config.h"
#include "common/config_parser.h"
#include "common/file_stream.h"
@@ -146,13 +146,14 @@ bool ConfigParser::has(const std::string& key) const {
void ConfigParser::validateOptions() const {
if(mode_ == ConfigMode::translating) {
- UTIL_THROW_IF2(!has("vocabs") || get<std::vector<std::string>>("vocabs").empty(),
+ UTIL_THROW_IF2(
+ !has("vocabs") || get<std::vector<std::string>>("vocabs").empty(),
"Translating, but vocabularies are not given!");
for(const auto& modelFile : get<std::vector<std::string>>("models")) {
boost::filesystem::path modelPath(modelFile);
UTIL_THROW_IF2(!boost::filesystem::exists(modelPath),
- "Model file does not exist: " + modelFile);
+ "Model file does not exist: " + modelFile);
}
return;
@@ -177,9 +178,10 @@ void ConfigParser::validateOptions() const {
if(mode_ == ConfigMode::rescoring) {
UTIL_THROW_IF2(!boost::filesystem::exists(modelPath),
- "Model file does not exist: " + modelPath.string());
+ "Model file does not exist: " + modelPath.string());
- UTIL_THROW_IF2(!has("vocabs") || get<std::vector<std::string>>("vocabs").empty(),
+ UTIL_THROW_IF2(
+ !has("vocabs") || get<std::vector<std::string>>("vocabs").empty(),
"Scoring, but vocabularies are not given!");
return;
@@ -193,8 +195,9 @@ void ConfigParser::validateOptions() const {
!modelDir.empty() && !boost::filesystem::is_directory(modelDir),
"Model directory does not exist");
- UTIL_THROW_IF2(!modelDir.empty() && !(boost::filesystem::status(modelDir).permissions()
- & boost::filesystem::owner_write),
+ UTIL_THROW_IF2(!modelDir.empty()
+ && !(boost::filesystem::status(modelDir).permissions()
+ & boost::filesystem::owner_write),
"No write permission in model directory");
UTIL_THROW_IF2(
@@ -835,7 +838,6 @@ void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
SET_OPTION("transformer-dim-ffn", int);
SET_OPTION("transformer-ffn-activation", std::string);
-
#ifdef CUDNN
SET_OPTION("char-stride", int);
SET_OPTION("char-highway", int);
@@ -976,7 +978,7 @@ void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
SET_OPTION("relative-paths", bool);
SET_OPTION("devices", std::vector<std::string>);
SET_OPTION("cpu-threads", size_t);
- //SET_OPTION("omp-threads", size_t);
+ // SET_OPTION("omp-threads", size_t);
SET_OPTION("mini-batch", int);
SET_OPTION("maxi-batch", int);
@@ -1021,24 +1023,22 @@ void ConfigParser::parseOptions(int argc, char** argv, bool doValidate) {
exit(0);
}
-// @TODO: this should probably be in processOptionDevices()
-//#ifdef BLAS_FOUND
-// //omp_set_num_threads(vm_["omp-threads"].as<size_t>());
-//#ifdef MKL_FOUND
-// mkl_set_num_threads(vm_["omp-threads"].as<size_t>());
-//#endif
-//#endif
+ // @TODO: this should probably be in processOptionDevices()
+ //#ifdef BLAS_FOUND
+ // //omp_set_num_threads(vm_["omp-threads"].as<size_t>());
+ //#ifdef MKL_FOUND
+ // mkl_set_num_threads(vm_["omp-threads"].as<size_t>());
+ //#endif
+ //#endif
}
std::vector<DeviceId> ConfigParser::getDevices() {
std::vector<DeviceId> devices;
try {
-
std::string devicesStr
= Join(config_["devices"].as<std::vector<std::string>>());
-
if(mode_ == ConfigMode::training && get<bool>("multi-node")) {
auto parts = Split(devicesStr, ":");
for(size_t i = 1; i < parts.size(); ++i) {
@@ -1061,11 +1061,10 @@ std::vector<DeviceId> ConfigParser::getDevices() {
if(config_["cpu-threads"].as<size_t>() > 0) {
devices.clear();
for(size_t i = 0; i < config_["cpu-threads"].as<size_t>(); ++i)
- devices.push_back({i, DeviceType::cpu});
+ devices.push_back({i, DeviceType::cpu});
}
- }
- catch(...) {
+ } catch(...) {
ABORT("Problem parsing devices, please report an issue on github");
}
diff --git a/src/common/definitions.h b/src/common/definitions.h
index 8fb3bbb6..d7975dc4 100644
--- a/src/common/definitions.h
+++ b/src/common/definitions.h
@@ -1,10 +1,10 @@
#pragma once
#include <functional>
+#include <iostream>
#include <memory>
#include <string>
#include <vector>
-#include <iostream>
#include "common/logging.h"
#include "shape.h"
@@ -57,7 +57,6 @@ struct DeviceId {
friend bool operator==(DeviceId id1, DeviceId id2) {
return id1.no == id2.no && id1.type == id2.type;
}
-
};
class TensorBase;
diff --git a/src/common/shape.h b/src/common/shape.h
index da98a3b9..5cab14fb 100644
--- a/src/common/shape.h
+++ b/src/common/shape.h
@@ -1,195 +1,191 @@
#pragma once
+#include <algorithm>
#include <cstdint>
#include <iostream>
-#include <string>
#include <sstream>
+#include <string>
#include <vector>
-#include <algorithm>
#include "common/logging.h"
namespace marian {
struct Shape {
- public:
- std::vector<int> shape_;
-
- public:
- Shape() : shape_{1} {}
-
- Shape(std::initializer_list<int> il) : Shape() {
- shape_.resize(il.size());
- std::copy(il.begin(), il.end(), begin());
- }
-
- void resize(size_t n) {
- shape_.resize(n, 1);
- }
-
- const int* data() const {
- return shape_.data();
- }
-
- int* data() {
- return shape_.data();
- }
-
- Shape(const Shape& shape) : Shape() {
- shape_.resize(shape.size());
- std::copy(shape.begin(), shape.end(), begin());
- }
-
- inline void set(int i, int val) {
- dim(i) = val;
- }
-
- inline int& dim(int i) {
- if(i >= 0) {
- ABORT_IF(i >= size(),
- "Index {} is out of bounds, shape has {} dimension", i, size());
- return shape_[i];
- }
- else {
- ABORT_IF((int)size() + i < 0,
- "Negative index {} is out of bounds, shape has {} dimension", i, size());
- return shape_[size() + i];
- }
- }
-
- inline const int& dim(int i) const { return const_cast<Shape&>(*this).dim(i); }
-
- inline int operator[](int i) { return dim(i); }
-
- inline int operator[](int i) const { return dim(i); }
+public:
+ std::vector<int> shape_;
- inline int& back() { return shape_.back(); }
+public:
+ Shape() : shape_{1} {}
- inline int stride(int i) const {
- std::vector<int> stride(shape_.size(), 1);
- for(int j = shape_.size() - 2; j >= 0; --j)
- stride[j] = stride[j + 1] * shape_[j + 1];
+ Shape(std::initializer_list<int> il) : Shape() {
+ shape_.resize(il.size());
+ std::copy(il.begin(), il.end(), begin());
+ }
- if(i >= 0)
- return stride[i];
- else
- return stride[size() + i];
- }
-
- inline size_t size() const { return shape_.size(); }
-
- inline int elements() const {
- int el = 1;
- for(auto s : shape_)
- el *= s;
- return el;
- }
-
- inline void dims(int i, std::vector<int>& d) const {
- d.resize(shape_.size());
-
- std::vector<int> stride(shape_.size(), 1);
- for(int j = shape_.size() - 2; j >= 0; --j)
- stride[j] = stride[j + 1] * shape_[j + 1];
-
- for(int j = 0; j < d.size(); ++j)
- d[j] = (i / stride[j]) % shape_[j];
- }
-
- auto begin() -> decltype(shape_.begin()) { return shape_.begin(); }
- auto begin() const -> decltype(shape_.begin()) { return shape_.begin(); }
-
- auto end() -> decltype(shape_.end()) { return shape_.end(); }
- auto end() const -> decltype(shape_.end()) { return shape_.end(); }
-
- auto rbegin() -> decltype(shape_.rbegin()) { return shape_.rbegin(); }
- auto rbegin() const -> decltype(shape_.rbegin()) { return shape_.rbegin(); }
-
- auto rend() -> decltype(shape_.rend()) { return shape_.rend(); }
- auto rend() const -> decltype(shape_.rend()) { return shape_.rend(); }
+ void resize(size_t n) { shape_.resize(n, 1); }
- bool operator==(const Shape& other) const {
- return size() == other.size() && std::equal(begin(), end(), other.begin());
- }
-
- bool operator!=(const Shape& other) const { return !(*this == other); }
-
- std::string toString() const {
- std::stringstream strm;
- strm << "shape=" << (*this)[0];
- for(int i = 1; i < size(); ++i)
- strm << "x" << (*this)[i];
- strm << " size=" << elements() << " ("
- << elements() * sizeof(float) << "B)";
- return strm.str();
- }
-
- friend std::ostream& operator<<(std::ostream& strm, const Shape& shape) {
- strm << shape.toString();
- return strm;
- }
+ const int* data() const { return shape_.data(); }
- operator std::string() const {
- std::stringstream ss;
- ss << *this;
- return ss.str();
- }
+ int* data() { return shape_.data(); }
- int axis(int ax) {
- if(ax < 0)
- return size() + ax;
- else
- return ax;
- }
+ Shape(const Shape& shape) : Shape() {
+ shape_.resize(shape.size());
+ std::copy(shape.begin(), shape.end(), begin());
+ }
- static Shape broadcast(const std::vector<Shape>& shapes) {
- int maxDims = 0;
- for(auto& s : shapes)
- if(s.size() > maxDims)
- maxDims = s.size();
-
- Shape shape;
- shape.resize(maxDims);
-
- for(auto& s : shapes) {
- for(int i = 0; i < s.size(); ++i) {
- ABORT_IF(shape[-i] != s[-i] && shape[-i] != 1 && s[-i] != 1,
- "Shapes {} and {} cannot be broadcasted",
- (std::string)shape,
- (std::string)s);
- shape.set(-i, std::max(shape[-i], s[-i]));
- }
+ inline void set(int i, int val) { dim(i) = val; }
+
+ inline int& dim(int i) {
+ if(i >= 0) {
+ ABORT_IF(i >= size(),
+ "Index {} is out of bounds, shape has {} dimension",
+ i,
+ size());
+ return shape_[i];
+ } else {
+ ABORT_IF((int)size() + i < 0,
+ "Negative index {} is out of bounds, shape has {} dimension",
+ i,
+ size());
+ return shape_[size() + i];
+ }
+ }
+
+ inline const int& dim(int i) const {
+ return const_cast<Shape&>(*this).dim(i);
+ }
+
+ inline int operator[](int i) { return dim(i); }
+
+ inline int operator[](int i) const { return dim(i); }
+
+ inline int& back() { return shape_.back(); }
+
+ inline int stride(int i) const {
+ std::vector<int> stride(shape_.size(), 1);
+ for(int j = shape_.size() - 2; j >= 0; --j)
+ stride[j] = stride[j + 1] * shape_[j + 1];
+
+ if(i >= 0)
+ return stride[i];
+ else
+ return stride[size() + i];
+ }
+
+ inline size_t size() const { return shape_.size(); }
+
+ inline int elements() const {
+ int el = 1;
+ for(auto s : shape_)
+ el *= s;
+ return el;
+ }
+
+ inline void dims(int i, std::vector<int>& d) const {
+ d.resize(shape_.size());
+
+ std::vector<int> stride(shape_.size(), 1);
+ for(int j = shape_.size() - 2; j >= 0; --j)
+ stride[j] = stride[j + 1] * shape_[j + 1];
+
+ for(int j = 0; j < d.size(); ++j)
+ d[j] = (i / stride[j]) % shape_[j];
+ }
+
+ auto begin() -> decltype(shape_.begin()) { return shape_.begin(); }
+ auto begin() const -> decltype(shape_.begin()) { return shape_.begin(); }
+
+ auto end() -> decltype(shape_.end()) { return shape_.end(); }
+ auto end() const -> decltype(shape_.end()) { return shape_.end(); }
+
+ auto rbegin() -> decltype(shape_.rbegin()) { return shape_.rbegin(); }
+ auto rbegin() const -> decltype(shape_.rbegin()) { return shape_.rbegin(); }
+
+ auto rend() -> decltype(shape_.rend()) { return shape_.rend(); }
+ auto rend() const -> decltype(shape_.rend()) { return shape_.rend(); }
+
+ bool operator==(const Shape& other) const {
+ return size() == other.size() && std::equal(begin(), end(), other.begin());
+ }
+
+ bool operator!=(const Shape& other) const { return !(*this == other); }
+
+ std::string toString() const {
+ std::stringstream strm;
+ strm << "shape=" << (*this)[0];
+ for(int i = 1; i < size(); ++i)
+ strm << "x" << (*this)[i];
+ strm << " size=" << elements() << " (" << elements() * sizeof(float)
+ << "B)";
+ return strm.str();
+ }
+
+ friend std::ostream& operator<<(std::ostream& strm, const Shape& shape) {
+ strm << shape.toString();
+ return strm;
+ }
+
+ operator std::string() const {
+ std::stringstream ss;
+ ss << *this;
+ return ss.str();
+ }
+
+ int axis(int ax) {
+ if(ax < 0)
+ return size() + ax;
+ else
+ return ax;
+ }
+
+ static Shape broadcast(const std::vector<Shape>& shapes) {
+ int maxDims = 0;
+ for(auto& s : shapes)
+ if(s.size() > maxDims)
+ maxDims = s.size();
+
+ Shape shape;
+ shape.resize(maxDims);
+
+ for(auto& s : shapes) {
+ for(int i = 0; i < s.size(); ++i) {
+ ABORT_IF(shape[-i] != s[-i] && shape[-i] != 1 && s[-i] != 1,
+ "Shapes {} and {} cannot be broadcasted",
+ (std::string)shape,
+ (std::string)s);
+ shape.set(-i, std::max(shape[-i], s[-i]));
}
- return shape;
- }
-
- template <typename T>
- static Shape broadcast(const std::initializer_list<T>& il) {
- return broadcast(std::vector<T>(il));
}
-
- template <typename T>
- static Shape broadcast(const std::vector<T>& nodes) {
- int maxDims = 0;
- for(auto& n : nodes)
- if(n->shape().size() > maxDims)
- maxDims = n->shape().size();
-
- Shape shape;
- shape.resize(maxDims);
-
- for(auto& node : nodes) {
- const Shape& shapen = node->shape();
- for(int i = 1; i <= shapen.size(); ++i) {
- ABORT_IF(shape[-i] != shapen[-i] && shape[-i] != 1 && shapen[-i] != 1,
- "Shapes {} and {} cannot be broadcasted",
- (std::string)shape,
- (std::string)shapen);
- shape.set(-i, std::max(shape[-i], shapen[-i]));
- }
+ return shape;
+ }
+
+ template <typename T>
+ static Shape broadcast(const std::initializer_list<T>& il) {
+ return broadcast(std::vector<T>(il));
+ }
+
+ template <typename T>
+ static Shape broadcast(const std::vector<T>& nodes) {
+ int maxDims = 0;
+ for(auto& n : nodes)
+ if(n->shape().size() > maxDims)
+ maxDims = n->shape().size();
+
+ Shape shape;
+ shape.resize(maxDims);
+
+ for(auto& node : nodes) {
+ const Shape& shapen = node->shape();
+ for(int i = 1; i <= shapen.size(); ++i) {
+ ABORT_IF(shape[-i] != shapen[-i] && shape[-i] != 1 && shapen[-i] != 1,
+ "Shapes {} and {} cannot be broadcasted",
+ (std::string)shape,
+ (std::string)shapen);
+ shape.set(-i, std::max(shape[-i], shapen[-i]));
}
- return shape;
}
+ return shape;
+ }
};
-
}
diff --git a/src/data/batch_generator.h b/src/data/batch_generator.h
index 0a6f801d..3ddf8752 100644
--- a/src/data/batch_generator.h
+++ b/src/data/batch_generator.h
@@ -1,11 +1,11 @@
#pragma once
+#include <boost/timer/timer.hpp>
+#include <condition_variable>
#include <deque>
#include <functional>
-#include <queue>
#include <mutex>
-#include <condition_variable>
-#include <boost/timer/timer.hpp>
+#include <queue>
#include "common/config.h"
#include "data/batch_stats.h"
@@ -47,16 +47,17 @@ private:
void fillBatches(bool shuffle = true) {
typedef typename sample::value_type Item;
- auto itemCmp = [](const Item& sa, const Item& sb) {
- return sa.size() < sb.size();
- };
+ auto itemCmp
+ = [](const Item& sa, const Item& sb) { return sa.size() < sb.size(); };
auto cmpSrc = [itemCmp](const sample& a, const sample& b) {
- return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end(), itemCmp);
+ return std::lexicographical_compare(
+ a.begin(), a.end(), b.begin(), b.end(), itemCmp);
};
auto cmpTrg = [itemCmp](const sample& a, const sample& b) {
- return std::lexicographical_compare(a.rbegin(), a.rend(), b.rbegin(), b.rend(), itemCmp);
+ return std::lexicographical_compare(
+ a.rbegin(), a.rend(), b.rbegin(), b.rend(), itemCmp);
};
auto cmpNone = [](const sample& a, const sample& b) { return &a < &b; };
@@ -168,9 +169,8 @@ public:
operator bool() const {
// wait if empty but loading
std::unique_lock<std::mutex> lock(loadMutex_);
- loadCondition_.wait(lock, [this]{
- return loadReady_ || !bufferedBatches_.empty();
- });
+ loadCondition_.wait(
+ lock, [this] { return loadReady_ || !bufferedBatches_.empty(); });
return !bufferedBatches_.empty();
}
@@ -178,15 +178,16 @@ public:
BatchPtr next() {
{
std::unique_lock<std::mutex> lock(loadMutex_);
- loadCondition_.wait(lock, [this]{
- return loadReady_ || !bufferedBatches_.empty();
- });
+ loadCondition_.wait(
+ lock, [this] { return loadReady_ || !bufferedBatches_.empty(); });
}
ABORT_IF(bufferedBatches_.empty(), "No batches to fetch, run prepare()");
currentBatch_ = bufferedBatches_.front();
- if(loadReady_ && bufferedBatches_.size() <= std::max(options_->get<int>("maxi-batch") / 5, 1)) {
+ if(loadReady_
+ && bufferedBatches_.size()
+ <= std::max(options_->get<int>("maxi-batch") / 5, 1)) {
{
std::unique_lock<std::mutex> lock(loadMutex_);
loadReady_ = false;
diff --git a/src/data/corpus_base.h b/src/data/corpus_base.h
index ebd157bc..cb2a1518 100644
--- a/src/data/corpus_base.h
+++ b/src/data/corpus_base.h
@@ -400,7 +400,6 @@ public:
std::cerr << std::endl;
}
}
-
};
class CorpusIterator;
diff --git a/src/data/corpus_nbest.cpp b/src/data/corpus_nbest.cpp
index 9eefe898..105b5acd 100644
--- a/src/data/corpus_nbest.cpp
+++ b/src/data/corpus_nbest.cpp
@@ -1,7 +1,7 @@
#include <random>
-#include "data/corpus_nbest.h"
#include "common/utils.h"
+#include "data/corpus_nbest.h"
namespace marian {
namespace data {
@@ -10,8 +10,8 @@ CorpusNBest::CorpusNBest(Ptr<Config> options, bool translate /*= false*/)
: CorpusBase(options, translate) {}
CorpusNBest::CorpusNBest(std::vector<std::string> paths,
- std::vector<Ptr<Vocab>> vocabs,
- Ptr<Config> options)
+ std::vector<Ptr<Vocab>> vocabs,
+ Ptr<Config> options)
: CorpusBase(paths, vocabs, options) {}
int numFromNbest(const std::string& line) {
@@ -19,7 +19,8 @@ int numFromNbest(const std::string& line) {
Split(line, fields, " ||| ", true);
ABORT_IF(fields.size() < 4,
"Too few fields ({}) in line \"{}\", is this a correct n-best list?",
- fields.size(), line);
+ fields.size(),
+ line);
return std::stoi(fields[0]);
}
@@ -28,7 +29,8 @@ std::string lineFromNbest(const std::string& line) {
Split(line, fields, " ||| ", true);
ABORT_IF(fields.size() < 4,
"Too few fields ({}) in line \"{}\", is this a correct n-best list?",
- fields.size(), line);
+ fields.size(),
+ line);
return fields[1];
}
@@ -56,7 +58,8 @@ SentenceTuple CorpusNBest::next() {
for(size_t i = 0; i < last; ++i) {
if(curr_num > lastNum_) {
ABORT_IF(!std::getline((std::istream&)*files_[i], lastLines_[i]),
- "Too few lines in input {}", i);
+ "Too few lines in input {}",
+ i);
}
addWordsToSentenceTuple(lastLines_[i], i, tup);
}
@@ -90,6 +93,5 @@ void CorpusNBest::reset() {
files_.emplace_back(new InputFileStream(path));
}
}
-
}
}
diff --git a/src/data/corpus_sqlite.cpp b/src/data/corpus_sqlite.cpp
index 04cbc1b9..9f2d95bb 100644
--- a/src/data/corpus_sqlite.cpp
+++ b/src/data/corpus_sqlite.cpp
@@ -154,6 +154,5 @@ void CorpusSQLite::restore(Ptr<TrainingState> ts) {
reset();
}
}
-
}
}
diff --git a/src/data/corpus_sqlite.h b/src/data/corpus_sqlite.h
index c604edab..2e871805 100644
--- a/src/data/corpus_sqlite.h
+++ b/src/data/corpus_sqlite.h
@@ -19,7 +19,6 @@
#include <SQLiteCpp/SQLiteCpp.h>
#include <SQLiteCpp/sqlite3/sqlite3.h>
-
static void SQLiteRandomSeed(sqlite3_context* context,
int argc,
sqlite3_value** argv) {
diff --git a/src/data/vocab.cpp b/src/data/vocab.cpp
index 07d16c33..196c147c 100644
--- a/src/data/vocab.cpp
+++ b/src/data/vocab.cpp
@@ -138,8 +138,9 @@ void Vocab::create(const std::string& vocabPath, const std::string& trainPath) {
"Specified vocab directory {} does not exist",
dir);
- ABORT_IF(!dir.empty() && !(boost::filesystem::status(dir).permissions()
- & boost::filesystem::owner_write),
+ ABORT_IF(!dir.empty()
+ && !(boost::filesystem::status(dir).permissions()
+ & boost::filesystem::owner_write),
"No write permission in vocab directory {}",
dir);
diff --git a/src/functional/array.h b/src/functional/array.h
index 7e415abb..9d187999 100644
--- a/src/functional/array.h
+++ b/src/functional/array.h
@@ -32,7 +32,5 @@ struct Array {
data_[i] = val;
}
};
-
}
-
}
diff --git a/src/functional/floats.h b/src/functional/floats.h
index 679c4750..33478843 100644
--- a/src/functional/floats.h
+++ b/src/functional/floats.h
@@ -4,93 +4,90 @@
#include "functional/operands.h"
namespace marian {
- namespace functional {
-
- namespace float2unsigned {
- constexpr float abs(float x) { return x < 0 ? -x : x; }
-
- constexpr int exponent(float x) {
- return abs(x) >= 2 ? exponent(x / 2) + 1 :
- abs(x) < 1 ? exponent(x * 2) - 1 :
- 0;
- }
-
- constexpr float scalbn(float value, int exponent) {
- return exponent == 0 ? value :
- exponent > 0 ? scalbn(value * 2, exponent - 1) :
- scalbn(value / 2, exponent + 1);
- }
-
- constexpr unsigned mantissa(float x, int exp) {
- // remove hidden 1 and bias the exponent to get integer
- return abs(x) < std::numeric_limits<float>::infinity() ?
- scalbn(scalbn(abs(x), -exp) - 1, 23) : 0;
- }
-
- constexpr unsigned to_binary(float x, unsigned sign, int exp) {
- return sign * (1u << 31)
- + (exp + 127) * (1u << 23)
- + mantissa(x, exp);
- }
-
- constexpr unsigned to_binary(float x) {
- return x == 0 ? 0 : to_binary(x, x < 0, exponent(x));
- }
-
- }
-
- namespace unsigned2float {
-
- constexpr float sign(unsigned i) {
- return (i & (1u << 31)) ? -1.f : 1.f;
- }
-
- constexpr int exponent(unsigned i) {
- return int((i >> 23) & 255u) - 127;
- }
-
- constexpr float sig(unsigned i, unsigned shift) {
- return ((i >> shift) & 1u) * 1.f / (1u << (23 - shift))
- + (shift > 0 ? sig(i, shift - 1) : 0);
- }
-
- constexpr float powr(int exp) {
- return exp > 0 ? 2.f * powr(exp - 1) : 1.f;
- }
-
- constexpr float pow(int exp) {
- return exp < 0 ? 1.f / powr(-exp) : powr(exp);
- }
-
- constexpr float from_binary(unsigned i) {
- return (1.f + sig(i, 22u))
- * pow(exponent(i))
- * sign(i);
- }
- }
-
- constexpr unsigned f2i(float x) {
- return float2unsigned::to_binary(x);
- }
-
- constexpr float i2f(float x) {
- return unsigned2float::from_binary(x);
- }
-
- template <unsigned V>
- struct F {
- static constexpr auto value = i2f(V);
- static constexpr auto binary = V;
-
- template <typename ...Args>
- __HDI__ constexpr float operator()(Args&&... args) const {
- return value;
- }
-
- std::string to_string() {
- return "F<" + std::to_string(value) + ">";
- }
- };
-
+namespace functional {
+
+namespace float2unsigned {
+constexpr float abs(float x) {
+ return x < 0 ? -x : x;
+}
+
+// clang-format off
+constexpr int exponent(float x) {
+ return abs(x) >= 2 ? exponent(x / 2) + 1 :
+ abs(x) < 1 ? exponent(x * 2) - 1 :
+ 0;
+}
+
+constexpr float scalbn(float value, int exponent) {
+ return exponent == 0 ? value :
+ exponent > 0 ? scalbn(value * 2, exponent - 1) :
+ scalbn(value / 2, exponent + 1);
+}
+// clang-format on
+
+constexpr unsigned mantissa(float x, int exp) {
+ // remove hidden 1 and bias the exponent to get integer
+ return abs(x) < std::numeric_limits<float>::infinity()
+ ? scalbn(scalbn(abs(x), -exp) - 1, 23)
+ : 0;
+}
+
+constexpr unsigned to_binary(float x, unsigned sign, int exp) {
+ return sign * (1u << 31) + (exp + 127) * (1u << 23) + mantissa(x, exp);
+}
+
+constexpr unsigned to_binary(float x) {
+ return x == 0 ? 0 : to_binary(x, x < 0, exponent(x));
+}
+}
+
+namespace unsigned2float {
+
+constexpr float sign(unsigned i) {
+ return (i & (1u << 31)) ? -1.f : 1.f;
+}
+
+constexpr int exponent(unsigned i) {
+ return int((i >> 23) & 255u) - 127;
+}
+
+constexpr float sig(unsigned i, unsigned shift) {
+ return ((i >> shift) & 1u) * 1.f / (1u << (23 - shift))
+ + (shift > 0 ? sig(i, shift - 1) : 0);
+}
+
+constexpr float powr(int exp) {
+ return exp > 0 ? 2.f * powr(exp - 1) : 1.f;
+}
+
+constexpr float pow(int exp) {
+ return exp < 0 ? 1.f / powr(-exp) : powr(exp);
+}
+
+constexpr float from_binary(unsigned i) {
+ return (1.f + sig(i, 22u)) * pow(exponent(i)) * sign(i);
+}
+}
+
+constexpr unsigned f2i(float x) {
+ return float2unsigned::to_binary(x);
+}
+
+constexpr float i2f(float x) {
+ return unsigned2float::from_binary(x);
+}
+
+template <unsigned V>
+struct F {
+ static constexpr auto value = i2f(V);
+ static constexpr auto binary = V;
+
+ template <typename... Args>
+ __HDI__ constexpr float operator()(Args&&... args) const {
+ return value;
}
-} \ No newline at end of file
+
+ std::string to_string() { return "F<" + std::to_string(value) + ">"; }
+};
+}
+}
diff --git a/src/functional/functional.h b/src/functional/functional.h
index c5a174e8..42ebcdeb 100644
--- a/src/functional/functional.h
+++ b/src/functional/functional.h
@@ -4,25 +4,25 @@
#include "functional/predicates.h"
namespace marian {
- namespace functional {
+namespace functional {
- template <int N>
- using ref = Assignee<N>;
+template <int N>
+using ref = Assignee<N>;
- static ref<1> _1;
- static ref<2> _2;
- static ref<3> _3;
- static ref<4> _4;
- static ref<5> _5;
- static ref<6> _6;
- static ref<7> _7;
- static ref<8> _8;
- static ref<9> _9;
+static ref<1> _1;
+static ref<2> _2;
+static ref<3> _3;
+static ref<4> _4;
+static ref<5> _5;
+static ref<6> _6;
+static ref<7> _7;
+static ref<8> _8;
+static ref<9> _9;
- static C<0> _0c;
- static C<1> _1c;
- static C<2> _2c;
- static C<-1> _1cneg;
- static C<-2> _2cneg;
- }
+static C<0> _0c;
+static C<1> _1c;
+static C<2> _2c;
+static C<-1> _1cneg;
+static C<-2> _2cneg;
+}
} \ No newline at end of file
diff --git a/src/functional/operands.h b/src/functional/operands.h
index f3f3c22e..573770d1 100644
--- a/src/functional/operands.h
+++ b/src/functional/operands.h
@@ -5,71 +5,69 @@
#include "functional/defs.h"
namespace marian {
- namespace functional {
-
- template <class C>
- using IsClass = typename std::enable_if<std::is_class<C>::value, C>::type;
-
- template <int N>
- struct Select {
- template <typename T, typename ...Args>
- __HDI__ static auto apply(T&& arg, Args&&... args) -> decltype(Select<N-1>::apply(args...)) {
- return Select<N-1>::apply(args...);
- }
- };
-
- template <>
- struct Select<0> {
- template <typename T, typename ...Args>
- __HDI__ static T apply(T&& arg, Args&&... args) {
- return arg;
- }
- };
+namespace functional {
+
+template <class C>
+using IsClass = typename std::enable_if<std::is_class<C>::value, C>::type;
+
+template <int N>
+struct Select {
+ template <typename T, typename... Args>
+ __HDI__ static auto apply(T&& arg, Args&&... args)
+ -> decltype(Select<N - 1>::apply(args...)) {
+ return Select<N - 1>::apply(args...);
+ }
+};
+
+template <>
+struct Select<0> {
+ template <typename T, typename... Args>
+ __HDI__ static T apply(T&& arg, Args&&... args) {
+ return arg;
+ }
+};
/******************************************************************************/
- template <int V>
- struct C {
- static constexpr auto value = V;
+template <int V>
+struct C {
+ static constexpr auto value = V;
- template <typename ...Args>
- __HDI__ float operator()(Args&&... args) { return V; }
+ template <typename... Args>
+ __HDI__ float operator()(Args&&... args) {
+ return V;
+ }
- std::string to_string() {
- return "C<" + std::to_string(V) + ">";
- }
- };
+ std::string to_string() { return "C<" + std::to_string(V) + ">"; }
+};
/******************************************************************************/
- struct Capture {
- float value;
+struct Capture {
+ float value;
- Capture(float val) : value(val) {};
+ Capture(float val) : value(val){};
- template <typename ...Args>
- __HDI__ float operator()(Args&&... args) { return value; }
+ template <typename... Args>
+ __HDI__ float operator()(Args&&... args) {
+ return value;
+ }
- std::string to_string() {
- return "Cap(" + std::to_string(value) + ")";
- }
- };
+ std::string to_string() { return "Cap(" + std::to_string(value) + ")"; }
+};
/******************************************************************************/
- template <int N>
- struct Var {
- static constexpr auto index = N;
-
- template <typename ...Args>
- __HDI__ float& operator()(Args&&... args) {
- return Select<N-1>::apply(args...);
- }
-
- std::string to_string() {
- return "Var<" + std::to_string(N) + ">";
- }
- };
+template <int N>
+struct Var {
+ static constexpr auto index = N;
+ template <typename... Args>
+ __HDI__ float& operator()(Args&&... args) {
+ return Select<N - 1>::apply(args...);
}
+
+ std::string to_string() { return "Var<" + std::to_string(N) + ">"; }
+};
+}
}
diff --git a/src/functional/predicates.h b/src/functional/predicates.h
index eb091d43..41a741bb 100644
--- a/src/functional/predicates.h
+++ b/src/functional/predicates.h
@@ -4,237 +4,230 @@
#include "functional/operands.h"
namespace marian {
- namespace functional {
-
- template <typename Function, typename X>
- struct UnaryFunctor {
- X x;
-
- template <class Arg>
- UnaryFunctor(Arg a) : x(a) {}
-
- template <typename ...Args>
- __HDI__ float operator()(Args&&... args) {
- return Function::apply(x(args...));
- }
-
- std::string to_string() {
- return Function::n() + "<" + x.to_string() + ">";
- }
- };
-
- template <class Function, class X, class Y>
- struct BinaryFunctor {
- X x;
- Y y;
-
- template <class Arg1, class Arg2>
- BinaryFunctor(Arg1 arg1, Arg2 arg2) : x(arg1), y(arg2) {}
-
- template <typename ...Args>
- __HDI__ float operator()(Args&&... args) {
- return Function::apply(x(args...), y(args...));
- }
-
- std::string to_string() {
- return Function::n() +
- "<" + x.to_string() +
- "," + y.to_string() + ">";
- }
- };
-
- #define UNARY(name, name2, func) \
- namespace elem { \
- struct name { \
- __HDI__ static float apply(float x) { return func; } \
- static std::string n() { return #name; }\
- }; \
- }\
- template <class X> using name = UnaryFunctor<elem::name, X>;\
- template <typename X>\
- name<IsClass<X>> name2(X x) {\
- return name<X>(x);\
- }\
- static name<Capture> name2(Capture x) {\
- return name<Capture>(x);\
- }
-
- #define BINARY(name, name2, func) \
- namespace elem { \
- struct name { \
- __HDI__ static float apply(float x, float y) { return func; } \
- static std::string n() { return #name; }\
- }; \
- }\
- template <class X, class Y> using name = BinaryFunctor<elem::name, X, Y>;\
- template <class X, class Y>\
- name<IsClass<X>, IsClass<Y>> name2(X x, Y y) {\
- return name<X, Y>(x, y);\
- }\
- template <class Y>\
- name<Capture, IsClass<Y>> name2(Capture x, Y y) {\
- return name<Capture, Y>(x, y);\
- }\
- template <class X>\
- name<IsClass<X>, Capture> name2(X x, Capture y) {\
- return name<X, Capture>(x, y);\
- }
-
- UNARY(Tanh, tanh, tanhf(x));
- UNARY(Sin, sin, sinf(x));
- UNARY(Cos, cos, cosf(x));
- UNARY(Tan, tan, tanf(x));
- UNARY(Log, log, logf(x));
- UNARY(Exp, exp, expf(x));
- UNARY(Abs, abs, fabs(x));
- UNARY(Sqrt, sqrt, sqrtf(x));
- UNARY(Neg, operator-, -x);
- UNARY(Logit, logit, x > 0 ? (1.f / (1.f + expf(-x))) : (expf(x) / (1.f + expf(x))));
-
- BINARY(Plus, operator+, x + y);
- BINARY(Minus, operator-, x - y);
- BINARY(Mult, operator*, x * y);
- BINARY(Div, operator/, x / y);
-
- UNARY(Negate, operator!, !x);
- BINARY(Eq, operator==, x == y);
- BINARY(NEq, operator!=, x != y);
- BINARY(Gt, operator>, x > y);
- BINARY(Lt, operator<, x < y);
- BINARY(Geq, operator>=, x >= y);
- BINARY(Leq, operator<=, x <= y);
- BINARY(And, operator&&, x && y);
- BINARY(Or, operator||, x || y);
-
- template <typename T>
- __HDI__ T sgn(T val) {
- return (float(0) < val) - (val < float(0));
- }
-
- UNARY(Sgn, sgn, sgn(x));
-
- BINARY(Pow, pow, pow(x, y));
-
- BINARY(Clip, clip, fabs(x) >= y ? sgn(x) * y : x);
-
- UNARY(sReLU, ReLU, x > 0.f ? x : 0.f);
- UNARY(sReLUBack, ReLUback, x > 0.f ? 1.f : 0.f);
- BINARY(sPReLU, PReLU, x > 0.f ? x : x * y);
- BINARY(sPReLUBack, PReLUback, x > 0.f ? 1.f : y);
-
- template <class Function, class X, class Y, class Z>
- struct TernaryFunctor {
- X x;
- Y y;
- Z z;
-
- template <class Arg1, class Arg2, class Arg3>
- TernaryFunctor(Arg1 arg1, Arg2 arg2, Arg3 arg3)
- : x(arg1), y(arg2), z(arg3) {}
-
- template <typename ...Args>
- __HDI__ float operator()(Args&&... args) {
- return Function::apply(x(args...), y(args...), z(args...));
- }
- };
-
- #define TERNARY(name, name2, func) \
- namespace elem { \
- struct name { \
- __HDI__ static float apply(float x, float y, float z) { return func; } \
- }; \
- }\
- template <class X, class Y, class Z> using name = TernaryFunctor<elem::name, X, Y, Z>;\
- template <typename X, typename Y, typename Z>\
- name<IsClass<X>, IsClass<Y>, IsClass<Z>> name2(X x, Y y, Z z) {\
- return name<X, Y, Z>(x, y, z);\
- }\
- template <typename X, typename Z>\
- name<IsClass<X>, Capture, IsClass<Z>> name2(X x, Capture y, Z z) {\
- return name<X, Capture, Z>(x, y, z);\
- }\
- template <typename Y, typename Z>\
- name<Capture, IsClass<Y>, IsClass<Z>> name2(Capture x, Y y, Z z) {\
- return name<Capture, Y, Z>(x, y, z);\
- }\
- template <typename X>\
- name<IsClass<X>, Capture, Capture> name2(X x, Capture y, Capture z) {\
- return name<X, Capture, Capture>(x, y, z);\
- }\
- template <typename Y>\
- name<Capture, IsClass<Y>, Capture> name2(Capture x, Y y, Capture z) {\
- return name<Capture, Y, Capture>(x, y, z);\
- }\
- template <typename Z>\
- name<Capture, Capture, IsClass<Z>> name2(Capture x, Capture y, Z z) {\
- return name<Capture, Capture, Z>(x, y, z);\
- }
-
- TERNARY(IfThenElse, if_then_else, x ? y : z);
-
-
-
- template <class X, class Y>
- struct Assign {
- X x;
- Y y;
-
- template <class Arg1, class Arg2>
- Assign(Arg1 arg1, Arg2 arg2) : x(arg1), y(arg2) {}
-
- template <typename ...Args>
- __HDI__ float operator()(Args&&... args) {
- return x(args...) = y(args...);
- }
- };
-
- template <int N>
- struct Assignee {
- Var<N> var;
-
- Assignee() {}
- Assignee(Var<N> v) : var(v) {}
-
- template <typename ...Args>
- __HDI__ float& operator()(Args&&... args) {
- return var(args...);
- }
-
- template <class X>
- Assign<Var<N>, IsClass<X>> operator=(X x) {
- return Assign<Var<N>, X>(var, x);
- }
-
- Assign<Var<N>, Capture> operator=(Capture x) {
- return Assign<Var<N>, Capture>(var, x);
- }
-
- template <class X>
- auto operator+=(X x)->decltype(*this = *this + x) {
- return *this = *this + x;
- }
-
- template <class X>
- auto operator-=(X x)->decltype(*this = *this - x) {
- return *this = *this - x;
- }
-
- template <class X>
- auto operator*=(X x)->decltype(*this = *this * x) {
- return *this = *this * x;
- }
-
- template <class X>
- auto operator/=(X x)->decltype(*this = *this / x) {
- return *this = *this / x;
- }
-
- std::string to_string() {
- return var.to_string();
- }
- };
+namespace functional {
-/******************************************************************************/
+template <typename Function, typename X>
+struct UnaryFunctor {
+ X x;
+
+ template <class Arg>
+ UnaryFunctor(Arg a) : x(a) {}
+
+ template <typename... Args>
+ __HDI__ float operator()(Args&&... args) {
+ return Function::apply(x(args...));
+ }
+
+ std::string to_string() { return Function::n() + "<" + x.to_string() + ">"; }
+};
+
+template <class Function, class X, class Y>
+struct BinaryFunctor {
+ X x;
+ Y y;
+
+ template <class Arg1, class Arg2>
+ BinaryFunctor(Arg1 arg1, Arg2 arg2) : x(arg1), y(arg2) {}
+
+ template <typename... Args>
+ __HDI__ float operator()(Args&&... args) {
+ return Function::apply(x(args...), y(args...));
+ }
+
+ std::string to_string() {
+ return Function::n() + "<" + x.to_string() + "," + y.to_string() + ">";
+ }
+};
+
+#define UNARY(name, name2, func) \
+ namespace elem { \
+ struct name { \
+ __HDI__ static float apply(float x) { return func; } \
+ static std::string n() { return #name; } \
+ }; \
+ } \
+ template <class X> \
+ using name = UnaryFunctor<elem::name, X>; \
+ template <typename X> \
+ name<IsClass<X>> name2(X x) { \
+ return name<X>(x); \
+ } \
+ static name<Capture> name2(Capture x) { return name<Capture>(x); }
+
+#define BINARY(name, name2, func) \
+ namespace elem { \
+ struct name { \
+ __HDI__ static float apply(float x, float y) { return func; } \
+ static std::string n() { return #name; } \
+ }; \
+ } \
+ template <class X, class Y> \
+ using name = BinaryFunctor<elem::name, X, Y>; \
+ template <class X, class Y> \
+ name<IsClass<X>, IsClass<Y>> name2(X x, Y y) { \
+ return name<X, Y>(x, y); \
+ } \
+ template <class Y> \
+ name<Capture, IsClass<Y>> name2(Capture x, Y y) { \
+ return name<Capture, Y>(x, y); \
+ } \
+ template <class X> \
+ name<IsClass<X>, Capture> name2(X x, Capture y) { \
+ return name<X, Capture>(x, y); \
+ }
+
+UNARY(Tanh, tanh, tanhf(x));
+UNARY(Sin, sin, sinf(x));
+UNARY(Cos, cos, cosf(x));
+UNARY(Tan, tan, tanf(x));
+UNARY(Log, log, logf(x));
+UNARY(Exp, exp, expf(x));
+UNARY(Abs, abs, fabs(x));
+UNARY(Sqrt, sqrt, sqrtf(x));
+UNARY(Neg, operator-, -x);
+UNARY(Logit,
+ logit,
+ x > 0 ? (1.f / (1.f + expf(-x))) : (expf(x) / (1.f + expf(x))));
+
+BINARY(Plus, operator+, x + y);
+BINARY(Minus, operator-, x - y);
+BINARY(Mult, operator*, x* y);
+BINARY(Div, operator/, x / y);
+
+UNARY(Negate, operator!, !x);
+BINARY(Eq, operator==, x == y);
+BINARY(NEq, operator!=, x != y);
+BINARY(Gt, operator>, x> y);
+BINARY(Lt, operator<, x<y);
+BINARY(Geq, operator>=, x >= y);
+BINARY(Leq, operator<=, x <= y);
+BINARY(And, operator&&, x&& y);
+BINARY(Or, operator||, x || y);
+
+template <typename T>
+__HDI__ T sgn(T val) {
+ return (float(0) < val) - (val < float(0));
+}
+
+UNARY(Sgn, sgn, sgn(x));
+
+BINARY(Pow, pow, pow(x, y));
+
+BINARY(Clip, clip, fabs(x) >= y ? sgn(x) * y : x);
+
+UNARY(sReLU, ReLU, x > 0.f ? x : 0.f);
+UNARY(sReLUBack, ReLUback, x > 0.f ? 1.f : 0.f);
+BINARY(sPReLU, PReLU, x > 0.f ? x : x * y);
+BINARY(sPReLUBack, PReLUback, x > 0.f ? 1.f : y);
+
+template <class Function, class X, class Y, class Z>
+struct TernaryFunctor {
+ X x;
+ Y y;
+ Z z;
+
+ template <class Arg1, class Arg2, class Arg3>
+ TernaryFunctor(Arg1 arg1, Arg2 arg2, Arg3 arg3) : x(arg1), y(arg2), z(arg3) {}
+
+ template <typename... Args>
+ __HDI__ float operator()(Args&&... args) {
+ return Function::apply(x(args...), y(args...), z(args...));
+ }
+};
+
+#define TERNARY(name, name2, func) \
+ namespace elem { \
+ struct name { \
+ __HDI__ static float apply(float x, float y, float z) { return func; } \
+ }; \
+ } \
+ template <class X, class Y, class Z> \
+ using name = TernaryFunctor<elem::name, X, Y, Z>; \
+ template <typename X, typename Y, typename Z> \
+ name<IsClass<X>, IsClass<Y>, IsClass<Z>> name2(X x, Y y, Z z) { \
+ return name<X, Y, Z>(x, y, z); \
+ } \
+ template <typename X, typename Z> \
+ name<IsClass<X>, Capture, IsClass<Z>> name2(X x, Capture y, Z z) { \
+ return name<X, Capture, Z>(x, y, z); \
+ } \
+ template <typename Y, typename Z> \
+ name<Capture, IsClass<Y>, IsClass<Z>> name2(Capture x, Y y, Z z) { \
+ return name<Capture, Y, Z>(x, y, z); \
+ } \
+ template <typename X> \
+ name<IsClass<X>, Capture, Capture> name2(X x, Capture y, Capture z) { \
+ return name<X, Capture, Capture>(x, y, z); \
+ } \
+ template <typename Y> \
+ name<Capture, IsClass<Y>, Capture> name2(Capture x, Y y, Capture z) { \
+ return name<Capture, Y, Capture>(x, y, z); \
+ } \
+ template <typename Z> \
+ name<Capture, Capture, IsClass<Z>> name2(Capture x, Capture y, Z z) { \
+ return name<Capture, Capture, Z>(x, y, z); \
+ }
+TERNARY(IfThenElse, if_then_else, x ? y : z);
+
+template <class X, class Y>
+struct Assign {
+ X x;
+ Y y;
+
+ template <class Arg1, class Arg2>
+ Assign(Arg1 arg1, Arg2 arg2) : x(arg1), y(arg2) {}
+
+ template <typename... Args>
+ __HDI__ float operator()(Args&&... args) {
+ return x(args...) = y(args...);
+ }
+};
+
+template <int N>
+struct Assignee {
+ Var<N> var;
+
+ Assignee() {}
+ Assignee(Var<N> v) : var(v) {}
+
+ template <typename... Args>
+ __HDI__ float& operator()(Args&&... args) {
+ return var(args...);
+ }
+
+ template <class X>
+ Assign<Var<N>, IsClass<X>> operator=(X x) {
+ return Assign<Var<N>, X>(var, x);
}
+
+ Assign<Var<N>, Capture> operator=(Capture x) {
+ return Assign<Var<N>, Capture>(var, x);
+ }
+
+ template <class X>
+ auto operator+=(X x) -> decltype(*this = *this + x) {
+ return *this = *this + x;
+ }
+
+ template <class X>
+ auto operator-=(X x) -> decltype(*this = *this - x) {
+ return *this = *this - x;
+ }
+
+ template <class X>
+ auto operator*=(X x) -> decltype(*this = *this * x) {
+ return *this = *this * x;
+ }
+
+ template <class X>
+ auto operator/=(X x) -> decltype(*this = *this / x) {
+ return *this = *this / x;
+ }
+
+ std::string to_string() { return var.to_string(); }
+};
+
+/******************************************************************************/
+}
}
diff --git a/src/functional/shape.h b/src/functional/shape.h
index e8781531..3212a3ed 100644
--- a/src/functional/shape.h
+++ b/src/functional/shape.h
@@ -17,7 +17,6 @@ namespace functional {
* @brief Represents the size of each dimension in a tensor.
*/
-
template <const int N>
struct ConstantShape {
Array<int, N> shape_;
@@ -32,10 +31,10 @@ struct ConstantShape {
}
__HD__ ConstantShape(const ConstantShape& shape)
- : shape_(shape.shape_),
- stride_(shape.stride_),
- bstride_(shape.bstride_),
- elements_(shape.elements_) {}
+ : shape_(shape.shape_),
+ stride_(shape.stride_),
+ bstride_(shape.bstride_),
+ elements_(shape.elements_) {}
ConstantShape(const Shape& shape) {
size_t filled = shape.size();
@@ -43,7 +42,8 @@ struct ConstantShape {
ABORT_IF(filled > N,
"Recompile with CONST_SHAPE_DIMS >= " + std::to_string(filled));
- std::copy(shape.shape_.begin(), shape.shape_.end(), shape_.begin() + N - filled);
+ std::copy(
+ shape.shape_.begin(), shape.shape_.end(), shape_.begin() + N - filled);
if(N - filled)
std::fill_n(shape_.begin(), N - filled, 1);
updateStrides();
@@ -51,7 +51,6 @@ struct ConstantShape {
}
__HDI__ void updateStrides() {
-
stride_[N - 1] = 1;
bstride_[N - 1] = shape_[N - 1] == 1 ? 0 : stride_[N - 1];
@@ -73,7 +72,6 @@ struct ConstantShape {
updateElements();
}
-
__HDI__ int dim(int i) { return shape_[i]; }
__HDI__ int dim(int i) const {
@@ -92,9 +90,7 @@ struct ConstantShape {
__HDI__ static constexpr size_t size() { return N; }
- __HDI__ int elements() const {
- return elements_;
- }
+ __HDI__ int elements() const { return elements_; }
__HDI__ int index(const Array<int, N>& d) const {
int i = 0;
@@ -113,7 +109,7 @@ struct ConstantShape {
__HDI__ void dims(int i, Array<int, N>& d) const {
for(int j = 0; j < N; ++j)
d[j] = (i / stride_[j]) % shape_[j];
- }
+ }
__HDI__ bool operator==(const ConstantShape& other) const {
for(int i = 0; i < N; ++i)
@@ -128,7 +124,5 @@ struct ConstantShape {
};
typedef ConstantShape<CONST_SHAPE_DIMS> Shape;
-
}
-
}
diff --git a/src/functional/tensor.h b/src/functional/tensor.h
index 8c48c11e..43fb8fe0 100644
--- a/src/functional/tensor.h
+++ b/src/functional/tensor.h
@@ -7,7 +7,7 @@
namespace marian {
namespace functional {
-template<typename T>
+template <typename T>
struct Tensor {
T* data_;
functional::Shape shape_;
@@ -15,19 +15,20 @@ struct Tensor {
__HD__ Tensor() {}
__HD__ Tensor(T* ptr, const functional::Shape& shape)
- : data_(ptr), shape_(shape) {}
+ : data_(ptr), shape_(shape) {}
- __H__ Tensor(marian::Tensor t)
- : data_(t->data()), shape_(t->shape()) {}
+ __H__ Tensor(marian::Tensor t) : data_(t->data()), shape_(t->shape()) {}
__HDI__ float& operator[](size_t i) { return data_[i]; }
__HDI__ const float& operator[](size_t i) const { return data_[i]; }
- __HDI__ float& operator[](const functional::Array<int, functional::Shape::size()>& indices) {
+ __HDI__ float& operator[](
+ const functional::Array<int, functional::Shape::size()>& indices) {
return data_[shape_.index(indices)];
}
- __HDI__ const float& operator[](const functional::Array<int, functional::Shape::size()>& indices) const {
+ __HDI__ const float& operator[](
+ const functional::Array<int, functional::Shape::size()>& indices) const {
return data_[shape_.index(indices)];
}
@@ -37,6 +38,5 @@ struct Tensor {
__HDI__ Shape& shape() { return shape_; }
__HDI__ const Shape& shape() const { return shape_; }
};
-
}
} \ No newline at end of file
diff --git a/src/functional/tmp.h b/src/functional/tmp.h
index 9d155fa0..65cb49d8 100644
--- a/src/functional/tmp.h
+++ b/src/functional/tmp.h
@@ -12,82 +12,86 @@ struct FApply {};
template <class Functor>
struct FApply<1, Functor> {
- __HDI__ static float apply(Functor functor,
- functional::Array<functional::Tensor<float>, 1>& in,
- const functional::Array<int, 1>& indices) {
+ __HDI__ static float apply(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, 1>& in,
+ const functional::Array<int, 1>& indices) {
return functor(in[0][indices[0]]);
}
- __HDI__ static float apply(Functor functor,
- functional::Array<functional::Tensor<float>, 1>& in,
- int index) {
+ __HDI__ static float apply(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, 1>& in,
+ int index) {
return functor(in[0][index]);
}
};
template <class Functor>
struct FApply<2, Functor> {
- __HDI__ static float apply(Functor functor,
- functional::Array<functional::Tensor<float>, 2>& in,
- const functional::Array<int, 2>& indices) {
+ __HDI__ static float apply(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, 2>& in,
+ const functional::Array<int, 2>& indices) {
return functor(in[0][indices[0]], in[1][indices[1]]);
}
- __HDI__ static float apply(Functor functor,
- functional::Array<functional::Tensor<float>, 2>& in,
- int index) {
+ __HDI__ static float apply(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, 2>& in,
+ int index) {
return functor(in[0][index], in[1][index]);
}
};
template <class Functor>
struct FApply<3, Functor> {
- __HDI__ static float apply(Functor functor,
- functional::Array<functional::Tensor<float>, 3>& in,
- const functional::Array<int, 3>& indices) {
+ __HDI__ static float apply(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, 3>& in,
+ const functional::Array<int, 3>& indices) {
return functor(in[0][indices[0]], in[1][indices[1]], in[2][indices[2]]);
}
- __HDI__ static float apply(Functor functor,
- functional::Array<functional::Tensor<float>, 3>& in,
- int index) {
+ __HDI__ static float apply(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, 3>& in,
+ int index) {
return functor(in[0][index], in[1][index], in[2][index]);
}
};
-
template <class Functor>
struct FApply<4, Functor> {
- __HDI__ static float apply(Functor functor,
- functional::Array<functional::Tensor<float>, 4>& in,
- const functional::Array<int, 4>& indices) {
+ __HDI__ static float apply(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, 4>& in,
+ const functional::Array<int, 4>& indices) {
return functor(in[0][indices[0]],
in[1][indices[1]],
in[2][indices[2]],
in[3][indices[3]]);
}
- __HDI__ static float apply(Functor functor,
- functional::Array<functional::Tensor<float>, 4>& in,
- int index) {
- return functor(in[0][index],
- in[1][index],
- in[2][index],
- in[3][index]);
+ __HDI__ static float apply(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, 4>& in,
+ int index) {
+ return functor(in[0][index], in[1][index], in[2][index], in[3][index]);
}
};
template <size_t K, class Functor>
- __HDI__ float apply(Functor functor,
- functional::Array<functional::Tensor<float>, K>& in,
- const functional::Array<int, K>& indices) {
+__HDI__ float apply(Functor functor,
+ functional::Array<functional::Tensor<float>, K>& in,
+ const functional::Array<int, K>& indices) {
return FApply<K, Functor>::apply(functor, in, indices);
}
template <size_t K, class Functor>
- __HDI__ float apply(Functor functor,
- functional::Array<functional::Tensor<float>, K>& in,
- int index) {
+__HDI__ float apply(Functor functor,
+ functional::Array<functional::Tensor<float>, K>& in,
+ int index) {
return FApply<K, Functor>::apply(functor, in, index);
}
@@ -96,11 +100,12 @@ template <size_t K, class Functor>
template <size_t n, size_t N, size_t K>
struct Loop {
template <class Functor>
- __HDI__ static float result(Functor functor,
- functional::Array<functional::Tensor<float>, K>& in,
- const functional::Array<int, K>& pAcc,
- const functional::Array<int, N>& length,
- const functional::Array<int, N>& dim) {
+ __HDI__ static float result(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, K>& in,
+ const functional::Array<int, K>& pAcc,
+ const functional::Array<int, N>& length,
+ const functional::Array<int, N>& dim) {
float sum = 0;
functional::Array<int, K> acc;
for(int i = 0; i < length[N - n]; ++i) {
@@ -116,11 +121,12 @@ struct Loop {
template <size_t N, size_t K>
struct Loop<1, N, K> {
template <class Functor>
- __HDI__ static float result(Functor functor,
- functional::Array<functional::Tensor<float>, K>& in,
- const functional::Array<int, K>& pAcc,
- const functional::Array<int, N>& length,
- const functional::Array<int, N>& dim) {
+ __HDI__ static float result(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, K>& in,
+ const functional::Array<int, K>& pAcc,
+ const functional::Array<int, N>& length,
+ const functional::Array<int, N>& dim) {
float sum = 0;
functional::Array<int, K> acc;
for(int i = 0; i < length[N - 1]; ++i) {
@@ -141,6 +147,5 @@ __HDI__ float loops(Functor functor,
functional::Array<int, K> acc = {0};
return Loop<N, N, K>::result(functor, in, acc, length, dim);
}
-
}
-} \ No newline at end of file
+}
diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp
index f0ae1ffa..4a0edb34 100644
--- a/src/graph/expression_graph.cpp
+++ b/src/graph/expression_graph.cpp
@@ -1,5 +1,5 @@
-#include <sstream>
#include "graph/expression_graph.h"
+#include <sstream>
#include "tensors/tensor_operators.h"
@@ -18,15 +18,12 @@ void ExpressionGraph::setDevice(DeviceId deviceId) {
}
Expr ExpressionGraph::dropout(float prob, const Shape& shape) {
- return Expression<ConstantNode>(shared_from_this(),
- shape,
- [prob, this](Tensor t) {
- Dropout(t, prob);
- });
+ return Expression<ConstantNode>(
+ shared_from_this(), shape, [prob, this](Tensor t) { Dropout(t, prob); });
}
void ExpressionGraph::checkNan(Tensor t) {
ABORT_IF(throwNaN_, "Not implemented");
- //ABORT_IF(throwNaN_ && IsNan(t), "Tensor has NaN");
+ // ABORT_IF(throwNaN_ && IsNan(t), "Tensor has NaN");
}
}
diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h
index ea1645ec..c6cd4558 100644
--- a/src/graph/expression_graph.h
+++ b/src/graph/expression_graph.h
@@ -215,7 +215,9 @@ public:
ABORT_IF(shape != p->shape(),
"Requested shape {} for existing parameter '{}' does not match "
"original shape {}",
- shape, name, p->shape());
+ shape,
+ name,
+ p->shape());
p->setTrainable(!fixed);
add(p);
@@ -239,10 +241,8 @@ public:
return p;
}
- Expr constant(const Shape& shape,
- const NodeInitializer& init) {
- return Expression<ConstantNode>(
- shared_from_this(), shape, init);
+ Expr constant(const Shape& shape, const NodeInitializer& init) {
+ return Expression<ConstantNode>(shared_from_this(), shape, init);
}
Expr ones(const Shape& shape) {
diff --git a/src/graph/expression_operators.cpp b/src/graph/expression_operators.cpp
index a1c9faa4..a4a8b079 100644
--- a/src/graph/expression_operators.cpp
+++ b/src/graph/expression_operators.cpp
@@ -126,7 +126,6 @@ Expr repeat(Expr a, size_t repeats, keywords::axis_k ax) {
return concatenate(std::vector<Expr>(repeats, a), ax);
}
-
Expr reshape(Expr a, Shape shape) {
return Expression<ReshapeNodeOp>(a, shape);
}
@@ -165,10 +164,7 @@ Expr flatten(Expr a) {
}
Expr flatten_2d(Expr a) {
- Shape shape = {
- a->shape().elements() / a->shape()[-1],
- a->shape()[-1]
- };
+ Shape shape = {a->shape().elements() / a->shape()[-1], a->shape()[-1]};
return Expression<ReshapeNodeOp>(a, shape);
}
@@ -232,17 +228,16 @@ Expr step(Expr a, int step, int axis) {
}
Expr cross_entropy(Expr a, Expr b) {
- //auto sOrig = a->shape();
- //auto sOut = a->shape();
- //Shape sTemp({sOrig[0] * sOrig[2] * sOrig[3], sOrig[1], 1, 1});
- //sOut.set(1, 1);
- //return reshape(Expression<CrossEntropyNodeOp>(reshape(a, sTemp), b), sOut);
+ // auto sOrig = a->shape();
+ // auto sOut = a->shape();
+ // Shape sTemp({sOrig[0] * sOrig[2] * sOrig[3], sOrig[1], 1, 1});
+ // sOut.set(1, 1);
+ // return reshape(Expression<CrossEntropyNodeOp>(reshape(a, sTemp), b), sOut);
return Expression<CrossEntropyNodeOp>(a, b);
}
-Expr affine(Expr a, Expr b, Expr c,
- bool transA, bool transB, float scalar) {
+Expr affine(Expr a, Expr b, Expr c, bool transA, bool transB, float scalar) {
std::vector<Expr> nodes = {a, b, c};
return Expression<AffineNodeOp>(nodes, transA, transB, scalar);
}
@@ -299,6 +294,7 @@ Expr highway(Expr y, Expr x, Expr t) {
}
Expr highway(const std::string prefix, Expr x) {
+ // clang-format off
size_t outDim = x->shape()[-1];
auto g = mlp::dense(x->graph())
("prefix", prefix + "_highway_d1")
@@ -311,6 +307,7 @@ Expr highway(const std::string prefix, Expr x) {
("activation", mlp::act::ReLU)
.construct()->apply(x);
return (g * relued) + ((1 - g) * x);
+ // clang-format on
}
// Expr batch_norm(Expr x, Expr gamma, Expr beta) {
@@ -334,41 +331,26 @@ Expr shift(Expr a, Shape shift) {
#ifdef CUDA_FOUND
-Expr avg_pooling(
- Expr x,
- int height,
- int width,
- int padHeight,
- int padWidth,
- int strideHeight,
- int strideWidth) {
- return Expression<PoolingOp>(x,
- height,
- width,
- padHeight,
- padWidth,
- strideHeight,
- strideWidth,
- "avg");
-}
-
-Expr max_pooling(
- Expr x,
- int height,
- int width,
- int padHeight,
- int padWidth,
- int strideHeight,
- int strideWidth)
-{
- return Expression<PoolingOp>(x,
- height,
- width,
- padHeight,
- padWidth,
- strideHeight,
- strideWidth,
- "max");
+Expr avg_pooling(Expr x,
+ int height,
+ int width,
+ int padHeight,
+ int padWidth,
+ int strideHeight,
+ int strideWidth) {
+ return Expression<PoolingOp>(
+ x, height, width, padHeight, padWidth, strideHeight, strideWidth, "avg");
+}
+
+Expr max_pooling(Expr x,
+ int height,
+ int width,
+ int padHeight,
+ int padWidth,
+ int strideHeight,
+ int strideWidth) {
+ return Expression<PoolingOp>(
+ x, height, width, padHeight, padWidth, strideHeight, strideWidth, "max");
}
Expr convert2cudnnFormat(Expr x) {
@@ -377,13 +359,13 @@ Expr convert2cudnnFormat(Expr x) {
int embSize = x->shape()[2];
std::vector<size_t> newIndeces;
- for (int b = 0; b < numExamples; ++b) {
- for (int t = 0; t < numWords; ++t) {
+ for(int b = 0; b < numExamples; ++b) {
+ for(int t = 0; t < numWords; ++t) {
newIndeces.push_back((t * numExamples) + b);
}
}
- auto xRows = reshape(x, {x->shape()[0] * x ->shape()[1], x->shape()[2]});
+ auto xRows = reshape(x, {x->shape()[0] * x->shape()[1], x->shape()[2]});
Shape outShape({numExamples, 1, numWords, embSize});
return reshape(rows(xRows, newIndeces), outShape);
@@ -397,8 +379,8 @@ Expr convertFromcudnnFormat(Expr x) {
auto reshapedX = reshape(x, {batchDim * sentenceDim, embSize});
std::vector<size_t> newIndeces;
- for (int t = 0; t < sentenceDim; ++t) {
- for (int b = 0; b < batchDim; ++b) {
+ for(int t = 0; t < sentenceDim; ++t) {
+ for(int b = 0; b < batchDim; ++b) {
newIndeces.push_back(b * sentenceDim + t);
}
}
@@ -412,5 +394,4 @@ Expr pooling_with_masking(Expr x, Expr mask, int width, bool isEven) {
}
#endif
-
}
diff --git a/src/graph/expression_operators.h b/src/graph/expression_operators.h
index 1145be3c..c637105f 100644
--- a/src/graph/expression_operators.h
+++ b/src/graph/expression_operators.h
@@ -110,7 +110,6 @@ Expr mean(Expr a, keywords::axis_k ax = 0);
Expr cross_entropy(Expr a, Expr b);
-
Expr scalar_product(Expr a, Expr b, keywords::axis_k ax = 0);
Expr weighted_average(Expr in, Expr weights, keywords::axis_k ax = 0);
@@ -161,6 +160,5 @@ Expr max_pooling(Expr x,
int strideHeight = 1,
int strideWidth = 1);
-Expr pooling_with_masking(Expr x, Expr mask, int width, bool isEven=false);
-
+Expr pooling_with_masking(Expr x, Expr mask, int width, bool isEven = false);
}
diff --git a/src/graph/node.cpp b/src/graph/node.cpp
index 1c93683c..721cb30f 100644
--- a/src/graph/node.cpp
+++ b/src/graph/node.cpp
@@ -1,4 +1,5 @@
#include "tensors/backend.h"
+
#include "graph/expression_graph.h"
#include "graph/node.h"
diff --git a/src/graph/node.h b/src/graph/node.h
index 74af5771..15f223aa 100644
--- a/src/graph/node.h
+++ b/src/graph/node.h
@@ -33,8 +33,7 @@ protected:
public:
Node(Ptr<ExpressionGraph> graph, Shape shape)
- : graph_(graph),
- shape_(shape) {}
+ : graph_(graph), shape_(shape) {}
virtual ~Node() {
if(destroy_) {
@@ -152,7 +151,7 @@ struct NaryNodeOp : public Node {
}
NaryNodeOp(const std::vector<Expr>& nodes)
- : NaryNodeOp(nodes, nodes[0]->shape()) {}
+ : NaryNodeOp(nodes, nodes[0]->shape()) {}
virtual ~NaryNodeOp() {}
diff --git a/src/graph/node_initializers.cpp b/src/graph/node_initializers.cpp
index 0d131c61..6650ede6 100644
--- a/src/graph/node_initializers.cpp
+++ b/src/graph/node_initializers.cpp
@@ -109,9 +109,8 @@ void ortho(Tensor t) {
NodeInitializer from_vector(const std::vector<float>& v) {
auto vPtr = New<std::vector<float>>(v.begin(), v.end());
- return [vPtr](Tensor t) {
- t->set(vPtr->data(), vPtr->data() + vPtr->size());
- };
+ return
+ [vPtr](Tensor t) { t->set(vPtr->data(), vPtr->data() + vPtr->size()); };
}
NodeInitializer from_vector(const std::vector<size_t>& v) {
@@ -138,9 +137,9 @@ NodeInitializer from_numpy(const cnpy::NpyArrayPtr& np) {
// move this somewhere else
NodeInitializer from_word2vec(const std::string& file,
- int dimVoc,
- int dimEmb,
- bool normalize /*= false*/) {
+ int dimVoc,
+ int dimEmb,
+ bool normalize /*= false*/) {
return [file, dimVoc, dimEmb, normalize](Tensor t) {
auto embs = Word2VecReader().read(file, dimVoc, dimEmb);
diff --git a/src/graph/node_initializers.h b/src/graph/node_initializers.h
index 5b069657..bd74c6c4 100644
--- a/src/graph/node_initializers.h
+++ b/src/graph/node_initializers.h
@@ -70,9 +70,9 @@ NodeInitializer from_sparse_vector(
NodeInitializer from_numpy(const cnpy::NpyArrayPtr& np);
NodeInitializer from_word2vec(const std::string& file,
- int dimVoc,
- int dimEmb,
- bool normalize = false);
+ int dimVoc,
+ int dimEmb,
+ bool normalize = false);
}
} // namespace marian
diff --git a/src/graph/node_operators.h b/src/graph/node_operators.h
index 8720d0bb..4e97fff3 100644
--- a/src/graph/node_operators.h
+++ b/src/graph/node_operators.h
@@ -7,11 +7,12 @@
namespace marian {
struct ConstantNode : public Node {
- ConstantNode(Ptr<ExpressionGraph> graph, const Shape& shape, const NodeInitializer& init)
+ ConstantNode(Ptr<ExpressionGraph> graph,
+ const Shape& shape,
+ const NodeInitializer& init)
: Node(graph, shape),
init_(new NodeInitializer(init)),
initialized_(false) {
-
setTrainable(false);
}
@@ -41,11 +42,13 @@ private:
};
struct ParamNode : public Node {
- ParamNode(Ptr<ExpressionGraph> graph, const Shape& shape, const NodeInitializer& init, bool fixed = false)
+ ParamNode(Ptr<ExpressionGraph> graph,
+ const Shape& shape,
+ const NodeInitializer& init,
+ bool fixed = false)
: Node(graph, shape),
init_(new NodeInitializer(init)),
initialized_(false) {
-
setTrainable(!fixed);
}
diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h
index c9e67cd7..6fc08690 100644
--- a/src/graph/node_operators_binary.h
+++ b/src/graph/node_operators_binary.h
@@ -16,13 +16,8 @@ private:
float scalar_;
public:
- DotNodeOp(Expr a,
- Expr b,
- bool transA,
- bool transB,
- float scalar)
- : NaryNodeOp({a, b},
- newShape(a, b, transA, transB)),
+ DotNodeOp(Expr a, Expr b, bool transA, bool transB, float scalar)
+ : NaryNodeOp({a, b}, newShape(a, b, transA, transB)),
transA_(transA),
transB_(transB),
scalar_(scalar) {}
@@ -49,14 +44,13 @@ public:
NodeOps forwardOps() {
// C = alpha * dot(op(A), op(B))
- return {NodeOp(Prod(
- val_,
- child(0)->val(),
- child(1)->val(),
- transA_,
- transB_,
- 0.f,
- scalar_))};
+ return {NodeOp(Prod(val_,
+ child(0)->val(),
+ child(1)->val(),
+ transA_,
+ transB_,
+ 0.f,
+ scalar_))};
}
NodeOps backwardOps() {
@@ -149,7 +143,7 @@ public:
: NaryNodeOp(nodes, newShape(nodes[0], nodes[1], transA, transB)),
transA_(transA),
transB_(transB),
- scalar_(scalar){}
+ scalar_(scalar) {}
Shape newShape(Expr a, Expr b, bool transA, bool transB) {
auto shapeA = a->shape();
@@ -171,19 +165,17 @@ public:
return outShape;
}
-
NodeOps forwardOps() {
using namespace functional;
return {
- NodeOp(Prod(
- val_,
- child(0)->val(),
- child(1)->val(),
- transA_,
- transB_,
- 0.f,
- scalar_);
- Add(_1, val_, child(2)->val()))
+ NodeOp(Prod(val_,
+ child(0)->val(),
+ child(1)->val(),
+ transA_,
+ transB_,
+ 0.f,
+ scalar_);
+ Add(_1, val_, child(2)->val()))
};
}
@@ -266,7 +258,6 @@ public:
const std::string type() { return "affine"; }
};
-
class DotBatchedNodeOp : public NaryNodeOp {
private:
bool transA_;
@@ -274,13 +265,8 @@ private:
float scalar_;
public:
- DotBatchedNodeOp(Expr a,
- Expr b,
- bool transA,
- bool transB,
- float scalar)
- : NaryNodeOp({a, b},
- newShape(a, b, transA, transB)),
+ DotBatchedNodeOp(Expr a, Expr b, bool transA, bool transB, float scalar)
+ : NaryNodeOp({a, b}, newShape(a, b, transA, transB)),
transA_(transA),
transB_(transB),
scalar_(scalar) {}
@@ -307,14 +293,13 @@ public:
NodeOps forwardOps() {
// C = alpha * dot(op(A), op(B))
- return {NodeOp(ProdBatched(
- val_,
- child(0)->val(),
- child(1)->val(),
- transA_,
- transB_,
- 0.f,
- scalar_))};
+ return {NodeOp(ProdBatched(val_,
+ child(0)->val(),
+ child(1)->val(),
+ transA_,
+ transB_,
+ 0.f,
+ scalar_))};
}
NodeOps backwardOps() {
@@ -325,71 +310,67 @@ public:
// to sum gradients from different graph parts
if(!transA_ && transB_)
- return {
- NodeOp(ProdBatched(child(0)->grad(),
- adj_,
- child(1)->val(),
- false,
- false,
- 1.0,
- scalar_)),
- NodeOp(ProdBatched(child(1)->grad(),
- adj_,
- child(0)->val(),
- true,
- false,
- 1.0,
- scalar_))};
+ return {NodeOp(ProdBatched(child(0)->grad(),
+ adj_,
+ child(1)->val(),
+ false,
+ false,
+ 1.0,
+ scalar_)),
+ NodeOp(ProdBatched(child(1)->grad(),
+ adj_,
+ child(0)->val(),
+ true,
+ false,
+ 1.0,
+ scalar_))};
if(transA_ && !transB_)
- return {
- NodeOp(ProdBatched(child(0)->grad(),
- child(1)->val(),
- adj_,
- false,
- true,
- 1.0,
- scalar_)),
- NodeOp(ProdBatched(child(1)->grad(),
- child(0)->val(),
- adj_,
- false,
- false,
- 1.0,
- scalar_))};
+ return {NodeOp(ProdBatched(child(0)->grad(),
+ child(1)->val(),
+ adj_,
+ false,
+ true,
+ 1.0,
+ scalar_)),
+ NodeOp(ProdBatched(child(1)->grad(),
+ child(0)->val(),
+ adj_,
+ false,
+ false,
+ 1.0,
+ scalar_))};
if(transA_ && transB_)
- return {
- NodeOp(ProdBatched(child(0)->grad(),
- child(1)->val(),
- adj_,
- true,
- true,
- 1.0,
- scalar_)),
- NodeOp(ProdBatched(child(1)->grad(),
- adj_,
- child(0)->val(),
- true,
- true,
- 1.0,
- scalar_))};
-
- return {
- NodeOp(ProdBatched(child(0)->grad(),
- adj_,
- child(1)->val(),
- false,
- true,
- 1.0,
- scalar_)),
- NodeOp(ProdBatched(child(1)->grad(),
- child(0)->val(),
- adj_,
- true,
- false,
- 1.0,
- scalar_))};
+ return {NodeOp(ProdBatched(child(0)->grad(),
+ child(1)->val(),
+ adj_,
+ true,
+ true,
+ 1.0,
+ scalar_)),
+ NodeOp(ProdBatched(child(1)->grad(),
+ adj_,
+ child(0)->val(),
+ true,
+ true,
+ 1.0,
+ scalar_))};
+
+ return {NodeOp(ProdBatched(child(0)->grad(),
+ adj_,
+ child(1)->val(),
+ false,
+ true,
+ 1.0,
+ scalar_)),
+ NodeOp(ProdBatched(child(1)->grad(),
+ child(0)->val(),
+ adj_,
+ true,
+ false,
+ 1.0,
+ scalar_))};
}
const std::string type() { return "•"; }
@@ -400,8 +381,7 @@ public:
struct ScalarProductNodeOp : public NaryNodeOp {
template <typename... Args>
ScalarProductNodeOp(Expr a, Expr b, Args... args)
- : NaryNodeOp({a, b}, newShape(a, b, args...)) {
- }
+ : NaryNodeOp({a, b}, newShape(a, b, args...)) {}
template <typename... Args>
Shape newShape(Expr a, Expr b, Args... args) {
@@ -433,12 +413,9 @@ struct ScalarProductNodeOp : public NaryNodeOp {
};
struct ElementBinaryNodeOp : public NaryNodeOp {
- ElementBinaryNodeOp(Expr a, Expr b)
- : NaryNodeOp({a, b}, newShape(a, b)) {}
+ ElementBinaryNodeOp(Expr a, Expr b) : NaryNodeOp({a, b}, newShape(a, b)) {}
- Shape newShape(Expr a, Expr b) {
- return Shape::broadcast({a, b});
- }
+ Shape newShape(Expr a, Expr b) { return Shape::broadcast({a, b}); }
const std::string color() { return "yellow"; }
};
@@ -553,8 +530,7 @@ struct DivNodeOp : public ElementBinaryNodeOp {
// Cross-entropy node. It computes -b*log(softmax(a)), summing rowwise.
struct CrossEntropyNodeOp : public NaryNodeOp {
- CrossEntropyNodeOp(Expr a, Expr b)
- : NaryNodeOp({a, b}, newShape(a)) {}
+ CrossEntropyNodeOp(Expr a, Expr b) : NaryNodeOp({a, b}, newShape(a)) {}
Shape newShape(Expr a) {
Shape shape1 = a->shape();
@@ -578,7 +554,9 @@ struct CrossEntropyNodeOp : public NaryNodeOp {
struct ConcatenateNodeOp : public NaryNodeOp {
template <typename... Args>
ConcatenateNodeOp(const std::vector<Expr>& nodes, Args... args)
- : NaryNodeOp(nodes, newShape(nodes, keywords::Get(keywords::axis, 0, args...))) {}
+ : NaryNodeOp(nodes,
+ newShape(nodes, keywords::Get(keywords::axis, 0, args...))) {
+ }
Shape newShape(const std::vector<Expr>& nodes, int ax) {
Shape shape = nodes.back()->shape();
@@ -730,38 +708,33 @@ struct HighwayNodeOp : public NaryNodeOp {
class ConvolutionOp : public NaryNodeOp {
public:
- ConvolutionOp(
- const std::vector<Expr>& nodes,
- int hPad = 0,
- int wPad = 0,
- int hStride = 1,
- int wStride = 1)
- : NaryNodeOp(nodes),
- conv_(nodes[1]->shape(),
- nodes[2]->shape(),
- hPad,
- wPad,
- hStride,
- wStride) {
+ ConvolutionOp(const std::vector<Expr>& nodes,
+ int hPad = 0,
+ int wPad = 0,
+ int hStride = 1,
+ int wStride = 1)
+ : NaryNodeOp(nodes),
+ conv_(nodes[1]->shape(),
+ nodes[2]->shape(),
+ hPad,
+ wPad,
+ hStride,
+ wStride) {
conv_.getOutputShape(nodes[0]->shape(), shape_);
}
NodeOps forwardOps() {
return {NodeOp(conv_.forward(
- child(0)->val(),
- child(1)->val(),
- child(2)->val(),
- val_))};
+ child(0)->val(), child(1)->val(), child(2)->val(), val_))};
}
NodeOps backwardOps() {
- return {NodeOp(conv_.backward(
- child(0)->val(),
- child(0)->grad(),
- child(1)->val(),
- child(1)->grad(),
- child(2)->grad(),
- adj_))};
+ return {NodeOp(conv_.backward(child(0)->val(),
+ child(0)->grad(),
+ child(1)->val(),
+ child(1)->grad(),
+ child(2)->grad(),
+ adj_))};
}
const std::string type() { return "layer_convolution"; }
@@ -769,5 +742,4 @@ public:
protected:
ConvolutionWrapper conv_;
};
-
}
diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h
index 0ca2c2a2..8d81a63a 100644
--- a/src/graph/node_operators_unary.h
+++ b/src/graph/node_operators_unary.h
@@ -12,11 +12,9 @@
namespace marian {
struct UnaryNodeOp : public NaryNodeOp {
- UnaryNodeOp(Expr a, Shape shape)
- : NaryNodeOp({a}, shape) {}
+ UnaryNodeOp(Expr a, Shape shape) : NaryNodeOp({a}, shape) {}
- UnaryNodeOp(Expr a)
- : NaryNodeOp({a}, a->shape()) {}
+ UnaryNodeOp(Expr a) : NaryNodeOp({a}, a->shape()) {}
const std::string color() { return "yellow"; }
};
@@ -26,9 +24,7 @@ private:
float scalar_{0};
public:
- ScalarAddNodeOp(Expr a, float scalar)
- : UnaryNodeOp(a),
- scalar_{scalar} {}
+ ScalarAddNodeOp(Expr a, float scalar) : UnaryNodeOp(a), scalar_{scalar} {}
NodeOps forwardOps() {
using namespace functional;
@@ -67,8 +63,7 @@ private:
float scalar_{0};
public:
- ScalarMultNodeOp(Expr a, float scalar)
- : UnaryNodeOp(a), scalar_{scalar} {}
+ ScalarMultNodeOp(Expr a, float scalar) : UnaryNodeOp(a), scalar_{scalar} {}
NodeOps forwardOps() {
using namespace functional;
@@ -210,7 +205,6 @@ struct TanhNodeOp : public NaryNodeOp {
const std::string type() { return "tanh"; }
};
-
struct ReLUNodeOp : public UnaryNodeOp {
ReLUNodeOp(Expr a) : UnaryNodeOp(a) {}
@@ -262,8 +256,7 @@ struct ReLUNodeOp : public UnaryNodeOp {
* \f]
*/
struct PReLUNodeOp : public UnaryNodeOp {
- PReLUNodeOp(float alpha, Expr a)
- : UnaryNodeOp(a), alpha_(alpha) {}
+ PReLUNodeOp(float alpha, Expr a) : UnaryNodeOp(a), alpha_(alpha) {}
NodeOps forwardOps() {
using namespace functional;
@@ -334,11 +327,9 @@ struct SwishNodeOp : public UnaryNodeOp {
};
struct SoftmaxNodeOp : public UnaryNodeOp {
- SoftmaxNodeOp(Expr a)
- : UnaryNodeOp(a), mask_(nullptr) {}
+ SoftmaxNodeOp(Expr a) : UnaryNodeOp(a), mask_(nullptr) {}
- SoftmaxNodeOp(Expr a, Expr mask)
- : UnaryNodeOp(a), mask_(mask) {}
+ SoftmaxNodeOp(Expr a, Expr mask) : UnaryNodeOp(a), mask_(mask) {}
Expr mask_;
@@ -407,17 +398,18 @@ struct SumNodeOp : public UnaryNodeOp {
int ax_;
template <typename... Args>
- SumNodeOp(Expr a, Args... args)
- : UnaryNodeOp(a, newShape(a, args...)) {}
+ SumNodeOp(Expr a, Args... args) : UnaryNodeOp(a, newShape(a, args...)) {}
NodeOps forwardOps() {
using namespace functional;
- return {NodeOp(Reduce(_1, val_, child(0)->val()))}; }
+ return {NodeOp(Reduce(_1, val_, child(0)->val()))};
+ }
NodeOps backwardOps() {
using namespace functional;
- return {NodeOp(Add(_1, child(0)->grad(), adj_))}; }
+ return {NodeOp(Add(_1, child(0)->grad(), adj_))};
+ }
template <class... Args>
Shape newShape(Expr a, Args... args) {
@@ -456,8 +448,7 @@ struct MeanNodeOp : public UnaryNodeOp {
int ax_;
template <typename... Args>
- MeanNodeOp(Expr a, Args... args)
- : UnaryNodeOp(a, newShape(a, args...)) {}
+ MeanNodeOp(Expr a, Args... args) : UnaryNodeOp(a, newShape(a, args...)) {}
NodeOps forwardOps() {
using namespace functional;
@@ -543,8 +534,7 @@ struct ExpNodeOp : public UnaryNodeOp {
struct SqrtNodeOp : public UnaryNodeOp {
float epsilon_;
- SqrtNodeOp(Expr a, float epsilon)
- : UnaryNodeOp(a), epsilon_(epsilon) {}
+ SqrtNodeOp(Expr a, float epsilon) : UnaryNodeOp(a), epsilon_(epsilon) {}
NodeOps forwardOps() {
using namespace functional;
@@ -614,8 +604,7 @@ struct NegNodeOp : public UnaryNodeOp {
struct RowsNodeOp : public UnaryNodeOp {
RowsNodeOp(Expr a, const std::vector<size_t>& indeces)
- : UnaryNodeOp(a, newShape(a, indeces)),
- indices_(indeces) {}
+ : UnaryNodeOp(a, newShape(a, indeces)), indices_(indeces) {}
NodeOps forwardOps() {
// @TODO: solve this with a tensor!
@@ -666,8 +655,7 @@ struct RowsNodeOp : public UnaryNodeOp {
struct ColsNodeOp : public UnaryNodeOp {
ColsNodeOp(Expr a, const std::vector<size_t>& indeces)
- : UnaryNodeOp(a, newShape(a, indeces)),
- indices_(indeces) {}
+ : UnaryNodeOp(a, newShape(a, indeces)), indices_(indeces) {}
NodeOps forwardOps() {
// @TODO: solve this with a tensor!
@@ -716,8 +704,7 @@ struct ColsNodeOp : public UnaryNodeOp {
struct SelectNodeOp : public UnaryNodeOp {
SelectNodeOp(Expr a, int axis, const std::vector<size_t>& indeces)
- : UnaryNodeOp(a, newShape(a, axis, indeces)),
- indices_(indeces) {}
+ : UnaryNodeOp(a, newShape(a, axis, indeces)), indices_(indeces) {}
NodeOps forwardOps() {
return {NodeOp(
@@ -772,8 +759,7 @@ struct TransposeNodeOp : public UnaryNodeOp {
std::vector<int> axes_;
TransposeNodeOp(Expr a, const std::vector<int>& axes)
- : UnaryNodeOp(a, newShape(a, axes)),
- axes_{axes} {}
+ : UnaryNodeOp(a, newShape(a, axes)), axes_{axes} {}
NodeOps forwardOps() {
return {NodeOp(TransposeND(val_, child(0)->val(), axes_))};
@@ -788,7 +774,7 @@ struct TransposeNodeOp : public UnaryNodeOp {
Shape shape = a->shape();
ABORT_IF(shape.size() != axes.size(),
- "Shape and transpose axes have different number of dimensions");
+ "Shape and transpose axes have different number of dimensions");
for(int i = 0; i < shape.size(); ++i)
shape.set(i, a->shape()[axes[i]]);
@@ -829,8 +815,7 @@ private:
public:
template <typename... Args>
- ReshapeNodeOp(Expr a, Shape shape)
- : UnaryNodeOp(a, shape), reshapee_(a) {
+ ReshapeNodeOp(Expr a, Shape shape) : UnaryNodeOp(a, shape), reshapee_(a) {
Node::destroy_ = false;
}
@@ -894,9 +879,7 @@ private:
public:
StepNodeOp(Expr a, int step, int axis)
- : UnaryNodeOp(a, newShape(a, axis)),
- stepNode_(a),
- step_(step) {
+ : UnaryNodeOp(a, newShape(a, axis)), stepNode_(a), step_(step) {
Node::destroy_ = false;
}
@@ -1056,67 +1039,54 @@ public:
padWidth,
strideHeight,
strideWidth,
- mode) {
- }
+ mode) {}
NodeOps forwardOps() {
return {NodeOp(pooling_.forward(child(0)->val(), val_))};
}
NodeOps backwardOps() {
- return {NodeOp(pooling_.backward(
- child(0)->val(),
- child(0)->grad(),
- val_,
- adj_))};
+ return {NodeOp(
+ pooling_.backward(child(0)->val(), child(0)->grad(), val_, adj_))};
}
const std::string type() { return "layer_pooling"; }
-
protected:
PoolingWrapper pooling_;
};
class PoolingWithMaskingOp : public UnaryNodeOp {
- public:
- PoolingWithMaskingOp( Expr x, Expr mask, int width, bool isEven=false)
- : UnaryNodeOp(x),
- mask_(mask),
- width_(width),
- isEven_(isEven)
- {
- auto xShape = x->shape();
- int dimBatch = xShape[0];
- int dimWord = xShape[1];
- int cols = (isEven_) ? xShape[2] - 1 : xShape[2];
- int dimSentence = (cols / width_) + (cols % width_ != 0);
- shape_ = {dimBatch, dimWord, dimSentence};
- }
+public:
+ PoolingWithMaskingOp(Expr x, Expr mask, int width, bool isEven = false)
+ : UnaryNodeOp(x), mask_(mask), width_(width), isEven_(isEven) {
+ auto xShape = x->shape();
+ int dimBatch = xShape[0];
+ int dimWord = xShape[1];
+ int cols = (isEven_) ? xShape[2] - 1 : xShape[2];
+ int dimSentence = (cols / width_) + (cols % width_ != 0);
+ shape_ = {dimBatch, dimWord, dimSentence};
+ }
- NodeOps forwardOps() {
- return {NodeOp(PoolingWithMaskingForward(val_,
+ NodeOps forwardOps() {
+ return {NodeOp(PoolingWithMaskingForward(
+ val_, child(0)->val(), mask_->val(), width_, isEven_))};
+ }
+
+ NodeOps backwardOps() {
+ return {NodeOp(PoolingWithMaskingBackward(adj_,
+ child(0)->grad(),
child(0)->val(),
mask_->val(),
width_,
isEven_))};
- }
-
- NodeOps backwardOps() {
- return {NodeOp(PoolingWithMaskingBackward(adj_,
- child(0)->grad(),
- child(0)->val(),
- mask_->val(),
- width_,
- isEven_))};
- }
+ }
- const std::string type() {return "layer_pooling";}
+ const std::string type() { return "layer_pooling"; }
- protected:
- Expr mask_;
- int width_;
- bool isEven_;
+protected:
+ Expr mask_;
+ int width_;
+ bool isEven_;
};
-
}
diff --git a/src/layers/constructors.h b/src/layers/constructors.h
index 4f76c36c..a95d6253 100644
--- a/src/layers/constructors.h
+++ b/src/layers/constructors.h
@@ -23,18 +23,18 @@ struct LayerFactory : public Factory {
return as<Cast>() != nullptr;
}
- virtual Ptr<Layer> construct() = 0;
+ virtual Ptr<Layer> construct() = 0;
};
class DenseFactory : public LayerFactory {
protected:
- //std::vector<std::pair<std::string, std::string>> tiedParams_;
+ // std::vector<std::pair<std::string, std::string>> tiedParams_;
std::vector<std::pair<std::string, std::string>> tiedParamsTransposed_;
public:
DenseFactory(Ptr<ExpressionGraph> graph) : LayerFactory(graph) {}
- //Accumulator<DenseFactory> tie(const std::string& param,
+ // Accumulator<DenseFactory> tie(const std::string& param,
// const std::string& tied) {
// tiedParams_.push_back({param, tied});
// return Accumulator<DenseFactory>(*this);
@@ -48,21 +48,20 @@ public:
Ptr<Layer> construct() {
auto dense = New<Dense>(graph_, options_);
- //for(auto& p : tiedParams_)
+ // for(auto& p : tiedParams_)
// dense->tie(p.first, p.second);
for(auto& p : tiedParamsTransposed_)
dense->tie_transposed(p.first, p.second);
return dense;
}
-
+
DenseFactory clone() {
DenseFactory aClone(graph_);
aClone.options_->merge(options_);
- //aClone.tiedParams_ = tiedParams_;
+ // aClone.tiedParams_ = tiedParams_;
aClone.tiedParamsTransposed_ = tiedParamsTransposed_;
return aClone;
}
-
};
typedef Accumulator<DenseFactory> dense;
@@ -95,7 +94,6 @@ public:
}
void push_back(Ptr<Layer> layer) { layers_.push_back(layer); }
-
};
class MLPFactory : public Factory {
@@ -121,7 +119,7 @@ public:
layers_.push_back(New<LF>(lf));
return Accumulator<MLPFactory>(*this);
}
-
+
MLPFactory clone() {
MLPFactory aClone(graph_);
aClone.options_->merge(options_);
@@ -129,7 +127,6 @@ public:
aClone.push_back(lf->clone());
return aClone;
}
-
};
typedef Accumulator<MLPFactory> mlp;
diff --git a/src/layers/convolution.cpp b/src/layers/convolution.cpp
index 064abedf..eb1b0554 100644
--- a/src/layers/convolution.cpp
+++ b/src/layers/convolution.cpp
@@ -2,8 +2,7 @@
#include "graph/node_operators_binary.h"
namespace marian {
-Convolution::Convolution(Ptr<ExpressionGraph> graph)
- : Factory(graph) {}
+Convolution::Convolution(Ptr<ExpressionGraph> graph) : Factory(graph) {}
Expr Convolution::apply(Expr x) {
auto prefix = opt<std::string>("prefix");
@@ -13,28 +12,21 @@ Expr Convolution::apply(Expr x) {
auto strides = opt<std::pair<int, int>>("strides", std::make_pair(1, 1));
int layerIn = x->shape()[1];
- auto kernel = graph_->param(prefix + "_conv_kernels",
- {layerIn,
- kernelNum,
- kernelDims.first,
- kernelDims.second},
- inits::glorot_uniform);
+ auto kernel
+ = graph_->param(prefix + "_conv_kernels",
+ {layerIn, kernelNum, kernelDims.first, kernelDims.second},
+ inits::glorot_uniform);
- auto bias = graph_->param(prefix + "_conv_bias",
- {1, kernelNum, 1, 1},
- inits::zeros);
+ auto bias = graph_->param(
+ prefix + "_conv_bias", {1, kernelNum, 1, 1}, inits::zeros);
std::vector<Expr> nodes = {x, kernel, bias};
- return Expression<ConvolutionOp>(nodes,
- paddings.first,
- paddings.second,
- strides.first,
- strides.second);
+ return Expression<ConvolutionOp>(
+ nodes, paddings.first, paddings.second, strides.first, strides.second);
}
Expr Convolution::apply(const std::vector<Expr>&) {
ABORT("Can't apply convolution on many inputs at once");
return nullptr;
}
-
}
diff --git a/src/layers/factory.h b/src/layers/factory.h
index 32e725c6..62976b68 100644
--- a/src/layers/factory.h
+++ b/src/layers/factory.h
@@ -33,6 +33,7 @@ public:
template <class BaseFactory>
class Accumulator : public BaseFactory {
typedef BaseFactory Factory;
+
public:
Accumulator() : Factory(nullptr) {}
Accumulator(Ptr<ExpressionGraph> graph) : Factory(graph) {}
diff --git a/src/layers/generic.h b/src/layers/generic.h
index b9c1d100..dbff3a10 100644
--- a/src/layers/generic.h
+++ b/src/layers/generic.h
@@ -1,6 +1,7 @@
#pragma once
#include "marian.h"
+
#include "layers/factory.h"
namespace marian {
@@ -75,11 +76,9 @@ public:
if(tiedParams_.count(nameW)) {
W = tiedParams_[nameW];
transposeW = true;
- }
- else {
- W = g->param(name + "_" + nameW,
- {in->shape()[-1], dim},
- inits::glorot_uniform);
+ } else {
+ W = g->param(
+ name + "_" + nameW, {in->shape()[-1], dim}, inits::glorot_uniform);
}
Expr b;
@@ -87,8 +86,7 @@ public:
if(tiedParams_.count(nameB))
b = tiedParams_[nameB];
else
- b = g->param(
- name + "_" + nameB, {1, dim}, inits::zeros);
+ b = g->param(name + "_" + nameB, {1, dim}, inits::zeros);
params_.push_back(W);
params_.push_back(b);
@@ -98,19 +96,19 @@ public:
auto ln_s = g->param(name + "_ln_s" + std::to_string(i),
{1, dim},
inits::from_value(1.f));
- auto ln_b = g->param(name + "_ln_b" + std::to_string(i),
- {1, dim},
- inits::zeros);
+ auto ln_b = g->param(
+ name + "_ln_b" + std::to_string(i), {1, dim}, inits::zeros);
- outputs.push_back(
- layer_norm(affine(in, W, b, false, transposeW), ln_s, ln_b, NEMATUS_LN_EPS));
+ outputs.push_back(layer_norm(
+ affine(in, W, b, false, transposeW), ln_s, ln_b, NEMATUS_LN_EPS));
} else {
auto gamma = g->param(name + "_gamma" + std::to_string(i),
{1, dim},
inits::from_value(1.0));
params_.push_back(gamma);
- outputs.push_back(layer_norm(dot(in, W, false, transposeW), gamma, b));
+ outputs.push_back(
+ layer_norm(dot(in, W, false, transposeW), gamma, b));
}
} else {
@@ -147,11 +145,9 @@ public:
if(tiedParams_.count(nameW)) {
transposeW = true;
W = tiedParams_[nameW];
- }
- else {
- W = g->param(name + "_" + nameW,
- {input->shape()[-1], dim},
- inits::glorot_uniform);
+ } else {
+ W = g->param(
+ name + "_" + nameW, {input->shape()[-1], dim}, inits::glorot_uniform);
}
Expr b;
std::string nameB = "b";
@@ -165,16 +161,14 @@ public:
Expr out;
if(layerNorm) {
if(nematusNorm) {
- auto ln_s = g->param(
- name + "_ln_s", {1, dim}, inits::from_value(1.f));
- auto ln_b
- = g->param(name + "_ln_b", {1, dim}, inits::zeros);
+ auto ln_s = g->param(name + "_ln_s", {1, dim}, inits::from_value(1.f));
+ auto ln_b = g->param(name + "_ln_b", {1, dim}, inits::zeros);
- out = layer_norm(affine(input, W, b, false, transposeW),
- ln_s, ln_b, NEMATUS_LN_EPS);
+ out = layer_norm(
+ affine(input, W, b, false, transposeW), ln_s, ln_b, NEMATUS_LN_EPS);
} else {
- auto gamma = g->param(
- name + "_gamma", {1, dim}, inits::from_value(1.0));
+ auto gamma
+ = g->param(name + "_gamma", {1, dim}, inits::from_value(1.0));
params_.push_back(gamma);
out = layer_norm(dot(input, W, false, transposeW), gamma, b);
@@ -217,22 +211,18 @@ struct EmbeddingFactory : public Factory {
}
}
- return graph_->param(name,
- {dimVoc, dimEmb},
- initFunc,
- fixed);
+ return graph_->param(name, {dimVoc, dimEmb}, initFunc, fixed);
}
};
typedef Accumulator<EmbeddingFactory> embedding;
-static inline
-Expr Cost(Expr logits,
- Expr indices,
- Expr mask,
- std::string costType = "cross-entropy",
- float smoothing = 0,
- Expr weights = nullptr) {
+static inline Expr Cost(Expr logits,
+ Expr indices,
+ Expr mask,
+ std::string costType = "cross-entropy",
+ float smoothing = 0,
+ Expr weights = nullptr) {
using namespace keywords;
auto ce = cross_entropy(logits, indices);
@@ -255,15 +245,17 @@ Expr Cost(Expr logits,
// axes:
// - time axis (words): -3
// - batch axis (sentences): -2
- if(costType == "ce-mean" || costType == "cross-entropy") { // sum over words; average over sentences
+ if(costType == "ce-mean"
+ || costType
+ == "cross-entropy") { // sum over words; average over sentences
cost = mean(costSum, axis = -2);
- } else if(costType == "ce-mean-words") { // average over target tokens
+ } else if(costType == "ce-mean-words") { // average over target tokens
cost = sum(costSum, axis = -2) / sum(sum(mask, axis = -3), axis = -2);
- } else if(costType == "ce-sum") { // sum over target tokens
+ } else if(costType == "ce-sum") { // sum over target tokens
cost = sum(costSum, axis = -2);
- } else if(costType == "perplexity") { // ==exp('ce-mean-words')
+ } else if(costType == "perplexity") { // ==exp('ce-mean-words')
cost = exp(sum(costSum, axis = -2) / sum(sum(mask, axis = -3), axis = -2));
- } else if(costType == "ce-rescore") { // sum over words, keep batch axis
+ } else if(costType == "ce-rescore") { // sum over words, keep batch axis
cost = -costSum;
} else { // same as ce-mean
cost = mean(costSum, axis = -2);
diff --git a/src/layers/guided_alignment.h b/src/layers/guided_alignment.h
index c353f649..c5dec742 100644
--- a/src/layers/guided_alignment.h
+++ b/src/layers/guided_alignment.h
@@ -4,20 +4,18 @@
namespace marian {
-static inline
-Expr guidedAlignmentCost(Ptr<ExpressionGraph> graph,
- Ptr<data::CorpusBatch> batch,
- Ptr<Options> options,
- Expr att) {
+static inline Expr guidedAlignmentCost(Ptr<ExpressionGraph> graph,
+ Ptr<data::CorpusBatch> batch,
+ Ptr<Options> options,
+ Expr att) {
using namespace keywords;
int dimBatch = att->shape()[0];
int dimSrc = att->shape()[2];
int dimTrg = att->shape()[3];
- auto aln = graph->constant(
- {dimBatch, 1, dimSrc, dimTrg},
- inits::from_vector(batch->getGuidedAlignment()));
+ auto aln = graph->constant({dimBatch, 1, dimSrc, dimTrg},
+ inits::from_vector(batch->getGuidedAlignment()));
std::string guidedCostType
= options->get<std::string>("guided-alignment-cost");
diff --git a/src/models/char_s2s.h b/src/models/char_s2s.h
index 64d1585f..f0c5de5c 100644
--- a/src/models/char_s2s.h
+++ b/src/models/char_s2s.h
@@ -2,6 +2,7 @@
#include "marian.h"
#include "models/s2s.h"
+
#include "layers/convolution.h"
namespace marian {
@@ -29,21 +30,18 @@ public:
int dimEmb = opt<int>("dim-emb");
auto convSizes = options_->get<std::vector<int>>("char-conv-filters-num");
- auto convWidths = options_->get<std::vector<int>>("char-conv-filters-widths");
+ auto convWidths
+ = options_->get<std::vector<int>>("char-conv-filters-widths");
int stride = opt<int>("char-stride");
int highwayNum = opt<int>("char-highway");
auto conved = CharConvPooling(
- prefix_ + "conv_pooling",
- dimEmb,
- convWidths,
- convSizes,
- stride)
- (batchEmbeddings, batchMask);
+ prefix_ + "conv_pooling", dimEmb, convWidths, convSizes, stride)(
+ batchEmbeddings, batchMask);
auto inHighway = conved;
- for (int i = 0; i < highwayNum; ++i) {
- inHighway = highway(prefix_ +"_" + std::to_string(i), inHighway);
+ for(int i = 0; i < highwayNum; ++i) {
+ inHighway = highway(prefix_ + "_" + std::to_string(i), inHighway);
}
Expr stridedMask = getStridedMask(graph, batch, stride);
@@ -52,24 +50,26 @@ public:
return New<EncoderState>(context, stridedMask, batch);
}
+
protected:
- Expr getStridedMask(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch,
+ Expr getStridedMask(Ptr<ExpressionGraph> graph,
+ Ptr<data::CorpusBatch> batch,
int stride) {
auto subBatch = (*batch)[batchIndex_];
int dimBatch = subBatch->batchSize();
std::vector<float> strided;
- for (size_t wordIdx = 0; wordIdx < subBatch->mask().size(); wordIdx += stride * dimBatch) {
- for (size_t j = wordIdx; j < wordIdx + dimBatch; ++j) {
+ for(size_t wordIdx = 0; wordIdx < subBatch->mask().size();
+ wordIdx += stride * dimBatch) {
+ for(size_t j = wordIdx; j < wordIdx + dimBatch; ++j) {
strided.push_back(subBatch->mask()[j]);
}
}
int dimWords = strided.size() / dimBatch;
- auto stridedMask = graph->constant({dimWords, dimBatch, 1},
- inits::from_vector(strided));
+ auto stridedMask
+ = graph->constant({dimWords, dimBatch, 1}, inits::from_vector(strided));
return stridedMask;
}
};
-
}
diff --git a/src/models/encdec.h b/src/models/encdec.h
index 39229953..dc38fef1 100644
--- a/src/models/encdec.h
+++ b/src/models/encdec.h
@@ -1,6 +1,7 @@
#pragma once
#include "marian.h"
+
#include "layers/generic.h"
#include "layers/guided_alignment.h"
#include "model_base.h"
@@ -30,8 +31,8 @@ protected:
auto batchEmbeddings
= reshape(chosenEmbeddings, {dimWords, dimBatch, dimEmb});
- auto batchMask = graph->constant(
- {dimWords, dimBatch, 1}, inits::from_vector(subBatch->mask()));
+ auto batchMask = graph->constant({dimWords, dimBatch, 1},
+ inits::from_vector(subBatch->mask()));
return std::make_tuple(batchEmbeddings, batchMask);
}
@@ -129,7 +130,8 @@ public:
virtual void selectEmbeddings(Ptr<ExpressionGraph> graph,
Ptr<DecoderState> state,
const std::vector<size_t>& embIdx,
- int dimBatch, int dimBeam) {
+ int dimBatch,
+ int dimBeam) {
using namespace keywords;
int dimTrgEmb = opt<int>("dim-emb");
@@ -149,12 +151,10 @@ public:
Expr selectedEmbs;
if(embIdx.empty()) {
- selectedEmbs = graph->constant({1, 1, dimBatch, dimTrgEmb},
- inits::zeros);
+ selectedEmbs = graph->constant({1, 1, dimBatch, dimTrgEmb}, inits::zeros);
} else {
selectedEmbs = rows(yEmb, embIdx);
- selectedEmbs
- = reshape(selectedEmbs, {dimBeam, 1, dimBatch, dimTrgEmb});
+ selectedEmbs = reshape(selectedEmbs, {dimBeam, 1, dimBatch, dimTrgEmb});
}
state->setTargetEmbeddings(selectedEmbs);
}
@@ -174,14 +174,16 @@ public:
virtual void selectEmbeddings(Ptr<ExpressionGraph> graph,
Ptr<DecoderState> state,
const std::vector<size_t>&,
- int dimBatch, int beamSize)
+ int dimBatch,
+ int beamSize)
= 0;
virtual Ptr<DecoderState> step(Ptr<ExpressionGraph> graph,
Ptr<DecoderState>,
const std::vector<size_t>&,
const std::vector<size_t>&,
- int dimBatch, int beamSize)
+ int dimBatch,
+ int beamSize)
= 0;
virtual Ptr<DecoderState> step(Ptr<ExpressionGraph>, Ptr<DecoderState>) = 0;
@@ -224,7 +226,8 @@ protected:
decoder["mini-batch"] = opt<size_t>("valid-mini-batch");
decoder["maxi-batch"] = opt<size_t>("valid-mini-batch") > 1 ? 100 : 1;
- decoder["maxi-batch-sort"] = opt<size_t>("valid-mini-batch") > 1 ? "trg" : "none";
+ decoder["maxi-batch-sort"]
+ = opt<size_t>("valid-mini-batch") > 1 ? "trg" : "none";
decoder["relative-paths"] = false;
@@ -320,8 +323,10 @@ public:
Ptr<DecoderState> state,
const std::vector<size_t>& hypIndices,
const std::vector<size_t>& embIndices,
- int dimBatch, int beamSize) {
- auto selectedState = hypIndices.empty() ? state : state->select(hypIndices, beamSize);
+ int dimBatch,
+ int beamSize) {
+ auto selectedState
+ = hypIndices.empty() ? state : state->select(hypIndices, beamSize);
selectEmbeddings(graph, selectedState, embIndices, dimBatch, beamSize);
selectedState->setSingleStep(true);
auto nextState = step(graph, selectedState);
@@ -332,7 +337,8 @@ public:
virtual void selectEmbeddings(Ptr<ExpressionGraph> graph,
Ptr<DecoderState> state,
const std::vector<size_t>& embIdx,
- int dimBatch, int beamSize) {
+ int dimBatch,
+ int beamSize) {
decoders_[0]->selectEmbeddings(graph, state, embIdx, dimBatch, beamSize);
}
@@ -366,17 +372,12 @@ public:
int dimBatch = batch->size();
int dimWords = sentenceWeighting ? 1 : batch->back()->batchWidth();
- weights = graph->constant(
- {1, dimWords, dimBatch, 1},
- inits::from_vector(batch->getDataWeights()));
+ weights = graph->constant({1, dimWords, dimBatch, 1},
+ inits::from_vector(batch->getDataWeights()));
}
- auto cost = Cost(nextState->getProbs(),
- trgIdx,
- trgMask,
- costType,
- ls,
- weights);
+ auto cost
+ = Cost(nextState->getProbs(), trgIdx, trgMask, costType, ls, weights);
if(options_->has("guided-alignment") && !inference_) {
auto alignments = decoders_[0]->getAlignments();
@@ -428,7 +429,7 @@ public:
do {
size_t current = (start + end) / 2;
- //std::cerr << i << " " << current << std::endl;
+ // std::cerr << i << " " << current << std::endl;
auto batch = data::CorpusBatch::fakeBatch(lengths, current, options_);
build(graph, batch);
fits = graph->fits();
@@ -436,8 +437,7 @@ public:
if(fits) {
stats->add(batch, multiplier);
start = current + 1;
- }
- else {
+ } else {
end = current - 1;
}
} while(end - start > step);
diff --git a/src/models/hardatt.h b/src/models/hardatt.h
index 85aa12a2..ff49ab05 100644
--- a/src/models/hardatt.h
+++ b/src/models/hardatt.h
@@ -1,9 +1,10 @@
#pragma once
#include "marian.h"
+
#include "layers/generic.h"
-#include "rnn/types.h"
#include "rnn/attention_constructors.h"
+#include "rnn/types.h"
#include <numeric>
@@ -21,13 +22,16 @@ public:
: DecoderState(states, probs, encStates),
attentionIndices_(attentionIndices) {}
- virtual Ptr<DecoderState> select(const std::vector<size_t>& selIdx, int beamSize) {
+ virtual Ptr<DecoderState> select(const std::vector<size_t>& selIdx,
+ int beamSize) {
std::vector<size_t> selectedAttentionIndices;
for(auto i : selIdx)
selectedAttentionIndices.push_back(attentionIndices_[i]);
- return New<DecoderStateHardAtt>(
- states_.select(selIdx, beamSize), probs_, encStates_, selectedAttentionIndices);
+ return New<DecoderStateHardAtt>(states_.select(selIdx, beamSize),
+ probs_,
+ encStates_,
+ selectedAttentionIndices);
}
virtual void setAttentionIndices(
@@ -85,11 +89,11 @@ public:
Expr start;
if(!meanContexts.empty()) {
// apply single layer network to mean to map into decoder space
- auto mlp = mlp::mlp(graph) //
- .push_back(mlp::dense(graph) //
- ("prefix", prefix_ + "_ff_state") //
- ("dim", opt<int>("dim-rnn")) //
- ("activation", (int)mlp::act::tanh)//
+ auto mlp = mlp::mlp(graph) //
+ .push_back(mlp::dense(graph) //
+ ("prefix", prefix_ + "_ff_state") //
+ ("dim", opt<int>("dim-rnn")) //
+ ("activation", (int)mlp::act::tanh) //
("layer-normalization",
opt<bool>("layer-normalization")));
start = mlp->apply(meanContexts);
@@ -133,7 +137,8 @@ public:
int dimBeam = trgEmbeddings->shape()[-4];
if(dropoutTrg) {
- trgEmbeddings = dropout(trgEmbeddings, dropoutTrg, {dimTrgWords, dimBatch, 1});
+ trgEmbeddings
+ = dropout(trgEmbeddings, dropoutTrg, {dimTrgWords, dimBatch, 1});
}
auto flatContext = reshape(context, {dimBatch * dimSrcWords, dimContext});
diff --git a/src/models/model_base.h b/src/models/model_base.h
index 3f077df7..b000af27 100644
--- a/src/models/model_base.h
+++ b/src/models/model_base.h
@@ -1,7 +1,7 @@
#pragma once
-#include "marian.h"
#include <string>
+#include "marian.h"
namespace marian {
namespace models {
diff --git a/src/models/model_factory.cpp b/src/models/model_factory.cpp
index 2cd8b631..ea7f809b 100644
--- a/src/models/model_factory.cpp
+++ b/src/models/model_factory.cpp
@@ -1,12 +1,13 @@
#include "marian.h"
+
#include "models/model_factory.h"
-#include "models/s2s.h"
-#include "models/transformer.h"
-#include "models/hardatt.h"
#include "models/amun.h"
-#include "models/nematus.h"
#include "models/encdec.h"
+#include "models/hardatt.h"
+#include "models/nematus.h"
+#include "models/s2s.h"
+#include "models/transformer.h"
#ifdef CUDNN
#include "models/char_s2s.h"
diff --git a/src/models/model_factory.h b/src/models/model_factory.h
index 409afe3d..0d3f4c89 100644
--- a/src/models/model_factory.h
+++ b/src/models/model_factory.h
@@ -1,8 +1,9 @@
#pragma once
#include "marian.h"
-#include "layers/factory.h"
+
#include "encdec.h"
+#include "layers/factory.h"
namespace marian {
diff --git a/src/models/s2s.h b/src/models/s2s.h
index a9528b28..4b9da7bf 100644
--- a/src/models/s2s.h
+++ b/src/models/s2s.h
@@ -1,9 +1,10 @@
#pragma once
#include "marian.h"
+
#include "layers/constructors.h"
-#include "rnn/constructors.h"
#include "rnn/attention_constructors.h"
+#include "rnn/constructors.h"
namespace marian {
diff --git a/src/models/states.h b/src/models/states.h
index f1f3c704..e2635c87 100644
--- a/src/models/states.h
+++ b/src/models/states.h
@@ -48,8 +48,10 @@ public:
virtual Expr getProbs() { return probs_; }
virtual void setProbs(Expr probs) { probs_ = probs; }
- virtual Ptr<DecoderState> select(const std::vector<size_t>& selIdx, int beamSize) {
- return New<DecoderState>(states_.select(selIdx, beamSize), probs_, encStates_);
+ virtual Ptr<DecoderState> select(const std::vector<size_t>& selIdx,
+ int beamSize) {
+ return New<DecoderState>(
+ states_.select(selIdx, beamSize), probs_, encStates_);
}
virtual const rnn::States& getStates() { return states_; }
diff --git a/src/models/transformer.h b/src/models/transformer.h
index 02478caa..ac20228c 100644
--- a/src/models/transformer.h
+++ b/src/models/transformer.h
@@ -1,11 +1,12 @@
#pragma once
#include "marian.h"
-#include "layers/factory.h"
+
+#include "encdec.h"
#include "layers/constructors.h"
+#include "layers/factory.h"
#include "model_base.h"
#include "model_factory.h"
-#include "encdec.h"
namespace marian {
@@ -34,8 +35,8 @@ public:
}
// shared across batch entries
- auto signal = graph->constant({dimWords, 1, dimEmb},
- inits::from_vector(vPos));
+ auto signal
+ = graph->constant({dimWords, 1, dimEmb}, inits::from_vector(vPos));
return input + signal;
}
@@ -47,15 +48,14 @@ public:
for(int i = 0; i < length; ++i)
for(int j = 0; j <= i; ++j)
vMask[i * length + j] = 1.f;
- return graph->constant({1, length, length},
- inits::from_vector(vMask));
+ return graph->constant({1, length, length}, inits::from_vector(vMask));
}
Expr InverseMask(Expr mask) {
// convert 0/1 mask to transformer style -inf mask
auto ms = mask->shape();
mask = (1 - mask) * -99999999.f;
- return reshape(mask, {ms[-3], 1, ms[-2], ms[-1]}) ;
+ return reshape(mask, {ms[-3], 1, ms[-2], ms[-1]});
}
Expr SplitHeads(Expr input, int dimHeads) {
@@ -135,18 +135,17 @@ public:
if(op == 'h') {
auto Wh = graph->param(
prefix + "_Wh", {dimModel, dimModel}, inits::glorot_uniform);
- auto bh
- = graph->param(prefix + "_bh", {1, dimModel}, inits::zeros);
+ auto bh = graph->param(prefix + "_bh", {1, dimModel}, inits::zeros);
auto t = affine(prevInput, Wh, bh);
output = highway(output, prevInput, t);
}
// layer normalization
if(op == 'n') {
- auto scale = graph->param(
- prefix + "_ln_scale", {1, dimModel}, inits::ones);
- auto bias = graph->param(
- prefix + "_ln_bias", {1, dimModel}, inits::zeros);
+ auto scale
+ = graph->param(prefix + "_ln_scale", {1, dimModel}, inits::ones);
+ auto bias
+ = graph->param(prefix + "_ln_bias", {1, dimModel}, inits::zeros);
output = layer_norm(output, scale, bias, 1e-6);
}
}
@@ -219,17 +218,13 @@ public:
if(i > 0)
prefixProj += "_enc" + std::to_string(i + 1);
- auto Wk = graph->param(prefixProj + "_Wk",
- {dimModel, dimModel},
- inits::glorot_uniform);
- auto bk = graph->param(
- prefixProj + "_bk", {1, dimModel}, inits::zeros);
+ auto Wk = graph->param(
+ prefixProj + "_Wk", {dimModel, dimModel}, inits::glorot_uniform);
+ auto bk = graph->param(prefixProj + "_bk", {1, dimModel}, inits::zeros);
- auto Wv = graph->param(prefixProj + "_Wv",
- {dimModel, dimModel},
- inits::glorot_uniform);
- auto bv = graph->param(
- prefixProj + "_bv", {1, dimModel}, inits::zeros);
+ auto Wv = graph->param(
+ prefixProj + "_Wv", {dimModel, dimModel}, inits::glorot_uniform);
+ auto bv = graph->param(prefixProj + "_bv", {1, dimModel}, inits::zeros);
auto kh = affine(keys[i], Wk, bk);
auto vh = affine(values[i], Wv, bv);
@@ -254,8 +249,8 @@ public:
int dimAtt = output->shape()[-1];
- auto Wo = graph->param(
- prefix + "_Wo", {dimAtt, dimOut}, inits::glorot_uniform);
+ auto Wo
+ = graph->param(prefix + "_Wo", {dimAtt, dimOut}, inits::glorot_uniform);
auto bo = graph->param(prefix + "_bo", {1, dimOut}, inits::zeros);
output = affine(output, Wo, bo);
@@ -468,11 +463,12 @@ public:
std::vector<Ptr<EncoderState>> &encStates)
: DecoderState(states, probs, encStates) {}
- virtual Ptr<DecoderState> select(const std::vector<size_t> &selIdx, int beamSize) {
+ virtual Ptr<DecoderState> select(const std::vector<size_t> &selIdx,
+ int beamSize) {
rnn::States selectedStates;
int dimDepth = states_[0].output->shape()[-1];
- int dimTime = states_[0].output->shape()[-2];
+ int dimTime = states_[0].output->shape()[-2];
int dimBatch = selIdx.size() / beamSize;
std::vector<size_t> selIdx2;
@@ -553,7 +549,7 @@ public:
decoderMask = reshape(TransposeTimeBatch(decoderMask),
{1, dimBatch, 1, dimTrgWords});
selfMask = selfMask * decoderMask;
- //if(dimBeam > 1)
+ // if(dimBeam > 1)
// selfMask = repeat(selfMask, dimBeam, axis = -4);
}
@@ -586,7 +582,8 @@ public:
for(int i = 1; i <= opt<int>("dec-depth"); ++i) {
auto values = query;
if(prevDecoderStates.size() > 0)
- values = concatenate({prevDecoderStates[i - 1].output, query}, axis = -2);
+ values
+ = concatenate({prevDecoderStates[i - 1].output, query}, axis = -2);
decoderStates.push_back({values, nullptr});
diff --git a/src/optimizers/clippers.cpp b/src/optimizers/clippers.cpp
index ea2a9b30..0ed21f91 100644
--- a/src/optimizers/clippers.cpp
+++ b/src/optimizers/clippers.cpp
@@ -15,5 +15,4 @@ void Norm::clip(Tensor t) {
if(l2Norm >= c_)
Element(_1 = (c_ / l2Norm) * _1, t);
}
-
}
diff --git a/src/optimizers/optimizers.cpp b/src/optimizers/optimizers.cpp
index 3841cbcc..d532d2c9 100644
--- a/src/optimizers/optimizers.cpp
+++ b/src/optimizers/optimizers.cpp
@@ -58,7 +58,8 @@ void Adagrad::load(const std::string& name,
// extract data into vectors
if(name == "adagrad_gt") {
vGt.resize(totalSize);
- std::copy((float*)np->data(), (float*)np->data() + totalSize, vGt.begin());
+ std::copy(
+ (float*)np->data(), (float*)np->data() + totalSize, vGt.begin());
}
}
@@ -178,11 +179,13 @@ void Adam::load(const std::string& name,
// extract data into vectors
if(name == "adam_mt") {
vMt.resize(totalSize);
- std::copy((float*)np->data(), (float*)np->data() + totalSize, vMt.begin());
+ std::copy(
+ (float*)np->data(), (float*)np->data() + totalSize, vMt.begin());
}
if(name == "adam_vt") {
vVt.resize(totalSize);
- std::copy((float*)np->data(), (float*)np->data() + totalSize, vVt.begin());
+ std::copy(
+ (float*)np->data(), (float*)np->data() + totalSize, vVt.begin());
}
}
diff --git a/src/optimizers/optimizers.h b/src/optimizers/optimizers.h
index ea51c625..41fe2404 100644
--- a/src/optimizers/optimizers.h
+++ b/src/optimizers/optimizers.h
@@ -1,8 +1,8 @@
#pragma once
+#include <algorithm>
#include <map>
#include <memory>
-#include <algorithm>
#include "common/config.h"
#include "graph/expression_graph.h"
diff --git a/src/rescorer/rescorer.h b/src/rescorer/rescorer.h
index 5ba409f3..8e9cd95e 100644
--- a/src/rescorer/rescorer.h
+++ b/src/rescorer/rescorer.h
@@ -42,9 +42,11 @@ private:
public:
Rescore(Ptr<Config> options)
: options_(options),
- corpus_(options_->get<bool>("n-best") ?
- std::static_pointer_cast<CorpusBase>(New<CorpusNBest>(options_)) :
- std::static_pointer_cast<CorpusBase>(New<Corpus>(options_))) {
+ corpus_(
+ options_->get<bool>("n-best")
+ ? std::static_pointer_cast<CorpusBase>(
+ New<CorpusNBest>(options_))
+ : std::static_pointer_cast<CorpusBase>(New<Corpus>(options_))) {
corpus_->prepare();
auto devices = options_->getDevices();
@@ -66,12 +68,12 @@ public:
models_.resize(graphs_.size());
ThreadPool pool(graphs_.size(), graphs_.size());
for(int i = 0; i < graphs_.size(); ++i) {
-
- pool.enqueue([=](int j) {
- models_[j] = New<Model>(temp);
- models_[j]->load(graphs_[j], modelFile);
- }, i);
-
+ pool.enqueue(
+ [=](int j) {
+ models_[j] = New<Model>(temp);
+ models_[j]->load(graphs_[j], modelFile);
+ },
+ i);
}
}
@@ -81,9 +83,10 @@ public:
auto batchGenerator = New<BatchGenerator<CorpusBase>>(corpus_, options_);
batchGenerator->prepare(false);
- Ptr<ScoreCollector> output = options_->get<bool>("n-best") ?
- std::static_pointer_cast<ScoreCollector>(New<ScoreCollectorNBest>(options_)) :
- New<ScoreCollector>();
+ Ptr<ScoreCollector> output = options_->get<bool>("n-best")
+ ? std::static_pointer_cast<ScoreCollector>(
+ New<ScoreCollectorNBest>(options_))
+ : New<ScoreCollector>();
bool summarize = options_->has("summary");
std::string summary
diff --git a/src/rescorer/score_collector.h b/src/rescorer/score_collector.h
index e828f18f..3144e3fb 100644
--- a/src/rescorer/score_collector.h
+++ b/src/rescorer/score_collector.h
@@ -13,8 +13,7 @@ namespace marian {
class ScoreCollector {
public:
- ScoreCollector()
- : nextId_(0), outStrm_(new OutputFileStream(std::cout)) {};
+ ScoreCollector() : nextId_(0), outStrm_(new OutputFileStream(std::cout)){};
virtual void Write(long id, const std::string& message) {
boost::mutex::scoped_lock lock(mutex_);
@@ -52,9 +51,7 @@ public:
}
}
- virtual void Write(long id, float value) {
- Write(id, std::to_string(value));
- }
+ virtual void Write(long id, float value) { Write(id, std::to_string(value)); }
protected:
long nextId_{0};
@@ -86,7 +83,9 @@ public:
ScoreCollectorNBest(const ScoreCollectorNBest&) = delete;
- std::string addToNBest(const std::string nbest, const std::string feature, float score) {
+ std::string addToNBest(const std::string nbest,
+ const std::string feature,
+ float score) {
std::vector<std::string> fields;
Split(nbest, fields, "|||");
std::stringstream ss;
@@ -96,13 +95,15 @@ public:
}
virtual void Write(long id, float score) {
-
std::string line;
{
boost::mutex::scoped_lock lock(mutex_);
auto iter = buffer_.find(id);
if(iter == buffer_.end()) {
- ABORT_IF(lastRead_ >= id, "Entry {} < {} already read but not in buffer", id, lastRead_);
+ ABORT_IF(lastRead_ >= id,
+ "Entry {} < {} already read but not in buffer",
+ id,
+ lastRead_);
std::string line;
while(lastRead_ < id && std::getline((std::istream&)*file_, line)) {
lastRead_++;
@@ -116,6 +117,5 @@ public:
ScoreCollector::Write(id, addToNBest(line, fname_, score));
}
-
};
}
diff --git a/src/rnn/attention_constructors.h b/src/rnn/attention_constructors.h
index 30e93481..046e8ce8 100644
--- a/src/rnn/attention_constructors.h
+++ b/src/rnn/attention_constructors.h
@@ -1,10 +1,11 @@
#pragma once
#include "marian.h"
+
#include "layers/factory.h"
-#include "rnn/types.h"
-#include "rnn/constructors.h"
#include "rnn/attention.h"
+#include "rnn/constructors.h"
+#include "rnn/types.h"
namespace marian {
namespace rnn {
diff --git a/src/rnn/cells.cpp b/src/rnn/cells.cpp
index 8b38780f..26cd3e75 100644
--- a/src/rnn/cells.cpp
+++ b/src/rnn/cells.cpp
@@ -52,8 +52,7 @@ Expr gruOps(const std::vector<Expr>& nodes, bool final) {
/******************************************************************************/
struct LSTMCellNodeOp : public NaryNodeOp {
- LSTMCellNodeOp(const std::vector<Expr>& nodes)
- : NaryNodeOp(nodes) {}
+ LSTMCellNodeOp(const std::vector<Expr>& nodes) : NaryNodeOp(nodes) {}
NodeOps forwardOps() {
std::vector<Tensor> inputs;
@@ -89,8 +88,7 @@ struct LSTMCellNodeOp : public NaryNodeOp {
};
struct LSTMOutputNodeOp : public NaryNodeOp {
- LSTMOutputNodeOp(const std::vector<Expr>& nodes)
- : NaryNodeOp(nodes) {}
+ LSTMOutputNodeOp(const std::vector<Expr>& nodes) : NaryNodeOp(nodes) {}
NodeOps forwardOps() {
std::vector<Tensor> inputs;
diff --git a/src/rnn/cells.h b/src/rnn/cells.h
index 5750bda4..bcfa4a90 100644
--- a/src/rnn/cells.h
+++ b/src/rnn/cells.h
@@ -43,8 +43,7 @@ public:
{dimInput, dimState},
inits::glorot_uniform);
- b_ = graph->param(
- prefix + "_b", {1, dimState}, inits::zeros);
+ b_ = graph->param(prefix + "_b", {1, dimState}, inits::zeros);
if(dropout_ > 0.0f) {
if(dimInput)
@@ -520,8 +519,7 @@ public:
{dimInput, 4 * dimState},
inits::glorot_uniform);
- b_ = graph->param(
- prefix + "_b", {1, 4 * dimState}, inits::zeros);
+ b_ = graph->param(prefix + "_b", {1, 4 * dimState}, inits::zeros);
if(dropout_ > 0.0f) {
if(dimInput)
diff --git a/src/rnn/constructors.h b/src/rnn/constructors.h
index 41879892..638464c8 100644
--- a/src/rnn/constructors.h
+++ b/src/rnn/constructors.h
@@ -1,7 +1,7 @@
#pragma once
-#include "marian.h"
#include "layers/factory.h"
+#include "marian.h"
#include "rnn/rnn.h"
namespace marian {
diff --git a/src/rnn/rnn.h b/src/rnn/rnn.h
index 4f1be340..6664b48f 100644
--- a/src/rnn/rnn.h
+++ b/src/rnn/rnn.h
@@ -1,9 +1,9 @@
#pragma once
-#include "marian.h"
#include "layers/generic.h"
-#include "rnn/types.h"
+#include "marian.h"
#include "rnn/cells.h"
+#include "rnn/types.h"
#include <algorithm>
#include <chrono>
@@ -74,10 +74,9 @@ private:
j = timeSteps - i - 1;
std::vector<Expr> steps(xWs.size());
- std::transform(xWs.begin(),
- xWs.end(),
- steps.begin(),
- [j](Expr e) { return step(e, j, -3); });
+ std::transform(xWs.begin(), xWs.end(), steps.begin(), [j](Expr e) {
+ return step(e, j, -3);
+ });
if(mask)
state = cell_->applyState(steps, state, step(mask, j, -3));
diff --git a/src/tensors/allocator.h b/src/tensors/allocator.h
index 6781afa7..df23fd06 100644
--- a/src/tensors/allocator.h
+++ b/src/tensors/allocator.h
@@ -9,8 +9,8 @@
#include <vector>
#include "common/definitions.h"
-#include "tensors/memory_piece.h"
#include "tensors/device.h"
+#include "tensors/memory_piece.h"
namespace marian {
@@ -92,8 +92,8 @@ private:
gaps_.swap(oldGaps);
for(auto gap : oldGaps)
- gaps_.insert(
- Gap(device_->data() + std::distance(oldData, gap.data()), gap.size()));
+ gaps_.insert(Gap(device_->data() + std::distance(oldData, gap.data()),
+ gap.size()));
insertGap(Gap(device_->data() + oldSize, add));
std::unordered_map<uint8_t*, Ptr<MemoryPiece>> oldAllocated;
@@ -141,7 +141,10 @@ private:
}
public:
- Allocator(DeviceId deviceId, size_t bytes, size_t step, size_t alignment = 256)
+ Allocator(DeviceId deviceId,
+ size_t bytes,
+ size_t step,
+ size_t alignment = 256)
: device_(DispatchDevice(deviceId, alignment)),
step_(step),
available_(0),
diff --git a/src/tensors/backend.cpp b/src/tensors/backend.cpp
index 05a70b18..e0165cbb 100644
--- a/src/tensors/backend.cpp
+++ b/src/tensors/backend.cpp
@@ -16,5 +16,4 @@ Ptr<Backend> BackendByDevice(DeviceId deviceId, size_t seed) {
#endif
return New<cpu::Backend>(deviceId, seed);
}
-
}
diff --git a/src/tensors/backend.h b/src/tensors/backend.h
index c69c3c09..3cd51ce1 100644
--- a/src/tensors/backend.h
+++ b/src/tensors/backend.h
@@ -10,8 +10,7 @@ protected:
size_t seed_;
public:
- Backend(DeviceId deviceId, size_t seed)
- : deviceId_(deviceId), seed_(seed) {}
+ Backend(DeviceId deviceId, size_t seed) : deviceId_(deviceId), seed_(seed) {}
virtual DeviceId getDevice() { return deviceId_; };
virtual void setDevice() = 0;
@@ -19,5 +18,4 @@ public:
};
Ptr<Backend> BackendByDevice(DeviceId deviceId, size_t seed);
-
}
diff --git a/src/tensors/cpu/add.h b/src/tensors/cpu/add.h
index f3c93294..228edf89 100644
--- a/src/tensors/cpu/add.h
+++ b/src/tensors/cpu/add.h
@@ -5,24 +5,22 @@
#pragma once
-#include "tensors/tensor.h"
#include "functional/functional.h"
#include "functional/shape.h"
-#include "functional/tmp.h"
#include "functional/tensor.h"
+#include "functional/tmp.h"
+#include "tensors/tensor.h"
namespace marian {
namespace cpu {
-
template <size_t K, class Functor>
void gAddGeneric(Functor functor,
const functional::Shape full,
functional::Tensor<float> out,
functional::Array<functional::Tensor<float>, K> ins,
float scale = 1.0) {
-
int outLength = out.shape().elements();
bool same = outLength == full.elements();
for(int i = 0; i < K; ++i)
@@ -73,7 +71,6 @@ void gAddReduce(Functor functor,
functional::Tensor<float> out,
functional::Array<functional::Tensor<float>, K> ins,
float scale = 1.0) {
-
int rows = full.elements() / full.back();
int cols = full.back();
@@ -100,12 +97,8 @@ void gAddReduce(Functor functor,
}
}
-template <class Functor, class ...Tensors>
-void Add(Functor functor,
- float scale,
- marian::Tensor out,
- Tensors... tensors) {
-
+template <class Functor, class... Tensors>
+void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors) {
auto full = marian::Shape::broadcast({out, tensors...});
int length = out->shape().elements();
@@ -113,7 +106,7 @@ void Add(Functor functor,
constexpr size_t K = sizeof...(Tensors);
functional::Tensor<float> gOut = out;
- functional::Array<functional::Tensor<float>, K> gIns = {tensors ...};
+ functional::Array<functional::Tensor<float>, K> gIns = {tensors...};
if(full.back() != 1 && out->shape().back() == 1) {
size_t m = full.elements() / length;
@@ -128,8 +121,5 @@ void Add(Functor functor,
cpu::gAddGeneric(functor, full, gOut, gIns, scale);
}
}
-
-
}
-
}
diff --git a/src/tensors/cpu/backend.h b/src/tensors/cpu/backend.h
index 0c667820..9b55ad83 100644
--- a/src/tensors/cpu/backend.h
+++ b/src/tensors/cpu/backend.h
@@ -15,17 +15,13 @@ private:
public:
Backend(DeviceId deviceId, size_t seed)
- : marian::Backend(deviceId, seed),
- gen_(seed_) {}
+ : marian::Backend(deviceId, seed), gen_(seed_) {}
- void setDevice() { }
+ void setDevice() {}
void synchronize() {}
- std::default_random_engine& getRandomGenerator() {
- return gen_;
- }
+ std::default_random_engine& getRandomGenerator() { return gen_; }
};
-
}
}
diff --git a/src/tensors/cpu/device.cpp b/src/tensors/cpu/device.cpp
index 95f09815..985b00f5 100644
--- a/src/tensors/cpu/device.cpp
+++ b/src/tensors/cpu/device.cpp
@@ -1,31 +1,31 @@
-#include <iostream>
#include "tensors/device.h"
+#include <iostream>
#include <stdlib.h>
namespace marian {
namespace cpu {
- Device::~Device() {
- free(data_);
- data_ = nullptr;
- size_ = 0;
- }
+Device::~Device() {
+ free(data_);
+ data_ = nullptr;
+ size_ = 0;
+}
- void Device::reserve(size_t size) {
- size = align(size);
- ABORT_IF(size < size_ || size == 0, "New size must be larger than old size and larger than 0");
+void Device::reserve(size_t size) {
+ size = align(size);
+ ABORT_IF(size < size_ || size == 0,
+ "New size must be larger than old size and larger than 0");
- if(data_) {
- uint8_t *temp = static_cast<uint8_t*>(aligned_alloc(alignment_, size));
- std::copy(data_, data_ + size_, temp);
- free(data_);
- data_ = temp;
- } else {
- data_ = static_cast<uint8_t*>(aligned_alloc(alignment_, size));
- }
- size_ = size;
+ if(data_) {
+ uint8_t *temp = static_cast<uint8_t *>(aligned_alloc(alignment_, size));
+ std::copy(data_, data_ + size_, temp);
+ free(data_);
+ data_ = temp;
+ } else {
+ data_ = static_cast<uint8_t *>(aligned_alloc(alignment_, size));
}
-
+ size_ = size;
+}
}
}
diff --git a/src/tensors/cpu/dropout.cpp b/src/tensors/cpu/dropout.cpp
index 6187cf3d..478eefc9 100644
--- a/src/tensors/cpu/dropout.cpp
+++ b/src/tensors/cpu/dropout.cpp
@@ -1,19 +1,20 @@
#include <algorithm>
#include <random>
-#include "tensors/tensor_operators.h"
#include "tensors/cpu/backend.h"
+#include "tensors/tensor_operators.h"
namespace marian {
- namespace cpu {
-
- void Dropout(Tensor tensor, float p) {
- auto cpuBackend = std::static_pointer_cast<cpu::Backend>(tensor->getBackend());
- auto &gen = cpuBackend->getRandomGenerator();
- std::bernoulli_distribution dist(1.f - p);
- std::generate(tensor->data(), tensor->data() + tensor->size(),
- [&]() { return dist(gen) / (1.f - p); });
- }
+namespace cpu {
- }
+void Dropout(Tensor tensor, float p) {
+ auto cpuBackend
+ = std::static_pointer_cast<cpu::Backend>(tensor->getBackend());
+ auto &gen = cpuBackend->getRandomGenerator();
+ std::bernoulli_distribution dist(1.f - p);
+ std::generate(tensor->data(), tensor->data() + tensor->size(), [&]() {
+ return dist(gen) / (1.f - p);
+ });
+}
+}
}
diff --git a/src/tensors/cpu/element.h b/src/tensors/cpu/element.h
index 1f38bf00..210b9a6c 100644
--- a/src/tensors/cpu/element.h
+++ b/src/tensors/cpu/element.h
@@ -13,12 +13,11 @@ namespace cpu {
template <size_t K, bool broadcast, class Functor>
void gElement(Functor functor,
functional::Array<functional::Tensor<float>, K> tensors) {
-
int length = tensors[0].shape().elements();
functional::Array<int, functional::Shape::size()> dims;
functional::Array<int, K> indices;
- #pragma omp parallel for simd
+#pragma omp parallel for simd
for(int index = 0; index < length; ++index) {
indices.fill(index);
if(broadcast) {
@@ -30,8 +29,8 @@ void gElement(Functor functor,
}
}
-template <class Functor, class ...Tensors>
-void Element(Functor functor, marian::Tensor out, Tensors ...tensors) {
+template <class Functor, class... Tensors>
+void Element(Functor functor, marian::Tensor out, Tensors... tensors) {
constexpr size_t K = sizeof...(tensors) + 1;
functional::Array<functional::Tensor<float>, K> gTensors = {out, tensors...};
@@ -46,6 +45,5 @@ void Element(Functor functor, marian::Tensor out, Tensors ...tensors) {
else
cpu::gElement<K, false>(functor, gTensors);
}
-
}
}
diff --git a/src/tensors/cpu/prod.cpp b/src/tensors/cpu/prod.cpp
index 927d8048..5766e640 100644
--- a/src/tensors/cpu/prod.cpp
+++ b/src/tensors/cpu/prod.cpp
@@ -1,15 +1,15 @@
-/* All or part of this file was contributed by Intel under license:
- * Copyright (C) 2017-2018 Intel Corporation
- * SPDX-License-Identifier: MIT
- */
+/* All or part of this file was contributed by Intel under license:
+ * Copyright (C) 2017-2018 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
-#include "tensors/tensor.h"
#include "tensors/cpu/backend.h"
+#include "tensors/tensor.h"
#if MKL_FOUND
#include <mkl.h>
#else
-#if BLAS_FOUND
+#if BLAS_FOUND
#include <cblas.h>
#endif
#endif
@@ -25,7 +25,6 @@ void Prod(marian::Tensor C,
bool transB,
float beta,
float scalar) {
-
#if BLAS_FOUND
float alpha = scalar;
@@ -46,19 +45,20 @@ void Prod(marian::Tensor C,
if(transB)
ldc = B->shape().elements() / B->shape()[-1];
- cblas_sgemm(
- CblasColMajor,
- transB ? CblasTrans : CblasNoTrans,
- transA ? CblasTrans : CblasNoTrans,
- n, m, k,
- alpha,
- B->data(),
- ldb,
- A->data(),
- lda,
- beta,
- C->data(),
- ldc);
+ cblas_sgemm(CblasColMajor,
+ transB ? CblasTrans : CblasNoTrans,
+ transA ? CblasTrans : CblasNoTrans,
+ n,
+ m,
+ k,
+ alpha,
+ B->data(),
+ ldb,
+ A->data(),
+ lda,
+ beta,
+ C->data(),
+ ldc);
#else
ABORT("Not implemented!");
#endif
@@ -73,7 +73,7 @@ void ProdBatched(marian::Tensor C,
float scalar) {
#if BLAS_FOUND
float alpha = scalar;
-
+
size_t batchA = A->shape().elements() / (A->shape()[-1] * A->shape()[-2]);
size_t batchB = B->shape().elements() / (B->shape()[-1] * B->shape()[-2]);
@@ -95,33 +95,34 @@ void ProdBatched(marian::Tensor C,
ldc = B->shape()[-2];
auto opA = transA ? CblasTrans : CblasNoTrans;
- auto opB = transB ? CblasTrans : CblasNoTrans;
-
+ auto opB = transB ? CblasTrans : CblasNoTrans;
+
auto strideB = batchB == 1 ? 0 : n * k;
auto strideA = batchA == 1 ? 0 : m * k;
auto strideC = n * m;
-
+
int steps = std::max(batchA, batchB);
-
+
int offsetA = 0;
int offsetB = 0;
int offsetC = 0;
-
+
for(int i = 0; i < steps; ++i) {
- cblas_sgemm(
- CblasColMajor,
- opB,
- opA,
- n, m, k,
- alpha,
- B->data() + offsetB,
- ldb,
- A->data() + offsetA,
- lda,
- beta,
- C->data() + offsetC,
- ldc);
-
+ cblas_sgemm(CblasColMajor,
+ opB,
+ opA,
+ n,
+ m,
+ k,
+ alpha,
+ B->data() + offsetB,
+ ldb,
+ A->data() + offsetA,
+ lda,
+ beta,
+ C->data() + offsetC,
+ ldc);
+
offsetA += strideA;
offsetB += strideB;
offsetC += strideC;
@@ -130,6 +131,5 @@ void ProdBatched(marian::Tensor C,
ABORT("Not implemented!");
#endif
}
-
}
}
diff --git a/src/tensors/cpu/tensor_operators.cpp b/src/tensors/cpu/tensor_operators.cpp
index 087ad022..14097085 100644
--- a/src/tensors/cpu/tensor_operators.cpp
+++ b/src/tensors/cpu/tensor_operators.cpp
@@ -44,13 +44,13 @@ void ConcatCont(Tensor out, const std::vector<Tensor>& inputs, int axis) {
}
inline void gInsertCols(float* out,
- const float* in,
- size_t rows,
- size_t cols,
- size_t cols_out,
- size_t cols_in,
- size_t offset_out,
- size_t offset_in) {
+ const float* in,
+ size_t rows,
+ size_t cols,
+ size_t cols_out,
+ size_t cols_in,
+ size_t offset_out,
+ size_t offset_in) {
for(int j = 0; j < rows; ++j) {
float* rowOut = out + j * cols_out + offset_out;
const float* rowIn = in + j * cols_in + offset_in;
@@ -68,9 +68,10 @@ void Concatenate1(Tensor out, const std::vector<Tensor>& inputs) {
for(auto in : inputs) {
ABORT_IF(rows != in->shape().elements() / in->shape().back(),
- "First dimension must be equal");
+ "First dimension must be equal");
int cols_in = in->shape().back();
- cpu::gInsertCols(out->data(), in->data(), rows, cols_in, cols_out, cols_in, offset, 0);
+ cpu::gInsertCols(
+ out->data(), in->data(), rows, cols_in, cols_out, cols_in, offset, 0);
offset += cols_in;
}
}
@@ -88,11 +89,10 @@ void Split1(std::vector<Tensor>& outputs, const Tensor in) {
int cols_in = in->shape().back();
for(auto out : outputs) {
ABORT_IF(rows != out->shape().elements() / out->shape().back(),
- "First dimension must be equal");
+ "First dimension must be equal");
int cols_out = out->shape().back();
- cpu::gInsertCols(out->data(), in->data(),
- rows, cols_out, cols_out, cols_in,
- 0, offset);
+ cpu::gInsertCols(
+ out->data(), in->data(), rows, cols_out, cols_out, cols_in, 0, offset);
offset += cols_out;
}
}
@@ -158,24 +158,24 @@ void Softmax(Tensor out_, Tensor in_, Tensor mask_) {
int rows = out_->shape().elements() / out_->shape().back();
int cols = out_->shape().back();
- for (int j = 0; j < rows; ++j) {
- float* so = out + j*cols;
- const float* sp = in + j*cols;
- const float* mp = mask ? mask + j*cols : nullptr;
+ for(int j = 0; j < rows; ++j) {
+ float* so = out + j * cols;
+ const float* sp = in + j * cols;
+ const float* mp = mask ? mask + j * cols : nullptr;
float max = sp[0];
- for (int i = 1; i < cols; ++i) {
+ for(int i = 1; i < cols; ++i) {
max = std::max(max, sp[i]);
}
float sum = 0.f;
- for (int i = 0; i < cols; ++i) {
+ for(int i = 0; i < cols; ++i) {
float ex = !mask || mp[i] ? std::exp(sp[i] - max) : 0.f;
so[i] = ex;
sum += ex;
}
- for (int i = 0; i < cols; ++i) {
+ for(int i = 0; i < cols; ++i) {
so[i] /= sum;
}
}
@@ -188,24 +188,24 @@ void LogSoftmax(Tensor out_, Tensor in_) {
int rows = out_->shape().elements() / out_->shape().back();
int cols = out_->shape().back();
- for (int j = 0; j < rows; ++j) {
+ for(int j = 0; j < rows; ++j) {
float* so = out + j * cols;
- const float* sp = in + j*cols;
+ const float* sp = in + j * cols;
float max = sp[0];
- for (int i = 1; i < cols; ++i) {
+ for(int i = 1; i < cols; ++i) {
max = std::max(max, sp[i]);
}
float sum = 0.f;
- for (int i = 0; i < cols; ++i) {
+ for(int i = 0; i < cols; ++i) {
float sm = sp[i] - max;
float ex = std::exp(sm);
so[i] = sm;
sum += ex;
}
- for (int i = 0; i < cols; ++i) {
+ for(int i = 0; i < cols; ++i) {
so[i] -= std::log(sum);
}
}
@@ -219,17 +219,17 @@ void SoftmaxGrad(Tensor grad_, Tensor adj_, Tensor val_) {
const float* adj = adj_->data();
const float* val = val_->data();
- for (size_t j = 0; j < rows; ++j) {
- float* gradRow = grad + j*cols;
- const float* adjRow = adj + j*cols;
- const float* valRow = val + j*cols;
+ for(size_t j = 0; j < rows; ++j) {
+ float* gradRow = grad + j * cols;
+ const float* adjRow = adj + j * cols;
+ const float* valRow = val + j * cols;
float sum = 0.f;
- for (size_t i = 0; i < cols; ++i) {
+ for(size_t i = 0; i < cols; ++i) {
sum += valRow[i] * adjRow[i];
}
- for (size_t i = 0; i < cols; ++i) {
+ for(size_t i = 0; i < cols; ++i) {
gradRow[i] += valRow[i] * (adjRow[i] - sum);
}
}
@@ -243,62 +243,68 @@ void LogSoftmaxGrad(Tensor grad_, Tensor adj_, Tensor val_) {
const float* adj = adj_->data();
const float* val = val_->data();
- for (int j = 0; j < rows; ++j) {
- float* gradRow = grad + j*cols;
- const float* adjRow = adj + j*cols;
- const float* valRow = val + j*cols;
+ for(int j = 0; j < rows; ++j) {
+ float* gradRow = grad + j * cols;
+ const float* adjRow = adj + j * cols;
+ const float* valRow = val + j * cols;
float sum = 0.f;
- for (int i = 0; i < cols; ++i) {
+ for(int i = 0; i < cols; ++i) {
sum += adjRow[i];
}
- for (int i = 0; i < cols; ++i) {
- gradRow[i] += adjRow[i] - sum*std::exp(valRow[i]);
+ for(int i = 0; i < cols; ++i) {
+ gradRow[i] += adjRow[i] - sum * std::exp(valRow[i]);
}
}
}
-void CopyRows(Tensor out_, const Tensor in_, const std::vector<size_t>& indices) {
+void CopyRows(Tensor out_,
+ const Tensor in_,
+ const std::vector<size_t>& indices) {
size_t cols = in_->shape()[1];
size_t rows = indices.size();
float* out = out_->data();
const float* in = in_->data();
- #pragma omp parallel for
- for (int j = 0; j < rows; ++j) {
+#pragma omp parallel for
+ for(int j = 0; j < rows; ++j) {
size_t dst = j;
size_t src = indices[j];
- float* rowOut = out + dst*cols;
- const float* rowIn = in + src*cols;
+ float* rowOut = out + dst * cols;
+ const float* rowIn = in + src * cols;
std::copy(rowIn, rowIn + cols, rowOut);
}
}
-void PasteRows(Tensor out_, const Tensor in_, const std::vector<size_t>& indices) {
+void PasteRows(Tensor out_,
+ const Tensor in_,
+ const std::vector<size_t>& indices) {
size_t cols = in_->shape()[-1];
size_t rows = indices.size();
float* out = out_->data();
const float* in = in_->data();
- for (int j = 0; j < rows; ++j) {
- size_t dst = indices[j]; // not a permutation - may alias, unlike PasteCols
+ for(int j = 0; j < rows; ++j) {
+ size_t dst = indices[j]; // not a permutation - may alias, unlike PasteCols
size_t src = j;
- float* rowOut = out + dst*cols;
- const float* rowIn = in + src*cols;
+ float* rowOut = out + dst * cols;
+ const float* rowIn = in + src * cols;
- for (int i = 0; i < cols; ++i) {
+ for(int i = 0; i < cols; ++i) {
rowOut[i] += rowIn[i];
}
}
}
-void CopyCols(Tensor out_, const Tensor in_, const std::vector<size_t>& indices) {
+void CopyCols(Tensor out_,
+ const Tensor in_,
+ const std::vector<size_t>& indices) {
size_t rows = in_->shape().elements() / in_->shape()[-1];
size_t colsIn = in_->shape()[-1];
size_t colsOut = indices.size();
@@ -306,18 +312,20 @@ void CopyCols(Tensor out_, const Tensor in_, const std::vector<size_t>& indices)
float* out = out_->data();
const float* in = in_->data();
- #pragma omp parallel for
- for (int j = 0; j < rows; ++j) {
- const float* rowIn = in + j*colsIn;
- float* rowOut = out + j*colsOut;
+#pragma omp parallel for
+ for(int j = 0; j < rows; ++j) {
+ const float* rowIn = in + j * colsIn;
+ float* rowOut = out + j * colsOut;
- for (int i = 0; i < colsOut; ++i) {
+ for(int i = 0; i < colsOut; ++i) {
rowOut[i] = rowIn[indices[i]];
}
}
}
-void PasteCols(Tensor out_, const Tensor in_, const std::vector<size_t>& indices) {
+void PasteCols(Tensor out_,
+ const Tensor in_,
+ const std::vector<size_t>& indices) {
size_t rows = out_->shape().elements() / out_->shape()[-1];
size_t colsOut = out_->shape()[-1];
size_t colsIn = indices.size();
@@ -328,12 +336,12 @@ void PasteCols(Tensor out_, const Tensor in_, const std::vector<size_t>& indices
/* n.b. Unlike PasteRows, currently appears safe to assume indices[i] is a
* permutation i.e. no racy aliases, and no need to sum vs. just assign.
*/
- for (int j = 0; j < rows; ++j) {
- const float* rowIn = in + j*colsIn;
- float* rowOut = out + j*colsOut;
+ for(int j = 0; j < rows; ++j) {
+ const float* rowIn = in + j * colsIn;
+ float* rowOut = out + j * colsOut;
// @TODO: should this be a sum?
- for (int i = 0; i < colsIn; ++i) {
+ for(int i = 0; i < colsIn; ++i) {
rowOut[indices[i]] = rowIn[i];
}
}
@@ -367,8 +375,8 @@ void GRUFastForward(Tensor out_, std::vector<Tensor> inputs, bool final) {
const float* b = inputs[3]->data();
const float* mask = inputs.size() > 4 ? inputs[4]->data() : nullptr;
- #pragma omp parallel for
- for (int j = 0; j < rows; ++j) {
+#pragma omp parallel for
+ for(int j = 0; j < rows; ++j) {
float m = !mask || mask[j];
float* rowOut = out + j * cols;
const float* rowState = state + j * cols;
@@ -376,8 +384,8 @@ void GRUFastForward(Tensor out_, std::vector<Tensor> inputs, bool final) {
const float* xWrow = xW + j * cols * 3;
const float* sUrow = sU + j * cols * 3;
- #pragma omp simd
- for (int i = 0; i < cols; ++i) {
+#pragma omp simd
+ for(int i = 0; i < cols; ++i) {
// @TODO: stable logit
float r = stableLogit(xWrow[i] + sUrow[i] + b[i]);
@@ -417,8 +425,8 @@ void GRUFastBackward(std::vector<Tensor> outputs,
const float* mask = inputs.size() > 4 ? inputs[4]->data() : 0;
const float* adj = adj_->data();
- #pragma omp parallel
- for (int j = 0; j < rows; ++j) {
+#pragma omp parallel
+ for(int j = 0; j < rows; ++j) {
float m = !mask || mask[j];
float* rowOutState = outState + j * cols;
@@ -430,8 +438,8 @@ void GRUFastBackward(std::vector<Tensor> outputs,
const float* rowSU = sU + j * cols * 3;
const float* rowAdj = adj + j * cols;
- #pragma omp for simd nowait
- for (int i = 0; i < cols; ++i) {
+#pragma omp for simd nowait
+ for(int i = 0; i < cols; ++i) {
int k = i + cols;
int l = i + 2 * cols;
@@ -446,10 +454,11 @@ void GRUFastBackward(std::vector<Tensor> outputs,
float adj = rowAdj[i];
- float t = (1-z)*(1-h*h);
+ float t = (1 - z) * (1 - h * h);
// df/ds
- if(outState) rowOutState[i] += (m * z - m + 1) * adj;
+ if(outState)
+ rowOutState[i] += (m * z - m + 1) * adj;
// df/d(xW_r) ...
float dfdxW_r = m * r * (1 - r) * t * adj;
@@ -457,20 +466,28 @@ void GRUFastBackward(std::vector<Tensor> outputs,
dfdxW_r *= rowSU[l] + b[l];
else
dfdxW_r *= rowSU[l];
- if(outXW) rowOutXW[i] += dfdxW_r;
- if(outSU) rowOutSU[i] += dfdxW_r;
- if(outB) outB[i] += dfdxW_r;
+ if(outXW)
+ rowOutXW[i] += dfdxW_r;
+ if(outSU)
+ rowOutSU[i] += dfdxW_r;
+ if(outB)
+ outB[i] += dfdxW_r;
// df/d(xW_z) ...
float dfdxW_z = m * (1 - z) * z * (rowState[i] - h) * adj;
- if(outXW) rowOutXW[k] += dfdxW_z;
- if(outSU) rowOutSU[k] += dfdxW_z;
- if(outB) outB[k] += dfdxW_z;
+ if(outXW)
+ rowOutXW[k] += dfdxW_z;
+ if(outSU)
+ rowOutSU[k] += dfdxW_z;
+ if(outB)
+ outB[k] += dfdxW_z;
// df/d(xW_x) ...
float dfdxW_x = m * t * adj;
- if(outXW) rowOutXW[l] += dfdxW_x;
- if(outSU) rowOutSU[l] += dfdxW_x * r;
+ if(outXW)
+ rowOutXW[l] += dfdxW_x;
+ if(outSU)
+ rowOutSU[l] += dfdxW_x * r;
if(outB)
if(final)
outB[l] += dfdxW_x * r;
@@ -490,18 +507,18 @@ void CrossEntropyPick(Tensor out_, Tensor in_, Tensor pick_) {
int rows = inShape.elements() / inShape.back();
int cols = inShape.back();
- #pragma omp parallel for
- for (int j = 0; j < rows; ++j) {
- const float* sp = in + j*cols;
+#pragma omp parallel for
+ for(int j = 0; j < rows; ++j) {
+ const float* sp = in + j * cols;
float max = sp[0];
- #pragma omp simd reduction(max:max)
- for (int i = 1; i < cols; ++i) {
+#pragma omp simd reduction(max : max)
+ for(int i = 1; i < cols; ++i) {
max = std::max(max, sp[i]);
}
float sum = 0.f;
- #pragma omp simd reduction(+:sum)
- for (int i = 0; i < cols; ++i) {
+#pragma omp simd reduction(+ : sum)
+ for(int i = 0; i < cols; ++i) {
sum += std::exp(sp[i] - max);
}
@@ -512,7 +529,10 @@ void CrossEntropyPick(Tensor out_, Tensor in_, Tensor pick_) {
}
}
-void CrossEntropyPickBackward(Tensor out_, Tensor adj_, Tensor a, Tensor pick_) {
+void CrossEntropyPickBackward(Tensor out_,
+ Tensor adj_,
+ Tensor a,
+ Tensor pick_) {
float* out = out_->data();
Shape& outShape = out_->shape();
const float* adj = adj_->data();
@@ -522,23 +542,23 @@ void CrossEntropyPickBackward(Tensor out_, Tensor adj_, Tensor a, Tensor pick_)
int rows = outShape.elements() / outShape.back();
int cols = outShape.back();
- #pragma omp parallel for
- for (int j = 0; j < rows; ++j) {
- const float* sp = in + j*cols;
- float* so = out + j*cols;
+#pragma omp parallel for
+ for(int j = 0; j < rows; ++j) {
+ const float* sp = in + j * cols;
+ float* so = out + j * cols;
float max = sp[0];
- for (int i = 1; i < cols; ++i) {
+ for(int i = 1; i < cols; ++i) {
max = std::max(max, sp[i]);
}
float sum = 0.f;
- for (int i = 0; i < cols; ++i) {
+ for(int i = 0; i < cols; ++i) {
sum += std::exp(sp[i] - max);
}
// cross-entropy
- for (int i = 0; i < cols; ++i) {
+ for(int i = 0; i < cols; ++i) {
float sub = (float)(i == (int)pick[j]);
so[i] += adj[j] * (std::exp(sp[i] - max) / sum - sub);
}
@@ -549,8 +569,8 @@ float L2Norm(Tensor in) {
float sum = 0.f;
size_t size = in->size();
const float* data = in->data();
- #pragma omp parallel for simd reduction(+:sum)
- for (size_t i = 0; i < size; ++i) {
+#pragma omp parallel for simd reduction(+ : sum)
+ for(size_t i = 0; i < size; ++i) {
sum += data[i] * data[i];
}
return std::sqrt(sum);
@@ -570,15 +590,15 @@ void Att(Tensor out_, Tensor va_, Tensor context_, Tensor state_) {
int rows = m;
int cols = k;
- #pragma omp parallel for
- for (size_t j = 0; j < rows; ++j) {
+#pragma omp parallel for
+ for(size_t j = 0; j < rows; ++j) {
const float* vaRow = va;
const float* ctxRow = ctx + (j % (b * t)) * cols;
const float* stateRow = state + ((j / (b * t)) * b + j % b) * cols;
float sum = 0.f;
- #pragma omp simd reduction(+:sum)
- for (size_t i = 0; i < cols; ++i) {
+#pragma omp simd reduction(+ : sum)
+ for(size_t i = 0; i < cols; ++i) {
float z = ctxRow[i] + stateRow[i];
sum += std::tanh(z) * vaRow[i];
}
@@ -587,8 +607,12 @@ void Att(Tensor out_, Tensor va_, Tensor context_, Tensor state_) {
}
}
-void AttBack(Tensor gVa_, Tensor gContext_, Tensor gState_,
- Tensor va_, Tensor context_, Tensor state_,
+void AttBack(Tensor gVa_,
+ Tensor gContext_,
+ Tensor gState_,
+ Tensor va_,
+ Tensor context_,
+ Tensor state_,
Tensor adj_) {
float* gVa = gVa_->data();
float* gContext = gContext_->data();
@@ -603,8 +627,8 @@ void AttBack(Tensor gVa_, Tensor gContext_, Tensor gState_,
size_t k = context_->shape()[-1];
size_t n = context_->shape()[-2];
- #pragma omp parallel for reduction(+:gState[:n*k], gVa[:k])
- for (size_t j = 0; j < m; ++j) {
+#pragma omp parallel for reduction(+ : gState[ : n* k], gVa[ : k])
+ for(size_t j = 0; j < m; ++j) {
float* gcRow = gContext + j * k;
float* gsRow = gState + (j % n) * k;
@@ -613,8 +637,8 @@ void AttBack(Tensor gVa_, Tensor gContext_, Tensor gState_,
float adj_j = adj[j];
- #pragma omp simd
- for (size_t i = 0; i < k; ++i) {
+#pragma omp simd
+ for(size_t i = 0; i < k; ++i) {
float z = cRow[i] + sRow[i];
float t = std::tanh(z);
@@ -642,31 +666,31 @@ void LayerNormalization(Tensor out_,
int rows = in_->shape().elements() / in_->shape().back();
int cols = in_->shape().back();
- #pragma omp parallel for
- for (int j = 0; j < rows; ++j) {
- float* so = out + j*cols;
- const float* sp = in + j*cols;
+#pragma omp parallel for
+ for(int j = 0; j < rows; ++j) {
+ float* so = out + j * cols;
+ const float* sp = in + j * cols;
float sum = 0.f;
- #pragma omp simd reduction(+:sum)
- for (int i = 0; i < cols; ++i) {
+#pragma omp simd reduction(+ : sum)
+ for(int i = 0; i < cols; ++i) {
sum += sp[i];
}
float mean = sum / cols;
float sqSum = 0.f;
- #pragma omp simd reduction(+:sqSum)
- for (int i = 0; i < cols; ++i) {
+#pragma omp simd reduction(+ : sqSum)
+ for(int i = 0; i < cols; ++i) {
float ex = sp[i] - mean;
- sqSum += ex*ex;
+ sqSum += ex * ex;
}
float sigma = std::sqrt(eps + sqSum / cols);
- #pragma omp simd
- for (int i = 0; i < cols; ++i) {
+#pragma omp simd
+ for(int i = 0; i < cols; ++i) {
float t = alpha[i] * ((sp[i] - mean) / sigma);
- if (beta != nullptr) {
+ if(beta != nullptr) {
t += beta[i];
}
@@ -696,36 +720,36 @@ void LayerNormalizationGrad(Tensor gradX_,
size_t rows = y_->shape().elements() / y_->shape()[-1];
size_t cols = y_->shape()[-1];
- if (beta) {
- #pragma omp parallel for reduction(+:gradGamma[:cols], gradBeta[:cols])
- for (size_t j = 0; j < rows; ++j) {
- const float* xRow = x + j*cols;
- const float* yRow = y + j*cols;
- const float* adjRow = adj + j*cols;
- float* gradXRow = gradX + j*cols;
+ if(beta) {
+#pragma omp parallel for reduction(+ : gradGamma[ : cols], gradBeta[ : cols])
+ for(size_t j = 0; j < rows; ++j) {
+ const float* xRow = x + j * cols;
+ const float* yRow = y + j * cols;
+ const float* adjRow = adj + j * cols;
+ float* gradXRow = gradX + j * cols;
float sum_x = 0.f;
float sum_adj = 0.f;
float sum_adj_x = 0.f;
float sum_sqr = 0.f;
- #pragma omp simd reduction(+:sum_x, sum_adj_x, sum_adj)
- for (size_t i = 0; i < cols; ++i) {
+#pragma omp simd reduction(+ : sum_x, sum_adj_x, sum_adj)
+ for(size_t i = 0; i < cols; ++i) {
sum_x += xRow[i];
sum_adj_x += adjRow[i] * (yRow[i] - (beta ? beta[i] : 0.f)) / gamma[i];
sum_adj += adjRow[i];
}
float mean = sum_x / cols;
- #pragma omp simd reduction(+:sum_sqr)
- for (size_t i = 0; i < cols; ++i) {
+#pragma omp simd reduction(+ : sum_sqr)
+ for(size_t i = 0; i < cols; ++i) {
float ex = xRow[i] - mean;
- sum_sqr += ex*ex;
+ sum_sqr += ex * ex;
}
float sigma = std::sqrt(eps + sum_sqr / cols);
- #pragma omp simd
- for (size_t i = 0; i < cols; ++i) {
+#pragma omp simd
+ for(size_t i = 0; i < cols; ++i) {
float grad_x = 0.f;
float x_hat = (yRow[i] - beta[i]) / gamma[i];
grad_x += cols * adjRow[i];
@@ -739,35 +763,35 @@ void LayerNormalizationGrad(Tensor gradX_,
}
}
} else {
- #pragma omp parallel for reduction(+:gradGamma[:cols])
- for (size_t j = 0; j < rows; ++j) {
- const float* xRow = x + j*cols;
- const float* yRow = y + j*cols;
- const float* adjRow = adj + j*cols;
- float* gradXRow = gradX + j*cols;
+#pragma omp parallel for reduction(+ : gradGamma[ : cols])
+ for(size_t j = 0; j < rows; ++j) {
+ const float* xRow = x + j * cols;
+ const float* yRow = y + j * cols;
+ const float* adjRow = adj + j * cols;
+ float* gradXRow = gradX + j * cols;
float sum_x = 0.f;
float sum_adj = 0.f;
float sum_adj_x = 0.f;
float sum_sqr = 0.f;
- #pragma omp simd reduction(+:sum_x, sum_adj_x, sum_adj)
- for (size_t i = 0; i < cols; ++i) {
+#pragma omp simd reduction(+ : sum_x, sum_adj_x, sum_adj)
+ for(size_t i = 0; i < cols; ++i) {
sum_x += xRow[i];
sum_adj_x += adjRow[i] * (yRow[i] - (beta ? beta[i] : 0.f)) / gamma[i];
sum_adj += adjRow[i];
}
float mean = sum_x / cols;
- #pragma omp simd reduction(+:sum_sqr)
- for (size_t i = 0; i < cols; ++i) {
+#pragma omp simd reduction(+ : sum_sqr)
+ for(size_t i = 0; i < cols; ++i) {
float ex = xRow[i] - mean;
- sum_sqr += ex*ex;
+ sum_sqr += ex * ex;
}
float sigma = std::sqrt(eps + sum_sqr / cols);
- #pragma omp simd
- for (size_t i = 0; i < cols; ++i) {
+#pragma omp simd
+ for(size_t i = 0; i < cols; ++i) {
float grad_x = 0.f;
float x_hat = yRow[i] / gamma[i];
grad_x += cols * adjRow[i];
@@ -794,9 +818,9 @@ void Shift(Tensor out_, Tensor in_, marian::Shape shift, bool invert) {
const float* in = in_->data();
int length = out_->shape().elements();
- #pragma omp parallel for
- for (int i = 0; i < length; ++i) {
- if (i - offset < 0 || i - offset >= length) {
+#pragma omp parallel for
+ for(int i = 0; i < length; ++i) {
+ if(i - offset < 0 || i - offset >= length) {
out[i] = 0.f;
} else {
out[i] = in[i - offset];
@@ -808,7 +832,7 @@ void SetSparse(float* out,
const std::vector<size_t>& indices,
const std::vector<float>& values) {
int length = indices.size();
- for (int index = 0; index < length; ++index) {
+ for(int index = 0; index < length; ++index) {
out[indices[index]] = values[index];
}
}
@@ -824,26 +848,26 @@ void LSTMCellForward(Tensor out_, std::vector<Tensor> inputs) {
const float* b = inputs[3]->data();
const float* mask = inputs.size() > 4 ? inputs[4]->data() : nullptr;
- for (int j = 0; j < rows; ++j) {
+ for(int j = 0; j < rows; ++j) {
float m = !mask || mask[j];
- float* rowOut = out + j*cols;
- const float* rowCell = cell + j*cols;
+ float* rowOut = out + j * cols;
+ const float* rowCell = cell + j * cols;
- const float* xWrow = xW + j*cols*4;
- const float* sUrow = sU + j*cols*4;
+ const float* xWrow = xW + j * cols * 4;
+ const float* sUrow = sU + j * cols * 4;
- for (int i = 0; i < cols; ++i) {
+ for(int i = 0; i < cols; ++i) {
float gf = stableLogit(xWrow[i] + sUrow[i] + b[i]);
int k = i + cols;
float gi = stableLogit(xWrow[k] + sUrow[k] + b[k]);
- int l = i + 2*cols;
+ int l = i + 2 * cols;
float gc = std::tanh(xWrow[l] + sUrow[l] + b[l]);
- float cout = gf*rowCell[i] + gi*gc;
- rowOut[i] = m*cout + (1-m)*rowCell[i];
+ float cout = gf * rowCell[i] + gi * gc;
+ rowOut[i] = m * cout + (1 - m) * rowCell[i];
}
}
}
@@ -858,15 +882,15 @@ void LSTMOutputForward(Tensor out_, std::vector<Tensor> inputs) {
const float* sU = inputs[2]->data();
const float* b = inputs[3]->data();
- for (int j = 0; j <rows; ++j) {
- float* rowOut = out + j*cols;
- const float* rowCell = cell + j*cols;
+ for(int j = 0; j < rows; ++j) {
+ float* rowOut = out + j * cols;
+ const float* rowCell = cell + j * cols;
- const float* xWrow = xW + j*cols*4;
- const float* sUrow = sU + j*cols*4;
+ const float* xWrow = xW + j * cols * 4;
+ const float* sUrow = sU + j * cols * 4;
- for (int i = 0; i < cols; ++i) {
- int k = i + 3*cols;
+ for(int i = 0; i < cols; ++i) {
+ int k = i + 3 * cols;
float go = stableLogit(xWrow[k] + sUrow[k] + b[k]);
rowOut[i] = go * std::tanh(rowCell[i]);
@@ -893,52 +917,70 @@ void LSTMCellBackward(std::vector<Tensor> outputs,
const float* mask = inputs.size() > 4 ? inputs[4]->data() : nullptr;
const float* adj = adj_->data();
- for (int j = 0; j <rows; ++j) {
+ for(int j = 0; j < rows; ++j) {
float m = !mask || mask[j];
- float* rowOutCell = outCell + j*cols;
- float* rowOutXW = outXW + j*cols*4;
- float* rowOutSU = outSU + j*cols*4;
+ float* rowOutCell = outCell + j * cols;
+ float* rowOutXW = outXW + j * cols * 4;
+ float* rowOutSU = outSU + j * cols * 4;
- const float* rowCell = cell + j*cols;
- const float* xWrow = xW + j*cols*4;
- const float* sUrow = sU + j*cols*4;
+ const float* rowCell = cell + j * cols;
+ const float* xWrow = xW + j * cols * 4;
+ const float* sUrow = sU + j * cols * 4;
- const float* rowAdj = adj + j*cols;
+ const float* rowAdj = adj + j * cols;
- for (int i = 0; i < cols; ++i) {
+ for(int i = 0; i < cols; ++i) {
float gf = stableLogit(xWrow[i] + sUrow[i] + b[i]);
int k = i + cols;
float gi = stableLogit(xWrow[k] + sUrow[k] + b[k]);
- int l = i + 2*cols;
+ int l = i + 2 * cols;
float gc = std::tanh(xWrow[l] + sUrow[l] + b[l]);
float adj = rowAdj[i];
// dc/dx_{t-1}
- if (outCell) {
- rowOutCell[i] += (m*gf - m + 1)*adj;
+ if(outCell) {
+ rowOutCell[i] += (m * gf - m + 1) * adj;
}
// dc/d(b_f) = dc/d(xW_f) ...
- float dcdxf = m*rowCell[i] * gf*(1-gf) * adj;
- if (outXW) { rowOutXW[i] += dcdxf; }
- if (outSU) { rowOutSU[i] += dcdxf; }
- if (outB) { outB[i] += dcdxf; }
+ float dcdxf = m * rowCell[i] * gf * (1 - gf) * adj;
+ if(outXW) {
+ rowOutXW[i] += dcdxf;
+ }
+ if(outSU) {
+ rowOutSU[i] += dcdxf;
+ }
+ if(outB) {
+ outB[i] += dcdxf;
+ }
// dc/d(b_i) ...
- float dcdb_i = m * gc * gi*(1-gi) * adj;
- if (outXW) { rowOutXW[k] += dcdb_i; }
- if (outSU) { rowOutSU[k] += dcdb_i; }
- if (outB) { outB[k] += dcdb_i; }
+ float dcdb_i = m * gc * gi * (1 - gi) * adj;
+ if(outXW) {
+ rowOutXW[k] += dcdb_i;
+ }
+ if(outSU) {
+ rowOutSU[k] += dcdb_i;
+ }
+ if(outB) {
+ outB[k] += dcdb_i;
+ }
// dc/d(b_c) ...
- float dcdxc = m * gi * (1 - gc*gc) * adj;
- if (outXW) { rowOutXW[l] += dcdxc; }
- if (outSU) { rowOutSU[l] += dcdxc; }
- if (outB) { outB[l] += dcdxc; }
+ float dcdxc = m * gi * (1 - gc * gc) * adj;
+ if(outXW) {
+ rowOutXW[l] += dcdxc;
+ }
+ if(outSU) {
+ rowOutSU[l] += dcdxc;
+ }
+ if(outB) {
+ outB[l] += dcdxc;
+ }
}
}
}
@@ -961,19 +1003,19 @@ void LSTMOutputBackward(std::vector<Tensor> outputs,
const float* adj = adj_->data();
- for (int j = 0; j < rows; ++j) {
- float* rowOutCell = outCell + j*cols;
- float* rowOutXW = outXW + j*cols*4;
- float* rowOutSU = outSU + j*cols*4;
+ for(int j = 0; j < rows; ++j) {
+ float* rowOutCell = outCell + j * cols;
+ float* rowOutXW = outXW + j * cols * 4;
+ float* rowOutSU = outSU + j * cols * 4;
- const float* rowCell = cell + j*cols;
- const float* xWrow = xW + j*cols*4;
- const float* sUrow = sU + j*cols*4;
+ const float* rowCell = cell + j * cols;
+ const float* xWrow = xW + j * cols * 4;
+ const float* sUrow = sU + j * cols * 4;
- const float* rowAdj = adj + j*cols;
+ const float* rowAdj = adj + j * cols;
- for (int i = 0; i < cols; ++i) {
- int k = i + 3*cols;
+ for(int i = 0; i < cols; ++i) {
+ int k = i + 3 * cols;
float go = stableLogit(xWrow[k] + sUrow[k] + b[k]);
float t = std::tanh(rowCell[i]);
@@ -981,15 +1023,21 @@ void LSTMOutputBackward(std::vector<Tensor> outputs,
float adj = rowAdj[i];
// dc/dc_{t-1}
- if (outCell) {
- rowOutCell[i] += go * (1 - t*t) * adj;
+ if(outCell) {
+ rowOutCell[i] += go * (1 - t * t) * adj;
}
// dc/d(b_o) = dc/d(xW_f) ...
- float dcdxo = t * go*(1-go) * adj;
- if (outXW) { rowOutXW[k] += dcdxo; }
- if (outSU) { rowOutSU[k] += dcdxo; }
- if (outB) { outB[k] += dcdxo; }
+ float dcdxo = t * go * (1 - go) * adj;
+ if(outXW) {
+ rowOutXW[k] += dcdxo;
+ }
+ if(outSU) {
+ rowOutSU[k] += dcdxo;
+ }
+ if(outB) {
+ outB[k] += dcdxo;
+ }
}
}
}
@@ -1027,6 +1075,5 @@ void PoolingWithMaskingBackward(Tensor adj,
bool isEven) {
ABORT("Not implemented!");
}
-
}
} // namespace marian
diff --git a/src/tensors/device.h b/src/tensors/device.h
index 74d68b3d..f8707634 100644
--- a/src/tensors/device.h
+++ b/src/tensors/device.h
@@ -23,7 +23,7 @@ public:
Device(DeviceId deviceId, size_t alignment = 256)
: deviceId_(deviceId), data_(0), size_(0), alignment_(alignment) {}
- virtual ~Device() {};
+ virtual ~Device(){};
virtual void reserve(size_t size) = 0;
@@ -35,30 +35,31 @@ public:
};
namespace gpu {
- class Device : public marian::Device {
- public:
- Device(DeviceId deviceId, size_t alignment = 256)
+class Device : public marian::Device {
+public:
+ Device(DeviceId deviceId, size_t alignment = 256)
: marian::Device(deviceId, alignment) {}
- ~Device();
+ ~Device();
- void reserve(size_t size);
- };
+ void reserve(size_t size);
+};
}
namespace cpu {
- class Device : public marian::Device {
- public:
- Device(DeviceId deviceId, size_t alignment = 256)
+class Device : public marian::Device {
+public:
+ Device(DeviceId deviceId, size_t alignment = 256)
: marian::Device(deviceId, alignment) {}
- ~Device();
+ ~Device();
- void reserve(size_t size);
- };
+ void reserve(size_t size);
+};
}
-static inline Ptr<Device> DispatchDevice(DeviceId deviceId, size_t alignment = 256) {
+static inline Ptr<Device> DispatchDevice(DeviceId deviceId,
+ size_t alignment = 256) {
#ifdef CUDA_FOUND
if(deviceId.type == DeviceType::gpu)
return New<gpu::Device>(deviceId, alignment);
@@ -71,5 +72,4 @@ static inline Ptr<Device> DispatchDevice(DeviceId deviceId, size_t alignment = 2
return New<cpu::Device>(deviceId, alignment);
#endif
}
-
}
diff --git a/src/tensors/dispatch.h b/src/tensors/dispatch.h
index 750cfff5..7c120e6f 100644
--- a/src/tensors/dispatch.h
+++ b/src/tensors/dispatch.h
@@ -2,204 +2,250 @@
#ifdef CUDA_FOUND
-#define DISPATCH1(Function, Arg1) \
- namespace gpu { \
- void Function(Arg1); \
- } \
- namespace cpu { \
- void Function(Arg1); \
- } \
- void Function(Arg1 arg1) { \
+#define DISPATCH1(Function, Arg1) \
+ namespace gpu { \
+ void Function(Arg1); \
+ } \
+ namespace cpu { \
+ void Function(Arg1); \
+ } \
+ void Function(Arg1 arg1) { \
if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
- gpu::Function(arg1); \
- else \
- cpu::Function(arg1); \
- }
-
-#define DISPATCH2(Function, Arg1, Arg2) \
- namespace gpu { \
- void Function(Arg1, Arg2); \
- } \
- namespace cpu { \
- void Function(Arg1, Arg2); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2) { \
+ gpu::Function(arg1); \
+ else \
+ cpu::Function(arg1); \
+ }
+
+#define DISPATCH2(Function, Arg1, Arg2) \
+ namespace gpu { \
+ void Function(Arg1, Arg2); \
+ } \
+ namespace cpu { \
+ void Function(Arg1, Arg2); \
+ } \
+ static inline void Function(Arg1 arg1, Arg2 arg2) { \
if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
- gpu::Function(arg1, arg2); \
- else \
- cpu::Function(arg1, arg2); \
- }
-
-#define DISPATCH3(Function, Arg1, Arg2, Arg3) \
- namespace gpu { \
- void Function(Arg1, Arg2, Arg3); \
- } \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3); \
- } \
+ gpu::Function(arg1, arg2); \
+ else \
+ cpu::Function(arg1, arg2); \
+ }
+
+#define DISPATCH3(Function, Arg1, Arg2, Arg3) \
+ namespace gpu { \
+ void Function(Arg1, Arg2, Arg3); \
+ } \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3); \
+ } \
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3) { \
- if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
- gpu::Function(arg1, arg2, arg3); \
- else \
- cpu::Function(arg1, arg2, arg3); \
- }
-
-#define DISPATCH4(Function, Arg1, Arg2, Arg3, Arg4) \
- namespace gpu { \
- void Function(Arg1, Arg2, Arg3, Arg4); \
- } \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4); \
- } \
+ if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
+ gpu::Function(arg1, arg2, arg3); \
+ else \
+ cpu::Function(arg1, arg2, arg3); \
+ }
+
+#define DISPATCH4(Function, Arg1, Arg2, Arg3, Arg4) \
+ namespace gpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4); \
+ } \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4); \
+ } \
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4) { \
+ if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
+ gpu::Function(arg1, arg2, arg3, arg4); \
+ else \
+ cpu::Function(arg1, arg2, arg3, arg4); \
+ }
+
+#define DISPATCH5(Function, Arg1, Arg2, Arg3, Arg4, Arg5) \
+ namespace gpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5); \
+ } \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5); \
+ } \
+ static inline void Function( \
+ Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5) { \
if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
- gpu::Function(arg1, arg2, arg3, arg4); \
- else \
- cpu::Function(arg1, arg2, arg3, arg4); \
- }
-
-#define DISPATCH5(Function, Arg1, Arg2, Arg3, Arg4, Arg5) \
- namespace gpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5); \
- } \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5) { \
- if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
- gpu::Function(arg1, arg2, arg3, arg4, arg5); \
- else \
- cpu::Function(arg1, arg2, arg3, arg4, arg5); \
- }
-
-#define DISPATCH6(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6) \
- namespace gpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6); \
- } \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6) { \
- if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
- gpu::Function(arg1, arg2, arg3, arg4, arg5, arg6); \
- else \
- cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6); \
+ gpu::Function(arg1, arg2, arg3, arg4, arg5); \
+ else \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5); \
+ }
+
+#define DISPATCH6(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6) \
+ namespace gpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6); \
+ } \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6); \
+ } \
+ static inline void Function( \
+ Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6) { \
+ if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
+ gpu::Function(arg1, arg2, arg3, arg4, arg5, arg6); \
+ else \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6); \
}
#define DISPATCH7(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7) \
- namespace gpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7); \
- } \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7) { \
- if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
- gpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7); \
- else \
- cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7); \
+ namespace gpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7); \
+ } \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7); \
+ } \
+ static inline void Function(Arg1 arg1, \
+ Arg2 arg2, \
+ Arg3 arg3, \
+ Arg4 arg4, \
+ Arg5 arg5, \
+ Arg6 arg6, \
+ Arg7 arg7) { \
+ if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
+ gpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7); \
+ else \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7); \
}
#define DISPATCH8(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8) \
- namespace gpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8); \
- } \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7, Arg8 arg8) { \
- if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
- gpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); \
- else \
- cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); \
- }
-
-#define DISPATCH9(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9) \
- namespace gpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9); \
- } \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7, Arg8 arg8, Arg9 arg9) { \
- if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
+ namespace gpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8); \
+ } \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8); \
+ } \
+ static inline void Function(Arg1 arg1, \
+ Arg2 arg2, \
+ Arg3 arg3, \
+ Arg4 arg4, \
+ Arg5 arg5, \
+ Arg6 arg6, \
+ Arg7 arg7, \
+ Arg8 arg8) { \
+ if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
+ gpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); \
+ else \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); \
+ }
+
+#define DISPATCH9( \
+ Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9) \
+ namespace gpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9); \
+ } \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9); \
+ } \
+ static inline void Function(Arg1 arg1, \
+ Arg2 arg2, \
+ Arg3 arg3, \
+ Arg4 arg4, \
+ Arg5 arg5, \
+ Arg6 arg6, \
+ Arg7 arg7, \
+ Arg8 arg8, \
+ Arg9 arg9) { \
+ if(arg1->getBackend()->getDevice().type == DeviceType::gpu) \
gpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); \
- else \
+ else \
cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); \
}
#else
#define DISPATCH1(Function, Arg1) \
- namespace cpu { \
- void Function(Arg1); \
- } \
- void Function(Arg1 arg1) { \
- cpu::Function(arg1); \
- }
-
-#define DISPATCH2(Function, Arg1, Arg2) \
- namespace cpu { \
- void Function(Arg1, Arg2); \
- } \
+ namespace cpu { \
+ void Function(Arg1); \
+ } \
+ void Function(Arg1 arg1) { cpu::Function(arg1); }
+
+#define DISPATCH2(Function, Arg1, Arg2) \
+ namespace cpu { \
+ void Function(Arg1, Arg2); \
+ } \
static inline void Function(Arg1 arg1, Arg2 arg2) { \
- cpu::Function(arg1, arg2); \
+ cpu::Function(arg1, arg2); \
}
-#define DISPATCH3(Function, Arg1, Arg2, Arg3) \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3); \
- } \
+#define DISPATCH3(Function, Arg1, Arg2, Arg3) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3); \
+ } \
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3) { \
- cpu::Function(arg1, arg2, arg3); \
+ cpu::Function(arg1, arg2, arg3); \
}
-#define DISPATCH4(Function, Arg1, Arg2, Arg3, Arg4) \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4); \
- } \
+#define DISPATCH4(Function, Arg1, Arg2, Arg3, Arg4) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4); \
+ } \
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4) { \
- cpu::Function(arg1, arg2, arg3, arg4); \
+ cpu::Function(arg1, arg2, arg3, arg4); \
}
-#define DISPATCH5(Function, Arg1, Arg2, Arg3, Arg4, Arg5) \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5) { \
- cpu::Function(arg1, arg2, arg3, arg4, arg5); \
+#define DISPATCH5(Function, Arg1, Arg2, Arg3, Arg4, Arg5) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5); \
+ } \
+ static inline void Function( \
+ Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5) { \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5); \
}
-#define DISPATCH6(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6) \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6) { \
- cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6); \
+#define DISPATCH6(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6); \
+ } \
+ static inline void Function( \
+ Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6) { \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6); \
}
#define DISPATCH7(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7) \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7) { \
- cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7); \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7); \
+ } \
+ static inline void Function(Arg1 arg1, \
+ Arg2 arg2, \
+ Arg3 arg3, \
+ Arg4 arg4, \
+ Arg5 arg5, \
+ Arg6 arg6, \
+ Arg7 arg7) { \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7); \
}
#define DISPATCH8(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8) \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7, Arg8 arg8) { \
- cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); \
- }
-
-#define DISPATCH9(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9) \
- namespace cpu { \
- void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9); \
- } \
- static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7, Arg8 arg8, Arg9 arg9) { \
- cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8); \
+ } \
+ static inline void Function(Arg1 arg1, \
+ Arg2 arg2, \
+ Arg3 arg3, \
+ Arg4 arg4, \
+ Arg5 arg5, \
+ Arg6 arg6, \
+ Arg7 arg7, \
+ Arg8 arg8) { \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); \
+ }
+
+#define DISPATCH9( \
+ Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9); \
+ } \
+ static inline void Function(Arg1 arg1, \
+ Arg2 arg2, \
+ Arg3 arg3, \
+ Arg4 arg4, \
+ Arg5 arg5, \
+ Arg6 arg6, \
+ Arg7 arg7, \
+ Arg8 arg8, \
+ Arg9 arg9) { \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); \
}
#endif
diff --git a/src/tensors/gpu/add.cu b/src/tensors/gpu/add.cu
index 84679313..1acb5b54 100644
--- a/src/tensors/gpu/add.cu
+++ b/src/tensors/gpu/add.cu
@@ -9,8 +9,8 @@
#include "functional/functional.h"
#include "functional/shape.h"
-#include "functional/tmp.h"
#include "functional/tensor.h"
+#include "functional/tmp.h"
namespace marian {
@@ -22,7 +22,6 @@ __global__ void gAddGeneric(Functor functor,
functional::Tensor<float> out,
functional::Array<functional::Tensor<float>, K> ins,
float scale = 1.0) {
-
int outLength = out.shape().elements();
bool same = outLength == full.elements();
for(int i = 0; i < K; ++i)
@@ -37,14 +36,12 @@ __global__ void gAddGeneric(Functor functor,
for(int bid = 0; bid < outLength; bid += blockDim.x * gridDim.x) {
int index = bid + blockDim.x * blockIdx.x + threadIdx.x;
if(index < outLength) {
-
if(same) {
out[index] += functional::apply(functor, ins, index) * scale;
} else {
out.shape().dims(index, dims);
out[index] += functional::loops(functor, ins, len, dims) * scale;
}
-
}
}
}
@@ -81,7 +78,6 @@ __global__ void gAddReduce(Functor functor,
functional::Tensor<float> out,
functional::Array<functional::Tensor<float>, K> ins,
float scale = 1.0) {
-
int rows = full.elements() / full.back();
int cols = full.back();
@@ -133,12 +129,8 @@ __global__ void gAddReduce(Functor functor,
}
}
-template <class Functor, class ...Tensors>
-void Add(Functor functor,
- float scale,
- marian::Tensor out,
- Tensors... tensors) {
-
+template <class Functor, class... Tensors>
+void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors) {
cudaSetDevice(out->getDevice().no);
auto full = marian::Shape::broadcast({out, tensors...});
@@ -148,7 +140,7 @@ void Add(Functor functor,
constexpr size_t K = sizeof...(Tensors);
functional::Tensor<float> gOut = out;
- functional::Array<functional::Tensor<float>, K> gIns = {tensors ...};
+ functional::Array<functional::Tensor<float>, K> gIns = {tensors...};
if(full.back() != 1 && out->shape().back() == 1) {
size_t m = full.elements() / length;
@@ -180,6 +172,5 @@ void Add(Functor functor,
}
#include "tensors/gpu/add.inc"
-
}
}
diff --git a/src/tensors/gpu/add.h b/src/tensors/gpu/add.h
index 18ed60ad..d6ab4c4e 100644
--- a/src/tensors/gpu/add.h
+++ b/src/tensors/gpu/add.h
@@ -6,11 +6,7 @@ namespace marian {
namespace gpu {
-template <class Functor, class ...Tensors>
-void Add(Functor functor,
- float scale,
- marian::Tensor out,
- Tensors... tensors);
-
+template <class Functor, class... Tensors>
+void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors);
}
}
diff --git a/src/tensors/gpu/algorithm.cu b/src/tensors/gpu/algorithm.cu
index 4fe5f9f4..d5f5b3ec 100644
--- a/src/tensors/gpu/algorithm.cu
+++ b/src/tensors/gpu/algorithm.cu
@@ -4,45 +4,49 @@
#include "tensors/gpu/cuda_helpers.h"
namespace marian {
- namespace gpu {
- template <typename T>
- void copy(Ptr<Backend> backend, const T* begin, const T* end, T* dest) {
- CUDA_CHECK(cudaSetDevice(backend->getDevice().no));
- CudaCopy(begin, end, dest);
- CUDA_CHECK(cudaStreamSynchronize(0));
- }
-
- template void copy<float>(Ptr<Backend> backend, const float* begin, const float* end, float* dest);
- template void copy<int>(Ptr<Backend> backend, const int* begin, const int* end, int* dest);
-
-
- __global__ void gFill(float *d_in, int size, float val) {
- for(int bid = 0; bid < size; bid += blockDim.x * gridDim.x) {
- int index = bid + threadIdx.x + blockDim.x * blockIdx.x;
- if(index < size) {
- d_in[index] = val;
- }
- }
- }
+namespace gpu {
+template <typename T>
+void copy(Ptr<Backend> backend, const T* begin, const T* end, T* dest) {
+ CUDA_CHECK(cudaSetDevice(backend->getDevice().no));
+ CudaCopy(begin, end, dest);
+ CUDA_CHECK(cudaStreamSynchronize(0));
+}
- void fill(Ptr<Backend> backend, float* begin, float* end, float value) {
- CUDA_CHECK(cudaSetDevice(backend->getDevice().no));
- int size = end - begin;
- int threads = std::min(512, size);
- int blocks = (size / threads) + (size % threads != 0);
- gFill<<<blocks, threads>>>(begin, size, value);
- CUDA_CHECK(cudaStreamSynchronize(0));
+template void copy<float>(Ptr<Backend> backend,
+ const float* begin,
+ const float* end,
+ float* dest);
+template void copy<int>(Ptr<Backend> backend,
+ const int* begin,
+ const int* end,
+ int* dest);
+
+__global__ void gFill(float* d_in, int size, float val) {
+ for(int bid = 0; bid < size; bid += blockDim.x * gridDim.x) {
+ int index = bid + threadIdx.x + blockDim.x * blockIdx.x;
+ if(index < size) {
+ d_in[index] = val;
}
+ }
+}
- void setSparse(Ptr<Backend> backend,
- const std::vector<size_t>& keys,
- const std::vector<float>& values,
- float* data) {
- CUDA_CHECK(cudaSetDevice(backend->getDevice().no));
- ABORT("no SetSparse");
- //gpu::SetSparse(data, keys, values);
- CUDA_CHECK(cudaStreamSynchronize(0));
- }
+void fill(Ptr<Backend> backend, float* begin, float* end, float value) {
+ CUDA_CHECK(cudaSetDevice(backend->getDevice().no));
+ int size = end - begin;
+ int threads = std::min(512, size);
+ int blocks = (size / threads) + (size % threads != 0);
+ gFill<<<blocks, threads>>>(begin, size, value);
+ CUDA_CHECK(cudaStreamSynchronize(0));
+}
- }
+void setSparse(Ptr<Backend> backend,
+ const std::vector<size_t>& keys,
+ const std::vector<float>& values,
+ float* data) {
+ CUDA_CHECK(cudaSetDevice(backend->getDevice().no));
+ ABORT("no SetSparse");
+ // gpu::SetSparse(data, keys, values);
+ CUDA_CHECK(cudaStreamSynchronize(0));
+}
+}
}
diff --git a/src/tensors/gpu/algorithm.h b/src/tensors/gpu/algorithm.h
index 003189d1..ac7925d1 100644
--- a/src/tensors/gpu/algorithm.h
+++ b/src/tensors/gpu/algorithm.h
@@ -3,12 +3,15 @@
#include "tensors/backend.h"
namespace marian {
- namespace gpu {
- template <typename T>
- void copy(Ptr<Backend> backend, const T* begin, const T* end, T* dest);
-
- void fill(Ptr<Backend> backend, float* begin, float* end, float value);
-
- void setSparse(Ptr<Backend> backend, const std::vector<size_t>&, const std::vector<float>&, float*);
- }
+namespace gpu {
+template <typename T>
+void copy(Ptr<Backend> backend, const T* begin, const T* end, T* dest);
+
+void fill(Ptr<Backend> backend, float* begin, float* end, float value);
+
+void setSparse(Ptr<Backend> backend,
+ const std::vector<size_t>&,
+ const std::vector<float>&,
+ float*);
+}
}
diff --git a/src/tensors/gpu/backend.h b/src/tensors/gpu/backend.h
index 41b66f75..5e7e7036 100644
--- a/src/tensors/gpu/backend.h
+++ b/src/tensors/gpu/backend.h
@@ -25,13 +25,9 @@ public:
setHandles();
}
- void setDevice() {
- cudaSetDevice(deviceId_.no);
- }
+ void setDevice() { cudaSetDevice(deviceId_.no); }
- void synchronize() {
- cudaStreamSynchronize(0);
- }
+ void synchronize() { cudaStreamSynchronize(0); }
cublasHandle_t getCublasHandle() { return cublasHandle_; }
@@ -41,13 +37,11 @@ private:
cublasHandle_t cublasHandle_;
curandGenerator_t curandGenerator_;
-
void setHandles() {
cublasHandle_ = create_handle();
curandGenerator_ = createCurandGenerator();
}
-
curandGenerator_t createCurandGenerator() {
cudaSetDevice(deviceId_.no);
curandGenerator_t generator;
@@ -67,6 +61,5 @@ private:
return cublasHandle;
}
};
-
}
}
diff --git a/src/tensors/gpu/cuda_helpers.h b/src/tensors/gpu/cuda_helpers.h
index a5bef04b..d939b996 100644
--- a/src/tensors/gpu/cuda_helpers.h
+++ b/src/tensors/gpu/cuda_helpers.h
@@ -13,9 +13,8 @@ const int MAX_BLOCKS = 65535;
#define CUDA_CHECK(ans) \
{ gpuAssert((ans), __FILE__, __LINE__); }
-
inline void gpuAssert(cudaError_t code,
- const char *file,
+ const char* file,
int line,
bool abort = true) {
if(code != cudaSuccess) {
@@ -26,8 +25,8 @@ inline void gpuAssert(cudaError_t code,
template <typename T>
void CudaCopy(const T* start, const T* end, T* dest) {
- CUDA_CHECK(cudaMemcpy((void*)dest, (void*)start, (end - start) * sizeof(T),
- cudaMemcpyDefault));
+ CUDA_CHECK(cudaMemcpy(
+ (void*)dest, (void*)start, (end - start) * sizeof(T), cudaMemcpyDefault));
}
#define CUSPARSE_CHECK(x) \
diff --git a/src/tensors/gpu/device.cu b/src/tensors/gpu/device.cu
index e15b80d7..d66761e7 100644
--- a/src/tensors/gpu/device.cu
+++ b/src/tensors/gpu/device.cu
@@ -7,34 +7,34 @@
namespace marian {
namespace gpu {
- Device::~Device() {
- cudaSetDevice(deviceId_.no);
- if(data_) {
- CUDA_CHECK(cudaFree(data_));
- }
- cudaDeviceSynchronize();
+Device::~Device() {
+ cudaSetDevice(deviceId_.no);
+ if(data_) {
+ CUDA_CHECK(cudaFree(data_));
}
+ cudaDeviceSynchronize();
+}
- void Device::reserve(size_t size) {
- size = align(size);
- cudaSetDevice(deviceId_.no);
-
- ABORT_IF(size < size_ || size == 0, "New size must be larger than old size and larger than 0");
-
- if(data_) {
- // Allocate memory by going through host memory
- uint8_t *temp = new uint8_t[size_];
- CUDA_CHECK(cudaMemcpy(temp, data_, size_, cudaMemcpyDeviceToHost));
- CUDA_CHECK(cudaFree(data_));
- CUDA_CHECK(cudaMalloc(&data_, size));
- CUDA_CHECK(cudaMemcpy(data_, temp, size_, cudaMemcpyHostToDevice));
- delete[] temp;
- } else {
- CUDA_CHECK(cudaMalloc(&data_, size));
- }
-
- size_ = size;
+void Device::reserve(size_t size) {
+ size = align(size);
+ cudaSetDevice(deviceId_.no);
+
+ ABORT_IF(size < size_ || size == 0,
+ "New size must be larger than old size and larger than 0");
+
+ if(data_) {
+ // Allocate memory by going through host memory
+ uint8_t *temp = new uint8_t[size_];
+ CUDA_CHECK(cudaMemcpy(temp, data_, size_, cudaMemcpyDeviceToHost));
+ CUDA_CHECK(cudaFree(data_));
+ CUDA_CHECK(cudaMalloc(&data_, size));
+ CUDA_CHECK(cudaMemcpy(data_, temp, size_, cudaMemcpyHostToDevice));
+ delete[] temp;
+ } else {
+ CUDA_CHECK(cudaMalloc(&data_, size));
}
+ size_ = size;
+}
}
}
diff --git a/src/tensors/gpu/dropout.cu b/src/tensors/gpu/dropout.cu
index 02a38446..dc82e49d 100644
--- a/src/tensors/gpu/dropout.cu
+++ b/src/tensors/gpu/dropout.cu
@@ -22,31 +22,29 @@
} \
} while(0)
-
namespace marian {
- namespace gpu {
-
- __global__ void gScale(float* data, int n, float p) {
- int index = threadIdx.x + blockIdx.x * blockDim.x;
+namespace gpu {
- while(index < n) {
- data[index] = (data[index] < p) / p;
- index += gridDim.x * blockDim.x;
- }
- }
+__global__ void gScale(float* data, int n, float p) {
+ int index = threadIdx.x + blockIdx.x * blockDim.x;
- void Dropout(Tensor tensor, float p) {
- auto gpuBackend = std::static_pointer_cast<gpu::Backend>(tensor->getBackend());
- curandGenerator_t gen = gpuBackend->getCurandGenerator();
- int n = tensor->size();
- CURAND_CALL(curandGenerateUniform(gen, tensor->data(), n));
-
- int numThreads = std::min(n, 512);
- int numBlocks = n / numThreads + (n % numThreads != 0);
+ while(index < n) {
+ data[index] = (data[index] < p) / p;
+ index += gridDim.x * blockDim.x;
+ }
+}
- gScale<<<numBlocks, numThreads>>>(tensor->data(), n, 1.f - p);
- }
+void Dropout(Tensor tensor, float p) {
+ auto gpuBackend
+ = std::static_pointer_cast<gpu::Backend>(tensor->getBackend());
+ curandGenerator_t gen = gpuBackend->getCurandGenerator();
+ int n = tensor->size();
+ CURAND_CALL(curandGenerateUniform(gen, tensor->data(), n));
+ int numThreads = std::min(n, 512);
+ int numBlocks = n / numThreads + (n % numThreads != 0);
- }
+ gScale<<<numBlocks, numThreads>>>(tensor->data(), n, 1.f - p);
+}
+}
}
diff --git a/src/tensors/gpu/element.cu b/src/tensors/gpu/element.cu
index b8d6cef9..f5c5fbe7 100644
--- a/src/tensors/gpu/element.cu
+++ b/src/tensors/gpu/element.cu
@@ -1,6 +1,5 @@
-
-
#include "tensors/gpu/element.h"
+
#include "tensors/gpu/cuda_helpers.h"
#include "functional/array.h"
#include "functional/tensor.h"
@@ -11,9 +10,9 @@ namespace marian {
namespace gpu {
template <size_t K, bool broadcast, class Functor>
-__global__ void gElement(Functor functor,
- functional::Array<functional::Tensor<float>, K> tensors) {
-
+__global__ void gElement(
+ Functor functor,
+ functional::Array<functional::Tensor<float>, K> tensors) {
int length = tensors[0].shape().elements();
functional::Array<int, functional::Shape::size()> dims;
functional::Array<int, K> indices;
@@ -21,7 +20,6 @@ __global__ void gElement(Functor functor,
for(int bid = 0; bid < length; bid += blockDim.x * gridDim.x) {
int index = bid + blockDim.x * blockIdx.x + threadIdx.x;
if(index < length) {
-
indices.fill(index);
if(broadcast) {
@@ -35,8 +33,8 @@ __global__ void gElement(Functor functor,
}
}
-template <class Functor, class ...Tensors>
-void Element(Functor functor, Tensor out, Tensors ...tensors) {
+template <class Functor, class... Tensors>
+void Element(Functor functor, Tensor out, Tensors... tensors) {
cudaSetDevice(out->getDevice().no);
constexpr size_t K = sizeof...(tensors) + 1;
@@ -57,8 +55,5 @@ void Element(Functor functor, Tensor out, Tensors ...tensors) {
}
#include "tensors/gpu/element.inc"
-
-
}
}
-
diff --git a/src/tensors/gpu/element.h b/src/tensors/gpu/element.h
index 0e7eb162..b13f625f 100644
--- a/src/tensors/gpu/element.h
+++ b/src/tensors/gpu/element.h
@@ -5,8 +5,7 @@
namespace marian {
namespace gpu {
-template <class Functor, class ...Tensors>
-void Element(Functor functor, Tensor out, Tensors ...tensors);
-
+template <class Functor, class... Tensors>
+void Element(Functor functor, Tensor out, Tensors... tensors);
}
}
diff --git a/src/tensors/gpu/prod.cu b/src/tensors/gpu/prod.cu
index 38278ec5..57ff2007 100644
--- a/src/tensors/gpu/prod.cu
+++ b/src/tensors/gpu/prod.cu
@@ -38,10 +38,11 @@ void Prod(marian::Tensor C,
cublasOperation_t opA = transA ? CUBLAS_OP_T : CUBLAS_OP_N;
cublasOperation_t opB = transB ? CUBLAS_OP_T : CUBLAS_OP_N;
- auto cublasHandle = std::static_pointer_cast<gpu::Backend>(C->getBackend())->getCublasHandle();
+ auto cublasHandle = std::static_pointer_cast<gpu::Backend>(C->getBackend())
+ ->getCublasHandle();
#if CUDA_VERSION >= 9000
- //cublasSetMathMode(cublasHandle, CUBLAS_TENSOR_OP_MATH);
+// cublasSetMathMode(cublasHandle, CUBLAS_TENSOR_OP_MATH);
#endif
cublasSgemm(cublasHandle,
@@ -59,7 +60,7 @@ void Prod(marian::Tensor C,
C->data(),
ldc);
#if CUDA_VERSION >= 9000
- //cublasSetMathMode(cublasHandle, CUBLAS_DEFAULT_MATH);
+// cublasSetMathMode(cublasHandle, CUBLAS_DEFAULT_MATH);
#endif
}
@@ -96,10 +97,11 @@ void ProdBatched(marian::Tensor C,
cublasOperation_t opA = transA ? CUBLAS_OP_T : CUBLAS_OP_N;
cublasOperation_t opB = transB ? CUBLAS_OP_T : CUBLAS_OP_N;
- auto cublasHandle = std::static_pointer_cast<gpu::Backend>(C->getBackend())->getCublasHandle();
+ auto cublasHandle = std::static_pointer_cast<gpu::Backend>(C->getBackend())
+ ->getCublasHandle();
#if CUDA_VERSION >= 9000
- //cublasSetMathMode(cublasHandle, CUBLAS_TENSOR_OP_MATH);
+// cublasSetMathMode(cublasHandle, CUBLAS_TENSOR_OP_MATH);
#endif
cublasSgemmStridedBatched(cublasHandle,
opB,
@@ -120,10 +122,8 @@ void ProdBatched(marian::Tensor C,
n * m,
std::max(batchA, batchB));
#if CUDA_VERSION >= 9000
- //cublasSetMathMode(cublasHandle, CUBLAS_DEFAULT_MATH);
+// cublasSetMathMode(cublasHandle, CUBLAS_DEFAULT_MATH);
#endif
}
-
-
}
}
diff --git a/src/tensors/gpu/prod.h b/src/tensors/gpu/prod.h
index db9b62d8..968e631f 100644
--- a/src/tensors/gpu/prod.h
+++ b/src/tensors/gpu/prod.h
@@ -21,6 +21,5 @@ void ProdBatched(marian::Tensor C,
bool transB,
float beta = 0,
float scalar = 1);
-
}
}
diff --git a/src/tensors/gpu/sparse.h b/src/tensors/gpu/sparse.h
index cffb398e..3dd30126 100644
--- a/src/tensors/gpu/sparse.h
+++ b/src/tensors/gpu/sparse.h
@@ -2,8 +2,8 @@
#include <cusparse_v2.h>
#include "common/definitions.h"
-#include "tensors/tensor.h"
#include "kernels/cuda_helpers.h"
+#include "tensors/tensor.h"
namespace marian {
diff --git a/src/tensors/gpu/tensor_operators.cu b/src/tensors/gpu/tensor_operators.cu
index 7e64c954..1160bdd1 100644
--- a/src/tensors/gpu/tensor_operators.cu
+++ b/src/tensors/gpu/tensor_operators.cu
@@ -28,11 +28,11 @@ __device__ inline float stableLogit(float x) {
}
bool IsNan(Tensor in) {
- //cudaSetDevice(in->getDevice().no);
- //thrust::device_ptr<float> begin = thrust::device_pointer_cast(in->data());
- //thrust::device_ptr<float> end
+ // cudaSetDevice(in->getDevice().no);
+ // thrust::device_ptr<float> begin = thrust::device_pointer_cast(in->data());
+ // thrust::device_ptr<float> end
// = thrust::device_pointer_cast(in->data() + in->size());
- //return thrust::transform_reduce(
+ // return thrust::transform_reduce(
// begin, end, isnan_test(), 0, thrust::plus<bool>());
return false;
}
@@ -93,10 +93,9 @@ void Concatenate1(Tensor out, const std::vector<Tensor>& inputs) {
for(auto in : inputs) {
ABORT_IF(rows != in->shape().elements() / in->shape().back(),
- "First dimension must be equal");
+ "First dimension must be equal");
int cols_in = in->shape().back();
-
int blocks = std::min(MAX_BLOCKS, rows);
int threads = std::min(MAX_THREADS, cols_in);
@@ -122,7 +121,7 @@ void Split1(std::vector<Tensor>& outputs, const Tensor in) {
int cols_in = in->shape().back();
for(auto out : outputs) {
ABORT_IF(rows != out->shape().elements() / out->shape().back(),
- "First dimension must be equal");
+ "First dimension must be equal");
int cols_out = out->shape().back();
int blocks = std::min(MAX_BLOCKS, rows);
@@ -166,10 +165,10 @@ void Deconcatenate(std::vector<Tensor>& outputs, const Tensor in, int ax) {
SplitCont(outputs, in, ax);
}
-__global__ void gTransposeND(functional::Tensor<float> out,
- const functional::Tensor<float> in,
- const functional::Array<int, functional::Shape::size()> permute) {
-
+__global__ void gTransposeND(
+ functional::Tensor<float> out,
+ const functional::Tensor<float> in,
+ const functional::Array<int, functional::Shape::size()> permute) {
constexpr size_t N = functional::Shape::size();
functional::Array<int, N> oDims;
functional::Array<int, N> pDims;
@@ -458,7 +457,6 @@ void SoftmaxGrad(Tensor grad, Tensor adj, Tensor val) {
int m = grad->shape().elements() / grad->shape().back();
int k = grad->shape().back();
-
int blocks = std::min(MAX_BLOCKS, m);
int threads = std::min(MAX_THREADS, k);
int shared = sizeof(float) * threads * 2;
@@ -784,7 +782,9 @@ void Select(Tensor out,
int blocks = std::min(MAX_BLOCKS, length / threads + (length % threads != 0));
auto mp_indices = allocator->alloc<size_t>(indices.size());
- CudaCopy(indices.data(), indices.data() + indices.size(), mp_indices->data<size_t>());
+ CudaCopy(indices.data(),
+ indices.data() + indices.size(),
+ mp_indices->data<size_t>());
int axisGPU = axis + functional::Shape::size() - out->shape().size();
gSelect<<<blocks, threads>>>(out->data(),
@@ -810,7 +810,9 @@ void Insert(Tensor out,
int blocks = std::min(MAX_BLOCKS, length / threads + (length % threads != 0));
auto mp_indices = allocator->alloc<size_t>(indices.size());
- CudaCopy(indices.data(), indices.data() + indices.size(), mp_indices->data<size_t>());
+ CudaCopy(indices.data(),
+ indices.data() + indices.size(),
+ mp_indices->data<size_t>());
int axisGPU = axis + functional::Shape::size() - out->shape().size();
gInsert<<<blocks, threads>>>(out->data(),
@@ -1174,19 +1176,18 @@ void CrossEntropyPickBackward(Tensor out, Tensor adj, Tensor a, Tensor pick) {
out->data(), out->shape(), adj->data(), a->data(), pick->data());
}
-
float L2Norm(Tensor in) {
-
cudaSetDevice(in->getDevice().no);
int size = in->shape().elements();
int threads = std::min(MAX_THREADS, size);
- int blocks = std::min(MAX_BLOCKS, size / threads + (size % threads != 0));
+ int blocks = std::min(MAX_BLOCKS, size / threads + (size % threads != 0));
uint8_t* data;
cudaMalloc(&data, blocks * sizeof(float));
- Tensor out(new TensorBase(
- New<MemoryPiece>(data, blocks * sizeof(float)), {1, blocks}, in->getBackend()));
+ Tensor out(new TensorBase(New<MemoryPiece>(data, blocks * sizeof(float)),
+ {1, blocks},
+ in->getBackend()));
using namespace functional;
ReduceAll(_1 * _1, out, in);
@@ -1203,7 +1204,7 @@ __global__ void gAtt(float* out,
int m, // total rows (batch x time x beam)
int k, // depth
int b, // batch size
- int t // time of ctx
+ int t // time of ctx
) {
int rows = m;
int cols = k;
@@ -1255,14 +1256,8 @@ void Att(Tensor out, Tensor va, Tensor context, Tensor state) {
int threads = std::min(MAX_THREADS, (int)k);
int shared = sizeof(float) * threads * 2;
- gAtt<<<blocks, threads, shared>>>(out->data(),
- va->data(),
- context->data(),
- state->data(),
- m,
- k,
- b,
- t);
+ gAtt<<<blocks, threads, shared>>>(
+ out->data(), va->data(), context->data(), state->data(), m, k, b, t);
}
__global__ void gAttBack(float* gVa,
@@ -1576,7 +1571,6 @@ __global__ void gShift(float* out, const float* in, int length, int offset) {
}
void Shift(Tensor out, Tensor in, marian::Shape shift, bool invert) {
-
ABORT_IF(in->shape().size() != shift.size(), "bad dimensions");
int offset = 0;
@@ -2006,21 +2000,22 @@ __global__ void gMaxPoolingForward(float* out,
int lastWidth) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
- if (tid >= outRows * outCols) return;
+ if(tid >= outRows * outCols)
+ return;
int rowId = tid / outRows;
int colId = tid % outRows;
float* b = in + (rowId * inCols) + (colId * width);
- float* localMask = mask + (rowId / numKernels) * maskCols + colId * width;
+ float* localMask = mask + (rowId / numKernels) * maskCols + colId * width;
- if (colId == outRows - 1) {
+ if(colId == outRows - 1) {
width = lastWidth;
}
float currentMax = b[0] * localMask[0];
- for (int i = 1; i < width; ++i) {
- if (b[i] * localMask[i] > currentMax) {
+ for(int i = 1; i < width; ++i) {
+ if(b[i] * localMask[i] > currentMax) {
currentMax = b[i] * localMask[i];
}
}
@@ -2045,15 +2040,20 @@ void PoolingWithMaskingForward(Tensor out,
int outRows = outShape[2];
int outCols = outShape[0] * outShape[1];
- int lastWidth = ((inCols - isEven) % width == 0)
- ? width
- : (inCols - isEven) % width;
+ int lastWidth
+ = ((inCols - isEven) % width == 0) ? width : (inCols - isEven) % width;
- gMaxPoolingForward<<<blocks, threads>>>(
- out->data(), outRows, outCols,
- in->data(), inRows, inCols,
- mask->data(), outShape[1], mask->shape()[2],
- width, lastWidth);
+ gMaxPoolingForward<<<blocks, threads>>>(out->data(),
+ outRows,
+ outCols,
+ in->data(),
+ inRows,
+ inCols,
+ mask->data(),
+ outShape[1],
+ mask->shape()[2],
+ width,
+ lastWidth);
}
__global__ void gMaxPoolingBackward(float* adj,
@@ -2067,30 +2067,31 @@ __global__ void gMaxPoolingBackward(float* adj,
int numKernels,
int maskCols,
int width,
- int lastWidth)
-{
+ int lastWidth) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
- if (tid >= adjRows * adjCols) return;
+ if(tid >= adjRows * adjCols)
+ return;
int rowId = tid / adjRows;
int colId = tid % adjRows;
float* b = in + (rowId * inCols) + (colId * width);
- if (colId == adjRows - 1) {
+ if(colId == adjRows - 1) {
width = lastWidth;
}
float* localMask = mask + (rowId / numKernels) * maskCols + colId * width;
size_t currentMaxIdx = 0;
- for (int i = 1; i < width; ++i) {
- if (b[i] * localMask[i] > b[currentMaxIdx] * localMask[currentMaxIdx]) {
+ for(int i = 1; i < width; ++i) {
+ if(b[i] * localMask[i] > b[currentMaxIdx] * localMask[currentMaxIdx]) {
currentMaxIdx = i;
}
}
- adjIn[(rowId * inCols) + (colId * width) + currentMaxIdx] += adj[rowId + (colId * adjCols)];
+ adjIn[(rowId * inCols) + (colId * width) + currentMaxIdx]
+ += adj[rowId + (colId * adjCols)];
}
void PoolingWithMaskingBackward(Tensor adj,
@@ -2111,16 +2112,21 @@ void PoolingWithMaskingBackward(Tensor adj,
int adjRows = adjShape[2];
int adjCols = adjShape[0] * adjShape[1];
- int lastWidth = ((inCols - isEven) % width == 0)
- ? width
- : (inCols - isEven) % width;
-
- gMaxPoolingBackward<<<blocks, threads>>>(
- adj->data(), adjRows, adjCols,
- in->data(), adjIn->data(), inRows, inCols,
- mask->data(), adjShape[1], mask->shape()[2],
- width, lastWidth);
+ int lastWidth
+ = ((inCols - isEven) % width == 0) ? width : (inCols - isEven) % width;
+
+ gMaxPoolingBackward<<<blocks, threads>>>(adj->data(),
+ adjRows,
+ adjCols,
+ in->data(),
+ adjIn->data(),
+ inRows,
+ inCols,
+ mask->data(),
+ adjShape[1],
+ mask->shape()[2],
+ width,
+ lastWidth);
}
-
}
} // namespace marian
diff --git a/src/tensors/tensor.h b/src/tensors/tensor.h
index c60caa91..9789428e 100644
--- a/src/tensors/tensor.h
+++ b/src/tensors/tensor.h
@@ -78,7 +78,7 @@ public:
#endif
}
- void get(std::vector<float> &v) {
+ void get(std::vector<float>& v) {
v.resize(size());
if(backend_->getDevice().type == DeviceType::cpu) {
std::copy(data(), data() + size(), v.data());
@@ -101,9 +101,7 @@ public:
#endif
}
- void set(const std::vector<float> &v) {
- set(v.data(), v.data() + v.size());
- }
+ void set(const std::vector<float>& v) { set(v.data(), v.data() + v.size()); }
void set(float value) {
if(backend_->getDevice().type == DeviceType::cpu) {
@@ -116,8 +114,7 @@ public:
#endif
}
- void setSparse(const std::vector<size_t> &k,
- const std::vector<float> &v) {
+ void setSparse(const std::vector<size_t>& k, const std::vector<float>& v) {
if(backend_->getDevice().type == DeviceType::cpu) {
for(int i = 0; i < k.size(); ++i)
data()[k[i]] = v[i];
@@ -130,8 +127,8 @@ public:
}
void copyFrom(Tensor in) {
- if(in->getBackend()->getDevice().type == DeviceType::cpu &&
- backend_->getDevice().type == DeviceType::cpu) {
+ if(in->getBackend()->getDevice().type == DeviceType::cpu
+ && backend_->getDevice().type == DeviceType::cpu) {
std::copy(in->data(), in->data() + in->size(), data());
}
#ifdef CUDA_FOUND
@@ -167,7 +164,6 @@ public:
disp = disp && (dims[j] < dispCols || dims[j] >= shape()[j] - dispCols);
if(disp) {
-
if(dims.back() == 0) {
bool par = true;
std::vector<std::string> p;
@@ -182,9 +178,7 @@ public:
strm << " ";
}
- strm << std::setw(12)
- << values[i]
- << " ";
+ strm << std::setw(12) << values[i] << " ";
if(dims.back() + 1 == shape().back()) {
for(int j = dims.size() - 1; j >= 0; --j) {
@@ -214,9 +208,7 @@ public:
strm << std::endl;
return strm.str();
}
-
};
typedef std::shared_ptr<TensorBase> Tensor;
-
}
diff --git a/src/tensors/tensor_operators.h b/src/tensors/tensor_operators.h
index 7086b97b..164fd12b 100644
--- a/src/tensors/tensor_operators.h
+++ b/src/tensors/tensor_operators.h
@@ -21,53 +21,47 @@
namespace marian {
- template <class Functor, class ...Tensors>
- void Element(Functor functor, marian::Tensor out, Tensors ...tensors) {
+template <class Functor, class... Tensors>
+void Element(Functor functor, marian::Tensor out, Tensors... tensors) {
#ifdef CUDA_FOUND
- if(out->getBackend()->getDevice().type == DeviceType::gpu)
- gpu::Element(functor, out, tensors...);
- else
+ if(out->getBackend()->getDevice().type == DeviceType::gpu)
+ gpu::Element(functor, out, tensors...);
+ else
#endif
- cpu::Element(functor, out, tensors...);
- }
-
- template <class Functor, class ...Tensors>
- void Add(Functor functor,
- float scale,
- marian::Tensor out,
- Tensors... tensors) {
+ cpu::Element(functor, out, tensors...);
+}
+
+template <class Functor, class... Tensors>
+void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors) {
#ifdef CUDA_FOUND
- if(out->getBackend()->getDevice().type == DeviceType::gpu)
- gpu::Add(functor, scale, out, tensors...);
- else
+ if(out->getBackend()->getDevice().type == DeviceType::gpu)
+ gpu::Add(functor, scale, out, tensors...);
+ else
#endif
- cpu::Add(functor, scale, out, tensors...);
- }
-
- template <class Functor, class ...Tensors>
- void Add(Functor functor,
- marian::Tensor out,
- Tensors... tensors) {
- Add(functor, 1, out, tensors...);
- }
-
- template <class Functor, class ...Tensors>
- void Reduce(Functor functor,
- float scale,
- marian::Tensor out,
- Tensors... tensors) {
- out->set(0);
- Add(functor, scale, out, tensors...);
- }
-
- template <class Functor, class ...Tensors>
- void Reduce(Functor functor,
- marian::Tensor out,
- Tensors... tensors) {
- out->set(0);
- Add(functor, out, tensors...);
- }
+ cpu::Add(functor, scale, out, tensors...);
+}
+
+template <class Functor, class... Tensors>
+void Add(Functor functor, marian::Tensor out, Tensors... tensors) {
+ Add(functor, 1, out, tensors...);
+}
+
+template <class Functor, class... Tensors>
+void Reduce(Functor functor,
+ float scale,
+ marian::Tensor out,
+ Tensors... tensors) {
+ out->set(0);
+ Add(functor, scale, out, tensors...);
+}
+
+template <class Functor, class... Tensors>
+void Reduce(Functor functor, marian::Tensor out, Tensors... tensors) {
+ out->set(0);
+ Add(functor, out, tensors...);
+}
+// clang-format off
DISPATCH7(Prod, marian::Tensor, const marian::Tensor, const marian::Tensor, bool, bool, float, float)
DISPATCH7(ProdBatched, marian::Tensor, const marian::Tensor, const marian::Tensor, bool, bool, float, float)
@@ -86,26 +80,34 @@ namespace marian {
DISPATCH4(Shift, marian::Tensor, marian::Tensor, marian::Shape, bool)
DISPATCH3(Concatenate, marian::Tensor, const std::vector<marian::Tensor>&, int)
+// clang-format on
#ifdef CUDA_FOUND
- namespace gpu {
- void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax);
- }
+namespace gpu {
+void Deconcatenate(std::vector<marian::Tensor>& outputs,
+ const marian::Tensor in,
+ int ax);
+}
#endif
- namespace cpu {
- void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax);
- }
+namespace cpu {
+void Deconcatenate(std::vector<marian::Tensor>& outputs,
+ const marian::Tensor in,
+ int ax);
+}
- static inline void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax) {
+static inline void Deconcatenate(std::vector<marian::Tensor>& outputs,
+ const marian::Tensor in,
+ int ax) {
#ifdef CUDA_FOUND
- if(in->getBackend()->getDevice().type == DeviceType::gpu)
- gpu::Deconcatenate(outputs, in, ax);
- else
+ if(in->getBackend()->getDevice().type == DeviceType::gpu)
+ gpu::Deconcatenate(outputs, in, ax);
+ else
#endif
- cpu::Deconcatenate(outputs, in, ax);
- }
+ cpu::Deconcatenate(outputs, in, ax);
+}
+// clang-format off
DISPATCH5(LayerNormalization, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, float)
DISPATCH9(LayerNormalizationGrad, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, float)
@@ -120,113 +122,116 @@ namespace marian {
DISPATCH5(Select, marian::Tensor, marian::Tensor, int, const std::vector<size_t>&, Ptr<Allocator>)
DISPATCH5(Insert, marian::Tensor, marian::Tensor, int, const std::vector<size_t>&, Ptr<Allocator>)
-
DISPATCH2(LSTMCellForward, marian::Tensor, std::vector<marian::Tensor>)
DISPATCH2(LSTMOutputForward, marian::Tensor, std::vector<marian::Tensor>);
+// clang-format on
#ifdef CUDA_FOUND
- namespace gpu {
- void LSTMCellBackward(std::vector<marian::Tensor> outputs,
- std::vector<marian::Tensor> inputs,
- marian::Tensor adj);
- }
+namespace gpu {
+void LSTMCellBackward(std::vector<marian::Tensor> outputs,
+ std::vector<marian::Tensor> inputs,
+ marian::Tensor adj);
+}
#endif
- namespace cpu {
- void LSTMCellBackward(std::vector<marian::Tensor> outputs,
- std::vector<marian::Tensor> inputs,
- marian::Tensor adj);
- }
+namespace cpu {
+void LSTMCellBackward(std::vector<marian::Tensor> outputs,
+ std::vector<marian::Tensor> inputs,
+ marian::Tensor adj);
+}
- static inline void LSTMCellBackward(std::vector<marian::Tensor> outputs,
- std::vector<marian::Tensor> inputs,
- marian::Tensor adj) {
+static inline void LSTMCellBackward(std::vector<marian::Tensor> outputs,
+ std::vector<marian::Tensor> inputs,
+ marian::Tensor adj) {
#ifdef CUDA_FOUND
- if(adj->getBackend()->getDevice().type == DeviceType::gpu)
- gpu::LSTMCellBackward(outputs, inputs, adj);
- else
+ if(adj->getBackend()->getDevice().type == DeviceType::gpu)
+ gpu::LSTMCellBackward(outputs, inputs, adj);
+ else
#endif
- cpu::LSTMCellBackward(outputs, inputs, adj);
- }
+ cpu::LSTMCellBackward(outputs, inputs, adj);
+}
#ifdef CUDA_FOUND
- namespace gpu {
- void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
- std::vector<marian::Tensor> inputs,
- marian::Tensor adj);
- }
+namespace gpu {
+void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
+ std::vector<marian::Tensor> inputs,
+ marian::Tensor adj);
+}
#endif
- namespace cpu {
- void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
- std::vector<marian::Tensor> inputs,
- marian::Tensor adj);
- }
+namespace cpu {
+void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
+ std::vector<marian::Tensor> inputs,
+ marian::Tensor adj);
+}
- static inline void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
- std::vector<marian::Tensor> inputs,
- marian::Tensor adj) {
+static inline void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
+ std::vector<marian::Tensor> inputs,
+ marian::Tensor adj) {
#ifdef CUDA_FOUND
- if(adj->getBackend()->getDevice().type == DeviceType::gpu)
- gpu::LSTMOutputBackward(outputs, inputs, adj);
- else
+ if(adj->getBackend()->getDevice().type == DeviceType::gpu)
+ gpu::LSTMOutputBackward(outputs, inputs, adj);
+ else
#endif
- cpu::LSTMOutputBackward(outputs, inputs, adj);
- }
+ cpu::LSTMOutputBackward(outputs, inputs, adj);
+}
- DISPATCH3(GRUFastForward, marian::Tensor, std::vector<marian::Tensor>, bool)
+DISPATCH3(GRUFastForward, marian::Tensor, std::vector<marian::Tensor>, bool)
#ifdef CUDA_FOUND
- namespace gpu {
- void GRUFastBackward(std::vector<marian::Tensor> outputs,
- std::vector<marian::Tensor> inputs,
- marian::Tensor adj,
- bool final);
- }
+namespace gpu {
+void GRUFastBackward(std::vector<marian::Tensor> outputs,
+ std::vector<marian::Tensor> inputs,
+ marian::Tensor adj,
+ bool final);
+}
#endif
- namespace cpu {
- void GRUFastBackward(std::vector<marian::Tensor> outputs,
- std::vector<marian::Tensor> inputs,
- marian::Tensor adj,
- bool final);
- }
-
- static inline void GRUFastBackward(std::vector<marian::Tensor> outputs,
- std::vector<marian::Tensor> inputs,
- marian::Tensor adj,
- bool final = false) {
+namespace cpu {
+void GRUFastBackward(std::vector<marian::Tensor> outputs,
+ std::vector<marian::Tensor> inputs,
+ marian::Tensor adj,
+ bool final);
+}
+
+static inline void GRUFastBackward(std::vector<marian::Tensor> outputs,
+ std::vector<marian::Tensor> inputs,
+ marian::Tensor adj,
+ bool final = false) {
#ifdef CUDA_FOUND
- if(adj->getBackend()->getDevice().type == DeviceType::gpu)
- gpu::GRUFastBackward(outputs, inputs, adj, final);
- else
+ if(adj->getBackend()->getDevice().type == DeviceType::gpu)
+ gpu::GRUFastBackward(outputs, inputs, adj, final);
+ else
#endif
- cpu::GRUFastBackward(outputs, inputs, adj, final);
- }
+ cpu::GRUFastBackward(outputs, inputs, adj, final);
+}
+// clang-format off
DISPATCH4(Att, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor)
DISPATCH7(AttBack, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor)
+// clang-format on
#ifdef CUDA_FOUND
- namespace gpu {
- float L2Norm(marian::Tensor in);
- }
+namespace gpu {
+float L2Norm(marian::Tensor in);
+}
#endif
- namespace cpu {
- float L2Norm(marian::Tensor in);
- }
+namespace cpu {
+float L2Norm(marian::Tensor in);
+}
- static inline float L2Norm(marian::Tensor in) {
+static inline float L2Norm(marian::Tensor in) {
#ifdef CUDA_FOUND
- if(in->getBackend()->getDevice().type == DeviceType::gpu)
- return gpu::L2Norm(in);
- else
+ if(in->getBackend()->getDevice().type == DeviceType::gpu)
+ return gpu::L2Norm(in);
+ else
#endif
- return cpu::L2Norm(in);
- }
+ return cpu::L2Norm(in);
+}
+// clang-format off
DISPATCH5(PoolingWithMaskingForward, marian::Tensor, marian::Tensor, marian::Tensor, int, bool)
DISPATCH6(PoolingWithMaskingBackward, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, int, bool)
-
+// clang-format on
}
diff --git a/src/training/graph_group_async.cpp b/src/training/graph_group_async.cpp
index bee719ad..ee3b7d34 100644
--- a/src/training/graph_group_async.cpp
+++ b/src/training/graph_group_async.cpp
@@ -104,7 +104,8 @@ void AsyncGraphGroup::init(Ptr<data::Batch> batch) {
totalSize -= __size__;
Tensor param;
- Ptr<TensorAllocator> allocator = New<TensorAllocator>(graph->getBackend());
+ Ptr<TensorAllocator> allocator
+ = New<TensorAllocator>(graph->getBackend());
allocator->reserveExact(__size__ * sizeof(float));
allocator->allocate(param, {1, __size__});
paramsAlloc_.push_back(allocator);
@@ -122,7 +123,8 @@ void AsyncGraphGroup::init(Ptr<data::Batch> batch) {
int __size__ = std::min(shardSize_, totalSize);
totalSize -= __size__;
Tensor grad_;
- Ptr<TensorAllocator> allocator_ = New<TensorAllocator>(graph->getBackend());
+ Ptr<TensorAllocator> allocator_
+ = New<TensorAllocator>(graph->getBackend());
allocator_->reserveExact(__size__ * sizeof(float));
allocator_->allocate(grad_, {1, __size__});
@@ -139,7 +141,8 @@ void AsyncGraphGroup::init(Ptr<data::Batch> batch) {
int __size__ = std::min(shardSize_, totalSize);
totalSize -= __size__;
Tensor paramAvg;
- Ptr<TensorAllocator> allocator = New<TensorAllocator>(graph->getBackend());
+ Ptr<TensorAllocator> allocator
+ = New<TensorAllocator>(graph->getBackend());
allocator->reserveExact(__size__ * sizeof(float));
allocator->allocate(paramAvg, {1, __size__});
@@ -230,7 +233,8 @@ void AsyncGraphGroup::execute(Ptr<data::Batch> batch) {
scheduler_->update(cost, batch);
if(scheduler_->saving() || scheduler_->validating()) {
- // Wait with validation or saving until all other threads are done with update.
+ // Wait with validation or saving until all other threads are done with
+ // update.
// We want to reuse the graphs for validation, so they need to be in
// a safe state.
pool_->wait_for_others(lock);
diff --git a/src/training/graph_group_async.h b/src/training/graph_group_async.h
index 94291dee..af311798 100644
--- a/src/training/graph_group_async.h
+++ b/src/training/graph_group_async.h
@@ -70,7 +70,6 @@ public:
movingAvg_{options_->get<float>("exponential-smoothing") > 0},
mvDecay_{options_->get<float>("exponential-smoothing")},
tau_{options_->get<size_t>("optimizer-delay")} {
-
pool_.reset(new ThreadPool(devices_.size(), devices_.size()));
for(auto device : devices_) {
diff --git a/src/training/graph_group_multinode.cpp b/src/training/graph_group_multinode.cpp
index b80952a8..a09db862 100644
--- a/src/training/graph_group_multinode.cpp
+++ b/src/training/graph_group_multinode.cpp
@@ -150,7 +150,8 @@ void MultiNodeGraphGroup::initClientCommOverlapGpuTensors() {
size_t modelSize = clientGraphs_[0]->params()->vals()->size();
for(int client = 0; client < devices_.size(); client++) {
// Communication overlap buffer (for grads + params)
- Tensor commOverlapBuffer = newTensor(modelSize, clientGraphs_[client]->getBackend());
+ Tensor commOverlapBuffer
+ = newTensor(modelSize, clientGraphs_[client]->getBackend());
commOverlapBuffer->copyFrom(clientGraphs_[0]->params()->vals());
clientCommOverlapBuffersGPU_.push_back(commOverlapBuffer);
// Gradients local sum buffer
@@ -206,11 +207,13 @@ void MultiNodeGraphGroup::calculateShardSizes() {
void MultiNodeGraphGroup::initShardGpuTensors() {
size_t offset = 0;
for(int shard = 0; shard < devices_.size(); shard++) {
- Tensor gpuParams = newTensor(shardSizes_[shard], clientGraphs_[shard]->getBackend());
+ Tensor gpuParams
+ = newTensor(shardSizes_[shard], clientGraphs_[shard]->getBackend());
gpuParams->copyFrom(clientGraphs_[0]->params()->vals()->subtensor(
offset, shardSizes_[shard]));
shardParams_.push_back(gpuParams);
- shardGrads_.push_back(newTensor(shardSizes_[shard], clientGraphs_[shard]->getBackend()));
+ shardGrads_.push_back(
+ newTensor(shardSizes_[shard], clientGraphs_[shard]->getBackend()));
}
}
diff --git a/src/training/graph_group_singleton.cpp b/src/training/graph_group_singleton.cpp
index 509e50c6..528fff8f 100644
--- a/src/training/graph_group_singleton.cpp
+++ b/src/training/graph_group_singleton.cpp
@@ -1,5 +1,5 @@
-#include "tensors/tensor_operators.h"
#include "training/graph_group_singleton.h"
+#include "tensors/tensor_operators.h"
#include "functional/functional.h"
namespace marian {
@@ -15,7 +15,8 @@ void SingletonGraph::updateMovingAverage(Tensor mvAvgParams,
Tensor params,
size_t batches) {
using namespace functional;
- float decay = std::max(mvDecay_, 1.f - (float)(batches + 1) / (float)(batches + 10));
+ float decay
+ = std::max(mvDecay_, 1.f - (float)(batches + 1) / (float)(batches + 10));
Element(_1 = ((1.f - decay) * _1) + (decay * _2), mvAvgParams, params);
}
@@ -59,8 +60,7 @@ void SingletonGraph::execute(Ptr<data::Batch> batch) {
if(mvAvg_) {
mvAvgGraph_->reuseWorkspace(graph_);
scheduler_->validate({mvAvgGraph_});
- }
- else {
+ } else {
scheduler_->validate({graph_});
}
}
diff --git a/src/training/graph_group_singleton.h b/src/training/graph_group_singleton.h
index 5f658bfb..11b9cbc8 100644
--- a/src/training/graph_group_singleton.h
+++ b/src/training/graph_group_singleton.h
@@ -29,7 +29,6 @@ public:
: GraphGroup(options),
mvAvg_{options_->get<float>("exponential-smoothing") > 0},
mvDecay_{options_->get<float>("exponential-smoothing")} {
-
auto deviceId = options_->getDevices()[0];
graph_ = New<ExpressionGraph>();
graph_->setDevice(deviceId);
diff --git a/src/training/graph_group_sync.cpp b/src/training/graph_group_sync.cpp
index e169b6ed..685ba172 100644
--- a/src/training/graph_group_sync.cpp
+++ b/src/training/graph_group_sync.cpp
@@ -1,6 +1,6 @@
#include "training/graph_group_sync.h"
-#include "tensors/tensor_operators.h"
#include "functional/functional.h"
+#include "tensors/tensor_operators.h"
namespace marian {
@@ -17,7 +17,8 @@ void SyncGraphGroup::updateMovingAverage(Tensor paramsAvg,
Tensor params,
size_t batches) {
using namespace functional;
- float decay = std::max(mvDecay_, 1.f - (float)(batches + 1) / (float)(batches + 10));
+ float decay
+ = std::max(mvDecay_, 1.f - (float)(batches + 1) / (float)(batches + 10));
Element(_1 = ((1.f - decay) * _1) + (decay * _2), paramsAvg, params);
}
@@ -135,10 +136,10 @@ void SyncGraphGroup::execute(Ptr<data::Batch> batch) {
int size = params_[idx]->size();
int i = 0;
- float div = devices_.size(); // no. of GPUs
+ float div = devices_.size(); // no. of GPUs
// do not average gradients if cost type is sum.
- if (options_->get<std::string>("cost-type") == "ce-sum") {
+ if(options_->get<std::string>("cost-type") == "ce-sum") {
div = 1;
}
@@ -176,7 +177,7 @@ void SyncGraphGroup::execute(Ptr<data::Batch> batch) {
float cost = 0;
for(auto c : costs)
cost += c;
- if (options_->get<std::string>("cost-type") != "ce-sum") {
+ if(options_->get<std::string>("cost-type") != "ce-sum") {
cost = cost / costs.size();
}
diff --git a/src/training/graph_group_sync.h b/src/training/graph_group_sync.h
index 11c7d9f4..f4bc1ad4 100644
--- a/src/training/graph_group_sync.h
+++ b/src/training/graph_group_sync.h
@@ -43,7 +43,6 @@ public:
devices_{options_->getDevices()},
movingAvg_{options_->get<float>("exponential-smoothing") > 0},
mvDecay_{options_->get<float>("exponential-smoothing")} {
-
for(auto device : devices_) {
auto graph = New<ExpressionGraph>();
graph->setDevice(device);
diff --git a/src/training/scheduler.h b/src/training/scheduler.h
index ea2c3f01..5e817cc5 100644
--- a/src/training/scheduler.h
+++ b/src/training/scheduler.h
@@ -100,7 +100,8 @@ public:
return (state_->batches % options_->get<size_t>("save-freq") == 0);
}
- void validate(const std::vector<Ptr<ExpressionGraph>>& graphs, bool final = false) {
+ void validate(const std::vector<Ptr<ExpressionGraph>>& graphs,
+ bool final = false) {
if(state_->validated
|| (state_->batches % options_->get<size_t>("valid-freq") != 0
&& !final))
diff --git a/src/training/sparse_tensor.cu b/src/training/sparse_tensor.cu
index cea5b655..78da9668 100644
--- a/src/training/sparse_tensor.cu
+++ b/src/training/sparse_tensor.cu
@@ -7,8 +7,8 @@
#include "tensors/tensor.h"
#include "tensors/tensor_operators.h"
#include "training/sparse_tensor.h"
-#include "tensors/gpu/cuda_helpers.h"
+#include "tensors/gpu/cuda_helpers.h"
namespace marian {
@@ -48,10 +48,10 @@ __global__ void gFindSubtensor(int* indices,
}
SparseTensorBase::SparseTensorBase(int capacity, Ptr<Backend> backend)
-: backend_(backend), capacity_(capacity) {
+ : backend_(backend), capacity_(capacity) {
ABORT_IF(backend_->getDevice().type == DeviceType::cpu,
- "Gradient dropping is currently not implemented for CPU usage");
-
+ "Gradient dropping is currently not implemented for CPU usage");
+
cudaSetDevice(backend_->getDevice().no);
CUDA_CHECK(cudaMalloc(&data_, sizeof(float) * capacity));
CUDA_CHECK(cudaMalloc(&indices_, sizeof(int) * capacity));
@@ -64,7 +64,7 @@ SparseTensorBase::SparseTensorBase(float* data,
int* indices,
int size,
Ptr<Backend> backend)
-: backend_(backend) {
+ : backend_(backend) {
data_ = data;
indices_ = indices;
size_ = size;
@@ -98,10 +98,10 @@ void SparseTensorBase::copyFrom(float* data,
size_ = size;
if(size == 0)
return;
-
+
ABORT_IF(backend_->getDevice().type == DeviceType::cpu,
- "Gradient dropping is currently not implemented for CPU usage");
-
+ "Gradient dropping is currently not implemented for CPU usage");
+
cudaSetDevice(backend_->getDevice().no);
cudaMemcpy(data_, data, size * sizeof(float), cudaMemcpyDefault);
@@ -128,7 +128,7 @@ void SparseTensorBase::setSize(int size) {
void SparseTensorBase::toDense(Tensor t, int offset) {
ABORT_IF(backend_->getDevice().type == DeviceType::cpu,
"Gradient dropping is currently not implemented for CPU usage");
-
+
cudaSetDevice(backend_->getDevice().no);
int threads = 512;
int blocks = 1 + size_ / threads;
@@ -152,8 +152,8 @@ std::shared_ptr<SparseTensorBase> SparseTensorBase::subtensor(int pos,
int size,
int idx) {
ABORT_IF(backend_->getDevice().type == DeviceType::cpu,
- "Gradient dropping is currently not implemented for CPU usage");
-
+ "Gradient dropping is currently not implemented for CPU usage");
+
cudaSetDevice(backend_->getDevice().no);
cudaStreamSynchronize(0);
int* start = gstart_ + idx;
diff --git a/src/training/training.h b/src/training/training.h
index 7216c36f..31473f00 100644
--- a/src/training/training.h
+++ b/src/training/training.h
@@ -45,7 +45,6 @@ public:
if((options_->has("valid-sets") || options_->has("valid-script-path"))
&& options_->get<size_t>("valid-freq") > 0) {
-
for(auto validator : Validators(dataset->getVocabs(), options_))
scheduler->addValidator(validator);
}
diff --git a/src/training/validator.h b/src/training/validator.h
index 295bfa49..7e448e3d 100644
--- a/src/training/validator.h
+++ b/src/training/validator.h
@@ -4,9 +4,9 @@
#include <cstdlib>
#include <limits>
+#include "3rd_party/threadpool.h"
#include "common/config.h"
#include "common/utils.h"
-#include "3rd_party/threadpool.h"
#include "data/batch_generator.h"
#include "data/corpus.h"
#include "graph/expression_graph.h"
@@ -25,8 +25,7 @@ namespace marian {
class ValidatorBase : public TrainingObserver {
public:
ValidatorBase(bool lowerIsBetter)
- : lowerIsBetter_(lowerIsBetter),
- lastBest_{initScore()} {}
+ : lowerIsBetter_(lowerIsBetter), lastBest_{initScore()} {}
virtual float validate(const std::vector<Ptr<ExpressionGraph>>& graphs) = 0;
virtual std::string type() = 0;
@@ -98,7 +97,8 @@ protected:
Ptr<data::BatchGenerator<DataSet>>)
= 0;
- void updateStalled(const std::vector<Ptr<ExpressionGraph>>& graphs, float val) {
+ void updateStalled(const std::vector<Ptr<ExpressionGraph>>& graphs,
+ float val) {
if((lowerIsBetter_ && lastBest_ > val)
|| (!lowerIsBetter_ && lastBest_ < val)) {
stalled_ = 0;
@@ -226,7 +226,6 @@ public:
TranslationValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Config> options)
: Validator(vocabs, options, false),
quiet_(options_->get<bool>("quiet-translation")) {
-
Ptr<Options> opts = New<Options>();
opts->merge(options);
opts->set("inference", true);
@@ -314,7 +313,8 @@ public:
scorer = scorers[id % graphs.size()];
}
- auto search = New<BeamSearch>(options_, std::vector<Ptr<Scorer>>{scorer});
+ auto search
+ = New<BeamSearch>(options_, std::vector<Ptr<Scorer>>{scorer});
auto histories = search->search(graph, batch);
for(auto history : histories) {
diff --git a/src/translator/beam_search.h b/src/translator/beam_search.h
index cfe94f59..12df46dc 100644
--- a/src/translator/beam_search.h
+++ b/src/translator/beam_search.h
@@ -34,21 +34,20 @@ public:
std::vector<Ptr<ScorerState>>& states,
size_t beamSize,
bool first) {
-
Beams newBeams(beams.size());
for(int i = 0; i < keys.size(); ++i) {
- int embIdx = keys[i] % vocabSize;
- int beamIdx = i / beamSize;
+ int embIdx = keys[i] % vocabSize;
+ int beamIdx = i / beamSize;
if(newBeams[beamIdx].size() < beams[beamIdx].size()) {
auto& beam = beams[beamIdx];
auto& newBeam = newBeams[beamIdx];
int hypIdx = keys[i] / vocabSize;
- float cost = costs[i];
+ float cost = costs[i];
- int hypIdxTrans = (hypIdx / beamSize) +
- (hypIdx % beamSize) * beams.size();
+ int hypIdxTrans
+ = (hypIdx / beamSize) + (hypIdx % beamSize) * beams.size();
if(first)
hypIdxTrans = hypIdx;
@@ -78,7 +77,7 @@ public:
Beams pruneBeam(const Beams& beams) {
Beams newBeams;
- for(auto beam: beams) {
+ for(auto beam : beams) {
Beam newBeam;
for(auto hyp : beam) {
if(hyp->GetWord() > 0) {
@@ -90,9 +89,7 @@ public:
return newBeams;
}
- Histories search(Ptr<ExpressionGraph> graph,
- Ptr<data::CorpusBatch> batch) {
-
+ Histories search(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch) {
int dimBatch = batch->size();
Histories histories;
for(int i = 0; i < dimBatch; ++i) {
@@ -140,8 +137,7 @@ public:
Expr prevCosts;
if(first) {
// no cost
- prevCosts = graph->constant({1, 1, 1, 1},
- inits::from_value(0));
+ prevCosts = graph->constant({1, 1, 1, 1}, inits::from_value(0));
} else {
std::vector<float> beamCosts;
@@ -155,8 +151,7 @@ public:
hypIndices.push_back(hyp->GetPrevStateIndex());
embIndices.push_back(hyp->GetWord());
beamCosts.push_back(hyp->GetCost());
- }
- else {
+ } else {
hypIndices.push_back(0);
embIndices.push_back(0);
beamCosts.push_back(-9999);
@@ -164,9 +159,8 @@ public:
}
}
- prevCosts
- = graph->constant({(int)localBeamSize, 1, dimBatch, 1},
- inits::from_vector(beamCosts));
+ prevCosts = graph->constant({(int)localBeamSize, 1, dimBatch, 1},
+ inits::from_vector(beamCosts));
}
//**********************************************************************
@@ -174,10 +168,12 @@ public:
auto totalCosts = prevCosts;
for(int i = 0; i < scorers_.size(); ++i) {
- states[i] = scorers_[i]->step(graph, states[i], hypIndices, embIndices, dimBatch, localBeamSize);
+ states[i] = scorers_[i]->step(
+ graph, states[i], hypIndices, embIndices, dimBatch, localBeamSize);
if(scorers_[i]->getWeight() != 1.f)
- totalCosts = totalCosts + scorers_[i]->getWeight() * states[i]->getProbs();
+ totalCosts
+ = totalCosts + scorers_[i]->getWeight() * states[i]->getProbs();
else
totalCosts = totalCosts + states[i]->getProbs();
}
@@ -207,12 +203,14 @@ public:
nth->getNBestList(beamSizes, totalCosts->val(), outCosts, outKeys, first);
int dimTrgVoc = totalCosts->shape()[-1];
- beams = toHyps(outKeys, outCosts, dimTrgVoc, beams, states, localBeamSize, first);
+ beams = toHyps(
+ outKeys, outCosts, dimTrgVoc, beams, states, localBeamSize, first);
auto prunedBeams = pruneBeam(beams);
for(int i = 0; i < dimBatch; ++i) {
if(!beams[i].empty()) {
- final = final || histories[i]->size() >= 3 * batch->front()->batchWidth();
+ final = final
+ || histories[i]->size() >= 3 * batch->front()->batchWidth();
histories[i]->Add(beams[i], prunedBeams[i].empty() || final);
}
}
diff --git a/src/translator/helpers.cpp b/src/translator/helpers.cpp
index f112e405..6f48d218 100644
--- a/src/translator/helpers.cpp
+++ b/src/translator/helpers.cpp
@@ -18,7 +18,7 @@ void SetColumn(Tensor in_, size_t col, float value) {
int nColumns = in_->shape()[-1];
float* in = in_->data();
- for (int rowNumber = 0; rowNumber < nRows; ++rowNumber) {
+ for(int rowNumber = 0; rowNumber < nRows; ++rowNumber) {
int index = col + rowNumber * nColumns;
in[index] = value;
}
@@ -31,7 +31,6 @@ void suppressUnk(Expr probs) {
void suppressWord(Expr probs, Word id) {
SetColumn(probs->val(), id, std::numeric_limits<float>::lowest());
}
-
}
void suppressUnk(Expr probs) {
@@ -55,5 +54,4 @@ void suppressWord(Expr probs, Word id) {
}
#endif
}
-
}
diff --git a/src/translator/helpers.cu b/src/translator/helpers.cu
index 7672e515..8971db44 100644
--- a/src/translator/helpers.cu
+++ b/src/translator/helpers.cu
@@ -44,6 +44,5 @@ void suppressUnk(Expr probs) {
void suppressWord(Expr probs, Word id) {
SetColumn(probs->val(), id, std::numeric_limits<float>::lowest());
}
-
}
}
diff --git a/src/translator/helpers.h b/src/translator/helpers.h
index bee596f3..ef9118b7 100644
--- a/src/translator/helpers.h
+++ b/src/translator/helpers.h
@@ -14,7 +14,6 @@ namespace cpu {
void suppressUnk(Expr probs);
void suppressWord(Expr probs, Word id);
-
}
namespace gpu {
@@ -22,11 +21,9 @@ namespace gpu {
void suppressUnk(Expr probs);
void suppressWord(Expr probs, Word id);
-
}
void suppressUnk(Expr probs);
void suppressWord(Expr probs, Word id);
-
}
diff --git a/src/translator/history.h b/src/translator/history.h
index 0070b723..6ed0f019 100644
--- a/src/translator/history.h
+++ b/src/translator/history.h
@@ -27,7 +27,8 @@ public:
if(beam[j]->GetWord() == 0 || last) {
float cost = beam[j]->GetCost() / LengthPenalty(history_.size());
topHyps_.push({history_.size(), j, cost});
- //std::cerr << "Add " << history_.size() << " " << j << " " << cost << std::endl;
+ // std::cerr << "Add " << history_.size() << " " << j << " " << cost
+ // << std::endl;
}
}
history_.push_back(beam);
@@ -44,14 +45,14 @@ public:
size_t start = bestHypCoord.i;
size_t j = bestHypCoord.j;
- //float c = bestHypCoord.cost;
- //std::cerr << "h: " << start << " " << j << " " << c << std::endl;
+ // float c = bestHypCoord.cost;
+ // std::cerr << "h: " << start << " " << j << " " << c << std::endl;
Words targetWords;
Ptr<Hypothesis> bestHyp = history_[start][j];
while(bestHyp->GetPrevHyp() != nullptr) {
targetWords.push_back(bestHyp->GetWord());
- //std::cerr << bestHyp->GetWord() << " " << bestHyp << std::endl;
+ // std::cerr << bestHyp->GetWord() << " " << bestHyp << std::endl;
bestHyp = bestHyp->GetPrevHyp();
}
diff --git a/src/translator/nth_element.cpp b/src/translator/nth_element.cpp
index 2730adfb..9416ea0e 100644
--- a/src/translator/nth_element.cpp
+++ b/src/translator/nth_element.cpp
@@ -3,11 +3,11 @@
* SPDX-License-Identifier: MIT
*/
+#include "translator/nth_element.h"
#include <algorithm>
#include <iterator>
#include <limits>
#include <numeric>
-#include "translator/nth_element.h"
namespace marian {
@@ -18,8 +18,8 @@ NthElementCPU::NthElementCPU(size_t maxBeamSize, size_t maxBatchSize) {
}
void NthElementCPU::getNBestList(float* probs,
- const std::vector<int>& batchFirstElementIdxs,
- const std::vector<int>& cumulativeBeamSizes) {
+ const std::vector<int>& batchFirstElementIdxs,
+ const std::vector<int>& cumulativeBeamSizes) {
/* For each batch, select the top N elements, where N is the beam size for
* this batch. Locally record these elements (their current value and index
* in 'probs') before updating each element to a large negative value, such
@@ -31,16 +31,19 @@ void NthElementCPU::getNBestList(float* probs,
std::iota(idxs.begin(), idxs.end(), 0);
int numBatches = batchFirstElementIdxs.size() - 1;
- for (int batchIdx = 0; batchIdx < numBatches; ++batchIdx) {
+ for(int batchIdx = 0; batchIdx < numBatches; ++batchIdx) {
int pos = cumulativeBeamSizes[batchIdx];
- int beamSize = cumulativeBeamSizes[batchIdx+1] - pos;
+ int beamSize = cumulativeBeamSizes[batchIdx + 1] - pos;
- std::vector<int>::iterator begin = idxs.begin() + batchFirstElementIdxs[batchIdx];
+ std::vector<int>::iterator begin
+ = idxs.begin() + batchFirstElementIdxs[batchIdx];
std::vector<int>::iterator middle = begin + beamSize;
- std::vector<int>::iterator end = idxs.begin() + batchFirstElementIdxs[batchIdx+1];
- std::partial_sort(begin, middle, end, [=](int a, int b) { return probs[a] > probs[b]; });
+ std::vector<int>::iterator end
+ = idxs.begin() + batchFirstElementIdxs[batchIdx + 1];
+ std::partial_sort(
+ begin, middle, end, [=](int a, int b) { return probs[a] > probs[b]; });
- while (begin != middle) {
+ while(begin != middle) {
int idx = *begin++;
h_res_idx[pos] = idx;
h_res[pos] = probs[idx];
@@ -51,32 +54,38 @@ void NthElementCPU::getNBestList(float* probs,
}
void NthElementCPU::getNBestList(const std::vector<size_t>& beamSizes,
- Tensor probs, std::vector<float>& outCosts, std::vector<unsigned>& outKeys,
- const bool isFirst) {
+ Tensor probs,
+ std::vector<float>& outCosts,
+ std::vector<unsigned>& outKeys,
+ const bool isFirst) {
std::vector<int> cumulativeBeamSizes(beamSizes.size() + 1, 0);
std::vector<int> batchFirstElementIdxs(beamSizes.size() + 1, 0);
size_t vocabSize = probs->shape()[-1];
- for (size_t i = 0; i < beamSizes.size(); ++i) {
- cumulativeBeamSizes[i+1] = cumulativeBeamSizes[i] + beamSizes[i];
- batchFirstElementIdxs[i+1] += (isFirst ? i + 1 : cumulativeBeamSizes[i+1]) * vocabSize;
+ for(size_t i = 0; i < beamSizes.size(); ++i) {
+ cumulativeBeamSizes[i + 1] = cumulativeBeamSizes[i] + beamSizes[i];
+ batchFirstElementIdxs[i + 1]
+ += (isFirst ? i + 1 : cumulativeBeamSizes[i + 1]) * vocabSize;
}
getNBestList(probs->data(), batchFirstElementIdxs, cumulativeBeamSizes);
GetPairs(cumulativeBeamSizes.back(), outKeys, outCosts);
}
-void NthElementCPU::GetPairs(size_t number, std::vector<unsigned>& outKeys,
- std::vector<float>& outValues) {
- std::copy(h_res_idx.begin(), h_res_idx.begin() + number, std::back_inserter(outKeys));
- std::copy(h_res.begin(), h_res.begin() + number, std::back_inserter(outValues));
+void NthElementCPU::GetPairs(size_t number,
+ std::vector<unsigned>& outKeys,
+ std::vector<float>& outValues) {
+ std::copy(h_res_idx.begin(),
+ h_res_idx.begin() + number,
+ std::back_inserter(outKeys));
+ std::copy(
+ h_res.begin(), h_res.begin() + number, std::back_inserter(outValues));
lastN = number;
}
void NthElementCPU::getValueByKey(std::vector<float>& out, float* d_in) {
- for (size_t i = 0; i < lastN; ++i) {
+ for(size_t i = 0; i < lastN; ++i) {
out[i] = d_in[h_res_idx[i]];
}
}
-
}
diff --git a/src/translator/nth_element.cu b/src/translator/nth_element.cu
index f44a6290..fda5855f 100644
--- a/src/translator/nth_element.cu
+++ b/src/translator/nth_element.cu
@@ -271,8 +271,8 @@ __global__ void gGetValueByKey(float* d_in, float* d_out, int* indeces, int n) {
}
NthElementGPU::NthElementGPU(size_t maxBeamSize,
- size_t maxBatchSize,
- DeviceId deviceId)
+ size_t maxBatchSize,
+ DeviceId deviceId)
: deviceId_(deviceId),
NUM_BLOCKS(std::min(
500,
@@ -321,10 +321,9 @@ NthElementGPU::~NthElementGPU() {
CUDA_CHECK(cudaFree(d_cumBeamSizes));
}
-
void NthElementGPU::getNBestList(float* probs,
- const std::vector<int>& batchFirstElementIdxs,
- const std::vector<int>& cummulatedBeamSizes) {
+ const std::vector<int>& batchFirstElementIdxs,
+ const std::vector<int>& cummulatedBeamSizes) {
cudaSetDevice(deviceId_.no);
CUDA_CHECK(cudaMemcpyAsync(d_batchPosition,
batchFirstElementIdxs.data(),
@@ -359,10 +358,10 @@ void NthElementGPU::getNBestList(float* probs,
}
void NthElementGPU::getNBestList(const std::vector<size_t>& beamSizes,
- Tensor Probs,
- std::vector<float>& outCosts,
- std::vector<unsigned>& outKeys,
- const bool isFirst) {
+ Tensor Probs,
+ std::vector<float>& outCosts,
+ std::vector<unsigned>& outKeys,
+ const bool isFirst) {
cudaSetDevice(deviceId_.no);
std::vector<int> cummulatedBeamSizes(beamSizes.size() + 1, 0);
@@ -381,8 +380,8 @@ void NthElementGPU::getNBestList(const std::vector<size_t>& beamSizes,
}
void NthElementGPU::GetPairs(size_t number,
- std::vector<unsigned>& outKeys,
- std::vector<float>& outValues) {
+ std::vector<unsigned>& outKeys,
+ std::vector<float>& outValues) {
cudaSetDevice(deviceId_.no);
CUDA_CHECK(cudaMemcpyAsync(h_res,
d_res,
@@ -417,5 +416,4 @@ void NthElementGPU::getValueByKey(std::vector<float>& out, float* d_in) {
/* stream_ */ 0));
CUDA_CHECK(cudaStreamSynchronize(/* stream_ */ 0));
}
-
}
diff --git a/src/translator/nth_element.h b/src/translator/nth_element.h
index c65d1114..a2485021 100644
--- a/src/translator/nth_element.h
+++ b/src/translator/nth_element.h
@@ -16,18 +16,21 @@ struct NthElement {
virtual ~NthElement() {}
virtual void getNBestList(float* probs,
- const std::vector<int>& batchFirstElementIdxs,
- const std::vector<int>& cummulatedBeamSizes) = 0;
+ const std::vector<int>& batchFirstElementIdxs,
+ const std::vector<int>& cummulatedBeamSizes)
+ = 0;
virtual void getNBestList(const std::vector<size_t>& beamSizes,
- Tensor Probs,
- std::vector<float>& outCosts,
- std::vector<unsigned>& outKeys,
- const bool isFirst = false) = 0;
+ Tensor Probs,
+ std::vector<float>& outCosts,
+ std::vector<unsigned>& outKeys,
+ const bool isFirst = false)
+ = 0;
virtual void GetPairs(size_t number,
- std::vector<unsigned>& outKeys,
- std::vector<float>& outValues) = 0;
+ std::vector<unsigned>& outKeys,
+ std::vector<float>& outValues)
+ = 0;
virtual void getValueByKey(std::vector<float>& out, float* d_in) = 0;
};
@@ -105,5 +108,4 @@ private:
int* d_cumBeamSizes;
size_t lastN;
};
-
}
diff --git a/src/translator/printer.h b/src/translator/printer.h
index 969fe705..1f9ed787 100644
--- a/src/translator/printer.h
+++ b/src/translator/printer.h
@@ -14,12 +14,11 @@ void Printer(Ptr<Config> options,
Ptr<History> history,
OStream& best1,
OStream& bestn) {
-
bool reverse = options->get<bool>("right-left");
if(options->has("n-best") && options->get<bool>("n-best")) {
const auto& nbl = history->NBest(options->get<size_t>("beam-size"));
-
+
for(size_t i = 0; i < nbl.size(); ++i) {
const auto& result = nbl[i];
const auto& words = std::get<0>(result);
@@ -49,9 +48,9 @@ void Printer(Ptr<Config> options,
}
auto bestTranslation = history->Top();
-
- std::string translation = Join((*vocab)(std::get<0>(bestTranslation)),
- " ", reverse);
+
+ std::string translation
+ = Join((*vocab)(std::get<0>(bestTranslation)), " ", reverse);
best1 << translation << std::flush;
}
}
diff --git a/src/translator/scorers.h b/src/translator/scorers.h
index 94bda6e7..da7b20ad 100644
--- a/src/translator/scorers.h
+++ b/src/translator/scorers.h
@@ -34,7 +34,8 @@ public:
Ptr<ScorerState>,
const std::vector<size_t>&,
const std::vector<size_t>&,
- int dimBatch, int beamSize)
+ int dimBatch,
+ int beamSize)
= 0;
virtual void init(Ptr<ExpressionGraph> graph) {}
@@ -90,12 +91,13 @@ public:
Ptr<ScorerState> state,
const std::vector<size_t>& hypIndices,
const std::vector<size_t>& embIndices,
- int dimBatch, int beamSize) {
+ int dimBatch,
+ int beamSize) {
graph->switchParams(getName());
auto wrappedState
= std::dynamic_pointer_cast<ScorerWrapperState>(state)->getState();
- return New<ScorerWrapperState>(
- encdec_->step(graph, wrappedState, hypIndices, embIndices, dimBatch, beamSize));
+ return New<ScorerWrapperState>(encdec_->step(
+ graph, wrappedState, hypIndices, embIndices, dimBatch, beamSize));
}
};
@@ -132,8 +134,7 @@ public:
p[0] = 0;
p[2] = 0;
- penalties_ = graph->constant({1, dimVocab_},
- inits::from_vector(p));
+ penalties_ = graph->constant({1, dimVocab_}, inits::from_vector(p));
return New<WordPenaltyState>(dimVocab_, penalties_);
}
@@ -141,7 +142,8 @@ public:
Ptr<ScorerState> state,
const std::vector<size_t>& hypIndices,
const std::vector<size_t>& embIndices,
- int dimBatch, int beamSize) {
+ int dimBatch,
+ int beamSize) {
return state;
}
};
@@ -168,8 +170,7 @@ public:
p[i] = 0;
p[2] = 0;
- penalties_ = graph->constant({1, dimVocab_},
- inits::from_vector(p));
+ penalties_ = graph->constant({1, dimVocab_}, inits::from_vector(p));
return New<WordPenaltyState>(dimVocab_, penalties_);
}
@@ -177,7 +178,8 @@ public:
Ptr<ScorerState> state,
const std::vector<size_t>& hypIndices,
const std::vector<size_t>& embIndices,
- int dimBatch, int beamSize) {
+ int dimBatch,
+ int beamSize) {
return state;
}
};
diff --git a/src/translator/translator.h b/src/translator/translator.h
index 43dbf16a..0cb0da01 100644
--- a/src/translator/translator.h
+++ b/src/translator/translator.h
@@ -37,7 +37,7 @@ public:
ThreadPool threadPool(devices.size(), devices.size());
scorers_.resize(devices.size());
graphs_.resize(devices.size());
-
+
size_t id = 0;
for(auto device : devices) {
auto task = [&](DeviceId device, size_t id) {
@@ -61,13 +61,13 @@ public:
data::BatchGenerator<data::Corpus> bg(corpus_, options_);
auto devices = options_->getDevices();
-
+
ThreadPool threadPool(devices.size(), devices.size());
size_t batchId = 0;
auto collector = New<OutputCollector>();
if(options_->get<bool>("quiet-translation"))
- collector->setPrintingStrategy(New<QuietPrinting>());
+ collector->setPrintingStrategy(New<QuietPrinting>());
bg.prepare(false);