Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2018-03-01 02:52:40 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2018-03-01 02:52:40 +0300
commitfd208d841b0ec4fdbfdc81df601e86986528a051 (patch)
treeefe85c4e634fa1885583e2d4192a8d7252a0e945 /src
parent71f911940c91d2fd5c337ddf4d8e88108d5ed822 (diff)
simplified interface, get slowly rid of keywords
Diffstat (limited to 'src')
-rw-r--r--src/3rd_party/cnpy/cnpy.cpp53
-rw-r--r--src/3rd_party/cnpy/cnpy.h37
-rw-r--r--src/CMakeLists.txt2
-rw-r--r--src/common/config.cpp2
-rw-r--r--src/common/definitions.h8
-rw-r--r--src/examples/iris/iris.cpp12
-rw-r--r--src/examples/mnist/model.h8
-rw-r--r--src/examples/mnist/model_lenet.h8
-rw-r--r--src/graph/expression_graph.cpp8
-rw-r--r--src/graph/expression_graph.h47
-rw-r--r--src/graph/node.h20
-rw-r--r--src/graph/node_initializers.cpp10
-rw-r--r--src/graph/node_initializers.h19
-rw-r--r--src/graph/node_operators.cpp (renamed from src/graph/node_operators.cu)0
-rw-r--r--src/graph/node_operators.h18
-rw-r--r--src/graph/node_operators_binary.h46
-rw-r--r--src/graph/node_operators_unary.h96
-rw-r--r--src/layers/convolution.cu4
-rw-r--r--src/layers/generic.h28
-rw-r--r--src/layers/guided_alignment.h2
-rw-r--r--src/models/amun.h34
-rw-r--r--src/models/encdec.h10
-rw-r--r--src/models/nematus.h14
-rw-r--r--src/models/s2s.h2
-rw-r--r--src/models/transformer.h40
-rw-r--r--src/optimizers/optimizers.cu14
-rw-r--r--src/rnn/attention.cu2
-rw-r--r--src/rnn/attention.h22
-rw-r--r--src/rnn/cells.cu15
-rw-r--r--src/rnn/cells.h138
-rw-r--r--src/rnn/rnn.h2
-rw-r--r--src/tensors/tensor.h12
-rw-r--r--src/translator/beam_search.h4
-rw-r--r--src/translator/scorers.h4
34 files changed, 346 insertions, 395 deletions
diff --git a/src/3rd_party/cnpy/cnpy.cpp b/src/3rd_party/cnpy/cnpy.cpp
index 277ee7a5..f4df0418 100644
--- a/src/3rd_party/cnpy/cnpy.cpp
+++ b/src/3rd_party/cnpy/cnpy.cpp
@@ -59,9 +59,9 @@ template<> std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const cha
return lhs;
}
-void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& shape, unsigned int& ndims, bool& fortran_order) {
+void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& shape, unsigned int& ndims, bool& fortran_order) {
char buffer[256];
- size_t res = fread(buffer,sizeof(char),11,fp);
+ size_t res = fread(buffer,sizeof(char),11,fp);
if(res != 11)
throw std::runtime_error("parse_npy_header: failed fread");
std::string header = fgets(buffer,256,fp);
@@ -88,7 +88,7 @@ void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& sh
}
//endian, word size, data type
- //byte order code | stands for not applicable.
+ //byte order code | stands for not applicable.
//not sure when this applies except for byte array
loc1 = header.find("descr")+9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
@@ -125,7 +125,7 @@ void cnpy::parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& globa
assert(comment_len == 0);
}
-cnpy::NpyArray load_the_npy_file(FILE* fp) {
+cnpy::NpyArrayPtr load_the_npy_file(FILE* fp) {
unsigned int* shape;
unsigned int ndims, word_size;
bool fortran_order;
@@ -134,13 +134,13 @@ cnpy::NpyArray load_the_npy_file(FILE* fp) {
for(unsigned int i = 0; i < ndims; i++)
size *= shape[i];
- cnpy::NpyArray arr;
- arr.word_size = word_size;
- arr.shape = std::vector<unsigned int>(shape, shape+ndims);
+ auto arr = cnpy::NpyArrayPtr(new cnpy::NpyArray());
+ arr->word_size = word_size;
+ arr->shape = std::vector<unsigned int>(shape, shape+ndims);
delete[] shape;
- arr.resize(size*word_size);
- arr.fortran_order = fortran_order;
- size_t nread = fread(arr.data(), word_size, size,fp);
+ arr->resize(size*word_size);
+ arr->fortran_order = fortran_order;
+ size_t nread = fread(arr->data(), word_size, size,fp);
if(nread != size)
throw std::runtime_error("load_the_npy_file: failed fread");
return arr;
@@ -152,7 +152,7 @@ cnpy::npz_t cnpy::npz_load(std::string fname) {
if(!fp) printf("npz_load: Error! Unable to open file %s!\n",fname.c_str());
assert(fp);
- cnpy::npz_t arrays;
+ cnpy::npz_t arrays;
while(1) {
std::vector<char> local_header(30);
@@ -170,7 +170,7 @@ cnpy::npz_t cnpy::npz_load(std::string fname) {
if(vname_res != name_len)
throw std::runtime_error("npz_load: failed fread");
- //erase the lagging .npy
+ //erase the lagging .npy
varname.erase(varname.end()-4, varname.end());
//read in the extra field
@@ -186,16 +186,16 @@ cnpy::npz_t cnpy::npz_load(std::string fname) {
}
fclose(fp);
- return arrays;
+ return arrays;
}
-cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) {
+cnpy::NpyArrayPtr cnpy::npz_load(std::string fname, std::string varname) {
FILE* fp = fopen(fname.c_str(),"rb");
if(!fp) {
printf("npz_load: Error! Unable to open file %s!\n",fname.c_str());
abort();
- }
+ }
while(1) {
std::vector<char> local_header(30);
@@ -209,7 +209,7 @@ cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) {
//read in the variable name
unsigned short name_len = *(unsigned short*) &local_header[26];
std::string vname(name_len,' ');
- size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp);
+ size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp);
if(vname_res != name_len)
throw std::runtime_error("npz_load: failed fread");
vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy
@@ -219,7 +219,7 @@ cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) {
fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field
if(vname == varname) {
- NpyArray array = load_the_npy_file(fp);
+ auto array = load_the_npy_file(fp);
fclose(fp);
return array;
}
@@ -233,30 +233,27 @@ cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) {
fclose(fp);
std::stringstream ss;
- ss << "npz_load: Error! Variable name "
- << varname
- << " not found in "
- << fname
- << "!"
+ ss << "npz_load: Error! Variable name "
+ << varname
+ << " not found in "
+ << fname
+ << "!"
<< std::endl;
throw std::runtime_error(ss.str());
}
-cnpy::NpyArray cnpy::npy_load(std::string fname) {
+cnpy::NpyArrayPtr cnpy::npy_load(std::string fname) {
FILE* fp = fopen(fname.c_str(), "rb");
if(!fp) {
printf("npy_load: Error! Unable to open file %s!\n",fname.c_str());
- abort();
+ abort();
}
- NpyArray arr = load_the_npy_file(fp);
+ auto arr = load_the_npy_file(fp);
fclose(fp);
return arr;
}
-
-
-
diff --git a/src/3rd_party/cnpy/cnpy.h b/src/3rd_party/cnpy/cnpy.h
index f78271a6..0cdd6dca 100644
--- a/src/3rd_party/cnpy/cnpy.h
+++ b/src/3rd_party/cnpy/cnpy.h
@@ -20,27 +20,28 @@
namespace cnpy {
struct NpyArray {
- std::shared_ptr<std::vector<char>> ptr;
+ std::vector<char> bytes;
std::vector<unsigned int> shape;
unsigned int word_size{1};
bool fortran_order{0};
-
- NpyArray() : ptr{new std::vector<char>()} {}
-
+
+ NpyArray() {}
+
void resize(size_t n) {
- return ptr->resize(n);
+ return bytes.resize(n);
}
-
+
char* data() {
- return ptr->data();
+ return bytes.data();
}
const char* data() const {
- return ptr->data();
+ return bytes.data();
}
};
-
- typedef std::map<std::string, NpyArray> npz_t;
+
+ typedef std::shared_ptr<NpyArray> NpyArrayPtr;
+ typedef std::map<std::string, NpyArrayPtr> npz_t;
char BigEndianTest();
char map_type(const std::type_info& t);
@@ -48,20 +49,20 @@ namespace cnpy {
void parse_npy_header(FILE* fp,unsigned int& word_size, unsigned int*& shape, unsigned int& ndims, bool& fortran_order);
void parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset);
npz_t npz_load(std::string fname);
- NpyArray npz_load(std::string fname, std::string varname);
- NpyArray npy_load(std::string fname);
+ NpyArrayPtr npz_load(std::string fname, std::string varname);
+ NpyArrayPtr npy_load(std::string fname);
template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) {
//write in little endian
for(char byte = 0; byte < sizeof(T); byte++) {
- char val = *((char*)&rhs+byte);
+ char val = *((char*)&rhs+byte);
lhs.push_back(val);
}
return lhs;
}
- template<> std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
- template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
+ template<> std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
+ template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
template<typename T> std::string tostring(T i, int pad = 0, char padval = ' ') {
@@ -136,7 +137,7 @@ namespace cnpy {
if(fp) {
//zip file exists. we need to add a new npy file to it.
//first read the footer. this gives us the offset and size of the global header
- //then read and store the global header.
+ //then read and store the global header.
//below, we will write the the new data at the start of the global header then append the global header and footer below it
unsigned int global_header_size;
parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
@@ -202,7 +203,7 @@ namespace cnpy {
footer += (unsigned int) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array
footer += (unsigned short) 0; //zip file comment length
- //write everything
+ //write everything
fwrite(&local_header[0],sizeof(char),local_header.size(),fp);
fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp);
fwrite(data,sizeof(T),nels,fp);
@@ -211,7 +212,7 @@ namespace cnpy {
fclose(fp);
}
- template<typename T> std::vector<char> create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims) {
+ template<typename T> std::vector<char> create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims) {
std::vector<char> dict;
dict += "{'descr': '";
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 409a84e1..38a69d9c 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -24,7 +24,7 @@ cuda_add_library(marian
graph/expression_graph.cpp
graph/expression_operators.cu
graph/node.cpp
- graph/node_operators.cu
+ graph/node_operators.cpp
graph/node_initializers.cpp
layers/convolution.cu
diff --git a/src/common/config.cpp b/src/common/config.cpp
index 1f297094..9eca1f10 100644
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@@ -54,7 +54,7 @@ void Config::loadModelParameters(const std::string& name) {
void Config::GetYamlFromNpz(YAML::Node& yaml,
const std::string& varName,
const std::string& fName) {
- yaml = YAML::Load(cnpy::npz_load(fName, varName).data());
+ yaml = YAML::Load(cnpy::npz_load(fName, varName)->data());
}
void Config::AddYamlToNpz(const YAML::Node& yaml,
diff --git a/src/common/definitions.h b/src/common/definitions.h
index 749f4fef..8fb3bbb6 100644
--- a/src/common/definitions.h
+++ b/src/common/definitions.h
@@ -45,19 +45,19 @@ enum class DeviceType : size_t { gpu = 0, cpu = 1 };
struct DeviceId {
size_t no{0};
DeviceType type{DeviceType::gpu};
-
+
DeviceId() : no{0}, type{DeviceType::gpu} {}
DeviceId(size_t no_, DeviceType type_) : no(no_), type(type_) {}
-
+
friend std::ostream& operator<<(std::ostream& out, DeviceId deviceId) {
out << (deviceId.type == DeviceType::gpu ? "gpu" : "cpu") << deviceId.no;
return out;
}
-
+
friend bool operator==(DeviceId id1, DeviceId id2) {
return id1.no == id2.no && id1.type == id2.type;
}
-
+
};
class TensorBase;
diff --git a/src/examples/iris/iris.cpp b/src/examples/iris/iris.cpp
index 9b1bb958..80a3a2a9 100644
--- a/src/examples/iris/iris.cpp
+++ b/src/examples/iris/iris.cpp
@@ -25,20 +25,20 @@ Expr buildIrisClassifier(Ptr<ExpressionGraph> graph,
// Define the input layer
auto x = graph->constant({N, NUM_FEATURES},
- init = inits::from_vector(inputData));
+ inits::from_vector(inputData));
// Define the hidden layer
- auto W1 = graph->param("W1", {NUM_FEATURES, 5}, init = inits::uniform());
- auto b1 = graph->param("b1", {1, 5}, init = inits::zeros);
+ auto W1 = graph->param("W1", {NUM_FEATURES, 5}, inits::uniform());
+ auto b1 = graph->param("b1", {1, 5}, inits::zeros);
auto h = tanh(affine(x, W1, b1));
// Define the output layer
- auto W2 = graph->param("W2", {5, NUM_LABELS}, init = inits::uniform());
- auto b2 = graph->param("b2", {1, NUM_LABELS}, init = inits::zeros);
+ auto W2 = graph->param("W2", {5, NUM_LABELS}, inits::uniform());
+ auto b2 = graph->param("b2", {1, NUM_LABELS}, inits::zeros);
auto o = affine(h, W2, b2);
if(train) {
- auto y = graph->constant({N}, init = inits::from_vector(outputData));
+ auto y = graph->constant({N}, inits::from_vector(outputData));
/* Define cross entropy cost on the output layer.
* It can be also defined directly as:
* -mean(sum(logsoftmax(o) * y, axis=1), axis=0)
diff --git a/src/examples/mnist/model.h b/src/examples/mnist/model.h
index 2ab5a3f5..62e5696a 100644
--- a/src/examples/mnist/model.h
+++ b/src/examples/mnist/model.h
@@ -79,7 +79,7 @@ protected:
auto features
= std::static_pointer_cast<data::DataBatch>(batch)->features();
auto x = g->constant({(int)batch->size(), dims[0]},
- init = inits::from_vector(features));
+ inits::from_vector(features));
// Construct hidden layers
std::vector<Expr> layers, weights, biases;
@@ -104,11 +104,11 @@ protected:
// Construct a weight node for the outgoing connections from layer i
weights.emplace_back(g->param(
- "W" + std::to_string(i), {in, out}, init = inits::uniform()));
+ "W" + std::to_string(i), {in, out}, inits::uniform()));
// Construct a bias node. These weights are initialized to zero
biases.emplace_back(
- g->param("b" + std::to_string(i), {1, out}, init = inits::zeros));
+ g->param("b" + std::to_string(i), {1, out}, inits::zeros));
}
// Perform matrix multiplication and addition for the last layer
@@ -119,7 +119,7 @@ protected:
// labels
auto labels = std::static_pointer_cast<data::DataBatch>(batch)->labels();
auto y = g->constant({(int)batch->size(), 1},
- init = inits::from_vector(labels));
+ inits::from_vector(labels));
// Define a top-level node for training
return mean(cross_entropy(last, y), axis = 0);
diff --git a/src/examples/mnist/model_lenet.h b/src/examples/mnist/model_lenet.h
index 968ceaf3..a91ef97d 100644
--- a/src/examples/mnist/model_lenet.h
+++ b/src/examples/mnist/model_lenet.h
@@ -29,7 +29,7 @@ protected:
auto features
= std::static_pointer_cast<data::DataBatch>(batch)->features();
auto x = g->constant({(int)batch->size(), 1, 28, 28},
- init = inits::from_vector(features));
+ inits::from_vector(features));
// Construct hidden layers
@@ -74,11 +74,11 @@ protected:
// Construct a weight node for the outgoing connections from layer i
weights.emplace_back(g->param(
- "W" + std::to_string(i), {in, out}, init = inits::uniform()));
+ "W" + std::to_string(i), {in, out}, inits::uniform()));
// Construct a bias node. These weights are initialized to zero
biases.emplace_back(
- g->param("b" + std::to_string(i), {1, out}, init = inits::zeros));
+ g->param("b" + std::to_string(i), {1, out}, inits::zeros));
}
// Perform matrix multiplication and addition for the last layer
@@ -91,7 +91,7 @@ protected:
// labels
auto labels = std::static_pointer_cast<data::DataBatch>(batch)->labels();
auto y = g->constant({(int)batch->size(), 1},
- init = inits::from_vector(labels));
+ inits::from_vector(labels));
// Define a top-level node for training
return mean(cross_entropy(last, y), axis = 0);
diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp
index 520476ae..f0ae1ffa 100644
--- a/src/graph/expression_graph.cpp
+++ b/src/graph/expression_graph.cpp
@@ -17,12 +17,12 @@ void ExpressionGraph::setDevice(DeviceId deviceId) {
}
}
-Expr ExpressionGraph::dropout(float prob, Shape shape) {
+Expr ExpressionGraph::dropout(float prob, const Shape& shape) {
return Expression<ConstantNode>(shared_from_this(),
- keywords::init = [prob, this](Tensor t) {
+ shape,
+ [prob, this](Tensor t) {
Dropout(t, prob);
- },
- keywords::shape = shape);
+ });
}
void ExpressionGraph::checkNan(Tensor t) {
diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h
index 5131daf6..ea1645ec 100644
--- a/src/graph/expression_graph.h
+++ b/src/graph/expression_graph.h
@@ -77,7 +77,7 @@ public:
void copyParams(Ptr<ExpressionGraph> graph) {
for(auto p : *graph->params())
- param(p->name(), p->shape());
+ param(p->name(), p->shape(), inits::dummy);
params()->allocateForward();
params()->vals()->copyFrom(graph->params()->vals());
}
@@ -200,8 +200,11 @@ public:
dot.close();
}
- template <typename... Args>
- Expr param(std::string name, Shape shape, Args... args) {
+ Expr param(const std::string& pname,
+ const Shape& shape,
+ const NodeInitializer& init,
+ bool fixed = false) {
+ std::string name = pname;
if(!namespace_.empty())
name = namespace_ + "::" + name;
@@ -214,7 +217,6 @@ public:
"original shape {}",
shape, name, p->shape());
- bool fixed = Get(keywords::fixed, false, args...);
p->setTrainable(!fixed);
add(p);
return p;
@@ -229,8 +231,7 @@ public:
ABORT_IF(get(name), "Non-parameter with name '{}' already exists", name);
// create parameter node (adds to tape)
- p = Expression<ParamNode>(
- shared_from_this(), keywords::shape = shape, args...);
+ p = Expression<ParamNode>(shared_from_this(), shape, init, fixed);
// add to list of parameters
p->set_name(name);
@@ -238,25 +239,21 @@ public:
return p;
}
- template <typename... Args>
- Expr constant(Shape shape, Args... args) {
+ Expr constant(const Shape& shape,
+ const NodeInitializer& init) {
return Expression<ConstantNode>(
- shared_from_this(), keywords::shape = shape, args...);
+ shared_from_this(), shape, init);
}
- template <typename... Args>
- Expr ones(Args... args) {
- return Expression<ConstantNode>(
- shared_from_this(), keywords::init = inits::ones, args...);
+ Expr ones(const Shape& shape) {
+ return Expression<ConstantNode>(shared_from_this(), shape, inits::ones);
}
- template <typename... Args>
- Expr zeros(Args... args) {
- return Expression<ConstantNode>(
- shared_from_this(), keywords::init = inits::zeros, args...);
+ Expr zeros(const Shape& shape) {
+ return Expression<ConstantNode>(shared_from_this(), shape, inits::zeros);
}
- Expr dropout(float prob, Shape shape);
+ Expr dropout(float prob, const Shape& shape);
Expr get(std::string name) {
if(!namespace_.empty())
@@ -340,17 +337,17 @@ public:
continue;
Shape shape;
- if(it.second.shape.size() == 1) {
+ if(it.second->shape.size() == 1) {
shape.resize(2);
shape.set(0, 1);
- shape.set(1, it.second.shape[0]);
+ shape.set(1, it.second->shape[0]);
} else {
- shape.resize(it.second.shape.size());
- for(int i = 0; i < it.second.shape.size(); ++i)
- shape.set(i, it.second.shape[i]);
+ shape.resize(it.second->shape.size());
+ for(int i = 0; i < it.second->shape.size(); ++i)
+ shape.set(i, it.second->shape[i]);
}
- param(name, shape, init = inits::from_numpy(it.second));
+ param(name, shape, inits::from_numpy(it.second));
}
if(markReloaded)
@@ -371,7 +368,7 @@ public:
}
std::vector<float> v;
- p.second->val() >> v;
+ p.second->val()->get(v);
auto& pShape = p.second->shape();
unsigned dim = pShape.size();
diff --git a/src/graph/node.h b/src/graph/node.h
index aa450000..74af5771 100644
--- a/src/graph/node.h
+++ b/src/graph/node.h
@@ -13,7 +13,6 @@
namespace marian {
class Node : public Chainable<Tensor>,
- public keywords::Keywords,
public std::enable_shared_from_this<Node> {
protected:
size_t id_{0};
@@ -33,11 +32,9 @@ protected:
std::string debugMessage_;
public:
- template <typename... Args>
- Node(Ptr<ExpressionGraph> graph, Args... args)
- : Keywords(args...),
- graph_(graph),
- shape_(Get(keywords::shape, {1, 1, 1, 1})) {}
+ Node(Ptr<ExpressionGraph> graph, Shape shape)
+ : graph_(graph),
+ shape_(shape) {}
virtual ~Node() {
if(destroy_) {
@@ -143,12 +140,8 @@ public:
struct NaryNodeOp : public Node {
size_t hash_{0};
- template <typename... Args>
- NaryNodeOp(const std::vector<Expr>& nodes, Args... args)
- : Node(nodes.front()->graph(),
- keywords::shape
- = keywords::Get(keywords::shape, nodes.front()->shape(), args...),
- args...) {
+ NaryNodeOp(const std::vector<Expr>& nodes, Shape shape)
+ : Node(nodes.front()->graph(), shape) {
children_.resize(nodes.size());
for(int i = 0; i < nodes.size(); ++i)
children_[i] = nodes[i];
@@ -158,6 +151,9 @@ struct NaryNodeOp : public Node {
remove_children_from_top_nodes();
}
+ NaryNodeOp(const std::vector<Expr>& nodes)
+ : NaryNodeOp(nodes, nodes[0]->shape()) {}
+
virtual ~NaryNodeOp() {}
std::vector<Expr>& children() { return children_; }
diff --git a/src/graph/node_initializers.cpp b/src/graph/node_initializers.cpp
index f82b6f45..0d131c61 100644
--- a/src/graph/node_initializers.cpp
+++ b/src/graph/node_initializers.cpp
@@ -72,7 +72,7 @@ void xorshift(Tensor t) {
std::vector<float> vals(t->size());
for(auto&& v : vals)
v = xor128();
- t << vals;
+ t->set(vals);
}
void glorot_normal(Tensor t) {
@@ -110,7 +110,7 @@ void ortho(Tensor t) {
NodeInitializer from_vector(const std::vector<float>& v) {
auto vPtr = New<std::vector<float>>(v.begin(), v.end());
return [vPtr](Tensor t) {
- t->set(vPtr->data(), vPtr->data() + vPtr->size());
+ t->set(vPtr->data(), vPtr->data() + vPtr->size());
};
}
@@ -127,12 +127,12 @@ NodeInitializer from_sparse_vector(
};
}
-NodeInitializer from_numpy(const cnpy::NpyArray& np) {
+NodeInitializer from_numpy(const cnpy::NpyArrayPtr& np) {
return [np](Tensor t) {
size_t size = 1;
- for(size_t dim : np.shape)
+ for(size_t dim : np->shape)
size *= dim;
- t->set((float*)np.data(), (float*)np.data() + size);
+ t->set((float*)np->data(), (float*)np->data() + size);
};
}
diff --git a/src/graph/node_initializers.h b/src/graph/node_initializers.h
index 6bd83c12..5b069657 100644
--- a/src/graph/node_initializers.h
+++ b/src/graph/node_initializers.h
@@ -27,25 +27,32 @@ NodeInitializer from_value(float v);
NodeInitializer diag(float val);
-template <class Distribution>
-void distribution(std::vector<float>& vals, float a, float b) {
+template <class Distribution, class Iterator>
+void distribution(Iterator begin, Iterator end, float a, float b) {
std::default_random_engine engine(Config::seed++);
Distribution dist(a, b);
auto gen = std::bind(dist, engine);
- std::generate(begin(vals), end(vals), gen);
+ std::generate(begin, end, gen);
+}
+
+template <class Distribution>
+void distribution(std::vector<float>& vals, float a, float b) {
+ distribution<Distribution>(vals.begin(), vals.end(), a, b);
}
template <class Distribution>
void distribution(Tensor t, float a, float b) {
std::vector<float> vals(t->size());
- distribution<Distribution>(vals, a, b);
- t << vals;
+ distribution<Distribution>(vals.begin(), vals.end(), a, b);
+ t->set(vals);
}
NodeInitializer normal(float scale = 0.1, bool ortho = true);
NodeInitializer uniform(float scale = 0.1);
+static inline void dummy(Tensor t) {}
+
void ortho(Tensor t);
void glorot_uniform(Tensor t);
@@ -60,7 +67,7 @@ NodeInitializer from_vector(const std::vector<size_t>& v);
NodeInitializer from_sparse_vector(
std::pair<std::vector<size_t>, std::vector<float>>& v);
-NodeInitializer from_numpy(const cnpy::NpyArray& np);
+NodeInitializer from_numpy(const cnpy::NpyArrayPtr& np);
NodeInitializer from_word2vec(const std::string& file,
int dimVoc,
diff --git a/src/graph/node_operators.cu b/src/graph/node_operators.cpp
index 76f0b1e3..76f0b1e3 100644
--- a/src/graph/node_operators.cu
+++ b/src/graph/node_operators.cpp
diff --git a/src/graph/node_operators.h b/src/graph/node_operators.h
index b785fade..8720d0bb 100644
--- a/src/graph/node_operators.h
+++ b/src/graph/node_operators.h
@@ -7,12 +7,11 @@
namespace marian {
struct ConstantNode : public Node {
- template <typename... Args>
- ConstantNode(Args... args)
- : Node(args...),
- init_(new NodeInitializer(Get(keywords::init, [](Tensor) {}))),
+ ConstantNode(Ptr<ExpressionGraph> graph, const Shape& shape, const NodeInitializer& init)
+ : Node(graph, shape),
+ init_(new NodeInitializer(init)),
initialized_(false) {
- ABORT_IF(!Has(keywords::shape), "Constant items require shape information");
+
setTrainable(false);
}
@@ -42,14 +41,11 @@ private:
};
struct ParamNode : public Node {
- template <typename... Args>
- ParamNode(Args... args)
- : Node(args...),
- init_(new NodeInitializer(Get(keywords::init, [](Tensor) {}))),
+ ParamNode(Ptr<ExpressionGraph> graph, const Shape& shape, const NodeInitializer& init, bool fixed = false)
+ : Node(graph, shape),
+ init_(new NodeInitializer(init)),
initialized_(false) {
- ABORT_IF(!Has(keywords::shape), "Param items require shape information");
- bool fixed = Get(keywords::fixed, false);
setTrainable(!fixed);
}
diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h
index a2a47a61..c9e67cd7 100644
--- a/src/graph/node_operators_binary.h
+++ b/src/graph/node_operators_binary.h
@@ -16,16 +16,13 @@ private:
float scalar_;
public:
- template <typename... Args>
DotNodeOp(Expr a,
Expr b,
bool transA,
bool transB,
- float scalar,
- Args... args)
+ float scalar)
: NaryNodeOp({a, b},
- keywords::shape = newShape(a, b, transA, transB),
- args...),
+ newShape(a, b, transA, transB)),
transA_(transA),
transB_(transB),
scalar_(scalar) {}
@@ -149,8 +146,7 @@ public:
bool transA,
bool transB,
float scalar)
- : NaryNodeOp(nodes, keywords::shape = newShape(nodes[0], nodes[1],
- transA, transB)),
+ : NaryNodeOp(nodes, newShape(nodes[0], nodes[1], transA, transB)),
transA_(transA),
transB_(transB),
scalar_(scalar){}
@@ -278,16 +274,13 @@ private:
float scalar_;
public:
- template <typename... Args>
DotBatchedNodeOp(Expr a,
Expr b,
bool transA,
bool transB,
- float scalar,
- Args... args)
+ float scalar)
: NaryNodeOp({a, b},
- keywords::shape = newShape(a, b, transA, transB),
- args...),
+ newShape(a, b, transA, transB)),
transA_(transA),
transB_(transB),
scalar_(scalar) {}
@@ -407,7 +400,7 @@ public:
struct ScalarProductNodeOp : public NaryNodeOp {
template <typename... Args>
ScalarProductNodeOp(Expr a, Expr b, Args... args)
- : NaryNodeOp({a, b}, keywords::shape = newShape(a, b, args...), args...) {
+ : NaryNodeOp({a, b}, newShape(a, b, args...)) {
}
template <typename... Args>
@@ -440,9 +433,8 @@ struct ScalarProductNodeOp : public NaryNodeOp {
};
struct ElementBinaryNodeOp : public NaryNodeOp {
- template <typename... Args>
- ElementBinaryNodeOp(Expr a, Expr b, Args... args)
- : NaryNodeOp({a, b}, keywords::shape = newShape(a, b), args...) {}
+ ElementBinaryNodeOp(Expr a, Expr b)
+ : NaryNodeOp({a, b}, newShape(a, b)) {}
Shape newShape(Expr a, Expr b) {
return Shape::broadcast({a, b});
@@ -452,8 +444,7 @@ struct ElementBinaryNodeOp : public NaryNodeOp {
};
struct PlusNodeOp : public ElementBinaryNodeOp {
- template <typename... Args>
- PlusNodeOp(Args... args) : ElementBinaryNodeOp(args...) {}
+ PlusNodeOp(Expr a, Expr b) : ElementBinaryNodeOp(a, b) {}
NodeOps forwardOps() {
using namespace functional;
@@ -473,8 +464,7 @@ struct PlusNodeOp : public ElementBinaryNodeOp {
};
struct MinusNodeOp : public ElementBinaryNodeOp {
- template <typename... Args>
- MinusNodeOp(Args... args) : ElementBinaryNodeOp(args...) {}
+ MinusNodeOp(Expr a, Expr b) : ElementBinaryNodeOp(a, b) {}
NodeOps forwardOps() {
using namespace functional;
@@ -494,8 +484,7 @@ struct MinusNodeOp : public ElementBinaryNodeOp {
};
struct MultNodeOp : public ElementBinaryNodeOp {
- template <typename... Args>
- MultNodeOp(Args... args) : ElementBinaryNodeOp(args...) {}
+ MultNodeOp(Expr a, Expr b) : ElementBinaryNodeOp(a, b) {}
NodeOps forwardOps() {
using namespace functional;
@@ -515,8 +504,7 @@ struct MultNodeOp : public ElementBinaryNodeOp {
};
struct DivNodeOp : public ElementBinaryNodeOp {
- template <typename... Args>
- DivNodeOp(Args... args) : ElementBinaryNodeOp(args...) {}
+ DivNodeOp(Expr a, Expr b) : ElementBinaryNodeOp(a, b) {}
NodeOps forwardOps() {
using namespace functional;
@@ -565,9 +553,8 @@ struct DivNodeOp : public ElementBinaryNodeOp {
// Cross-entropy node. It computes -b*log(softmax(a)), summing rowwise.
struct CrossEntropyNodeOp : public NaryNodeOp {
- template <typename... Args>
- CrossEntropyNodeOp(Expr a, Expr b, Args... args)
- : NaryNodeOp({a, b}, keywords::shape = newShape(a), args...) {}
+ CrossEntropyNodeOp(Expr a, Expr b)
+ : NaryNodeOp({a, b}, newShape(a)) {}
Shape newShape(Expr a) {
Shape shape1 = a->shape();
@@ -591,10 +578,7 @@ struct CrossEntropyNodeOp : public NaryNodeOp {
struct ConcatenateNodeOp : public NaryNodeOp {
template <typename... Args>
ConcatenateNodeOp(const std::vector<Expr>& nodes, Args... args)
- : NaryNodeOp(nodes,
- keywords::shape
- = newShape(nodes, keywords::Get(keywords::axis, 0, args...)),
- args...) {}
+ : NaryNodeOp(nodes, newShape(nodes, keywords::Get(keywords::axis, 0, args...))) {}
Shape newShape(const std::vector<Expr>& nodes, int ax) {
Shape shape = nodes.back()->shape();
diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h
index 0a76471b..e857e790 100644
--- a/src/graph/node_operators_unary.h
+++ b/src/graph/node_operators_unary.h
@@ -14,9 +14,11 @@
namespace marian {
struct UnaryNodeOp : public NaryNodeOp {
- template <typename... Args>
- UnaryNodeOp(Expr a, Args... args)
- : NaryNodeOp({a}, keywords::shape = a->shape(), args...) {}
+ UnaryNodeOp(Expr a, Shape shape)
+ : NaryNodeOp({a}, shape) {}
+
+ UnaryNodeOp(Expr a)
+ : NaryNodeOp({a}, a->shape()) {}
const std::string color() { return "yellow"; }
};
@@ -26,9 +28,9 @@ private:
float scalar_{0};
public:
- template <typename... Args>
- ScalarAddNodeOp(Expr a, float scalar, Args... args)
- : UnaryNodeOp(a, args...), scalar_{scalar} {}
+ ScalarAddNodeOp(Expr a, float scalar)
+ : UnaryNodeOp(a),
+ scalar_{scalar} {}
NodeOps forwardOps() {
using namespace functional;
@@ -67,9 +69,8 @@ private:
float scalar_{0};
public:
- template <typename... Args>
- ScalarMultNodeOp(Expr a, float scalar, Args... args)
- : UnaryNodeOp(a, args...), scalar_{scalar} {}
+ ScalarMultNodeOp(Expr a, float scalar)
+ : UnaryNodeOp(a), scalar_{scalar} {}
NodeOps forwardOps() {
using namespace functional;
@@ -104,8 +105,7 @@ public:
};
struct LogitNodeOp : public UnaryNodeOp {
- template <typename... Args>
- LogitNodeOp(Args... args) : UnaryNodeOp(args...) {}
+ LogitNodeOp(Expr a) : UnaryNodeOp(a) {}
NodeOps forwardOps() {
using namespace functional;
@@ -164,7 +164,7 @@ struct LogitNodeOp : public UnaryNodeOp {
struct TanhNodeOp : public NaryNodeOp {
TanhNodeOp(const std::vector<Expr>& nodes)
- : NaryNodeOp(nodes, keywords::shape = newShape(nodes)) {}
+ : NaryNodeOp(nodes, newShape(nodes)) {}
Shape newShape(const std::vector<Expr>& nodes) {
return Shape::broadcast(nodes);
@@ -214,8 +214,7 @@ struct TanhNodeOp : public NaryNodeOp {
struct ReLUNodeOp : public UnaryNodeOp {
- template <typename... Args>
- ReLUNodeOp(Args... args) : UnaryNodeOp(args...) {}
+ ReLUNodeOp(Expr a) : UnaryNodeOp(a) {}
NodeOps forwardOps() {
// f(x) = max(0, x)
@@ -265,9 +264,8 @@ struct ReLUNodeOp : public UnaryNodeOp {
* \f]
*/
struct PReLUNodeOp : public UnaryNodeOp {
- template <typename... Args>
- PReLUNodeOp(float alpha, Args... args)
- : UnaryNodeOp(args...), alpha_(alpha) {}
+ PReLUNodeOp(float alpha, Expr a)
+ : UnaryNodeOp(a), alpha_(alpha) {}
NodeOps forwardOps() {
using namespace functional;
@@ -316,8 +314,7 @@ private:
*
*/
struct SwishNodeOp : public UnaryNodeOp {
- template <typename... Args>
- SwishNodeOp(Args... args) : UnaryNodeOp(args...) {}
+ SwishNodeOp(Expr a) : UnaryNodeOp(a) {}
NodeOps forwardOps() {
using namespace functional;
@@ -338,14 +335,12 @@ struct SwishNodeOp : public UnaryNodeOp {
const std::string type() { return "swish"; }
};
-struct SoftmaxNodeOp : public NaryNodeOp {
- template <typename... Args>
- SoftmaxNodeOp(Expr a, Args... args)
- : NaryNodeOp(a, args...), mask_(nullptr) {}
+struct SoftmaxNodeOp : public UnaryNodeOp {
+ SoftmaxNodeOp(Expr a)
+ : UnaryNodeOp(a), mask_(nullptr) {}
- template <typename... Args>
- SoftmaxNodeOp(Expr a, Expr mask, Args... args)
- : NaryNodeOp({a}, args...), mask_(mask) {}
+ SoftmaxNodeOp(Expr a, Expr mask)
+ : UnaryNodeOp(a), mask_(mask) {}
Expr mask_;
@@ -396,8 +391,7 @@ struct SoftmaxNodeOp : public NaryNodeOp {
};
struct LogSoftmaxNodeOp : public UnaryNodeOp {
- template <typename... Args>
- LogSoftmaxNodeOp(Args... args) : UnaryNodeOp(args...) {}
+ LogSoftmaxNodeOp(Expr a) : UnaryNodeOp(a) {}
NodeOps forwardOps() { return {NodeOp(LogSoftmax(val_, child(0)->val()))}; }
@@ -416,7 +410,7 @@ struct SumNodeOp : public UnaryNodeOp {
template <typename... Args>
SumNodeOp(Expr a, Args... args)
- : UnaryNodeOp(a, keywords::shape = newShape(a, args...), args...) {}
+ : UnaryNodeOp(a, newShape(a, args...)) {}
NodeOps forwardOps() {
using namespace functional;
@@ -465,7 +459,7 @@ struct MeanNodeOp : public UnaryNodeOp {
template <typename... Args>
MeanNodeOp(Expr a, Args... args)
- : UnaryNodeOp(a, keywords::shape = newShape(a, args...), args...) {}
+ : UnaryNodeOp(a, newShape(a, args...)) {}
NodeOps forwardOps() {
using namespace functional;
@@ -516,8 +510,7 @@ struct MeanNodeOp : public UnaryNodeOp {
};
struct LogNodeOp : public UnaryNodeOp {
- template <typename... Args>
- LogNodeOp(Args... args) : UnaryNodeOp(args...) {}
+ LogNodeOp(Expr a) : UnaryNodeOp(a) {}
NodeOps forwardOps() {
using namespace functional;
@@ -534,8 +527,7 @@ struct LogNodeOp : public UnaryNodeOp {
};
struct ExpNodeOp : public UnaryNodeOp {
- template <typename... Args>
- ExpNodeOp(Args... args) : UnaryNodeOp(args...) {}
+ ExpNodeOp(Expr a) : UnaryNodeOp(a) {}
NodeOps forwardOps() {
using namespace functional;
@@ -553,9 +545,8 @@ struct ExpNodeOp : public UnaryNodeOp {
struct SqrtNodeOp : public UnaryNodeOp {
float epsilon_;
- template <typename... Args>
- SqrtNodeOp(Expr a, float epsilon, Args... args)
- : UnaryNodeOp(a, args...), epsilon_(epsilon) {}
+ SqrtNodeOp(Expr a, float epsilon)
+ : UnaryNodeOp(a), epsilon_(epsilon) {}
NodeOps forwardOps() {
using namespace functional;
@@ -591,8 +582,7 @@ struct SqrtNodeOp : public UnaryNodeOp {
};
struct SquareNodeOp : public UnaryNodeOp {
- template <typename... Args>
- SquareNodeOp(Args... args) : UnaryNodeOp(args...) {}
+ SquareNodeOp(Expr a) : UnaryNodeOp(a) {}
NodeOps forwardOps() {
using namespace functional;
@@ -609,8 +599,7 @@ struct SquareNodeOp : public UnaryNodeOp {
};
struct NegNodeOp : public UnaryNodeOp {
- template <typename... Args>
- NegNodeOp(Args... args) : UnaryNodeOp(args...) {}
+ NegNodeOp(Expr a) : UnaryNodeOp(a) {}
NodeOps forwardOps() {
using namespace functional;
@@ -626,9 +615,8 @@ struct NegNodeOp : public UnaryNodeOp {
};
struct RowsNodeOp : public UnaryNodeOp {
- template <typename... Args>
- RowsNodeOp(Expr a, const std::vector<size_t>& indeces, Args... args)
- : UnaryNodeOp(a, keywords::shape = newShape(a, indeces), args...),
+ RowsNodeOp(Expr a, const std::vector<size_t>& indeces)
+ : UnaryNodeOp(a, newShape(a, indeces)),
indices_(indeces) {}
NodeOps forwardOps() {
@@ -679,9 +667,8 @@ struct RowsNodeOp : public UnaryNodeOp {
};
struct ColsNodeOp : public UnaryNodeOp {
- template <typename... Args>
- ColsNodeOp(Expr a, const std::vector<size_t>& indeces, Args... args)
- : UnaryNodeOp(a, keywords::shape = newShape(a, indeces), args...),
+ ColsNodeOp(Expr a, const std::vector<size_t>& indeces)
+ : UnaryNodeOp(a, newShape(a, indeces)),
indices_(indeces) {}
NodeOps forwardOps() {
@@ -731,7 +718,7 @@ struct ColsNodeOp : public UnaryNodeOp {
struct SelectNodeOp : public UnaryNodeOp {
SelectNodeOp(Expr a, int axis, const std::vector<size_t>& indeces)
- : UnaryNodeOp(a, keywords::shape = newShape(a, axis, indeces)),
+ : UnaryNodeOp(a, newShape(a, axis, indeces)),
indices_(indeces) {}
NodeOps forwardOps() {
@@ -787,7 +774,7 @@ struct TransposeNodeOp : public UnaryNodeOp {
std::vector<int> axes_;
TransposeNodeOp(Expr a, const std::vector<int>& axes)
- : UnaryNodeOp(a, keywords::shape = newShape(a, axes)),
+ : UnaryNodeOp(a, newShape(a, axes)),
axes_{axes} {}
NodeOps forwardOps() {
@@ -844,8 +831,8 @@ private:
public:
template <typename... Args>
- ReshapeNodeOp(Expr a, Shape shape, Args... args)
- : UnaryNodeOp(a, keywords::shape = shape, args...), reshapee_(a) {
+ ReshapeNodeOp(Expr a, Shape shape)
+ : UnaryNodeOp(a, shape), reshapee_(a) {
Node::destroy_ = false;
}
@@ -909,7 +896,7 @@ private:
public:
StepNodeOp(Expr a, int step, int axis)
- : UnaryNodeOp(a, keywords::shape = newShape(a, axis)),
+ : UnaryNodeOp(a, newShape(a, axis)),
stepNode_(a),
step_(step) {
Node::destroy_ = false;
@@ -981,9 +968,8 @@ public:
};
struct ShiftNodeOp : public UnaryNodeOp {
- template <typename... Args>
- ShiftNodeOp(Expr a, Shape shift, Args... args)
- : UnaryNodeOp(a, keywords::shape = a->shape(), args...), shift_(shift) {}
+ ShiftNodeOp(Expr a, Shape shift)
+ : UnaryNodeOp(a, a->shape()), shift_(shift) {}
NodeOps forwardOps() {
return {NodeOp(Shift(val_, child(0)->val(), shift_, false))};
diff --git a/src/layers/convolution.cu b/src/layers/convolution.cu
index b0749450..064abedf 100644
--- a/src/layers/convolution.cu
+++ b/src/layers/convolution.cu
@@ -18,11 +18,11 @@ Expr Convolution::apply(Expr x) {
kernelNum,
kernelDims.first,
kernelDims.second},
- keywords::init=inits::glorot_uniform);
+ inits::glorot_uniform);
auto bias = graph_->param(prefix + "_conv_bias",
{1, kernelNum, 1, 1},
- keywords::init=inits::zeros);
+ inits::zeros);
std::vector<Expr> nodes = {x, kernel, bias};
return Expression<ConvolutionOp>(nodes,
diff --git a/src/layers/generic.h b/src/layers/generic.h
index 558280af..b9c1d100 100644
--- a/src/layers/generic.h
+++ b/src/layers/generic.h
@@ -79,7 +79,7 @@ public:
else {
W = g->param(name + "_" + nameW,
{in->shape()[-1], dim},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
}
Expr b;
@@ -88,7 +88,7 @@ public:
b = tiedParams_[nameB];
else
b = g->param(
- name + "_" + nameB, {1, dim}, keywords::init = inits::zeros);
+ name + "_" + nameB, {1, dim}, inits::zeros);
params_.push_back(W);
params_.push_back(b);
@@ -97,17 +97,17 @@ public:
if(nematusNorm) {
auto ln_s = g->param(name + "_ln_s" + std::to_string(i),
{1, dim},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
auto ln_b = g->param(name + "_ln_b" + std::to_string(i),
{1, dim},
- keywords::init = inits::zeros);
+ inits::zeros);
outputs.push_back(
layer_norm(affine(in, W, b, false, transposeW), ln_s, ln_b, NEMATUS_LN_EPS));
} else {
auto gamma = g->param(name + "_gamma" + std::to_string(i),
{1, dim},
- keywords::init = inits::from_value(1.0));
+ inits::from_value(1.0));
params_.push_back(gamma);
outputs.push_back(layer_norm(dot(in, W, false, transposeW), gamma, b));
@@ -151,14 +151,14 @@ public:
else {
W = g->param(name + "_" + nameW,
{input->shape()[-1], dim},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
}
Expr b;
std::string nameB = "b";
if(tiedParams_.count(nameB))
b = tiedParams_[nameB];
else
- b = g->param(name + "_" + nameB, {1, dim}, keywords::init = inits::zeros);
+ b = g->param(name + "_" + nameB, {1, dim}, inits::zeros);
params_ = {W, b};
@@ -166,15 +166,15 @@ public:
if(layerNorm) {
if(nematusNorm) {
auto ln_s = g->param(
- name + "_ln_s", {1, dim}, keywords::init = inits::from_value(1.f));
+ name + "_ln_s", {1, dim}, inits::from_value(1.f));
auto ln_b
- = g->param(name + "_ln_b", {1, dim}, keywords::init = inits::zeros);
+ = g->param(name + "_ln_b", {1, dim}, inits::zeros);
out = layer_norm(affine(input, W, b, false, transposeW),
ln_s, ln_b, NEMATUS_LN_EPS);
} else {
auto gamma = g->param(
- name + "_gamma", {1, dim}, keywords::init = inits::from_value(1.0));
+ name + "_gamma", {1, dim}, inits::from_value(1.0));
params_.push_back(gamma);
out = layer_norm(dot(input, W, false, transposeW), gamma, b);
@@ -219,8 +219,8 @@ struct EmbeddingFactory : public Factory {
return graph_->param(name,
{dimVoc, dimEmb},
- keywords::init = initFunc,
- keywords::fixed = fixed);
+ initFunc,
+ fixed);
}
};
@@ -239,7 +239,7 @@ Expr Cost(Expr logits,
if(weights)
ce = weights * ce;
-
+
if(smoothing > 0) {
// @TODO: add this to CE kernels instead
auto ceq = mean(logsoftmax(logits), axis = -1);
@@ -250,7 +250,7 @@ Expr Cost(Expr logits,
ce = ce * mask;
auto costSum = sum(ce, axis = -3);
-
+
Expr cost;
// axes:
// - time axis (words): -3
diff --git a/src/layers/guided_alignment.h b/src/layers/guided_alignment.h
index fb430ecc..c353f649 100644
--- a/src/layers/guided_alignment.h
+++ b/src/layers/guided_alignment.h
@@ -17,7 +17,7 @@ Expr guidedAlignmentCost(Ptr<ExpressionGraph> graph,
auto aln = graph->constant(
{dimBatch, 1, dimSrc, dimTrg},
- keywords::init = inits::from_vector(batch->getGuidedAlignment()));
+ inits::from_vector(batch->getGuidedAlignment()));
std::string guidedCostType
= options->get<std::string>("guided-alignment-cost");
diff --git a/src/models/amun.h b/src/models/amun.h
index fd4160a3..8382c863 100644
--- a/src/models/amun.h
+++ b/src/models/amun.h
@@ -41,9 +41,9 @@ public:
using namespace keywords;
LOG(info, "Loading model from {}", name);
-
+
auto numpy = cnpy::npz_load(name);
-
+
std::map<std::string, std::string> nameMap
= {{"decoder_U", "decoder_cell1_U"},
{"decoder_Ux", "decoder_cell1_Ux"},
@@ -91,38 +91,38 @@ public:
{"encoder_r_bx", "encoder_bi_r_bx"},
{"encoder_r_gamma1", "encoder_bi_r_gamma1"},
{"encoder_r_gamma2", "encoder_bi_r_gamma2"}};
-
+
if(opt<bool>("tied-embeddings-src") || opt<bool>("tied-embeddings-all"))
nameMap["Wemb"] = "Wemb";
-
+
graph->setReloaded(false);
-
+
for(auto it : numpy) {
auto name = it.first;
-
+
if(name == "decoder_c_tt")
continue;
if(name.substr(0, 8) == "special:")
continue;
-
+
Shape shape;
- if(numpy[name].shape.size() == 2) {
+ if(numpy[name]->shape.size() == 2) {
shape.resize(2);
- shape.set(0, numpy[name].shape[0]);
- shape.set(1, numpy[name].shape[1]);
- } else if(numpy[name].shape.size() == 1) {
+ shape.set(0, numpy[name]->shape[0]);
+ shape.set(1, numpy[name]->shape[1]);
+ } else if(numpy[name]->shape.size() == 1) {
shape.resize(2);
shape.set(0, 1);
- shape.set(1, numpy[name].shape[0]);
+ shape.set(1, numpy[name]->shape[0]);
}
-
+
std::string pName = name;
if(nameMap.count(name))
pName = nameMap[name];
-
- graph->param(pName, shape, init = inits::from_numpy(numpy[name]));
+
+ graph->param(pName, shape, inits::from_numpy(numpy[name]));
}
-
+
graph->setReloaded(true);
}
@@ -182,7 +182,7 @@ public:
for(auto p : graph->params()->getMap()) {
std::vector<float> v;
- p.second->val() >> v;
+ p.second->val()->get(v);
unsigned dim;
if(p.second->shape()[0] == 1) {
diff --git a/src/models/encdec.h b/src/models/encdec.h
index 071a9eb0..cfda0404 100644
--- a/src/models/encdec.h
+++ b/src/models/encdec.h
@@ -31,7 +31,7 @@ protected:
auto batchEmbeddings
= reshape(chosenEmbeddings, {dimWords, dimBatch, dimEmb});
auto batchMask = graph->constant(
- {dimWords, dimBatch, 1}, init = inits::from_vector(subBatch->mask()));
+ {dimWords, dimBatch, 1}, inits::from_vector(subBatch->mask()));
return std::make_tuple(batchEmbeddings, batchMask);
}
@@ -113,10 +113,10 @@ public:
= reshape(chosenEmbeddings, {dimWords, dimBatch, opt<int>("dim-emb")});
auto yMask = graph->constant({dimWords, dimBatch, 1},
- init = inits::from_vector(subBatch->mask()));
+ inits::from_vector(subBatch->mask()));
auto yData = graph->constant({(int)subBatch->data().size(), 1},
- init = inits::from_vector(subBatch->data()));
+ inits::from_vector(subBatch->data()));
auto yShifted = shift(y, {1, 0, 0});
@@ -150,7 +150,7 @@ public:
Expr selectedEmbs;
if(embIdx.empty()) {
selectedEmbs = graph->constant({1, 1, dimBatch, dimTrgEmb},
- init = inits::zeros);
+ inits::zeros);
} else {
selectedEmbs = rows(yEmb, embIdx);
selectedEmbs
@@ -367,7 +367,7 @@ public:
weights = graph->constant(
{1, dimWords, dimBatch, 1},
- keywords::init = inits::from_vector(batch->getDataWeights()));
+ inits::from_vector(batch->getDataWeights()));
}
auto cost = Cost(nextState->getProbs(),
diff --git a/src/models/nematus.h b/src/models/nematus.h
index 3b93ab52..82b77c68 100644
--- a/src/models/nematus.h
+++ b/src/models/nematus.h
@@ -42,21 +42,21 @@ public:
continue;
Shape shape;
- if(numpy[name].shape.size() == 2) {
+ if(numpy[name]->shape.size() == 2) {
shape.resize(2);
- shape.set(0, numpy[name].shape[0]);
- shape.set(1, numpy[name].shape[1]);
- } else if(numpy[name].shape.size() == 1) {
+ shape.set(0, numpy[name]->shape[0]);
+ shape.set(1, numpy[name]->shape[1]);
+ } else if(numpy[name]->shape.size() == 1) {
shape.resize(2);
shape.set(0, 1);
- shape.set(1, numpy[name].shape[0]);
+ shape.set(1, numpy[name]->shape[0]);
}
std::string pName = name;
if(nameMap_.count(name))
pName = nameMap_[name];
- graph->param(pName, shape, init = inits::from_numpy(numpy[name]));
+ graph->param(pName, shape, inits::from_numpy(numpy[name]));
}
graph->setReloaded(true);
@@ -76,7 +76,7 @@ public:
for(auto p : graph->params()->getMap()) {
std::vector<float> v;
- p.second->val() >> v;
+ p.second->val()->get(v);
unsigned dim;
if(p.second->shape()[0] == 1) {
diff --git a/src/models/s2s.h b/src/models/s2s.h
index 2d1ee281..164c86f5 100644
--- a/src/models/s2s.h
+++ b/src/models/s2s.h
@@ -275,7 +275,7 @@ public:
int dimBatch = batch->size();
int dimRnn = opt<int>("dim-rnn");
- start = graph->constant({dimBatch, dimRnn}, init = inits::zeros);
+ start = graph->constant({dimBatch, dimRnn}, inits::zeros);
}
rnn::States startStates(opt<size_t>("dec-depth"), {start, start});
diff --git a/src/models/transformer.h b/src/models/transformer.h
index c41453db..d8999263 100644
--- a/src/models/transformer.h
+++ b/src/models/transformer.h
@@ -35,7 +35,7 @@ public:
// shared across batch entries
auto signal = graph->constant({dimWords, 1, dimEmb},
- init = inits::from_vector(vPos));
+ inits::from_vector(vPos));
return input + signal;
}
@@ -48,7 +48,7 @@ public:
for(int j = 0; j <= i; ++j)
vMask[i * length + j] = 1.f;
return graph->constant({1, length, length},
- init = inits::from_vector(vMask));
+ inits::from_vector(vMask));
}
Expr InverseMask(Expr mask) {
@@ -104,9 +104,9 @@ public:
// layer normalization
if(op == 'n') {
auto scale = graph->param(
- prefix + "_ln_scale_pre", {1, dimModel}, init = inits::ones);
+ prefix + "_ln_scale_pre", {1, dimModel}, inits::ones);
auto bias = graph->param(
- prefix + "_ln_bias_pre", {1, dimModel}, init = inits::zeros);
+ prefix + "_ln_bias_pre", {1, dimModel}, inits::zeros);
output = layer_norm(output, scale, bias, 1e-6);
}
}
@@ -136,9 +136,9 @@ public:
// highway connection
if(op == 'h') {
auto Wh = graph->param(
- prefix + "_Wh", {dimModel, dimModel}, init = inits::glorot_uniform);
+ prefix + "_Wh", {dimModel, dimModel}, inits::glorot_uniform);
auto bh
- = graph->param(prefix + "_bh", {1, dimModel}, init = inits::zeros);
+ = graph->param(prefix + "_bh", {1, dimModel}, inits::zeros);
auto t = affine(prevInput, Wh, bh);
output = highway(output, prevInput, t);
@@ -146,9 +146,9 @@ public:
// layer normalization
if(op == 'n') {
auto scale = graph->param(
- prefix + "_ln_scale", {1, dimModel}, init = inits::ones);
+ prefix + "_ln_scale", {1, dimModel}, inits::ones);
auto bias = graph->param(
- prefix + "_ln_bias", {1, dimModel}, init = inits::zeros);
+ prefix + "_ln_bias", {1, dimModel}, inits::zeros);
output = layer_norm(output, scale, bias, 1e-6);
}
}
@@ -211,8 +211,8 @@ public:
int dimModel = q->shape()[-1];
auto Wq = graph->param(
- prefix + "_Wq", {dimModel, dimModel}, init = inits::glorot_uniform);
- auto bq = graph->param(prefix + "_bq", {1, dimModel}, init = inits::zeros);
+ prefix + "_Wq", {dimModel, dimModel}, inits::glorot_uniform);
+ auto bq = graph->param(prefix + "_bq", {1, dimModel}, inits::zeros);
auto qh = affine(q, Wq, bq);
qh = SplitHeads(qh, dimHeads);
@@ -224,15 +224,15 @@ public:
auto Wk = graph->param(prefixProj + "_Wk",
{dimModel, dimModel},
- init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto bk = graph->param(
- prefixProj + "_bk", {1, dimModel}, init = inits::zeros);
+ prefixProj + "_bk", {1, dimModel}, inits::zeros);
auto Wv = graph->param(prefixProj + "_Wv",
{dimModel, dimModel},
- init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto bv = graph->param(
- prefixProj + "_bv", {1, dimModel}, init = inits::zeros);
+ prefixProj + "_bv", {1, dimModel}, inits::zeros);
auto kh = affine(keys[i], Wk, bk);
auto vh = affine(values[i], Wv, bv);
@@ -258,8 +258,8 @@ public:
int dimAtt = output->shape()[-1];
auto Wo = graph->param(
- prefix + "_Wo", {dimAtt, dimOut}, init = inits::glorot_uniform);
- auto bo = graph->param(prefix + "_bo", {1, dimOut}, init = inits::zeros);
+ prefix + "_Wo", {dimAtt, dimOut}, inits::glorot_uniform);
+ auto bo = graph->param(prefix + "_bo", {1, dimOut}, inits::zeros);
output = affine(output, Wo, bo);
return output;
@@ -336,12 +336,12 @@ public:
int dimFfn = options->get<int>("transformer-dim-ffn");
auto W1 = graph->param(
- prefix + "_W1", {dimModel, dimFfn}, init = inits::glorot_uniform);
- auto b1 = graph->param(prefix + "_b1", {1, dimFfn}, init = inits::zeros);
+ prefix + "_W1", {dimModel, dimFfn}, inits::glorot_uniform);
+ auto b1 = graph->param(prefix + "_b1", {1, dimFfn}, inits::zeros);
auto W2 = graph->param(
- prefix + "_W2", {dimFfn, dimModel}, init = inits::glorot_uniform);
- auto b2 = graph->param(prefix + "_b2", {1, dimModel}, init = inits::zeros);
+ prefix + "_W2", {dimFfn, dimModel}, inits::glorot_uniform);
+ auto b2 = graph->param(prefix + "_b2", {1, dimModel}, inits::zeros);
output = affine(output, W1, b1);
output = swish(output);
diff --git a/src/optimizers/optimizers.cu b/src/optimizers/optimizers.cu
index 49c380e1..afec4708 100644
--- a/src/optimizers/optimizers.cu
+++ b/src/optimizers/optimizers.cu
@@ -50,15 +50,15 @@ void Adagrad::load(const std::string& name,
auto numpy = cnpy::npz_load(name);
for(auto it : numpy) {
auto name = it.first;
- cnpy::NpyArray& np = it.second;
+ auto np = it.second;
// get the size of gt_
- totalSize = np.shape[1];
+ totalSize = np->shape[1];
// extract data into vectors
if(name == "adagrad_gt") {
vGt.resize(totalSize);
- std::copy((float*)np.data(), (float*)np.data() + totalSize, vGt.begin());
+ std::copy((float*)np->data(), (float*)np->data() + totalSize, vGt.begin());
}
}
@@ -174,19 +174,19 @@ void Adam::load(const std::string& name,
auto numpy = cnpy::npz_load(name);
for(auto it : numpy) {
auto name = it.first;
- cnpy::NpyArray& np = it.second;
+ auto np = it.second;
// get the size of mt_ and vt_, they are the same
- totalSize = np.shape[1];
+ totalSize = np->shape[1];
// extract data into vectors
if(name == "adam_mt") {
vMt.resize(totalSize);
- std::copy((float*)np.data(), (float*)np.data() + totalSize, vMt.begin());
+ std::copy((float*)np->data(), (float*)np->data() + totalSize, vMt.begin());
}
if(name == "adam_vt") {
vVt.resize(totalSize);
- std::copy((float*)np.data(), (float*)np.data() + totalSize, vVt.begin());
+ std::copy((float*)np->data(), (float*)np->data() + totalSize, vVt.begin());
}
}
diff --git a/src/rnn/attention.cu b/src/rnn/attention.cu
index 2faa9d9a..d5e44a2f 100644
--- a/src/rnn/attention.cu
+++ b/src/rnn/attention.cu
@@ -9,7 +9,7 @@ namespace rnn {
struct AttentionNodeOp : public NaryNodeOp {
AttentionNodeOp(const std::vector<Expr>& nodes)
- : NaryNodeOp(nodes, keywords::shape = newShape(nodes)) {}
+ : NaryNodeOp(nodes, newShape(nodes)) {}
Shape newShape(const std::vector<Expr>& nodes) {
Shape shape = Shape::broadcast({nodes[1], nodes[2]});
diff --git a/src/rnn/attention.h b/src/rnn/attention.h
index faece60a..70337355 100644
--- a/src/rnn/attention.h
+++ b/src/rnn/attention.h
@@ -51,15 +51,15 @@ public:
Wa_ = graph->param(prefix + "_W_comb_att",
{dimDecState, dimEncState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
Ua_ = graph->param(prefix + "_Wc_att",
{dimEncState, dimEncState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
va_ = graph->param(prefix + "_U_att",
{dimEncState, 1},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
ba_ = graph->param(
- prefix + "_b_att", {1, dimEncState}, keywords::init = inits::zeros);
+ prefix + "_b_att", {1, dimEncState}, inits::zeros);
if(dropout_ > 0.0f) {
dropMaskContext_ = graph->dropout(dropout_, {1, dimEncState});
@@ -75,17 +75,17 @@ public:
// instead of gammaContext_
Wc_att_lns_ = graph->param(prefix + "_Wc_att_lns",
{1, dimEncState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
Wc_att_lnb_ = graph->param(prefix + "_Wc_att_lnb",
{1, dimEncState},
- keywords::init = inits::zeros);
+ inits::zeros);
// instead of gammaState_
W_comb_att_lns_ = graph->param(prefix + "_W_comb_att_lns",
{1, dimEncState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
W_comb_att_lnb_ = graph->param(prefix + "_W_comb_att_lnb",
{1, dimEncState},
- keywords::init = inits::zeros);
+ inits::zeros);
mappedContext_ = layer_norm(affine(contextDropped_, Ua_, ba_),
Wc_att_lns_,
@@ -94,10 +94,10 @@ public:
} else {
gammaContext_ = graph->param(prefix + "_att_gamma1",
{1, dimEncState},
- keywords::init = inits::from_value(1.0));
+ inits::from_value(1.0));
gammaState_ = graph->param(prefix + "_att_gamma2",
{1, dimEncState},
- keywords::init = inits::from_value(1.0));
+ inits::from_value(1.0));
mappedContext_
= layer_norm(dot(contextDropped_, Ua_), gammaContext_, ba_);
@@ -144,7 +144,7 @@ public:
auto alignedSource
= scalar_product(encState_->getAttended(), e, axis = -3);
-
+
contexts_.push_back(alignedSource);
alignments_.push_back(e);
return alignedSource;
diff --git a/src/rnn/cells.cu b/src/rnn/cells.cu
index 42373eab..8b38780f 100644
--- a/src/rnn/cells.cu
+++ b/src/rnn/cells.cu
@@ -9,9 +9,8 @@ namespace rnn {
struct GRUFastNodeOp : public NaryNodeOp {
bool final_;
- template <typename... Args>
- GRUFastNodeOp(const std::vector<Expr>& nodes, bool final, Args... args)
- : NaryNodeOp(nodes, args...), final_(final) {}
+ GRUFastNodeOp(const std::vector<Expr>& nodes, bool final)
+ : NaryNodeOp(nodes), final_(final) {}
NodeOps forwardOps() {
std::vector<Tensor> inputs;
@@ -53,9 +52,8 @@ Expr gruOps(const std::vector<Expr>& nodes, bool final) {
/******************************************************************************/
struct LSTMCellNodeOp : public NaryNodeOp {
- template <typename... Args>
- LSTMCellNodeOp(const std::vector<Expr>& nodes, Args... args)
- : NaryNodeOp(nodes, args...) {}
+ LSTMCellNodeOp(const std::vector<Expr>& nodes)
+ : NaryNodeOp(nodes) {}
NodeOps forwardOps() {
std::vector<Tensor> inputs;
@@ -91,9 +89,8 @@ struct LSTMCellNodeOp : public NaryNodeOp {
};
struct LSTMOutputNodeOp : public NaryNodeOp {
- template <typename... Args>
- LSTMOutputNodeOp(const std::vector<Expr>& nodes, Args... args)
- : NaryNodeOp(nodes, args...) {}
+ LSTMOutputNodeOp(const std::vector<Expr>& nodes)
+ : NaryNodeOp(nodes) {}
NodeOps forwardOps() {
std::vector<Tensor> inputs;
diff --git a/src/rnn/cells.h b/src/rnn/cells.h
index 36fda810..2eeed6fa 100644
--- a/src/rnn/cells.h
+++ b/src/rnn/cells.h
@@ -36,15 +36,15 @@ public:
U_ = graph->param(prefix + "_U",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
if(dimInput)
W_ = graph->param(prefix + "_W",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
b_ = graph->param(
- prefix + "_b", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_b", {1, dimState}, inits::zeros);
if(dropout_ > 0.0f) {
if(dimInput)
@@ -56,10 +56,10 @@ public:
if(dimInput)
gamma1_ = graph->param(prefix + "_gamma1",
{1, 3 * dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
gamma2_ = graph->param(prefix + "_gamma2",
{1, 3 * dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
}
}
@@ -143,35 +143,35 @@ public:
auto U = graph->param(prefix + "_U",
{dimState, 2 * dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto Ux = graph->param(prefix + "_Ux",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
U_ = concatenate({U, Ux}, keywords::axis = -1);
if(dimInput > 0) {
auto W = graph->param(prefix + "_W",
{dimInput, 2 * dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto Wx = graph->param(prefix + "_Wx",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
W_ = concatenate({W, Wx}, keywords::axis = -1);
}
auto b = graph->param(
- prefix + "_b", {1, 2 * dimState}, keywords::init = inits::zeros);
+ prefix + "_b", {1, 2 * dimState}, inits::zeros);
auto bx = graph->param(
- prefix + "_bx", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bx", {1, dimState}, inits::zeros);
b_ = concatenate({b, bx}, keywords::axis = -1);
// @TODO use this and adjust Amun model type saving and loading
// U_ = graph->param(prefix + "_U", {dimState, 3 * dimState},
- // keywords::init=inits::glorot_uniform);
+ // (Expr a) : UnaryNodeOp(a)inits::glorot_uniform);
// W_ = graph->param(prefix + "_W", {dimInput, 3 * dimState},
- // keywords::init=inits::glorot_uniform);
+ // (Expr a) : UnaryNodeOp(a)inits::glorot_uniform);
// b_ = graph->param(prefix + "_b", {1, 3 * dimState},
- // keywords::init=inits::zeros);
+ // (Expr a) : UnaryNodeOp(a)inits::zeros);
if(dropout_ > 0.0f) {
if(dimInput)
@@ -183,10 +183,10 @@ public:
if(dimInput)
gamma1_ = graph->param(prefix + "_gamma1",
{1, 3 * dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
gamma2_ = graph->param(prefix + "_gamma2",
{1, 3 * dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
}
}
@@ -231,7 +231,7 @@ public:
if(xWs.empty()) {
if(!fakeInput_ || fakeInput_->shape() != sU->shape())
fakeInput_
- = sU->graph()->constant(sU->shape(), keywords::init = inits::zeros);
+ = sU->graph()->constant(sU->shape(), inits::zeros);
xW = fakeInput_;
} else {
xW = xWs.front();
@@ -299,10 +299,10 @@ public:
auto U = graph->param(prefix + "_U",
{dimState, 2 * dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto Ux = graph->param(prefix + "_Ux",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
if(layerNorm_) {
U_ = U;
@@ -314,10 +314,10 @@ public:
if(dimInput > 0) {
auto W = graph->param(prefix + "_W",
{dimInput, 2 * dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto Wx = graph->param(prefix + "_Wx",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
if(layerNorm_) {
W_ = W;
Wx_ = Wx;
@@ -327,9 +327,9 @@ public:
}
auto b = graph->param(
- prefix + "_b", {1, 2 * dimState}, keywords::init = inits::zeros);
+ prefix + "_b", {1, 2 * dimState}, inits::zeros);
auto bx = graph->param(
- prefix + "_bx", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bx", {1, dimState}, inits::zeros);
if(layerNorm_) {
b_ = b;
@@ -338,11 +338,11 @@ public:
// in specific cases we need to pass bx to the kernel
if(encoder_ && transition_) {
auto b0
- = graph->constant({1, 2 * dimState}, keywords::init = inits::zeros);
+ = graph->constant({1, 2 * dimState}, inits::zeros);
bbx_ = concatenate({b0, bx}, keywords::axis = -1);
} else {
bbx_
- = graph->constant({1, 3 * dimState}, keywords::init = inits::zeros);
+ = graph->constant({1, 3 * dimState}, inits::zeros);
}
} else {
bbx_ = concatenate({b, bx}, keywords::axis = -1);
@@ -358,27 +358,27 @@ public:
if(dimInput) {
W_lns_ = graph->param(prefix + "_W_lns",
{1, 2 * dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
W_lnb_ = graph->param(prefix + "_W_lnb",
{1, 2 * dimState},
- keywords::init = inits::zeros);
+ inits::zeros);
Wx_lns_ = graph->param(prefix + "_Wx_lns",
{1, 1 * dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
Wx_lnb_ = graph->param(prefix + "_Wx_lnb",
{1, 1 * dimState},
- keywords::init = inits::zeros);
+ inits::zeros);
}
U_lns_ = graph->param(prefix + "_U_lns",
{1, 2 * dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
U_lnb_ = graph->param(
- prefix + "_U_lnb", {1, 2 * dimState}, keywords::init = inits::zeros);
+ prefix + "_U_lnb", {1, 2 * dimState}, inits::zeros);
Ux_lns_ = graph->param(prefix + "_Ux_lns",
{1, 1 * dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
Ux_lnb_ = graph->param(
- prefix + "_Ux_lnb", {1, 1 * dimState}, keywords::init = inits::zeros);
+ prefix + "_Ux_lnb", {1, 1 * dimState}, inits::zeros);
}
}
@@ -468,7 +468,7 @@ public:
if(transition_) {
if(!fakeInput_ || fakeInput_->shape() != sU->shape())
fakeInput_
- = sU->graph()->constant(sU->shape(), keywords::init = inits::zeros);
+ = sU->graph()->constant(sU->shape(), inits::zeros);
xW = fakeInput_;
} else {
xW = xWs.front();
@@ -514,14 +514,14 @@ public:
U_ = graph->param(prefix + "_U",
{dimState, 4 * dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
if(dimInput)
W_ = graph->param(prefix + "_W",
{dimInput, 4 * dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
b_ = graph->param(
- prefix + "_b", {1, 4 * dimState}, keywords::init = inits::zeros);
+ prefix + "_b", {1, 4 * dimState}, inits::zeros);
if(dropout_ > 0.0f) {
if(dimInput)
@@ -533,10 +533,10 @@ public:
if(dimInput)
gamma1_ = graph->param(prefix + "_gamma1",
{1, 4 * dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
gamma2_ = graph->param(prefix + "_gamma2",
{1, 4 * dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
}
}
@@ -586,7 +586,7 @@ public:
if(xWs.empty()) {
if(!fakeInput_ || fakeInput_->shape() != sU->shape())
fakeInput_
- = sU->graph()->constant(sU->shape(), keywords::init = inits::zeros);
+ = sU->graph()->constant(sU->shape(), inits::zeros);
xW = fakeInput_;
} else {
xW = xWs.front();
@@ -623,20 +623,20 @@ public:
Um_ = graph->param(prefix + "_Um",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
Wm_ = graph->param(prefix + "_Wm",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
bm_ = graph->param(
- prefix + "_bm", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bm", {1, dimState}, inits::zeros);
if(CellType::layerNorm_) {
gamma1m_ = graph->param(prefix + "_gamma1m",
{1, dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
gamma2m_ = graph->param(prefix + "_gamma2m",
{1, dimState},
- keywords::init = inits::from_value(1.f));
+ inits::from_value(1.f));
}
}
@@ -697,39 +697,39 @@ public:
Uf_ = graph->param(prefix + "_Uf",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
Wf_ = graph->param(prefix + "_Wf",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
bf_ = graph->param(
- prefix + "_bf", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bf", {1, dimState}, inits::zeros);
Ui_ = graph->param(prefix + "_Ui",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
Wi_ = graph->param(prefix + "_Wi",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
bi_ = graph->param(
- prefix + "_bi", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bi", {1, dimState}, inits::zeros);
Uc_ = graph->param(prefix + "_Uc",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
Wc_ = graph->param(prefix + "_Wc",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
bc_ = graph->param(
- prefix + "_bc", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bc", {1, dimState}, inits::zeros);
Uo_ = graph->param(prefix + "_Uo",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
Wo_ = graph->param(prefix + "_Wo",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
bo_ = graph->param(
- prefix + "_bo", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bo", {1, dimState}, inits::zeros);
}
State apply(std::vector<Expr> inputs, State state, Expr mask = nullptr) {
@@ -791,39 +791,39 @@ public:
auto Uf = graph->param(prefix + "_Uf",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto Wf = graph->param(prefix + "_Wf",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto bf = graph->param(
- prefix + "_bf", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bf", {1, dimState}, inits::zeros);
auto Ui = graph->param(prefix + "_Ui",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto Wi = graph->param(prefix + "_Wi",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto bi = graph->param(
- prefix + "_bi", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bi", {1, dimState}, inits::zeros);
auto Uc = graph->param(prefix + "_Uc",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto Wc = graph->param(prefix + "_Wc",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto bc = graph->param(
- prefix + "_bc", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bc", {1, dimState}, inits::zeros);
auto Uo = graph->param(prefix + "_Uo",
{dimState, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto Wo = graph->param(prefix + "_Wo",
{dimInput, dimState},
- keywords::init = inits::glorot_uniform);
+ inits::glorot_uniform);
auto bo = graph->param(
- prefix + "_bo", {1, dimState}, keywords::init = inits::zeros);
+ prefix + "_bo", {1, dimState}, inits::zeros);
U_ = concatenate({Uf, Ui, Uc, Uo}, keywords::axis = -1);
W_ = concatenate({Wf, Wi, Wc, Wo}, keywords::axis = -1);
diff --git a/src/rnn/rnn.h b/src/rnn/rnn.h
index 7374ed2a..4f1be340 100644
--- a/src/rnn/rnn.h
+++ b/src/rnn/rnn.h
@@ -101,7 +101,7 @@ private:
int dimBatch = input->shape()[-2];
int dimState = cell_->getOptions()->get<int>("dimState");
- auto output = graph->zeros(keywords::shape = {1, dimBatch, dimState});
+ auto output = graph->zeros({1, dimBatch, dimState});
Expr cell = output;
State startState{output, cell};
diff --git a/src/tensors/tensor.h b/src/tensors/tensor.h
index d6924402..24ca0738 100644
--- a/src/tensors/tensor.h
+++ b/src/tensors/tensor.h
@@ -74,7 +74,7 @@ public:
else
std::copy(data(), data() + size(), v.data());
}
-
+
void set(const float* begin, const float* end) {
if(backend_->getDevice().type == DeviceType::gpu)
gpu::copy(backend_, begin, end, data());
@@ -189,14 +189,4 @@ public:
typedef std::shared_ptr<TensorBase> Tensor;
-static Tensor operator<<(Tensor t, const std::vector<float> &v) {
- t->set(v);
- return t;
-}
-
-static Tensor operator>>(Tensor t, std::vector<float> &v) {
- t->get(v);
- return t;
-}
-
}
diff --git a/src/translator/beam_search.h b/src/translator/beam_search.h
index 5c7cda26..20b7b628 100644
--- a/src/translator/beam_search.h
+++ b/src/translator/beam_search.h
@@ -139,7 +139,7 @@ public:
if(first) {
// no cost
prevCosts = graph->constant({1, 1, 1, 1},
- keywords::init = inits::from_value(0));
+ inits::from_value(0));
} else {
std::vector<float> beamCosts;
@@ -164,7 +164,7 @@ public:
prevCosts
= graph->constant({(int)localBeamSize, 1, dimBatch, 1},
- keywords::init = inits::from_vector(beamCosts));
+ inits::from_vector(beamCosts));
}
//**********************************************************************
diff --git a/src/translator/scorers.h b/src/translator/scorers.h
index 296431f2..94bda6e7 100644
--- a/src/translator/scorers.h
+++ b/src/translator/scorers.h
@@ -133,7 +133,7 @@ public:
p[2] = 0;
penalties_ = graph->constant({1, dimVocab_},
- keywords::init = inits::from_vector(p));
+ inits::from_vector(p));
return New<WordPenaltyState>(dimVocab_, penalties_);
}
@@ -169,7 +169,7 @@ public:
p[2] = 0;
penalties_ = graph->constant({1, dimVocab_},
- keywords::init = inits::from_vector(p));
+ inits::from_vector(p));
return New<WordPenaltyState>(dimVocab_, penalties_);
}