diff options
34 files changed, 346 insertions, 395 deletions
diff --git a/src/3rd_party/cnpy/cnpy.cpp b/src/3rd_party/cnpy/cnpy.cpp index 277ee7a5..f4df0418 100644 --- a/src/3rd_party/cnpy/cnpy.cpp +++ b/src/3rd_party/cnpy/cnpy.cpp @@ -59,9 +59,9 @@ template<> std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const cha return lhs; } -void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& shape, unsigned int& ndims, bool& fortran_order) { +void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& shape, unsigned int& ndims, bool& fortran_order) { char buffer[256]; - size_t res = fread(buffer,sizeof(char),11,fp); + size_t res = fread(buffer,sizeof(char),11,fp); if(res != 11) throw std::runtime_error("parse_npy_header: failed fread"); std::string header = fgets(buffer,256,fp); @@ -88,7 +88,7 @@ void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& sh } //endian, word size, data type - //byte order code | stands for not applicable. + //byte order code | stands for not applicable. //not sure when this applies except for byte array loc1 = header.find("descr")+9; bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); @@ -125,7 +125,7 @@ void cnpy::parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& globa assert(comment_len == 0); } -cnpy::NpyArray load_the_npy_file(FILE* fp) { +cnpy::NpyArrayPtr load_the_npy_file(FILE* fp) { unsigned int* shape; unsigned int ndims, word_size; bool fortran_order; @@ -134,13 +134,13 @@ cnpy::NpyArray load_the_npy_file(FILE* fp) { for(unsigned int i = 0; i < ndims; i++) size *= shape[i]; - cnpy::NpyArray arr; - arr.word_size = word_size; - arr.shape = std::vector<unsigned int>(shape, shape+ndims); + auto arr = cnpy::NpyArrayPtr(new cnpy::NpyArray()); + arr->word_size = word_size; + arr->shape = std::vector<unsigned int>(shape, shape+ndims); delete[] shape; - arr.resize(size*word_size); - arr.fortran_order = fortran_order; - size_t nread = fread(arr.data(), word_size, size,fp); + arr->resize(size*word_size); + arr->fortran_order = fortran_order; + size_t nread = fread(arr->data(), word_size, size,fp); if(nread != size) throw std::runtime_error("load_the_npy_file: failed fread"); return arr; @@ -152,7 +152,7 @@ cnpy::npz_t cnpy::npz_load(std::string fname) { if(!fp) printf("npz_load: Error! Unable to open file %s!\n",fname.c_str()); assert(fp); - cnpy::npz_t arrays; + cnpy::npz_t arrays; while(1) { std::vector<char> local_header(30); @@ -170,7 +170,7 @@ cnpy::npz_t cnpy::npz_load(std::string fname) { if(vname_res != name_len) throw std::runtime_error("npz_load: failed fread"); - //erase the lagging .npy + //erase the lagging .npy varname.erase(varname.end()-4, varname.end()); //read in the extra field @@ -186,16 +186,16 @@ cnpy::npz_t cnpy::npz_load(std::string fname) { } fclose(fp); - return arrays; + return arrays; } -cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) { +cnpy::NpyArrayPtr cnpy::npz_load(std::string fname, std::string varname) { FILE* fp = fopen(fname.c_str(),"rb"); if(!fp) { printf("npz_load: Error! Unable to open file %s!\n",fname.c_str()); abort(); - } + } while(1) { std::vector<char> local_header(30); @@ -209,7 +209,7 @@ cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) { //read in the variable name unsigned short name_len = *(unsigned short*) &local_header[26]; std::string vname(name_len,' '); - size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp); + size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp); if(vname_res != name_len) throw std::runtime_error("npz_load: failed fread"); vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy @@ -219,7 +219,7 @@ cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) { fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field if(vname == varname) { - NpyArray array = load_the_npy_file(fp); + auto array = load_the_npy_file(fp); fclose(fp); return array; } @@ -233,30 +233,27 @@ cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) { fclose(fp); std::stringstream ss; - ss << "npz_load: Error! Variable name " - << varname - << " not found in " - << fname - << "!" + ss << "npz_load: Error! Variable name " + << varname + << " not found in " + << fname + << "!" << std::endl; throw std::runtime_error(ss.str()); } -cnpy::NpyArray cnpy::npy_load(std::string fname) { +cnpy::NpyArrayPtr cnpy::npy_load(std::string fname) { FILE* fp = fopen(fname.c_str(), "rb"); if(!fp) { printf("npy_load: Error! Unable to open file %s!\n",fname.c_str()); - abort(); + abort(); } - NpyArray arr = load_the_npy_file(fp); + auto arr = load_the_npy_file(fp); fclose(fp); return arr; } - - - diff --git a/src/3rd_party/cnpy/cnpy.h b/src/3rd_party/cnpy/cnpy.h index f78271a6..0cdd6dca 100644 --- a/src/3rd_party/cnpy/cnpy.h +++ b/src/3rd_party/cnpy/cnpy.h @@ -20,27 +20,28 @@ namespace cnpy { struct NpyArray { - std::shared_ptr<std::vector<char>> ptr; + std::vector<char> bytes; std::vector<unsigned int> shape; unsigned int word_size{1}; bool fortran_order{0}; - - NpyArray() : ptr{new std::vector<char>()} {} - + + NpyArray() {} + void resize(size_t n) { - return ptr->resize(n); + return bytes.resize(n); } - + char* data() { - return ptr->data(); + return bytes.data(); } const char* data() const { - return ptr->data(); + return bytes.data(); } }; - - typedef std::map<std::string, NpyArray> npz_t; + + typedef std::shared_ptr<NpyArray> NpyArrayPtr; + typedef std::map<std::string, NpyArrayPtr> npz_t; char BigEndianTest(); char map_type(const std::type_info& t); @@ -48,20 +49,20 @@ namespace cnpy { void parse_npy_header(FILE* fp,unsigned int& word_size, unsigned int*& shape, unsigned int& ndims, bool& fortran_order); void parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset); npz_t npz_load(std::string fname); - NpyArray npz_load(std::string fname, std::string varname); - NpyArray npy_load(std::string fname); + NpyArrayPtr npz_load(std::string fname, std::string varname); + NpyArrayPtr npy_load(std::string fname); template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) { //write in little endian for(char byte = 0; byte < sizeof(T); byte++) { - char val = *((char*)&rhs+byte); + char val = *((char*)&rhs+byte); lhs.push_back(val); } return lhs; } - template<> std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs); - template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs); + template<> std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs); + template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs); template<typename T> std::string tostring(T i, int pad = 0, char padval = ' ') { @@ -136,7 +137,7 @@ namespace cnpy { if(fp) { //zip file exists. we need to add a new npy file to it. //first read the footer. this gives us the offset and size of the global header - //then read and store the global header. + //then read and store the global header. //below, we will write the the new data at the start of the global header then append the global header and footer below it unsigned int global_header_size; parse_zip_footer(fp,nrecs,global_header_size,global_header_offset); @@ -202,7 +203,7 @@ namespace cnpy { footer += (unsigned int) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array footer += (unsigned short) 0; //zip file comment length - //write everything + //write everything fwrite(&local_header[0],sizeof(char),local_header.size(),fp); fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp); fwrite(data,sizeof(T),nels,fp); @@ -211,7 +212,7 @@ namespace cnpy { fclose(fp); } - template<typename T> std::vector<char> create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims) { + template<typename T> std::vector<char> create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims) { std::vector<char> dict; dict += "{'descr': '"; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 409a84e1..38a69d9c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -24,7 +24,7 @@ cuda_add_library(marian graph/expression_graph.cpp graph/expression_operators.cu graph/node.cpp - graph/node_operators.cu + graph/node_operators.cpp graph/node_initializers.cpp layers/convolution.cu diff --git a/src/common/config.cpp b/src/common/config.cpp index 1f297094..9eca1f10 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -54,7 +54,7 @@ void Config::loadModelParameters(const std::string& name) { void Config::GetYamlFromNpz(YAML::Node& yaml, const std::string& varName, const std::string& fName) { - yaml = YAML::Load(cnpy::npz_load(fName, varName).data()); + yaml = YAML::Load(cnpy::npz_load(fName, varName)->data()); } void Config::AddYamlToNpz(const YAML::Node& yaml, diff --git a/src/common/definitions.h b/src/common/definitions.h index 749f4fef..8fb3bbb6 100644 --- a/src/common/definitions.h +++ b/src/common/definitions.h @@ -45,19 +45,19 @@ enum class DeviceType : size_t { gpu = 0, cpu = 1 }; struct DeviceId { size_t no{0}; DeviceType type{DeviceType::gpu}; - + DeviceId() : no{0}, type{DeviceType::gpu} {} DeviceId(size_t no_, DeviceType type_) : no(no_), type(type_) {} - + friend std::ostream& operator<<(std::ostream& out, DeviceId deviceId) { out << (deviceId.type == DeviceType::gpu ? "gpu" : "cpu") << deviceId.no; return out; } - + friend bool operator==(DeviceId id1, DeviceId id2) { return id1.no == id2.no && id1.type == id2.type; } - + }; class TensorBase; diff --git a/src/examples/iris/iris.cpp b/src/examples/iris/iris.cpp index 9b1bb958..80a3a2a9 100644 --- a/src/examples/iris/iris.cpp +++ b/src/examples/iris/iris.cpp @@ -25,20 +25,20 @@ Expr buildIrisClassifier(Ptr<ExpressionGraph> graph, // Define the input layer auto x = graph->constant({N, NUM_FEATURES}, - init = inits::from_vector(inputData)); + inits::from_vector(inputData)); // Define the hidden layer - auto W1 = graph->param("W1", {NUM_FEATURES, 5}, init = inits::uniform()); - auto b1 = graph->param("b1", {1, 5}, init = inits::zeros); + auto W1 = graph->param("W1", {NUM_FEATURES, 5}, inits::uniform()); + auto b1 = graph->param("b1", {1, 5}, inits::zeros); auto h = tanh(affine(x, W1, b1)); // Define the output layer - auto W2 = graph->param("W2", {5, NUM_LABELS}, init = inits::uniform()); - auto b2 = graph->param("b2", {1, NUM_LABELS}, init = inits::zeros); + auto W2 = graph->param("W2", {5, NUM_LABELS}, inits::uniform()); + auto b2 = graph->param("b2", {1, NUM_LABELS}, inits::zeros); auto o = affine(h, W2, b2); if(train) { - auto y = graph->constant({N}, init = inits::from_vector(outputData)); + auto y = graph->constant({N}, inits::from_vector(outputData)); /* Define cross entropy cost on the output layer. * It can be also defined directly as: * -mean(sum(logsoftmax(o) * y, axis=1), axis=0) diff --git a/src/examples/mnist/model.h b/src/examples/mnist/model.h index 2ab5a3f5..62e5696a 100644 --- a/src/examples/mnist/model.h +++ b/src/examples/mnist/model.h @@ -79,7 +79,7 @@ protected: auto features = std::static_pointer_cast<data::DataBatch>(batch)->features(); auto x = g->constant({(int)batch->size(), dims[0]}, - init = inits::from_vector(features)); + inits::from_vector(features)); // Construct hidden layers std::vector<Expr> layers, weights, biases; @@ -104,11 +104,11 @@ protected: // Construct a weight node for the outgoing connections from layer i weights.emplace_back(g->param( - "W" + std::to_string(i), {in, out}, init = inits::uniform())); + "W" + std::to_string(i), {in, out}, inits::uniform())); // Construct a bias node. These weights are initialized to zero biases.emplace_back( - g->param("b" + std::to_string(i), {1, out}, init = inits::zeros)); + g->param("b" + std::to_string(i), {1, out}, inits::zeros)); } // Perform matrix multiplication and addition for the last layer @@ -119,7 +119,7 @@ protected: // labels auto labels = std::static_pointer_cast<data::DataBatch>(batch)->labels(); auto y = g->constant({(int)batch->size(), 1}, - init = inits::from_vector(labels)); + inits::from_vector(labels)); // Define a top-level node for training return mean(cross_entropy(last, y), axis = 0); diff --git a/src/examples/mnist/model_lenet.h b/src/examples/mnist/model_lenet.h index 968ceaf3..a91ef97d 100644 --- a/src/examples/mnist/model_lenet.h +++ b/src/examples/mnist/model_lenet.h @@ -29,7 +29,7 @@ protected: auto features = std::static_pointer_cast<data::DataBatch>(batch)->features(); auto x = g->constant({(int)batch->size(), 1, 28, 28}, - init = inits::from_vector(features)); + inits::from_vector(features)); // Construct hidden layers @@ -74,11 +74,11 @@ protected: // Construct a weight node for the outgoing connections from layer i weights.emplace_back(g->param( - "W" + std::to_string(i), {in, out}, init = inits::uniform())); + "W" + std::to_string(i), {in, out}, inits::uniform())); // Construct a bias node. These weights are initialized to zero biases.emplace_back( - g->param("b" + std::to_string(i), {1, out}, init = inits::zeros)); + g->param("b" + std::to_string(i), {1, out}, inits::zeros)); } // Perform matrix multiplication and addition for the last layer @@ -91,7 +91,7 @@ protected: // labels auto labels = std::static_pointer_cast<data::DataBatch>(batch)->labels(); auto y = g->constant({(int)batch->size(), 1}, - init = inits::from_vector(labels)); + inits::from_vector(labels)); // Define a top-level node for training return mean(cross_entropy(last, y), axis = 0); diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp index 520476ae..f0ae1ffa 100644 --- a/src/graph/expression_graph.cpp +++ b/src/graph/expression_graph.cpp @@ -17,12 +17,12 @@ void ExpressionGraph::setDevice(DeviceId deviceId) { } } -Expr ExpressionGraph::dropout(float prob, Shape shape) { +Expr ExpressionGraph::dropout(float prob, const Shape& shape) { return Expression<ConstantNode>(shared_from_this(), - keywords::init = [prob, this](Tensor t) { + shape, + [prob, this](Tensor t) { Dropout(t, prob); - }, - keywords::shape = shape); + }); } void ExpressionGraph::checkNan(Tensor t) { diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index 5131daf6..ea1645ec 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -77,7 +77,7 @@ public: void copyParams(Ptr<ExpressionGraph> graph) { for(auto p : *graph->params()) - param(p->name(), p->shape()); + param(p->name(), p->shape(), inits::dummy); params()->allocateForward(); params()->vals()->copyFrom(graph->params()->vals()); } @@ -200,8 +200,11 @@ public: dot.close(); } - template <typename... Args> - Expr param(std::string name, Shape shape, Args... args) { + Expr param(const std::string& pname, + const Shape& shape, + const NodeInitializer& init, + bool fixed = false) { + std::string name = pname; if(!namespace_.empty()) name = namespace_ + "::" + name; @@ -214,7 +217,6 @@ public: "original shape {}", shape, name, p->shape()); - bool fixed = Get(keywords::fixed, false, args...); p->setTrainable(!fixed); add(p); return p; @@ -229,8 +231,7 @@ public: ABORT_IF(get(name), "Non-parameter with name '{}' already exists", name); // create parameter node (adds to tape) - p = Expression<ParamNode>( - shared_from_this(), keywords::shape = shape, args...); + p = Expression<ParamNode>(shared_from_this(), shape, init, fixed); // add to list of parameters p->set_name(name); @@ -238,25 +239,21 @@ public: return p; } - template <typename... Args> - Expr constant(Shape shape, Args... args) { + Expr constant(const Shape& shape, + const NodeInitializer& init) { return Expression<ConstantNode>( - shared_from_this(), keywords::shape = shape, args...); + shared_from_this(), shape, init); } - template <typename... Args> - Expr ones(Args... args) { - return Expression<ConstantNode>( - shared_from_this(), keywords::init = inits::ones, args...); + Expr ones(const Shape& shape) { + return Expression<ConstantNode>(shared_from_this(), shape, inits::ones); } - template <typename... Args> - Expr zeros(Args... args) { - return Expression<ConstantNode>( - shared_from_this(), keywords::init = inits::zeros, args...); + Expr zeros(const Shape& shape) { + return Expression<ConstantNode>(shared_from_this(), shape, inits::zeros); } - Expr dropout(float prob, Shape shape); + Expr dropout(float prob, const Shape& shape); Expr get(std::string name) { if(!namespace_.empty()) @@ -340,17 +337,17 @@ public: continue; Shape shape; - if(it.second.shape.size() == 1) { + if(it.second->shape.size() == 1) { shape.resize(2); shape.set(0, 1); - shape.set(1, it.second.shape[0]); + shape.set(1, it.second->shape[0]); } else { - shape.resize(it.second.shape.size()); - for(int i = 0; i < it.second.shape.size(); ++i) - shape.set(i, it.second.shape[i]); + shape.resize(it.second->shape.size()); + for(int i = 0; i < it.second->shape.size(); ++i) + shape.set(i, it.second->shape[i]); } - param(name, shape, init = inits::from_numpy(it.second)); + param(name, shape, inits::from_numpy(it.second)); } if(markReloaded) @@ -371,7 +368,7 @@ public: } std::vector<float> v; - p.second->val() >> v; + p.second->val()->get(v); auto& pShape = p.second->shape(); unsigned dim = pShape.size(); diff --git a/src/graph/node.h b/src/graph/node.h index aa450000..74af5771 100644 --- a/src/graph/node.h +++ b/src/graph/node.h @@ -13,7 +13,6 @@ namespace marian { class Node : public Chainable<Tensor>, - public keywords::Keywords, public std::enable_shared_from_this<Node> { protected: size_t id_{0}; @@ -33,11 +32,9 @@ protected: std::string debugMessage_; public: - template <typename... Args> - Node(Ptr<ExpressionGraph> graph, Args... args) - : Keywords(args...), - graph_(graph), - shape_(Get(keywords::shape, {1, 1, 1, 1})) {} + Node(Ptr<ExpressionGraph> graph, Shape shape) + : graph_(graph), + shape_(shape) {} virtual ~Node() { if(destroy_) { @@ -143,12 +140,8 @@ public: struct NaryNodeOp : public Node { size_t hash_{0}; - template <typename... Args> - NaryNodeOp(const std::vector<Expr>& nodes, Args... args) - : Node(nodes.front()->graph(), - keywords::shape - = keywords::Get(keywords::shape, nodes.front()->shape(), args...), - args...) { + NaryNodeOp(const std::vector<Expr>& nodes, Shape shape) + : Node(nodes.front()->graph(), shape) { children_.resize(nodes.size()); for(int i = 0; i < nodes.size(); ++i) children_[i] = nodes[i]; @@ -158,6 +151,9 @@ struct NaryNodeOp : public Node { remove_children_from_top_nodes(); } + NaryNodeOp(const std::vector<Expr>& nodes) + : NaryNodeOp(nodes, nodes[0]->shape()) {} + virtual ~NaryNodeOp() {} std::vector<Expr>& children() { return children_; } diff --git a/src/graph/node_initializers.cpp b/src/graph/node_initializers.cpp index f82b6f45..0d131c61 100644 --- a/src/graph/node_initializers.cpp +++ b/src/graph/node_initializers.cpp @@ -72,7 +72,7 @@ void xorshift(Tensor t) { std::vector<float> vals(t->size()); for(auto&& v : vals) v = xor128(); - t << vals; + t->set(vals); } void glorot_normal(Tensor t) { @@ -110,7 +110,7 @@ void ortho(Tensor t) { NodeInitializer from_vector(const std::vector<float>& v) { auto vPtr = New<std::vector<float>>(v.begin(), v.end()); return [vPtr](Tensor t) { - t->set(vPtr->data(), vPtr->data() + vPtr->size()); + t->set(vPtr->data(), vPtr->data() + vPtr->size()); }; } @@ -127,12 +127,12 @@ NodeInitializer from_sparse_vector( }; } -NodeInitializer from_numpy(const cnpy::NpyArray& np) { +NodeInitializer from_numpy(const cnpy::NpyArrayPtr& np) { return [np](Tensor t) { size_t size = 1; - for(size_t dim : np.shape) + for(size_t dim : np->shape) size *= dim; - t->set((float*)np.data(), (float*)np.data() + size); + t->set((float*)np->data(), (float*)np->data() + size); }; } diff --git a/src/graph/node_initializers.h b/src/graph/node_initializers.h index 6bd83c12..5b069657 100644 --- a/src/graph/node_initializers.h +++ b/src/graph/node_initializers.h @@ -27,25 +27,32 @@ NodeInitializer from_value(float v); NodeInitializer diag(float val); -template <class Distribution> -void distribution(std::vector<float>& vals, float a, float b) { +template <class Distribution, class Iterator> +void distribution(Iterator begin, Iterator end, float a, float b) { std::default_random_engine engine(Config::seed++); Distribution dist(a, b); auto gen = std::bind(dist, engine); - std::generate(begin(vals), end(vals), gen); + std::generate(begin, end, gen); +} + +template <class Distribution> +void distribution(std::vector<float>& vals, float a, float b) { + distribution<Distribution>(vals.begin(), vals.end(), a, b); } template <class Distribution> void distribution(Tensor t, float a, float b) { std::vector<float> vals(t->size()); - distribution<Distribution>(vals, a, b); - t << vals; + distribution<Distribution>(vals.begin(), vals.end(), a, b); + t->set(vals); } NodeInitializer normal(float scale = 0.1, bool ortho = true); NodeInitializer uniform(float scale = 0.1); +static inline void dummy(Tensor t) {} + void ortho(Tensor t); void glorot_uniform(Tensor t); @@ -60,7 +67,7 @@ NodeInitializer from_vector(const std::vector<size_t>& v); NodeInitializer from_sparse_vector( std::pair<std::vector<size_t>, std::vector<float>>& v); -NodeInitializer from_numpy(const cnpy::NpyArray& np); +NodeInitializer from_numpy(const cnpy::NpyArrayPtr& np); NodeInitializer from_word2vec(const std::string& file, int dimVoc, diff --git a/src/graph/node_operators.cu b/src/graph/node_operators.cpp index 76f0b1e3..76f0b1e3 100644 --- a/src/graph/node_operators.cu +++ b/src/graph/node_operators.cpp diff --git a/src/graph/node_operators.h b/src/graph/node_operators.h index b785fade..8720d0bb 100644 --- a/src/graph/node_operators.h +++ b/src/graph/node_operators.h @@ -7,12 +7,11 @@ namespace marian { struct ConstantNode : public Node { - template <typename... Args> - ConstantNode(Args... args) - : Node(args...), - init_(new NodeInitializer(Get(keywords::init, [](Tensor) {}))), + ConstantNode(Ptr<ExpressionGraph> graph, const Shape& shape, const NodeInitializer& init) + : Node(graph, shape), + init_(new NodeInitializer(init)), initialized_(false) { - ABORT_IF(!Has(keywords::shape), "Constant items require shape information"); + setTrainable(false); } @@ -42,14 +41,11 @@ private: }; struct ParamNode : public Node { - template <typename... Args> - ParamNode(Args... args) - : Node(args...), - init_(new NodeInitializer(Get(keywords::init, [](Tensor) {}))), + ParamNode(Ptr<ExpressionGraph> graph, const Shape& shape, const NodeInitializer& init, bool fixed = false) + : Node(graph, shape), + init_(new NodeInitializer(init)), initialized_(false) { - ABORT_IF(!Has(keywords::shape), "Param items require shape information"); - bool fixed = Get(keywords::fixed, false); setTrainable(!fixed); } diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h index a2a47a61..c9e67cd7 100644 --- a/src/graph/node_operators_binary.h +++ b/src/graph/node_operators_binary.h @@ -16,16 +16,13 @@ private: float scalar_; public: - template <typename... Args> DotNodeOp(Expr a, Expr b, bool transA, bool transB, - float scalar, - Args... args) + float scalar) : NaryNodeOp({a, b}, - keywords::shape = newShape(a, b, transA, transB), - args...), + newShape(a, b, transA, transB)), transA_(transA), transB_(transB), scalar_(scalar) {} @@ -149,8 +146,7 @@ public: bool transA, bool transB, float scalar) - : NaryNodeOp(nodes, keywords::shape = newShape(nodes[0], nodes[1], - transA, transB)), + : NaryNodeOp(nodes, newShape(nodes[0], nodes[1], transA, transB)), transA_(transA), transB_(transB), scalar_(scalar){} @@ -278,16 +274,13 @@ private: float scalar_; public: - template <typename... Args> DotBatchedNodeOp(Expr a, Expr b, bool transA, bool transB, - float scalar, - Args... args) + float scalar) : NaryNodeOp({a, b}, - keywords::shape = newShape(a, b, transA, transB), - args...), + newShape(a, b, transA, transB)), transA_(transA), transB_(transB), scalar_(scalar) {} @@ -407,7 +400,7 @@ public: struct ScalarProductNodeOp : public NaryNodeOp { template <typename... Args> ScalarProductNodeOp(Expr a, Expr b, Args... args) - : NaryNodeOp({a, b}, keywords::shape = newShape(a, b, args...), args...) { + : NaryNodeOp({a, b}, newShape(a, b, args...)) { } template <typename... Args> @@ -440,9 +433,8 @@ struct ScalarProductNodeOp : public NaryNodeOp { }; struct ElementBinaryNodeOp : public NaryNodeOp { - template <typename... Args> - ElementBinaryNodeOp(Expr a, Expr b, Args... args) - : NaryNodeOp({a, b}, keywords::shape = newShape(a, b), args...) {} + ElementBinaryNodeOp(Expr a, Expr b) + : NaryNodeOp({a, b}, newShape(a, b)) {} Shape newShape(Expr a, Expr b) { return Shape::broadcast({a, b}); @@ -452,8 +444,7 @@ struct ElementBinaryNodeOp : public NaryNodeOp { }; struct PlusNodeOp : public ElementBinaryNodeOp { - template <typename... Args> - PlusNodeOp(Args... args) : ElementBinaryNodeOp(args...) {} + PlusNodeOp(Expr a, Expr b) : ElementBinaryNodeOp(a, b) {} NodeOps forwardOps() { using namespace functional; @@ -473,8 +464,7 @@ struct PlusNodeOp : public ElementBinaryNodeOp { }; struct MinusNodeOp : public ElementBinaryNodeOp { - template <typename... Args> - MinusNodeOp(Args... args) : ElementBinaryNodeOp(args...) {} + MinusNodeOp(Expr a, Expr b) : ElementBinaryNodeOp(a, b) {} NodeOps forwardOps() { using namespace functional; @@ -494,8 +484,7 @@ struct MinusNodeOp : public ElementBinaryNodeOp { }; struct MultNodeOp : public ElementBinaryNodeOp { - template <typename... Args> - MultNodeOp(Args... args) : ElementBinaryNodeOp(args...) {} + MultNodeOp(Expr a, Expr b) : ElementBinaryNodeOp(a, b) {} NodeOps forwardOps() { using namespace functional; @@ -515,8 +504,7 @@ struct MultNodeOp : public ElementBinaryNodeOp { }; struct DivNodeOp : public ElementBinaryNodeOp { - template <typename... Args> - DivNodeOp(Args... args) : ElementBinaryNodeOp(args...) {} + DivNodeOp(Expr a, Expr b) : ElementBinaryNodeOp(a, b) {} NodeOps forwardOps() { using namespace functional; @@ -565,9 +553,8 @@ struct DivNodeOp : public ElementBinaryNodeOp { // Cross-entropy node. It computes -b*log(softmax(a)), summing rowwise. struct CrossEntropyNodeOp : public NaryNodeOp { - template <typename... Args> - CrossEntropyNodeOp(Expr a, Expr b, Args... args) - : NaryNodeOp({a, b}, keywords::shape = newShape(a), args...) {} + CrossEntropyNodeOp(Expr a, Expr b) + : NaryNodeOp({a, b}, newShape(a)) {} Shape newShape(Expr a) { Shape shape1 = a->shape(); @@ -591,10 +578,7 @@ struct CrossEntropyNodeOp : public NaryNodeOp { struct ConcatenateNodeOp : public NaryNodeOp { template <typename... Args> ConcatenateNodeOp(const std::vector<Expr>& nodes, Args... args) - : NaryNodeOp(nodes, - keywords::shape - = newShape(nodes, keywords::Get(keywords::axis, 0, args...)), - args...) {} + : NaryNodeOp(nodes, newShape(nodes, keywords::Get(keywords::axis, 0, args...))) {} Shape newShape(const std::vector<Expr>& nodes, int ax) { Shape shape = nodes.back()->shape(); diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h index 0a76471b..e857e790 100644 --- a/src/graph/node_operators_unary.h +++ b/src/graph/node_operators_unary.h @@ -14,9 +14,11 @@ namespace marian { struct UnaryNodeOp : public NaryNodeOp { - template <typename... Args> - UnaryNodeOp(Expr a, Args... args) - : NaryNodeOp({a}, keywords::shape = a->shape(), args...) {} + UnaryNodeOp(Expr a, Shape shape) + : NaryNodeOp({a}, shape) {} + + UnaryNodeOp(Expr a) + : NaryNodeOp({a}, a->shape()) {} const std::string color() { return "yellow"; } }; @@ -26,9 +28,9 @@ private: float scalar_{0}; public: - template <typename... Args> - ScalarAddNodeOp(Expr a, float scalar, Args... args) - : UnaryNodeOp(a, args...), scalar_{scalar} {} + ScalarAddNodeOp(Expr a, float scalar) + : UnaryNodeOp(a), + scalar_{scalar} {} NodeOps forwardOps() { using namespace functional; @@ -67,9 +69,8 @@ private: float scalar_{0}; public: - template <typename... Args> - ScalarMultNodeOp(Expr a, float scalar, Args... args) - : UnaryNodeOp(a, args...), scalar_{scalar} {} + ScalarMultNodeOp(Expr a, float scalar) + : UnaryNodeOp(a), scalar_{scalar} {} NodeOps forwardOps() { using namespace functional; @@ -104,8 +105,7 @@ public: }; struct LogitNodeOp : public UnaryNodeOp { - template <typename... Args> - LogitNodeOp(Args... args) : UnaryNodeOp(args...) {} + LogitNodeOp(Expr a) : UnaryNodeOp(a) {} NodeOps forwardOps() { using namespace functional; @@ -164,7 +164,7 @@ struct LogitNodeOp : public UnaryNodeOp { struct TanhNodeOp : public NaryNodeOp { TanhNodeOp(const std::vector<Expr>& nodes) - : NaryNodeOp(nodes, keywords::shape = newShape(nodes)) {} + : NaryNodeOp(nodes, newShape(nodes)) {} Shape newShape(const std::vector<Expr>& nodes) { return Shape::broadcast(nodes); @@ -214,8 +214,7 @@ struct TanhNodeOp : public NaryNodeOp { struct ReLUNodeOp : public UnaryNodeOp { - template <typename... Args> - ReLUNodeOp(Args... args) : UnaryNodeOp(args...) {} + ReLUNodeOp(Expr a) : UnaryNodeOp(a) {} NodeOps forwardOps() { // f(x) = max(0, x) @@ -265,9 +264,8 @@ struct ReLUNodeOp : public UnaryNodeOp { * \f] */ struct PReLUNodeOp : public UnaryNodeOp { - template <typename... Args> - PReLUNodeOp(float alpha, Args... args) - : UnaryNodeOp(args...), alpha_(alpha) {} + PReLUNodeOp(float alpha, Expr a) + : UnaryNodeOp(a), alpha_(alpha) {} NodeOps forwardOps() { using namespace functional; @@ -316,8 +314,7 @@ private: * */ struct SwishNodeOp : public UnaryNodeOp { - template <typename... Args> - SwishNodeOp(Args... args) : UnaryNodeOp(args...) {} + SwishNodeOp(Expr a) : UnaryNodeOp(a) {} NodeOps forwardOps() { using namespace functional; @@ -338,14 +335,12 @@ struct SwishNodeOp : public UnaryNodeOp { const std::string type() { return "swish"; } }; -struct SoftmaxNodeOp : public NaryNodeOp { - template <typename... Args> - SoftmaxNodeOp(Expr a, Args... args) - : NaryNodeOp(a, args...), mask_(nullptr) {} +struct SoftmaxNodeOp : public UnaryNodeOp { + SoftmaxNodeOp(Expr a) + : UnaryNodeOp(a), mask_(nullptr) {} - template <typename... Args> - SoftmaxNodeOp(Expr a, Expr mask, Args... args) - : NaryNodeOp({a}, args...), mask_(mask) {} + SoftmaxNodeOp(Expr a, Expr mask) + : UnaryNodeOp(a), mask_(mask) {} Expr mask_; @@ -396,8 +391,7 @@ struct SoftmaxNodeOp : public NaryNodeOp { }; struct LogSoftmaxNodeOp : public UnaryNodeOp { - template <typename... Args> - LogSoftmaxNodeOp(Args... args) : UnaryNodeOp(args...) {} + LogSoftmaxNodeOp(Expr a) : UnaryNodeOp(a) {} NodeOps forwardOps() { return {NodeOp(LogSoftmax(val_, child(0)->val()))}; } @@ -416,7 +410,7 @@ struct SumNodeOp : public UnaryNodeOp { template <typename... Args> SumNodeOp(Expr a, Args... args) - : UnaryNodeOp(a, keywords::shape = newShape(a, args...), args...) {} + : UnaryNodeOp(a, newShape(a, args...)) {} NodeOps forwardOps() { using namespace functional; @@ -465,7 +459,7 @@ struct MeanNodeOp : public UnaryNodeOp { template <typename... Args> MeanNodeOp(Expr a, Args... args) - : UnaryNodeOp(a, keywords::shape = newShape(a, args...), args...) {} + : UnaryNodeOp(a, newShape(a, args...)) {} NodeOps forwardOps() { using namespace functional; @@ -516,8 +510,7 @@ struct MeanNodeOp : public UnaryNodeOp { }; struct LogNodeOp : public UnaryNodeOp { - template <typename... Args> - LogNodeOp(Args... args) : UnaryNodeOp(args...) {} + LogNodeOp(Expr a) : UnaryNodeOp(a) {} NodeOps forwardOps() { using namespace functional; @@ -534,8 +527,7 @@ struct LogNodeOp : public UnaryNodeOp { }; struct ExpNodeOp : public UnaryNodeOp { - template <typename... Args> - ExpNodeOp(Args... args) : UnaryNodeOp(args...) {} + ExpNodeOp(Expr a) : UnaryNodeOp(a) {} NodeOps forwardOps() { using namespace functional; @@ -553,9 +545,8 @@ struct ExpNodeOp : public UnaryNodeOp { struct SqrtNodeOp : public UnaryNodeOp { float epsilon_; - template <typename... Args> - SqrtNodeOp(Expr a, float epsilon, Args... args) - : UnaryNodeOp(a, args...), epsilon_(epsilon) {} + SqrtNodeOp(Expr a, float epsilon) + : UnaryNodeOp(a), epsilon_(epsilon) {} NodeOps forwardOps() { using namespace functional; @@ -591,8 +582,7 @@ struct SqrtNodeOp : public UnaryNodeOp { }; struct SquareNodeOp : public UnaryNodeOp { - template <typename... Args> - SquareNodeOp(Args... args) : UnaryNodeOp(args...) {} + SquareNodeOp(Expr a) : UnaryNodeOp(a) {} NodeOps forwardOps() { using namespace functional; @@ -609,8 +599,7 @@ struct SquareNodeOp : public UnaryNodeOp { }; struct NegNodeOp : public UnaryNodeOp { - template <typename... Args> - NegNodeOp(Args... args) : UnaryNodeOp(args...) {} + NegNodeOp(Expr a) : UnaryNodeOp(a) {} NodeOps forwardOps() { using namespace functional; @@ -626,9 +615,8 @@ struct NegNodeOp : public UnaryNodeOp { }; struct RowsNodeOp : public UnaryNodeOp { - template <typename... Args> - RowsNodeOp(Expr a, const std::vector<size_t>& indeces, Args... args) - : UnaryNodeOp(a, keywords::shape = newShape(a, indeces), args...), + RowsNodeOp(Expr a, const std::vector<size_t>& indeces) + : UnaryNodeOp(a, newShape(a, indeces)), indices_(indeces) {} NodeOps forwardOps() { @@ -679,9 +667,8 @@ struct RowsNodeOp : public UnaryNodeOp { }; struct ColsNodeOp : public UnaryNodeOp { - template <typename... Args> - ColsNodeOp(Expr a, const std::vector<size_t>& indeces, Args... args) - : UnaryNodeOp(a, keywords::shape = newShape(a, indeces), args...), + ColsNodeOp(Expr a, const std::vector<size_t>& indeces) + : UnaryNodeOp(a, newShape(a, indeces)), indices_(indeces) {} NodeOps forwardOps() { @@ -731,7 +718,7 @@ struct ColsNodeOp : public UnaryNodeOp { struct SelectNodeOp : public UnaryNodeOp { SelectNodeOp(Expr a, int axis, const std::vector<size_t>& indeces) - : UnaryNodeOp(a, keywords::shape = newShape(a, axis, indeces)), + : UnaryNodeOp(a, newShape(a, axis, indeces)), indices_(indeces) {} NodeOps forwardOps() { @@ -787,7 +774,7 @@ struct TransposeNodeOp : public UnaryNodeOp { std::vector<int> axes_; TransposeNodeOp(Expr a, const std::vector<int>& axes) - : UnaryNodeOp(a, keywords::shape = newShape(a, axes)), + : UnaryNodeOp(a, newShape(a, axes)), axes_{axes} {} NodeOps forwardOps() { @@ -844,8 +831,8 @@ private: public: template <typename... Args> - ReshapeNodeOp(Expr a, Shape shape, Args... args) - : UnaryNodeOp(a, keywords::shape = shape, args...), reshapee_(a) { + ReshapeNodeOp(Expr a, Shape shape) + : UnaryNodeOp(a, shape), reshapee_(a) { Node::destroy_ = false; } @@ -909,7 +896,7 @@ private: public: StepNodeOp(Expr a, int step, int axis) - : UnaryNodeOp(a, keywords::shape = newShape(a, axis)), + : UnaryNodeOp(a, newShape(a, axis)), stepNode_(a), step_(step) { Node::destroy_ = false; @@ -981,9 +968,8 @@ public: }; struct ShiftNodeOp : public UnaryNodeOp { - template <typename... Args> - ShiftNodeOp(Expr a, Shape shift, Args... args) - : UnaryNodeOp(a, keywords::shape = a->shape(), args...), shift_(shift) {} + ShiftNodeOp(Expr a, Shape shift) + : UnaryNodeOp(a, a->shape()), shift_(shift) {} NodeOps forwardOps() { return {NodeOp(Shift(val_, child(0)->val(), shift_, false))}; diff --git a/src/layers/convolution.cu b/src/layers/convolution.cu index b0749450..064abedf 100644 --- a/src/layers/convolution.cu +++ b/src/layers/convolution.cu @@ -18,11 +18,11 @@ Expr Convolution::apply(Expr x) { kernelNum, kernelDims.first, kernelDims.second}, - keywords::init=inits::glorot_uniform); + inits::glorot_uniform); auto bias = graph_->param(prefix + "_conv_bias", {1, kernelNum, 1, 1}, - keywords::init=inits::zeros); + inits::zeros); std::vector<Expr> nodes = {x, kernel, bias}; return Expression<ConvolutionOp>(nodes, diff --git a/src/layers/generic.h b/src/layers/generic.h index 558280af..b9c1d100 100644 --- a/src/layers/generic.h +++ b/src/layers/generic.h @@ -79,7 +79,7 @@ public: else { W = g->param(name + "_" + nameW, {in->shape()[-1], dim}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); } Expr b; @@ -88,7 +88,7 @@ public: b = tiedParams_[nameB]; else b = g->param( - name + "_" + nameB, {1, dim}, keywords::init = inits::zeros); + name + "_" + nameB, {1, dim}, inits::zeros); params_.push_back(W); params_.push_back(b); @@ -97,17 +97,17 @@ public: if(nematusNorm) { auto ln_s = g->param(name + "_ln_s" + std::to_string(i), {1, dim}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); auto ln_b = g->param(name + "_ln_b" + std::to_string(i), {1, dim}, - keywords::init = inits::zeros); + inits::zeros); outputs.push_back( layer_norm(affine(in, W, b, false, transposeW), ln_s, ln_b, NEMATUS_LN_EPS)); } else { auto gamma = g->param(name + "_gamma" + std::to_string(i), {1, dim}, - keywords::init = inits::from_value(1.0)); + inits::from_value(1.0)); params_.push_back(gamma); outputs.push_back(layer_norm(dot(in, W, false, transposeW), gamma, b)); @@ -151,14 +151,14 @@ public: else { W = g->param(name + "_" + nameW, {input->shape()[-1], dim}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); } Expr b; std::string nameB = "b"; if(tiedParams_.count(nameB)) b = tiedParams_[nameB]; else - b = g->param(name + "_" + nameB, {1, dim}, keywords::init = inits::zeros); + b = g->param(name + "_" + nameB, {1, dim}, inits::zeros); params_ = {W, b}; @@ -166,15 +166,15 @@ public: if(layerNorm) { if(nematusNorm) { auto ln_s = g->param( - name + "_ln_s", {1, dim}, keywords::init = inits::from_value(1.f)); + name + "_ln_s", {1, dim}, inits::from_value(1.f)); auto ln_b - = g->param(name + "_ln_b", {1, dim}, keywords::init = inits::zeros); + = g->param(name + "_ln_b", {1, dim}, inits::zeros); out = layer_norm(affine(input, W, b, false, transposeW), ln_s, ln_b, NEMATUS_LN_EPS); } else { auto gamma = g->param( - name + "_gamma", {1, dim}, keywords::init = inits::from_value(1.0)); + name + "_gamma", {1, dim}, inits::from_value(1.0)); params_.push_back(gamma); out = layer_norm(dot(input, W, false, transposeW), gamma, b); @@ -219,8 +219,8 @@ struct EmbeddingFactory : public Factory { return graph_->param(name, {dimVoc, dimEmb}, - keywords::init = initFunc, - keywords::fixed = fixed); + initFunc, + fixed); } }; @@ -239,7 +239,7 @@ Expr Cost(Expr logits, if(weights) ce = weights * ce; - + if(smoothing > 0) { // @TODO: add this to CE kernels instead auto ceq = mean(logsoftmax(logits), axis = -1); @@ -250,7 +250,7 @@ Expr Cost(Expr logits, ce = ce * mask; auto costSum = sum(ce, axis = -3); - + Expr cost; // axes: // - time axis (words): -3 diff --git a/src/layers/guided_alignment.h b/src/layers/guided_alignment.h index fb430ecc..c353f649 100644 --- a/src/layers/guided_alignment.h +++ b/src/layers/guided_alignment.h @@ -17,7 +17,7 @@ Expr guidedAlignmentCost(Ptr<ExpressionGraph> graph, auto aln = graph->constant( {dimBatch, 1, dimSrc, dimTrg}, - keywords::init = inits::from_vector(batch->getGuidedAlignment())); + inits::from_vector(batch->getGuidedAlignment())); std::string guidedCostType = options->get<std::string>("guided-alignment-cost"); diff --git a/src/models/amun.h b/src/models/amun.h index fd4160a3..8382c863 100644 --- a/src/models/amun.h +++ b/src/models/amun.h @@ -41,9 +41,9 @@ public: using namespace keywords; LOG(info, "Loading model from {}", name); - + auto numpy = cnpy::npz_load(name); - + std::map<std::string, std::string> nameMap = {{"decoder_U", "decoder_cell1_U"}, {"decoder_Ux", "decoder_cell1_Ux"}, @@ -91,38 +91,38 @@ public: {"encoder_r_bx", "encoder_bi_r_bx"}, {"encoder_r_gamma1", "encoder_bi_r_gamma1"}, {"encoder_r_gamma2", "encoder_bi_r_gamma2"}}; - + if(opt<bool>("tied-embeddings-src") || opt<bool>("tied-embeddings-all")) nameMap["Wemb"] = "Wemb"; - + graph->setReloaded(false); - + for(auto it : numpy) { auto name = it.first; - + if(name == "decoder_c_tt") continue; if(name.substr(0, 8) == "special:") continue; - + Shape shape; - if(numpy[name].shape.size() == 2) { + if(numpy[name]->shape.size() == 2) { shape.resize(2); - shape.set(0, numpy[name].shape[0]); - shape.set(1, numpy[name].shape[1]); - } else if(numpy[name].shape.size() == 1) { + shape.set(0, numpy[name]->shape[0]); + shape.set(1, numpy[name]->shape[1]); + } else if(numpy[name]->shape.size() == 1) { shape.resize(2); shape.set(0, 1); - shape.set(1, numpy[name].shape[0]); + shape.set(1, numpy[name]->shape[0]); } - + std::string pName = name; if(nameMap.count(name)) pName = nameMap[name]; - - graph->param(pName, shape, init = inits::from_numpy(numpy[name])); + + graph->param(pName, shape, inits::from_numpy(numpy[name])); } - + graph->setReloaded(true); } @@ -182,7 +182,7 @@ public: for(auto p : graph->params()->getMap()) { std::vector<float> v; - p.second->val() >> v; + p.second->val()->get(v); unsigned dim; if(p.second->shape()[0] == 1) { diff --git a/src/models/encdec.h b/src/models/encdec.h index 071a9eb0..cfda0404 100644 --- a/src/models/encdec.h +++ b/src/models/encdec.h @@ -31,7 +31,7 @@ protected: auto batchEmbeddings = reshape(chosenEmbeddings, {dimWords, dimBatch, dimEmb}); auto batchMask = graph->constant( - {dimWords, dimBatch, 1}, init = inits::from_vector(subBatch->mask())); + {dimWords, dimBatch, 1}, inits::from_vector(subBatch->mask())); return std::make_tuple(batchEmbeddings, batchMask); } @@ -113,10 +113,10 @@ public: = reshape(chosenEmbeddings, {dimWords, dimBatch, opt<int>("dim-emb")}); auto yMask = graph->constant({dimWords, dimBatch, 1}, - init = inits::from_vector(subBatch->mask())); + inits::from_vector(subBatch->mask())); auto yData = graph->constant({(int)subBatch->data().size(), 1}, - init = inits::from_vector(subBatch->data())); + inits::from_vector(subBatch->data())); auto yShifted = shift(y, {1, 0, 0}); @@ -150,7 +150,7 @@ public: Expr selectedEmbs; if(embIdx.empty()) { selectedEmbs = graph->constant({1, 1, dimBatch, dimTrgEmb}, - init = inits::zeros); + inits::zeros); } else { selectedEmbs = rows(yEmb, embIdx); selectedEmbs @@ -367,7 +367,7 @@ public: weights = graph->constant( {1, dimWords, dimBatch, 1}, - keywords::init = inits::from_vector(batch->getDataWeights())); + inits::from_vector(batch->getDataWeights())); } auto cost = Cost(nextState->getProbs(), diff --git a/src/models/nematus.h b/src/models/nematus.h index 3b93ab52..82b77c68 100644 --- a/src/models/nematus.h +++ b/src/models/nematus.h @@ -42,21 +42,21 @@ public: continue; Shape shape; - if(numpy[name].shape.size() == 2) { + if(numpy[name]->shape.size() == 2) { shape.resize(2); - shape.set(0, numpy[name].shape[0]); - shape.set(1, numpy[name].shape[1]); - } else if(numpy[name].shape.size() == 1) { + shape.set(0, numpy[name]->shape[0]); + shape.set(1, numpy[name]->shape[1]); + } else if(numpy[name]->shape.size() == 1) { shape.resize(2); shape.set(0, 1); - shape.set(1, numpy[name].shape[0]); + shape.set(1, numpy[name]->shape[0]); } std::string pName = name; if(nameMap_.count(name)) pName = nameMap_[name]; - graph->param(pName, shape, init = inits::from_numpy(numpy[name])); + graph->param(pName, shape, inits::from_numpy(numpy[name])); } graph->setReloaded(true); @@ -76,7 +76,7 @@ public: for(auto p : graph->params()->getMap()) { std::vector<float> v; - p.second->val() >> v; + p.second->val()->get(v); unsigned dim; if(p.second->shape()[0] == 1) { diff --git a/src/models/s2s.h b/src/models/s2s.h index 2d1ee281..164c86f5 100644 --- a/src/models/s2s.h +++ b/src/models/s2s.h @@ -275,7 +275,7 @@ public: int dimBatch = batch->size(); int dimRnn = opt<int>("dim-rnn"); - start = graph->constant({dimBatch, dimRnn}, init = inits::zeros); + start = graph->constant({dimBatch, dimRnn}, inits::zeros); } rnn::States startStates(opt<size_t>("dec-depth"), {start, start}); diff --git a/src/models/transformer.h b/src/models/transformer.h index c41453db..d8999263 100644 --- a/src/models/transformer.h +++ b/src/models/transformer.h @@ -35,7 +35,7 @@ public: // shared across batch entries auto signal = graph->constant({dimWords, 1, dimEmb}, - init = inits::from_vector(vPos)); + inits::from_vector(vPos)); return input + signal; } @@ -48,7 +48,7 @@ public: for(int j = 0; j <= i; ++j) vMask[i * length + j] = 1.f; return graph->constant({1, length, length}, - init = inits::from_vector(vMask)); + inits::from_vector(vMask)); } Expr InverseMask(Expr mask) { @@ -104,9 +104,9 @@ public: // layer normalization if(op == 'n') { auto scale = graph->param( - prefix + "_ln_scale_pre", {1, dimModel}, init = inits::ones); + prefix + "_ln_scale_pre", {1, dimModel}, inits::ones); auto bias = graph->param( - prefix + "_ln_bias_pre", {1, dimModel}, init = inits::zeros); + prefix + "_ln_bias_pre", {1, dimModel}, inits::zeros); output = layer_norm(output, scale, bias, 1e-6); } } @@ -136,9 +136,9 @@ public: // highway connection if(op == 'h') { auto Wh = graph->param( - prefix + "_Wh", {dimModel, dimModel}, init = inits::glorot_uniform); + prefix + "_Wh", {dimModel, dimModel}, inits::glorot_uniform); auto bh - = graph->param(prefix + "_bh", {1, dimModel}, init = inits::zeros); + = graph->param(prefix + "_bh", {1, dimModel}, inits::zeros); auto t = affine(prevInput, Wh, bh); output = highway(output, prevInput, t); @@ -146,9 +146,9 @@ public: // layer normalization if(op == 'n') { auto scale = graph->param( - prefix + "_ln_scale", {1, dimModel}, init = inits::ones); + prefix + "_ln_scale", {1, dimModel}, inits::ones); auto bias = graph->param( - prefix + "_ln_bias", {1, dimModel}, init = inits::zeros); + prefix + "_ln_bias", {1, dimModel}, inits::zeros); output = layer_norm(output, scale, bias, 1e-6); } } @@ -211,8 +211,8 @@ public: int dimModel = q->shape()[-1]; auto Wq = graph->param( - prefix + "_Wq", {dimModel, dimModel}, init = inits::glorot_uniform); - auto bq = graph->param(prefix + "_bq", {1, dimModel}, init = inits::zeros); + prefix + "_Wq", {dimModel, dimModel}, inits::glorot_uniform); + auto bq = graph->param(prefix + "_bq", {1, dimModel}, inits::zeros); auto qh = affine(q, Wq, bq); qh = SplitHeads(qh, dimHeads); @@ -224,15 +224,15 @@ public: auto Wk = graph->param(prefixProj + "_Wk", {dimModel, dimModel}, - init = inits::glorot_uniform); + inits::glorot_uniform); auto bk = graph->param( - prefixProj + "_bk", {1, dimModel}, init = inits::zeros); + prefixProj + "_bk", {1, dimModel}, inits::zeros); auto Wv = graph->param(prefixProj + "_Wv", {dimModel, dimModel}, - init = inits::glorot_uniform); + inits::glorot_uniform); auto bv = graph->param( - prefixProj + "_bv", {1, dimModel}, init = inits::zeros); + prefixProj + "_bv", {1, dimModel}, inits::zeros); auto kh = affine(keys[i], Wk, bk); auto vh = affine(values[i], Wv, bv); @@ -258,8 +258,8 @@ public: int dimAtt = output->shape()[-1]; auto Wo = graph->param( - prefix + "_Wo", {dimAtt, dimOut}, init = inits::glorot_uniform); - auto bo = graph->param(prefix + "_bo", {1, dimOut}, init = inits::zeros); + prefix + "_Wo", {dimAtt, dimOut}, inits::glorot_uniform); + auto bo = graph->param(prefix + "_bo", {1, dimOut}, inits::zeros); output = affine(output, Wo, bo); return output; @@ -336,12 +336,12 @@ public: int dimFfn = options->get<int>("transformer-dim-ffn"); auto W1 = graph->param( - prefix + "_W1", {dimModel, dimFfn}, init = inits::glorot_uniform); - auto b1 = graph->param(prefix + "_b1", {1, dimFfn}, init = inits::zeros); + prefix + "_W1", {dimModel, dimFfn}, inits::glorot_uniform); + auto b1 = graph->param(prefix + "_b1", {1, dimFfn}, inits::zeros); auto W2 = graph->param( - prefix + "_W2", {dimFfn, dimModel}, init = inits::glorot_uniform); - auto b2 = graph->param(prefix + "_b2", {1, dimModel}, init = inits::zeros); + prefix + "_W2", {dimFfn, dimModel}, inits::glorot_uniform); + auto b2 = graph->param(prefix + "_b2", {1, dimModel}, inits::zeros); output = affine(output, W1, b1); output = swish(output); diff --git a/src/optimizers/optimizers.cu b/src/optimizers/optimizers.cu index 49c380e1..afec4708 100644 --- a/src/optimizers/optimizers.cu +++ b/src/optimizers/optimizers.cu @@ -50,15 +50,15 @@ void Adagrad::load(const std::string& name, auto numpy = cnpy::npz_load(name); for(auto it : numpy) { auto name = it.first; - cnpy::NpyArray& np = it.second; + auto np = it.second; // get the size of gt_ - totalSize = np.shape[1]; + totalSize = np->shape[1]; // extract data into vectors if(name == "adagrad_gt") { vGt.resize(totalSize); - std::copy((float*)np.data(), (float*)np.data() + totalSize, vGt.begin()); + std::copy((float*)np->data(), (float*)np->data() + totalSize, vGt.begin()); } } @@ -174,19 +174,19 @@ void Adam::load(const std::string& name, auto numpy = cnpy::npz_load(name); for(auto it : numpy) { auto name = it.first; - cnpy::NpyArray& np = it.second; + auto np = it.second; // get the size of mt_ and vt_, they are the same - totalSize = np.shape[1]; + totalSize = np->shape[1]; // extract data into vectors if(name == "adam_mt") { vMt.resize(totalSize); - std::copy((float*)np.data(), (float*)np.data() + totalSize, vMt.begin()); + std::copy((float*)np->data(), (float*)np->data() + totalSize, vMt.begin()); } if(name == "adam_vt") { vVt.resize(totalSize); - std::copy((float*)np.data(), (float*)np.data() + totalSize, vVt.begin()); + std::copy((float*)np->data(), (float*)np->data() + totalSize, vVt.begin()); } } diff --git a/src/rnn/attention.cu b/src/rnn/attention.cu index 2faa9d9a..d5e44a2f 100644 --- a/src/rnn/attention.cu +++ b/src/rnn/attention.cu @@ -9,7 +9,7 @@ namespace rnn { struct AttentionNodeOp : public NaryNodeOp { AttentionNodeOp(const std::vector<Expr>& nodes) - : NaryNodeOp(nodes, keywords::shape = newShape(nodes)) {} + : NaryNodeOp(nodes, newShape(nodes)) {} Shape newShape(const std::vector<Expr>& nodes) { Shape shape = Shape::broadcast({nodes[1], nodes[2]}); diff --git a/src/rnn/attention.h b/src/rnn/attention.h index faece60a..70337355 100644 --- a/src/rnn/attention.h +++ b/src/rnn/attention.h @@ -51,15 +51,15 @@ public: Wa_ = graph->param(prefix + "_W_comb_att", {dimDecState, dimEncState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); Ua_ = graph->param(prefix + "_Wc_att", {dimEncState, dimEncState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); va_ = graph->param(prefix + "_U_att", {dimEncState, 1}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); ba_ = graph->param( - prefix + "_b_att", {1, dimEncState}, keywords::init = inits::zeros); + prefix + "_b_att", {1, dimEncState}, inits::zeros); if(dropout_ > 0.0f) { dropMaskContext_ = graph->dropout(dropout_, {1, dimEncState}); @@ -75,17 +75,17 @@ public: // instead of gammaContext_ Wc_att_lns_ = graph->param(prefix + "_Wc_att_lns", {1, dimEncState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); Wc_att_lnb_ = graph->param(prefix + "_Wc_att_lnb", {1, dimEncState}, - keywords::init = inits::zeros); + inits::zeros); // instead of gammaState_ W_comb_att_lns_ = graph->param(prefix + "_W_comb_att_lns", {1, dimEncState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); W_comb_att_lnb_ = graph->param(prefix + "_W_comb_att_lnb", {1, dimEncState}, - keywords::init = inits::zeros); + inits::zeros); mappedContext_ = layer_norm(affine(contextDropped_, Ua_, ba_), Wc_att_lns_, @@ -94,10 +94,10 @@ public: } else { gammaContext_ = graph->param(prefix + "_att_gamma1", {1, dimEncState}, - keywords::init = inits::from_value(1.0)); + inits::from_value(1.0)); gammaState_ = graph->param(prefix + "_att_gamma2", {1, dimEncState}, - keywords::init = inits::from_value(1.0)); + inits::from_value(1.0)); mappedContext_ = layer_norm(dot(contextDropped_, Ua_), gammaContext_, ba_); @@ -144,7 +144,7 @@ public: auto alignedSource = scalar_product(encState_->getAttended(), e, axis = -3); - + contexts_.push_back(alignedSource); alignments_.push_back(e); return alignedSource; diff --git a/src/rnn/cells.cu b/src/rnn/cells.cu index 42373eab..8b38780f 100644 --- a/src/rnn/cells.cu +++ b/src/rnn/cells.cu @@ -9,9 +9,8 @@ namespace rnn { struct GRUFastNodeOp : public NaryNodeOp { bool final_; - template <typename... Args> - GRUFastNodeOp(const std::vector<Expr>& nodes, bool final, Args... args) - : NaryNodeOp(nodes, args...), final_(final) {} + GRUFastNodeOp(const std::vector<Expr>& nodes, bool final) + : NaryNodeOp(nodes), final_(final) {} NodeOps forwardOps() { std::vector<Tensor> inputs; @@ -53,9 +52,8 @@ Expr gruOps(const std::vector<Expr>& nodes, bool final) { /******************************************************************************/ struct LSTMCellNodeOp : public NaryNodeOp { - template <typename... Args> - LSTMCellNodeOp(const std::vector<Expr>& nodes, Args... args) - : NaryNodeOp(nodes, args...) {} + LSTMCellNodeOp(const std::vector<Expr>& nodes) + : NaryNodeOp(nodes) {} NodeOps forwardOps() { std::vector<Tensor> inputs; @@ -91,9 +89,8 @@ struct LSTMCellNodeOp : public NaryNodeOp { }; struct LSTMOutputNodeOp : public NaryNodeOp { - template <typename... Args> - LSTMOutputNodeOp(const std::vector<Expr>& nodes, Args... args) - : NaryNodeOp(nodes, args...) {} + LSTMOutputNodeOp(const std::vector<Expr>& nodes) + : NaryNodeOp(nodes) {} NodeOps forwardOps() { std::vector<Tensor> inputs; diff --git a/src/rnn/cells.h b/src/rnn/cells.h index 36fda810..2eeed6fa 100644 --- a/src/rnn/cells.h +++ b/src/rnn/cells.h @@ -36,15 +36,15 @@ public: U_ = graph->param(prefix + "_U", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); if(dimInput) W_ = graph->param(prefix + "_W", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); b_ = graph->param( - prefix + "_b", {1, dimState}, keywords::init = inits::zeros); + prefix + "_b", {1, dimState}, inits::zeros); if(dropout_ > 0.0f) { if(dimInput) @@ -56,10 +56,10 @@ public: if(dimInput) gamma1_ = graph->param(prefix + "_gamma1", {1, 3 * dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); gamma2_ = graph->param(prefix + "_gamma2", {1, 3 * dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); } } @@ -143,35 +143,35 @@ public: auto U = graph->param(prefix + "_U", {dimState, 2 * dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto Ux = graph->param(prefix + "_Ux", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); U_ = concatenate({U, Ux}, keywords::axis = -1); if(dimInput > 0) { auto W = graph->param(prefix + "_W", {dimInput, 2 * dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto Wx = graph->param(prefix + "_Wx", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); W_ = concatenate({W, Wx}, keywords::axis = -1); } auto b = graph->param( - prefix + "_b", {1, 2 * dimState}, keywords::init = inits::zeros); + prefix + "_b", {1, 2 * dimState}, inits::zeros); auto bx = graph->param( - prefix + "_bx", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bx", {1, dimState}, inits::zeros); b_ = concatenate({b, bx}, keywords::axis = -1); // @TODO use this and adjust Amun model type saving and loading // U_ = graph->param(prefix + "_U", {dimState, 3 * dimState}, - // keywords::init=inits::glorot_uniform); + // (Expr a) : UnaryNodeOp(a)inits::glorot_uniform); // W_ = graph->param(prefix + "_W", {dimInput, 3 * dimState}, - // keywords::init=inits::glorot_uniform); + // (Expr a) : UnaryNodeOp(a)inits::glorot_uniform); // b_ = graph->param(prefix + "_b", {1, 3 * dimState}, - // keywords::init=inits::zeros); + // (Expr a) : UnaryNodeOp(a)inits::zeros); if(dropout_ > 0.0f) { if(dimInput) @@ -183,10 +183,10 @@ public: if(dimInput) gamma1_ = graph->param(prefix + "_gamma1", {1, 3 * dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); gamma2_ = graph->param(prefix + "_gamma2", {1, 3 * dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); } } @@ -231,7 +231,7 @@ public: if(xWs.empty()) { if(!fakeInput_ || fakeInput_->shape() != sU->shape()) fakeInput_ - = sU->graph()->constant(sU->shape(), keywords::init = inits::zeros); + = sU->graph()->constant(sU->shape(), inits::zeros); xW = fakeInput_; } else { xW = xWs.front(); @@ -299,10 +299,10 @@ public: auto U = graph->param(prefix + "_U", {dimState, 2 * dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto Ux = graph->param(prefix + "_Ux", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); if(layerNorm_) { U_ = U; @@ -314,10 +314,10 @@ public: if(dimInput > 0) { auto W = graph->param(prefix + "_W", {dimInput, 2 * dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto Wx = graph->param(prefix + "_Wx", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); if(layerNorm_) { W_ = W; Wx_ = Wx; @@ -327,9 +327,9 @@ public: } auto b = graph->param( - prefix + "_b", {1, 2 * dimState}, keywords::init = inits::zeros); + prefix + "_b", {1, 2 * dimState}, inits::zeros); auto bx = graph->param( - prefix + "_bx", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bx", {1, dimState}, inits::zeros); if(layerNorm_) { b_ = b; @@ -338,11 +338,11 @@ public: // in specific cases we need to pass bx to the kernel if(encoder_ && transition_) { auto b0 - = graph->constant({1, 2 * dimState}, keywords::init = inits::zeros); + = graph->constant({1, 2 * dimState}, inits::zeros); bbx_ = concatenate({b0, bx}, keywords::axis = -1); } else { bbx_ - = graph->constant({1, 3 * dimState}, keywords::init = inits::zeros); + = graph->constant({1, 3 * dimState}, inits::zeros); } } else { bbx_ = concatenate({b, bx}, keywords::axis = -1); @@ -358,27 +358,27 @@ public: if(dimInput) { W_lns_ = graph->param(prefix + "_W_lns", {1, 2 * dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); W_lnb_ = graph->param(prefix + "_W_lnb", {1, 2 * dimState}, - keywords::init = inits::zeros); + inits::zeros); Wx_lns_ = graph->param(prefix + "_Wx_lns", {1, 1 * dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); Wx_lnb_ = graph->param(prefix + "_Wx_lnb", {1, 1 * dimState}, - keywords::init = inits::zeros); + inits::zeros); } U_lns_ = graph->param(prefix + "_U_lns", {1, 2 * dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); U_lnb_ = graph->param( - prefix + "_U_lnb", {1, 2 * dimState}, keywords::init = inits::zeros); + prefix + "_U_lnb", {1, 2 * dimState}, inits::zeros); Ux_lns_ = graph->param(prefix + "_Ux_lns", {1, 1 * dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); Ux_lnb_ = graph->param( - prefix + "_Ux_lnb", {1, 1 * dimState}, keywords::init = inits::zeros); + prefix + "_Ux_lnb", {1, 1 * dimState}, inits::zeros); } } @@ -468,7 +468,7 @@ public: if(transition_) { if(!fakeInput_ || fakeInput_->shape() != sU->shape()) fakeInput_ - = sU->graph()->constant(sU->shape(), keywords::init = inits::zeros); + = sU->graph()->constant(sU->shape(), inits::zeros); xW = fakeInput_; } else { xW = xWs.front(); @@ -514,14 +514,14 @@ public: U_ = graph->param(prefix + "_U", {dimState, 4 * dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); if(dimInput) W_ = graph->param(prefix + "_W", {dimInput, 4 * dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); b_ = graph->param( - prefix + "_b", {1, 4 * dimState}, keywords::init = inits::zeros); + prefix + "_b", {1, 4 * dimState}, inits::zeros); if(dropout_ > 0.0f) { if(dimInput) @@ -533,10 +533,10 @@ public: if(dimInput) gamma1_ = graph->param(prefix + "_gamma1", {1, 4 * dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); gamma2_ = graph->param(prefix + "_gamma2", {1, 4 * dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); } } @@ -586,7 +586,7 @@ public: if(xWs.empty()) { if(!fakeInput_ || fakeInput_->shape() != sU->shape()) fakeInput_ - = sU->graph()->constant(sU->shape(), keywords::init = inits::zeros); + = sU->graph()->constant(sU->shape(), inits::zeros); xW = fakeInput_; } else { xW = xWs.front(); @@ -623,20 +623,20 @@ public: Um_ = graph->param(prefix + "_Um", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); Wm_ = graph->param(prefix + "_Wm", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); bm_ = graph->param( - prefix + "_bm", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bm", {1, dimState}, inits::zeros); if(CellType::layerNorm_) { gamma1m_ = graph->param(prefix + "_gamma1m", {1, dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); gamma2m_ = graph->param(prefix + "_gamma2m", {1, dimState}, - keywords::init = inits::from_value(1.f)); + inits::from_value(1.f)); } } @@ -697,39 +697,39 @@ public: Uf_ = graph->param(prefix + "_Uf", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); Wf_ = graph->param(prefix + "_Wf", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); bf_ = graph->param( - prefix + "_bf", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bf", {1, dimState}, inits::zeros); Ui_ = graph->param(prefix + "_Ui", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); Wi_ = graph->param(prefix + "_Wi", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); bi_ = graph->param( - prefix + "_bi", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bi", {1, dimState}, inits::zeros); Uc_ = graph->param(prefix + "_Uc", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); Wc_ = graph->param(prefix + "_Wc", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); bc_ = graph->param( - prefix + "_bc", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bc", {1, dimState}, inits::zeros); Uo_ = graph->param(prefix + "_Uo", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); Wo_ = graph->param(prefix + "_Wo", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); bo_ = graph->param( - prefix + "_bo", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bo", {1, dimState}, inits::zeros); } State apply(std::vector<Expr> inputs, State state, Expr mask = nullptr) { @@ -791,39 +791,39 @@ public: auto Uf = graph->param(prefix + "_Uf", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto Wf = graph->param(prefix + "_Wf", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto bf = graph->param( - prefix + "_bf", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bf", {1, dimState}, inits::zeros); auto Ui = graph->param(prefix + "_Ui", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto Wi = graph->param(prefix + "_Wi", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto bi = graph->param( - prefix + "_bi", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bi", {1, dimState}, inits::zeros); auto Uc = graph->param(prefix + "_Uc", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto Wc = graph->param(prefix + "_Wc", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto bc = graph->param( - prefix + "_bc", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bc", {1, dimState}, inits::zeros); auto Uo = graph->param(prefix + "_Uo", {dimState, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto Wo = graph->param(prefix + "_Wo", {dimInput, dimState}, - keywords::init = inits::glorot_uniform); + inits::glorot_uniform); auto bo = graph->param( - prefix + "_bo", {1, dimState}, keywords::init = inits::zeros); + prefix + "_bo", {1, dimState}, inits::zeros); U_ = concatenate({Uf, Ui, Uc, Uo}, keywords::axis = -1); W_ = concatenate({Wf, Wi, Wc, Wo}, keywords::axis = -1); diff --git a/src/rnn/rnn.h b/src/rnn/rnn.h index 7374ed2a..4f1be340 100644 --- a/src/rnn/rnn.h +++ b/src/rnn/rnn.h @@ -101,7 +101,7 @@ private: int dimBatch = input->shape()[-2]; int dimState = cell_->getOptions()->get<int>("dimState"); - auto output = graph->zeros(keywords::shape = {1, dimBatch, dimState}); + auto output = graph->zeros({1, dimBatch, dimState}); Expr cell = output; State startState{output, cell}; diff --git a/src/tensors/tensor.h b/src/tensors/tensor.h index d6924402..24ca0738 100644 --- a/src/tensors/tensor.h +++ b/src/tensors/tensor.h @@ -74,7 +74,7 @@ public: else std::copy(data(), data() + size(), v.data()); } - + void set(const float* begin, const float* end) { if(backend_->getDevice().type == DeviceType::gpu) gpu::copy(backend_, begin, end, data()); @@ -189,14 +189,4 @@ public: typedef std::shared_ptr<TensorBase> Tensor; -static Tensor operator<<(Tensor t, const std::vector<float> &v) { - t->set(v); - return t; -} - -static Tensor operator>>(Tensor t, std::vector<float> &v) { - t->get(v); - return t; -} - } diff --git a/src/translator/beam_search.h b/src/translator/beam_search.h index 5c7cda26..20b7b628 100644 --- a/src/translator/beam_search.h +++ b/src/translator/beam_search.h @@ -139,7 +139,7 @@ public: if(first) { // no cost prevCosts = graph->constant({1, 1, 1, 1}, - keywords::init = inits::from_value(0)); + inits::from_value(0)); } else { std::vector<float> beamCosts; @@ -164,7 +164,7 @@ public: prevCosts = graph->constant({(int)localBeamSize, 1, dimBatch, 1}, - keywords::init = inits::from_vector(beamCosts)); + inits::from_vector(beamCosts)); } //********************************************************************** diff --git a/src/translator/scorers.h b/src/translator/scorers.h index 296431f2..94bda6e7 100644 --- a/src/translator/scorers.h +++ b/src/translator/scorers.h @@ -133,7 +133,7 @@ public: p[2] = 0; penalties_ = graph->constant({1, dimVocab_}, - keywords::init = inits::from_vector(p)); + inits::from_vector(p)); return New<WordPenaltyState>(dimVocab_, penalties_); } @@ -169,7 +169,7 @@ public: p[2] = 0; penalties_ = graph->constant({1, dimVocab_}, - keywords::init = inits::from_vector(p)); + inits::from_vector(p)); return New<WordPenaltyState>(dimVocab_, penalties_); } |