Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-08-06 12:57:14 +0300
committerRoman Grundkiewicz <rgrundki@exseed.ed.ac.uk>2018-08-06 12:57:14 +0300
commiteb20e9900fe544d3f38b2b15ae4240d0c4132a17 (patch)
treebde75868ca435155153863a2dec7ce4df4dab760
parent1a0c4c1d19e1a7d9aaebe82ceb4b9cc68aa5cc51 (diff)
parent9834e09a88022819262387af714764934c72caf8 (diff)
Merge with master
-rw-r--r--cmake/FindNCCL.cmake11
-rw-r--r--src/3rd_party/cnpy/cnpy.h39
-rw-r--r--src/CMakeLists.txt13
-rw-r--r--src/command/marian_conv.cpp61
-rw-r--r--src/command/marian_main.cpp (renamed from src/command/marian-main.cpp)0
-rw-r--r--src/common/binary.cpp161
-rw-r--r--src/common/binary.h26
-rw-r--r--src/common/config.cpp39
-rw-r--r--src/common/config.h14
-rw-r--r--src/common/config_parser.cpp1
-rw-r--r--src/common/definitions.h19
-rw-r--r--src/common/file_stream.h12
-rw-r--r--src/common/io.cpp165
-rw-r--r--src/common/io.h44
-rw-r--r--src/common/io_item.h36
-rw-r--r--src/common/types.h (renamed from src/tensors/types.h)0
-rw-r--r--src/data/npz_converter.cpp58
-rw-r--r--src/data/npz_converter.h177
-rw-r--r--src/graph/expression_graph.cpp38
-rw-r--r--src/graph/expression_graph.h165
-rw-r--r--src/graph/node_initializers.cpp37
-rw-r--r--src/graph/node_initializers.h5
-rw-r--r--src/graph/node_operators_unary.h2
-rw-r--r--src/graph/parameters.h83
-rw-r--r--src/layers/weight.cpp5
-rw-r--r--src/microsoft/quicksand.cpp5
-rw-r--r--src/models/amun.h66
-rw-r--r--src/models/costs.h6
-rw-r--r--src/models/encoder_decoder.cpp22
-rw-r--r--src/models/encoder_decoder.h11
-rw-r--r--src/models/nematus.h73
-rw-r--r--src/optimizers/optimizers.cpp69
-rw-r--r--src/tensors/allocator.h2
-rw-r--r--src/tensors/cpu/tensor_operators.cpp3
-rw-r--r--src/tensors/gpu/algorithm.cu1
-rw-r--r--src/tensors/gpu/tensor_operators.cu16
-rw-r--r--src/tensors/tensor.h16
-rw-r--r--src/tensors/tensor_operators.h16
-rw-r--r--src/translator/scorers.cpp58
-rw-r--r--src/translator/scorers.h27
40 files changed, 1025 insertions, 577 deletions
diff --git a/cmake/FindNCCL.cmake b/cmake/FindNCCL.cmake
index ab3c55a8..d6100e85 100644
--- a/cmake/FindNCCL.cmake
+++ b/cmake/FindNCCL.cmake
@@ -16,10 +16,19 @@ set(NCCL_LIB_PATHS
/usr/local/cuda/lib64
$ENV{NCCL_DIR}/lib64
$ENV{CUDA_TOOLKIT_ROOT_DIR}/lib64
+ /usr/local/cuda/lib
+ $ENV{NCCL_DIR}/lib
+ $ENV{CUDA_TOOLKIT_ROOT_DIR}/lib
)
find_path(NCCL_INCLUDE_DIR NAMES nccl.h PATHS ${NCCL_INC_PATHS})
-find_library(NCCL_LIBRARIES NAMES nccl PATHS ${NCCL_LIB_PATHS})
+
+if (USE_STATIC_LIBS)
+ message(STATUS "Trying to find static NCCL library")
+ find_library(NCCL_LIBRARIES NAMES libnccl_static.a PATHS ${NCCL_LIB_PATHS})
+else (USE_STATIC_LIBS)
+ find_library(NCCL_LIBRARIES NAMES nccl PATHS ${NCCL_LIB_PATHS})
+endif (USE_STATIC_LIBS)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARIES)
diff --git a/src/3rd_party/cnpy/cnpy.h b/src/3rd_party/cnpy/cnpy.h
index 54886397..ac427890 100644
--- a/src/3rd_party/cnpy/cnpy.h
+++ b/src/3rd_party/cnpy/cnpy.h
@@ -38,6 +38,10 @@ namespace cnpy {
const char* data() const {
return bytes.data();
}
+
+ size_t size() {
+ return bytes.size();
+ }
};
typedef std::shared_ptr<NpyArray> NpyArrayPtr;
@@ -218,7 +222,8 @@ namespace cnpy {
struct NpzItem : public NpyArray
{
std::string name; //name of item in .npz file (without .npy)
- char type; //type of item
+ char type; // type of item
+
template<typename T>
NpzItem(const std::string& name, const std::vector<T>& data, const std::vector<unsigned int>& dataShape) :
name(name), type(map_type(typeid(T)))
@@ -229,6 +234,26 @@ namespace cnpy {
auto* p = (const char*)data.data();
std::copy(p, p + bytes.size(), bytes.begin());
}
+
+ NpzItem(const std::string& name, const std::string& data, const std::vector<unsigned int>& dataShape) :
+ name(name), type(map_type(typeid(char)))
+ {
+ shape = dataShape;
+ word_size = sizeof(char);
+ std::copy(data.data(), data.data() + data.size() + 1, bytes.begin());
+ }
+
+ NpzItem(const std::string& name,
+ const std::vector<char>& data,
+ const std::vector<unsigned int>& dataShape,
+ char type_, size_t word_size_) :
+ name(name), type(type_)
+ {
+ shape = dataShape;
+ word_size = word_size_;
+ bytes.resize(data.size());
+ std::copy(data.begin(), data.end(), bytes.begin());
+ }
};
//same as npz_save() except that it saves multiple items to .npz file in a single go, which is required when writing to HDFS
@@ -248,22 +273,22 @@ namespace cnpy {
auto fname = item.name;
//first, form a "file name" by appending .npy to the item's name
fname += ".npy";
-
+
const auto* data = item.bytes.data();
const auto* shape = item.shape.data();
const auto type = item.type;
const auto word_size = item.word_size;
const unsigned int ndims = item.shape.size();
std::vector<char> npy_header = create_npy_header(type,word_size,shape,ndims);
-
+
unsigned long nels = 1;
for (int m=0; m<ndims; m++ ) nels *= shape[m];
int nbytes = nels*word_size + npy_header.size();
-
+
//get the CRC of the data to be added
unsigned int crc = crc32(0L,(unsigned char*)&npy_header[0],npy_header.size());
crc = crc32(crc,(unsigned char*)data,nels*word_size);
-
+
//build the local header
local_header.clear();
local_header += "PK"; //first part of sig
@@ -279,13 +304,13 @@ namespace cnpy {
local_header += (unsigned short) fname.size(); //fname length
local_header += (unsigned short) 0; //extra field length
local_header += fname;
-
+
//write everything
unsigned int local_header_offset = ftell(fp); // this is where this local item will begin in the file. Tis gets stored in the corresponding global header.
fwrite(&local_header[0],sizeof(char),local_header.size(),fp);
fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp);
fwrite(data,word_size,nels,fp);
-
+
// append to global header
// A concatenation of global headers for all objects gets written to the end of the file.
global_header += "PK"; //first part of sig
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 08ed5399..8708f87f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -9,6 +9,8 @@ add_library(marian STATIC
common/logging.cpp
common/config.cpp
common/config_parser.cpp
+ common/binary.cpp
+ common/io.cpp
data/vocab.cpp
data/corpus_base.cpp
@@ -102,7 +104,10 @@ set_target_properties(marian_scorer PROPERTIES OUTPUT_NAME marian-scorer)
add_executable(marian_vocab command/marian_vocab.cpp)
set_target_properties(marian_vocab PROPERTIES OUTPUT_NAME marian-vocab)
-set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_scorer marian_vocab)
+add_executable(marian_conv command/marian_conv.cpp)
+set_target_properties(marian_conv PROPERTIES OUTPUT_NAME marian-conv)
+
+set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_scorer marian_vocab marian_conv)
# marian.zip and marian.tgz
# This combines marian, marian_decoder in a single ZIP or TAR file for
@@ -117,7 +122,8 @@ if(USE_STATIC_LIBS)
"${CMAKE_BINARY_DIR}/marian-decoder"
"${CMAKE_BINARY_DIR}/marian-scorer"
"${CMAKE_BINARY_DIR}/marian-vocab"
- DEPENDS marian_train marian_decoder marian_scorer marian_vocab)
+ "${CMAKE_BINARY_DIR}/marian-conv"
+ DEPENDS marian_train marian_decoder marian_scorer marian_vocab marian_mmap)
add_custom_target(marian_zip DEPENDS "${CMAKE_BINARY_DIR}/marian.zip")
add_custom_command(
@@ -127,7 +133,8 @@ if(USE_STATIC_LIBS)
"marian-decoder"
"marian-scorer"
"marian-vocab"
- DEPENDS marian_train marian_decoder marian_scorer marian_vocab)
+ "marian-conv"
+ DEPENDS marian_train marian_decoder marian_scorer marian_vocab marian_mmap)
add_custom_target(marian_tgz DEPENDS "${CMAKE_BINARY_DIR}/marian.tgz")
add_custom_target(philly DEPENDS marian_tgz marian_zip)
diff --git a/src/command/marian_conv.cpp b/src/command/marian_conv.cpp
new file mode 100644
index 00000000..3664a7a2
--- /dev/null
+++ b/src/command/marian_conv.cpp
@@ -0,0 +1,61 @@
+#include "marian.h"
+
+#include <boost/program_options.hpp>
+#include <sstream>
+
+int main(int argc, char** argv) {
+ using namespace marian;
+
+ createLoggers();
+
+ namespace po = boost::program_options;
+ po::options_description desc("Allowed options");
+ // clang-format off
+ desc.add_options()
+ ("from,f", po::value<std::string>()->default_value("model.npz"),
+ "Input model")
+ ("to,t", po::value<std::string>()->default_value("model.bin"),
+ "Output model")
+ ("help,h", "Print this message and exit")
+ ;
+ // clang-format on
+
+ po::variables_map vm;
+ try {
+ po::store(po::parse_command_line(argc, argv, desc), vm);
+ po::notify(vm);
+ } catch(std::exception& e) {
+ std::cerr << "Error: " << e.what() << std::endl << std::endl;
+ std::cerr << "Usage: " << argv[0] << " [options]" << std::endl << std::endl;
+ std::cerr << desc << std::endl;
+ exit(1);
+ }
+
+ if(vm.count("help")) {
+ std::cerr << "Usage: " << argv[0] << " [options]" << std::endl << std::endl;
+ std::cerr << desc << std::endl;
+ exit(0);
+ }
+
+ LOG(info, "Outputting {}", vm["to"].as<std::string>());
+
+ YAML::Node config;
+ std::stringstream configStr;
+ marian::io::getYamlFromModel(config,
+ "special:model.yml",
+ vm["from"].as<std::string>());
+ configStr << config;
+
+ auto graph = New<ExpressionGraph>(true, false);
+ graph->setDevice(CPU0);
+
+ graph->load(vm["from"].as<std::string>());
+ graph->forward();
+ graph->save(vm["to"].as<std::string>(), configStr.str());
+
+ //graph->saveBinary(vm["bin"].as<std::string>());
+
+ LOG(info, "Finished");
+
+ return 0;
+}
diff --git a/src/command/marian-main.cpp b/src/command/marian_main.cpp
index 5e428570..5e428570 100644
--- a/src/command/marian-main.cpp
+++ b/src/command/marian_main.cpp
diff --git a/src/common/binary.cpp b/src/common/binary.cpp
new file mode 100644
index 00000000..7f199e15
--- /dev/null
+++ b/src/common/binary.cpp
@@ -0,0 +1,161 @@
+#include "common/binary.h"
+#include "common/definitions.h"
+#include "common/file_stream.h"
+#include "common/definitions.h"
+#include "common/types.h"
+#include "common/io_item.h"
+
+#include <string>
+
+namespace marian {
+namespace io {
+
+namespace binary {
+
+struct Header {
+ size_t nameLength;
+ size_t type;
+ size_t shapeLength;
+ size_t dataLength;
+};
+
+template <typename T>
+const T* get(const void*& current, size_t num = 1) {
+ const T* ptr = (const T*)current;
+ current = (const T*)current + num;
+ return ptr;
+}
+
+void loadItems(const void* current,
+ std::vector<io::Item>& items,
+ bool mapped) {
+
+ size_t binaryFileVersion = *get<size_t>(current);
+ ABORT_IF(binaryFileVersion != BINARY_FILE_VERSION,
+ "Binary file versions do not match: {} (file) != {} (expected)",
+ binaryFileVersion,
+ BINARY_FILE_VERSION);
+
+ size_t numHeaders = *get<size_t>(current);
+ const Header* headers = get<Header>(current, numHeaders);
+
+ items.resize(numHeaders);
+ for(int i = 0; i < numHeaders; ++i) {
+ items[i].type = (Type)headers[i].type;
+ items[i].name = get<char>(current, headers[i].nameLength);
+ items[i].mapped = mapped;
+ }
+
+ for(int i = 0; i < numHeaders; ++i) {
+ size_t len = headers[i].shapeLength;
+ items[i].shape.resize(len);
+ const int* arr = get<int>(current, len);
+ std::copy(arr, arr + len, items[i].shape.begin());
+ }
+
+ // move by offset bytes
+ size_t offset = *get<size_t>(current);
+ get<char>(current, offset);
+
+ for(int i = 0; i < numHeaders; ++i) {
+ if(items[i].mapped) {
+ items[i].ptr = get<char>(current, headers[i].dataLength);
+ } else {
+ size_t len = headers[i].dataLength;
+ items[i].bytes.resize(len);
+ const char* ptr = get<char>(current, len);
+ std::copy(ptr, ptr + len, items[i].bytes.begin());
+ }
+ }
+}
+
+void loadItems(const std::string& fileName,
+ std::vector<io::Item>& items) {
+
+ // Read file into buffer
+ size_t fileSize = boost::filesystem::file_size(fileName);
+ char* ptr = new char[fileSize];
+ InputFileStream in(fileName);
+ in.read(ptr, fileSize);
+
+ // Load items from buffer without mapping
+ loadItems(ptr, items, false);
+
+ // Delete buffer
+ delete[] ptr;
+}
+
+io::Item getItem(const void* current,
+ const std::string& varName) {
+
+ std::vector<io::Item> items;
+ loadItems(current, items);
+
+ for(auto& item : items)
+ if(item.name == varName)
+ return item;
+
+ return io::Item();
+}
+
+io::Item getItem(const std::string& fileName,
+ const std::string& varName) {
+
+ std::vector<io::Item> items;
+ loadItems(fileName, items);
+
+ for(auto& item : items)
+ if(item.name == varName)
+ return item;
+
+ return io::Item();
+}
+
+void saveItems(const std::string& fileName,
+ const std::vector<io::Item>& items) {
+ OutputFileStream out(fileName);
+ size_t pos = 0;
+
+ size_t binaryFileVersion = BINARY_FILE_VERSION;
+ pos += out.write(&binaryFileVersion);
+
+ std::vector<Header> headers;
+ for(const auto& item : items) {
+ headers.push_back(Header{item.name.size() + 1,
+ (size_t)item.type,
+ item.shape.size(),
+ item.size()});
+ }
+
+ size_t headerSize = headers.size();
+ pos += out.write(&headerSize);
+ pos += out.write(headers.data(), headers.size());
+
+ // Write out all names
+ for(const auto& item : items) {
+ pos += out.write(item.name.data(), item.name.size() + 1);
+ }
+ // Write out all shapes
+ for(const auto& item : items) {
+ pos += out.write(item.shape.data(), item.shape.size());
+ }
+
+ // align to next 256-byte boundary
+ size_t nextpos = ((pos + sizeof(size_t)) / 256 + 1) * 256;
+ size_t offset = nextpos - pos - sizeof(size_t);
+
+ pos += out.write(&offset);
+ for(size_t i = 0; i < offset; i++) {
+ char padding = 0;
+ pos += out.write(&padding);
+ }
+
+ // Write out all values
+ for(const auto& item : items) {
+ pos += out.write(item.data(), item.size());
+ }
+}
+
+}
+}
+}
diff --git a/src/common/binary.h b/src/common/binary.h
new file mode 100644
index 00000000..5616c56e
--- /dev/null
+++ b/src/common/binary.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "common/io_item.h"
+
+#include <string>
+#include <vector>
+
+// Increase this if binary format changes
+#define BINARY_FILE_VERSION 1
+
+namespace marian {
+namespace io {
+
+namespace binary {
+
+void loadItems(const void* current, std::vector<io::Item>& items, bool mapped = false);
+void loadItems(const std::string& fileName, std::vector<io::Item>& items);
+
+io::Item getItem(const void* current, const std::string& vName);
+io::Item getItem(const std::string& fileName, const std::string& vName);
+
+void saveItems(const std::string& fileName, const std::vector<io::Item>& items);
+
+}
+}
+}
diff --git a/src/common/config.cpp b/src/common/config.cpp
index 4a6540b0..f92a01d5 100644
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@@ -1,5 +1,4 @@
#include "common/config.h"
-#include "3rd_party/cnpy/cnpy.h"
#include "common/file_stream.h"
#include "common/logging.h"
@@ -47,42 +46,14 @@ void Config::override(const YAML::Node& params) {
void Config::loadModelParameters(const std::string& name) {
YAML::Node config;
- GetYamlFromNpz(config, "special:model.yml", name);
+ io::getYamlFromModel(config, "special:model.yml", name);
override(config);
}
-void Config::GetYamlFromNpz(YAML::Node& yaml,
- const std::string& varName,
- const std::string& fName) {
- yaml = YAML::Load(cnpy::npz_load(fName, varName)->data());
-}
-
-// helper to serialize a YAML::Node to a Yaml string in a 0-terminated character
-// vector
-static std::vector<char> asYamlCharVector(const YAML::Node node) {
- YAML::Emitter out;
- OutputYaml(node, out);
- return std::vector<char>(out.c_str(), out.c_str() + strlen(out.c_str()) + 1);
-}
-
-void Config::AddYamlToNpz(const YAML::Node& yaml,
- const std::string& varName,
- const std::string& fName) {
- // YAML::Node's Yaml representation is saved as a 0-terminated char vector to
- // the NPZ file
- auto yamlCharVector = asYamlCharVector(yaml);
- unsigned int shape = yamlCharVector.size();
- cnpy::npz_save(fName, varName, yamlCharVector.data(), &shape, 1, "a");
+void Config::loadModelParameters(const void* ptr) {
+ YAML::Node config;
+ io::getYamlFromModel(config, "special:model.yml", ptr);
+ override(config);
}
-// same as AddYamlToNpz() but adds to an in-memory NpzItem vector instead
-void Config::AddYamlToNpzItems(const YAML::Node& yaml,
- const std::string& varName,
- std::vector<cnpy::NpzItem>& allItems) {
- auto yamlCharVector = asYamlCharVector(yaml);
- allItems.emplace_back(
- varName,
- yamlCharVector,
- std::vector<unsigned int>{(unsigned int)yamlCharVector.size()});
-}
} // namespace marian
diff --git a/src/common/config.h b/src/common/config.h
index c3745191..d8359b24 100644
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -1,10 +1,10 @@
#pragma once
#include <boost/program_options.hpp>
-#include "3rd_party/cnpy/cnpy.h"
#include "3rd_party/yaml-cpp/yaml.h"
#include "common/config_parser.h"
#include "common/file_stream.h"
+#include "common/io.h"
#include "common/logging.h"
#include "common/utils.h"
#ifndef _WIN32 // TODO: why are these needed by a config parser? Can they be
@@ -114,6 +114,7 @@ public:
YAML::Node getModelParameters();
void loadModelParameters(const std::string& name);
+ void loadModelParameters(const void* ptr);
const std::vector<DeviceId>& getDevices() { return devices_; }
@@ -129,17 +130,6 @@ public:
return out;
}
- static void AddYamlToNpz(const YAML::Node&,
- const std::string&,
- const std::string&);
- static void AddYamlToNpzItems(const YAML::Node&,
- const std::string&,
- std::vector<cnpy::NpzItem>&);
-
- static void GetYamlFromNpz(YAML::Node&,
- const std::string&,
- const std::string&);
-
private:
YAML::Node config_;
std::vector<DeviceId> devices_;
diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp
index d40c078c..a4bb64fc 100644
--- a/src/common/config_parser.cpp
+++ b/src/common/config_parser.cpp
@@ -14,7 +14,6 @@
#endif
#endif
-#include "3rd_party/cnpy/cnpy.h"
#include "common/definitions.h"
#include "common/config.h"
diff --git a/src/common/definitions.h b/src/common/definitions.h
index 293d0492..0b71a530 100644
--- a/src/common/definitions.h
+++ b/src/common/definitions.h
@@ -60,6 +60,25 @@ struct DeviceId {
friend bool operator!=(DeviceId id1, DeviceId id2) { return !(id1 == id2); }
};
+// predefine a couple of devices for easier manual use
+const DeviceId CPU0{0, DeviceType::cpu};
+const DeviceId CPU1{1, DeviceType::cpu};
+const DeviceId CPU2{2, DeviceType::cpu};
+const DeviceId CPU3{3, DeviceType::cpu};
+const DeviceId CPU4{4, DeviceType::cpu};
+const DeviceId CPU5{5, DeviceType::cpu};
+const DeviceId CPU6{6, DeviceType::cpu};
+const DeviceId CPU7{7, DeviceType::cpu};
+
+const DeviceId GPU0{0, DeviceType::gpu};
+const DeviceId GPU1{1, DeviceType::gpu};
+const DeviceId GPU2{2, DeviceType::gpu};
+const DeviceId GPU3{3, DeviceType::gpu};
+const DeviceId GPU4{4, DeviceType::gpu};
+const DeviceId GPU5{5, DeviceType::gpu};
+const DeviceId GPU6{6, DeviceType::gpu};
+const DeviceId GPU7{7, DeviceType::gpu};
+
class TensorBase;
typedef Ptr<TensorBase> Tensor;
diff --git a/src/common/file_stream.h b/src/common/file_stream.h
index 4dcfd264..4236492b 100644
--- a/src/common/file_stream.h
+++ b/src/common/file_stream.h
@@ -116,6 +116,12 @@ public:
return stream;
}
+ template <typename T>
+ size_t read(T* ptr, size_t num = 1) {
+ istream_.read((char*)ptr, num * sizeof(T));
+ return num * sizeof(T);
+ }
+
std::string path() { return file_.string(); }
bool empty() { return ifstream_.peek() == std::ifstream::traits_type::eof(); }
@@ -156,6 +162,12 @@ public:
return stream;
}
+ template <typename T>
+ size_t write(const T* ptr, size_t num = 1) {
+ ostream_.write((char*)ptr, num * sizeof(T));
+ return num * sizeof(T);
+ }
+
std::string path() { return file_.string(); }
private:
diff --git a/src/common/io.cpp b/src/common/io.cpp
new file mode 100644
index 00000000..6ec0b3c8
--- /dev/null
+++ b/src/common/io.cpp
@@ -0,0 +1,165 @@
+#include "common/io.h"
+
+#include "3rd_party/cnpy/cnpy.h"
+#include "common/shape.h"
+#include "common/types.h"
+
+#include "common/io_item.h"
+#include "common/binary.h"
+
+
+namespace marian {
+
+namespace io {
+
+bool isNpz(const std::string& fileName) {
+ return fileName.size() >= 4 && fileName.substr(fileName.length() - 4) == ".npz";
+}
+
+bool isBin(const std::string& fileName) {
+ return fileName.size() >= 4 && fileName.substr(fileName.length() - 4) == ".bin";
+}
+
+void getYamlFromNpz(YAML::Node& yaml,
+ const std::string& varName,
+ const std::string& fileName) {
+ auto item = cnpy::npz_load(fileName, varName);
+ if(item->size() > 0)
+ yaml = YAML::Load(item->data());
+}
+
+void getYamlFromBin(YAML::Node& yaml,
+ const std::string& varName,
+ const std::string& fileName) {
+ auto item = binary::getItem(fileName, varName);
+ if(item.size() > 0)
+ yaml = YAML::Load(item.data());
+}
+
+void getYamlFromModel(YAML::Node& yaml,
+ const std::string& varName,
+ const std::string& fileName) {
+ if(io::isNpz(fileName)) {
+ io::getYamlFromNpz(yaml, varName, fileName);
+ }
+ else if(io::isBin(fileName)) {
+ io::getYamlFromBin(yaml, varName, fileName);
+ }
+ else {
+ ABORT("Unknown model file format for file {}", fileName);
+ }
+}
+
+void getYamlFromModel(YAML::Node& yaml,
+ const std::string& varName,
+ const void* ptr) {
+ auto item = binary::getItem(ptr, varName);
+ if(item.size() > 0)
+ yaml = YAML::Load(item.data());
+}
+
+void addMetaToItems(const std::string& meta,
+ const std::string& varName,
+ std::vector<io::Item>& items) {
+ Item item;
+ item.name = varName;
+
+ // increase size by 1 to add \0
+ item.shape = Shape({(int)meta.size() + 1});
+
+ item.bytes.resize(item.shape.elements());
+ std::copy(meta.begin(), meta.end(), item.bytes.begin());
+ // set string terminator
+ item.bytes.back() = '\0';
+
+ item.type = Type::int8;
+
+ items.push_back(item);
+}
+
+void loadItemsFromNpz(const std::string& fileName, std::vector<Item>& items) {
+ auto numpy = cnpy::npz_load(fileName);
+ for(auto it : numpy) {
+
+ Shape shape;
+ if(it.second->shape.size() == 1) {
+ shape.resize(2);
+ shape.set(0, 1);
+ shape.set(1, it.second->shape[0]);
+ } else {
+ shape.resize(it.second->shape.size());
+ for(size_t i = 0; i < it.second->shape.size(); ++i)
+ shape.set(i, it.second->shape[i]);
+ }
+
+ Item item;
+ item.name = it.first;
+ item.shape = shape;
+ item.bytes.swap(it.second->bytes);
+
+ items.emplace_back(std::move(item));
+ }
+}
+
+std::vector<Item> loadItems(const std::string& fileName) {
+ std::vector<Item> items;
+ if(isNpz(fileName)) {
+ loadItemsFromNpz(fileName, items);
+ }
+ else if(isBin(fileName)) {
+ binary::loadItems(fileName, items);
+ }
+ else {
+ ABORT("Unknown model file format for file {}", fileName);
+ }
+
+ return items;
+}
+
+std::vector<Item> loadItems(const void* ptr) {
+ std::vector<Item> items;
+ binary::loadItems(ptr, items, false);
+ return items;
+}
+
+std::vector<Item> mmapItems(const void* ptr) {
+ std::vector<Item> items;
+ binary::loadItems(ptr, items, true);
+ return items;
+}
+
+// @TODO: make cnpy and our wrapper talk to each other in terms of types
+// or implement our own saving routines for npz based on npy, probably better.
+void saveItemsNpz(const std::string& fileName, const std::vector<Item>& items) {
+ std::vector<cnpy::NpzItem> npzItems;
+ for(auto& item : items) {
+ std::vector<unsigned int> shape(item.shape.begin(), item.shape.end());
+ char type = 'f';
+
+ if(item.type == Type::float32)
+ type = cnpy::map_type(typeid(float));
+ else if(item.type == Type::int8)
+ type = cnpy::map_type(typeid(char));
+ else
+ ABORT("Other types not supported yet");
+
+ npzItems.emplace_back(item.name, item.bytes, shape, type, sizeOf(item.type));
+
+ }
+ cnpy::npz_save(fileName, npzItems);
+}
+
+void saveItems(const std::string& fileName, const std::vector<Item>& items) {
+ if(isNpz(fileName)) {
+ saveItemsNpz(fileName, items);
+ }
+ else if(isBin(fileName)) {
+ binary::saveItems(fileName, items);
+ }
+ else {
+ ABORT("Unknown file format for file {}", fileName);
+ }
+}
+
+}
+}
diff --git a/src/common/io.h b/src/common/io.h
new file mode 100644
index 00000000..210360d4
--- /dev/null
+++ b/src/common/io.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include "3rd_party/yaml-cpp/yaml.h"
+#include "common/io_item.h"
+
+#include <string>
+#include <vector>
+
+// interface for handling model files in marian, both *.npz files and
+// *.bin files have the same way of accessing them and are identified
+// by suffixes (*.npz or *.bin).
+
+// Files with the *.bin suffix are supposed to be memory-mappable for
+// CPU decoding.
+
+namespace marian {
+
+namespace io {
+
+bool isNpz(const std::string& fileName);
+bool isBin(const std::string& fileName);
+
+void getYamlFromModel(YAML::Node& yaml,
+ const std::string& varName,
+ const std::string& fileName);
+
+void getYamlFromModel(YAML::Node& yaml,
+ const std::string& varName,
+ const void* ptr);
+
+void addMetaToItems(const std::string& meta,
+ const std::string& varName,
+ std::vector<io::Item>& items);
+
+std::vector<Item> loadItems(const std::string& fileName);
+
+std::vector<Item> loadItems(const void* ptr);
+
+std::vector<Item> mmapItems(const void* ptr);
+
+void saveItems(const std::string& fileName, const std::vector<Item>& items);
+
+}
+}
diff --git a/src/common/io_item.h b/src/common/io_item.h
new file mode 100644
index 00000000..809ed358
--- /dev/null
+++ b/src/common/io_item.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include "common/shape.h"
+#include "common/types.h"
+
+#include <string>
+
+namespace marian {
+namespace io {
+
+struct Item {
+ std::vector<char> bytes;
+ const char* ptr{0};
+ bool mapped{false};
+
+ std::string name;
+ Shape shape;
+ Type type{Type::float32};
+
+ const char* data() const {
+ if(mapped)
+ return ptr;
+ else
+ return bytes.data();
+ }
+
+ size_t size() const {
+ if(mapped)
+ return shape.elements() * sizeOf(type);
+ else
+ return bytes.size();
+ }
+};
+
+}
+}
diff --git a/src/tensors/types.h b/src/common/types.h
index fd5f0625..fd5f0625 100644
--- a/src/tensors/types.h
+++ b/src/common/types.h
diff --git a/src/data/npz_converter.cpp b/src/data/npz_converter.cpp
deleted file mode 100644
index d1faad7c..00000000
--- a/src/data/npz_converter.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-// This file is part of the Marian toolkit.
-
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-
-#include "npz_converter.h"
-
-NpzConverter::NpzConverter(const std::string& file)
- : model_(cnpy::npz_load(file)), destructed_(false) {}
-
-NpzConverter::~NpzConverter() {
- if(!destructed_)
- model_.destruct();
-}
-
-void NpzConverter::Destruct() {
- model_.destruct();
- destructed_ = true;
-}
-
-/** TODO: Marcin, what does this function do? Why isn't it a method? */
-mblas::Matrix NpzConverter::operator[](const std::string& key) const {
- typedef blaze::
- CustomMatrix<float, blaze::unaligned, blaze::unpadded, blaze::rowMajor>
- BlazeWrapper;
- mblas::Matrix matrix;
- auto it = model_.find(key);
- if(it != model_.end()) {
- NpyMatrixWrapper np(it->second);
- matrix = BlazeWrapper(np.data(), np.size1(), np.size2());
- } else {
- std::cerr << "Missing " << key << std::endl;
- }
- return std::move(matrix);
-}
-
-mblas::Matrix NpzConverter::operator()(const std::string& key,
- bool transpose) const {
- mblas::Matrix matrix = (*this)[key];
- mblas::Trans(matrix);
- return std::move(matrix);
-}
diff --git a/src/data/npz_converter.h b/src/data/npz_converter.h
deleted file mode 100644
index f133b0a4..00000000
--- a/src/data/npz_converter.h
+++ /dev/null
@@ -1,177 +0,0 @@
-#pragma once
-
-// This file is part of the Marian toolkit.
-
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-
-#include "cnpy/cnpy.h"
-#include "tensor.h" //XXX Marcin, is this include actually needed? It appears to not be used.
-
-/**
- * @brief Loads model data stored in a npz file.
- *
- * Use of this class enables such data to later be stored in standard Marian
- * data structures.
- *
- * Note: this class makes use of the 3rd-party class <code>npy</code>.
- */
-class NpzConverter {
- // Private inner classes of the NpzConverter class
-private:
- /**
- * Wraps npy data such that the underlying matrix shape and
- * matrix data are made accessible.
- */
- class NpyMatrixWrapper {
- public:
- /**
- * Constructs a wrapper around an underlying npy data structure,
- * enabling the underlying data to be accessed as a matrix.
- *
- * @param npy the underlying data
- */
- NpyMatrixWrapper(const cnpy::NpyArray& npy) : npy_(npy) {}
-
- /**
- * Returns the total number of elements in the underlying matrix.
- *
- * @return the total number of elements in the underlying matrix
- */
- size_t size() const { return size1() * size2(); }
-
- /**
- * Returns a pointer to the raw data that underlies the matrix.
- *
- * @return a pointer to the raw data that underlies the matrix
- */
- float* data() const { return (float*)npy_.data; }
-
- /**
- * Given the index (i, j) of a matrix element,
- * this operator returns the float value from the underlying npz data
- * that is stored in the matrix.
- *
- * XXX: Marcin, is the following correct? Or do I have the row/column labels
- * swapped?
- *
- * @param i Index of a column in the matrix
- * @param j Index of a row in the matrix
- *
- * @return the float value stored at column i, row j of the matrix
- */
- float operator()(size_t i, size_t j) const {
- return ((float*)npy_.data)[i * size2() + j];
- }
-
- /**
- * Returns the number of columns in the matrix.
- *
- * XXX: Marcin, is this following correct? Or do I have the row/column
- * labels swapped?
- *
- * @return the number of columns in the matrix
- */
- size_t size1() const { return npy_.shape[0]; }
-
- /**
- * Returns the number of rows in the matrix.
- *
- * XXX: Marcin, is this following correct? Or do I have the row/column
- * labels swapped?
- *
- * @return the number of rows in the matrix
- */
- size_t size2() const {
- if(npy_.shape.size() == 1)
- return 1;
- else
- return npy_.shape[1];
- }
-
- private:
- /** Instance of the underlying (3rd party) data structure. */
- const cnpy::NpyArray& npy_;
-
- }; // End of NpyMatrixWrapper class
-
- // Public methods of the NpzConverter class
-public:
- /**
- * Constructs an object that reads npz data from a file.
- *
- * @param file Path to file containing npz data
- */
- NpzConverter(const std::string& file)
- : model_(cnpy::npz_load(file)), destructed_(false) {}
-
- /**
- * Destructs the model that underlies this NpzConverter object,
- * if that data has not already been destructed.
- */
- ~NpzConverter() {
- if(!destructed_)
- model_.destruct();
- }
-
- /**
- * Destructs the model that underlies this NpzConverter object,
- * and marks that data as having been destructed.
- */
- void Destruct() {
- model_.destruct();
- destructed_ = true;
- }
-
- /**
- * Loads data corresponding to a search key into the provided vector.
- *
- * @param key Search key XXX Marcin, what
- * type of thing is "key"? What are we searching for here?
- * @param data Container into which data will be loaded XXX Lane, is there
- * a way in Doxygen to mark and inout variable?
- * @param shape Shape object into which the number of rows and columns of the
- * vectors will be stored
- */
- void Load(const std::string& key,
- std::vector<float>& data,
- marian::Shape& shape) const {
- auto it = model_.find(key);
- if(it != model_.end()) {
- NpyMatrixWrapper np(it->second);
- data.clear();
- data.resize(np.size());
- std::copy(np.data(), np.data() + np.size(), data.begin());
-
- shape = {(int)np.size1(), (int)np.size2()};
-
- } else {
- std::cerr << "Missing " << key << std::endl;
- }
- }
-
- // Private member data of the NpzConverter class
-private:
- /** Underlying npz data */
- cnpy::npz_t model_;
-
- /** Indicates whether the underlying data has been destructed. */
- bool destructed_;
-
-}; // End of NpzConverter class
diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp
index 617bc846..23b5c167 100644
--- a/src/graph/expression_graph.cpp
+++ b/src/graph/expression_graph.cpp
@@ -26,4 +26,42 @@ void ExpressionGraph::checkNan(Tensor t) {
ABORT_IF(throwNaN_, "Not implemented");
// ABORT_IF(throwNaN_ && IsNan(t), "Tensor has NaN");
}
+
+void ExpressionGraph::parametersToItems(std::vector<io::Item>& ioItems,
+ const std::map<std::string, std::string>& nameMap) {
+
+ for(auto p : params()->getMap()) {
+ std::string pName = p.first;
+
+ if(!namespace_.empty()) {
+ if(pName.substr(0, namespace_.size() + 2) == namespace_ + "::")
+ pName = pName.substr(namespace_.size() + 2);
+ }
+
+ auto it = nameMap.find(pName);
+ if(it != nameMap.end())
+ pName = it->second;
+
+ ABORT_IF(p.second->val()->type() != Type::float32,
+ "Only float32 supported at the moment");
+
+ Tensor val = p.second->val();
+
+ io::Item item;
+ item.name = pName;
+ item.shape = val->shape();
+ item.type = val->type();
+
+ // Use the actual memory as this will be aligned and padded.
+ // When memory mapping this is required. Shape keeps track of
+ // tensor size. Saving to *.npz will cut to size.
+ auto mem = val->memory();
+ item.bytes.resize(mem->size());
+ copy(backend_, mem->data<char>(), mem->data<char>() + mem->size(), item.bytes.data());
+
+ ioItems.emplace_back(std::move(item));
+ }
+
+}
+
} // namespace marian
diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h
index 8436e6e4..578cff8e 100644
--- a/src/graph/expression_graph.h
+++ b/src/graph/expression_graph.h
@@ -1,7 +1,5 @@
#pragma once
-#include "3rd_party/cnpy/cnpy.h"
-#include "3rd_party/threadpool.h"
#include "common/config.h"
#include "common/definitions.h"
@@ -13,12 +11,10 @@
#include "graph/node_operators.h"
#include "graph/parameters.h"
-#include "3rd_party/cnpy/cnpy.h"
-
-#include <fstream>
#include <map>
#include <unordered_set>
+
namespace marian {
template <class T, typename... Args>
@@ -163,11 +159,6 @@ public:
namespace_ = newNamespace;
}
- void reserveWorkspaceMB(size_t num) {
- size_t bytes = num * 1024 * 1024 - 1;
- tensors_->reserve(bytes);
- }
-
void copyParams(Ptr<ExpressionGraph> graph) {
for(auto p : *graph->params())
param(p->name(), p->shape(), inits::dummy);
@@ -175,12 +166,18 @@ public:
params()->vals()->copyFrom(graph->params()->vals());
}
+ // @TODO: remove this
void forceInit() {
params()->allocateForward();
for(auto v : nodesForward_)
v->init();
}
+ void reserveWorkspaceMB(size_t num) {
+ size_t bytes = num * 1024 * 1024 - 1;
+ tensors_->reserve(bytes);
+ }
+
void reuseWorkspace(Ptr<ExpressionGraph> graph) {
tensors_ = graph->tensors_;
}
@@ -423,65 +420,131 @@ public:
void setThrowNaN(bool throwNaN) { throwNaN_ = throwNaN; }
- void load(const std::string& name, bool markReloaded) {
- using namespace keywords;
-
- LOG(info, "Loading model from {}", name);
+private:
+ // convert all parameters into an array of IoItem elements, for saving
+ void itemsToParameters(const std::vector<io::Item>& ioItems,
+ const std::map<std::string, std::string>& nameMap,
+ bool markReloaded = true) {
setReloaded(false);
+ for(auto& item : ioItems) {
+ std::string pName = item.name;
- auto numpy = cnpy::npz_load(name);
-
- for(auto it : numpy) {
- auto name = it.first;
- // skip over special parameters starting with _
- if(name.substr(0, 8) == "special:")
+ // skip over special parameters starting with "special:"
+ if(pName.substr(0, 8) == "special:")
continue;
- Shape shape;
- if(it.second->shape.size() == 1) {
- shape.resize(2);
- shape.set(0, 1);
- shape.set(1, it.second->shape[0]);
- } else {
- shape.resize(it.second->shape.size());
- for(size_t i = 0; i < it.second->shape.size(); ++i)
- shape.set(i, it.second->shape[i]);
- }
+ auto it = nameMap.find(pName);
+ if(it != nameMap.end())
+ pName = it->second;
- param(name, shape, inits::from_numpy(it.second));
+ param(pName, item.shape, inits::from_item(item));
}
-
if(markReloaded)
setReloaded(true);
}
- // convert all parameters into an array pf cnpy::NpzItem elements, for saving
- void save(std::vector<cnpy::NpzItem>& npzItems) {
- for(auto p : params()->getMap()) {
- std::string pName = p.first;
+public:
- if(!namespace_.empty()) {
- if(pName.substr(0, namespace_.size() + 2) == namespace_ + "::")
- pName = pName.substr(namespace_.size() + 2);
- }
+ void load(const std::string& name,
+ const std::map<std::string, std::string>& nameMap,
+ bool markReloaded = true) {
+ LOG(info, "Loading model from {}", name);
+ itemsToParameters(io::loadItems(name), nameMap, markReloaded);
+ }
- std::vector<float> v;
- p.second->val()->get(v);
+ void load(const std::string& name,
+ bool markReloaded = true) {
- auto& pShape = p.second->shape();
- std::vector<unsigned int> shape(pShape.begin(), pShape.end());
+ // code to test memory mapping
+ //if(io::isBin(name)) {
+ // loadMmap(name, markReloaded);
+ // return;
+ //}
- npzItems.emplace_back(pName, v, shape);
- }
+ std::map<std::string, std::string> emptyNameMap;
+ load(name, emptyNameMap, markReloaded);
}
- void save(const std::string& name) {
+ void load(const void* ptr,
+ const std::map<std::string, std::string>& nameMap,
+ bool markReloaded = true) {
+ LOG(info, "Loading model from buffer at {}", ptr);
+ itemsToParameters(io::loadItems(ptr), nameMap, markReloaded);
+ }
+
+ void load(const void* ptr,
+ bool markReloaded = true) {
+ std::map<std::string, std::string> emptyNameMap;
+ load(ptr, emptyNameMap, markReloaded);
+ }
+
+ void mmap(const void* ptr,
+ const std::map<std::string, std::string>& nameMap,
+ bool markReloaded = true) {
+
+ ABORT_IF(backend_->getDevice().type != DeviceType::cpu || !inferenceOnly_,
+ "Memory mapping only supported for CPU inference mode");
+
+ params_ = New<MappedParameters>();
+ params_->init(backend_);
+
+ LOG(info, "Memory mapping model at {}", ptr);
+ itemsToParameters(io::mmapItems(ptr), nameMap, markReloaded);
+ }
+
+ void mmap(const void* ptr,
+ bool markReloaded = true) {
+ std::map<std::string, std::string> emptyNameMap;
+ mmap(ptr, emptyNameMap, markReloaded);
+ }
+
+ // Code to test memory mapping
+ //char* buf_;
+ //void loadMmap(const std::string& name, bool markReloaded) {
+ // size_t fsize = boost::filesystem::file_size(name);
+ // buf_ = new char[fsize];
+ // InputFileStream in(name);
+ // in.read(buf_, fsize);
+ // mmap(buf_, markReloaded);
+ //}
+
+private:
+ // convert all parameters into an array of io::Item elements, for saving
+ void parametersToItems(std::vector<io::Item>& ioItems,
+ const std::map<std::string, std::string>& nameMap);
+
+public:
+
+ void save(const std::string& name,
+ const std::string& meta,
+ const std::map<std::string, std::string>& nameMap) {
LOG(info, "Saving model to {}", name);
- std::vector<cnpy::NpzItem> npzItems;
- save(npzItems);
- cnpy::npz_save(name, npzItems);
- LOG(info, "Saved {} items.", npzItems.size());
+
+ std::vector<io::Item> ioItems;
+ parametersToItems(ioItems, nameMap);
+ if(!meta.empty())
+ io::addMetaToItems(meta, "special:model.yml", ioItems);
+ io::saveItems(name, ioItems);
+
+ LOG(info, "Saved {} items.", ioItems.size());
+ }
+
+ void save(const std::string& name) {
+ std::map<std::string, std::string> emptyNameMap;
+ save(name, "", emptyNameMap);
+ }
+
+ void save(const std::string& name,
+ const std::string& meta) {
+ std::map<std::string, std::string> emptyNameMap;
+ save(name, meta, emptyNameMap);
}
+
+ void save(const std::string& name,
+ const std::map<std::string, std::string>& nameMap) {
+ save(name, "", nameMap);
+ }
+
};
template <class T, typename... Args>
diff --git a/src/graph/node_initializers.cpp b/src/graph/node_initializers.cpp
index 86a21ca7..99e3ff8c 100644
--- a/src/graph/node_initializers.cpp
+++ b/src/graph/node_initializers.cpp
@@ -125,14 +125,14 @@ NodeInitializer from_sparse_vector(
};
}
-NodeInitializer from_numpy(const cnpy::NpyArrayPtr& np) {
- return [np](Tensor t) {
- size_t size = 1;
- for(size_t dim : np->shape)
- size *= dim;
- t->set((float*)np->data(), (float*)np->data() + size);
- };
-}
+//NodeInitializer from_numpy(const cnpy::NpyArrayPtr& np) {
+// return [np](Tensor t) {
+// size_t size = 1;
+// for(size_t dim : np->shape)
+// size *= dim;
+// t->set((float*)np->data(), (float*)np->data() + size);
+// };
+//}
// move this somewhere else
NodeInitializer from_word2vec(const std::string& file,
@@ -154,6 +154,27 @@ NodeInitializer from_word2vec(const std::string& file,
t->set(embs);
};
}
+
+NodeInitializer from_item(const io::Item& item) {
+ if(item.mapped) {
+ return [item](Tensor t) {
+ // @TODO: implement other types, for now croak loudly.
+ ABORT_IF(t->getBackend()->getDevice().type != DeviceType::cpu, "Memory mapping only works for CPU tensors");
+ ABORT_IF(!matchType<float>(t->type()), "Tensor type and type for mapping do not match");
+ auto mp = New<MemoryPiece>((uint8_t*)item.ptr, t->size() * sizeof(float));
+ t->reset(mp);
+ };
+ }
+ else {
+ return [item](Tensor t) {
+ // @TODO: implement other types, for now croak loudly.
+ ABORT_IF(!matchType<float>(t->type()),
+ "Tensor type and type for mapping do not match");
+ t->set((const float*)item.bytes.data(), (const float*)item.bytes.data() + t->size());
+ };
+ }
+}
+
} // namespace inits
} // namespace marian
diff --git a/src/graph/node_initializers.h b/src/graph/node_initializers.h
index 1820f7af..3a961a2b 100644
--- a/src/graph/node_initializers.h
+++ b/src/graph/node_initializers.h
@@ -1,7 +1,6 @@
// TODO: move to backend, into graph/
#pragma once
-#include "cnpy/cnpy.h"
#include "common/config.h"
#include "tensors/tensor.h"
@@ -64,10 +63,12 @@ void glorot_normal(Tensor t);
NodeInitializer from_vector(const std::vector<float>& v);
NodeInitializer from_vector(const std::vector<size_t>& v);
+NodeInitializer from_item(const io::Item& item);
+
NodeInitializer from_sparse_vector(
std::pair<std::vector<size_t>, std::vector<float>>& v);
-NodeInitializer from_numpy(const cnpy::NpyArrayPtr& np);
+//NodeInitializer from_numpy(const cnpy::NpyArrayPtr& np);
NodeInitializer from_word2vec(const std::string& file,
int dimVoc,
diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h
index cbeded24..b51c8a75 100644
--- a/src/graph/node_operators_unary.h
+++ b/src/graph/node_operators_unary.h
@@ -657,7 +657,7 @@ struct RowsNodeOp : public UnaryNodeOp {
NodeOps forwardOps() {
// @TODO: solve this with a tensor!
- return {NodeOp(CopyRows(val_, child(0)->val(), indices_))};
+ return {NodeOp(CopyRows(val_, child(0)->val(), indices_, graph()->allocator()))};
}
NodeOps backwardOps() {
diff --git a/src/graph/parameters.h b/src/graph/parameters.h
index 1b9472fd..20bf47cf 100644
--- a/src/graph/parameters.h
+++ b/src/graph/parameters.h
@@ -11,7 +11,7 @@
namespace marian {
class Parameters {
-private:
+protected:
/** @brief List of all parameter nodes of this expression graph. */
std::vector<Expr> params_;
std::map<std::string, Expr> named_;
@@ -19,12 +19,15 @@ private:
Ptr<TensorAllocator> vals_;
Ptr<TensorAllocator> grads_;
-public:
- void init(Ptr<Backend> backend) {
- vals_ = New<TensorAllocator>(backend);
- grads_ = New<TensorAllocator>(backend);
+ size_t totalCapacity(Ptr<TensorAllocator> alloc) {
+ size_t sum = 0;
+ for(auto p : params_) {
+ sum += alloc->capacity(p->shape(), Type::float32);
+ }
+ return sum;
}
+public:
auto begin() -> decltype(params_.begin()) { return params_.begin(); }
auto end() -> decltype(params_.begin()) { return params_.end(); }
@@ -42,21 +45,18 @@ public:
size_t size() { return params_.size(); }
- size_t totalCapacity(Ptr<TensorAllocator> alloc) {
- size_t sum = 0;
- for(auto p : params_) {
- sum += alloc->capacity(p->shape(), Type::float32);
- }
- return sum;
- }
-
void add(Expr p, const std::string& name) {
params_.push_back(p);
ABORT_IF(named_.count(name), "Parameter '{}' already exists", name);
named_[name] = p;
}
- void allocateForward() {
+ virtual void init(Ptr<Backend> backend) {
+ vals_ = New<TensorAllocator>(backend);
+ grads_ = New<TensorAllocator>(backend);
+ }
+
+ virtual void allocateForward() {
if(!params_.empty() && vals_->size() == 0) {
vals_->reserveExact(totalCapacity(vals_));
for(auto p : params_) {
@@ -67,7 +67,7 @@ public:
}
}
- void allocateBackward() {
+ virtual void allocateBackward() {
if(!params_.empty() && grads_->size() == 0) {
grads_->reserveExact(totalCapacity(grads_));
for(auto p : params_)
@@ -76,13 +76,13 @@ public:
}
}
- void set_zero_adjoint() { grads()->set(0.f); }
+ virtual void set_zero_adjoint() { grads()->set(0.f); }
- Tensor vals() { return vals_->asTensor(); }
+ virtual Tensor vals() { return vals_->asTensor(); }
- Tensor grads() { return grads_->asTensor(); }
+ virtual Tensor grads() { return grads_->asTensor(); }
- void clear() {
+ virtual void clear() {
params_.clear();
named_.clear();
@@ -90,4 +90,49 @@ public:
grads_->clear();
}
};
+
+class MappedParameters : public Parameters {
+private:
+ Ptr<Backend> backend_;
+
+public:
+ virtual void init(Ptr<Backend> backend) override {
+ backend_ = backend;
+ }
+
+ virtual void allocateForward() override {
+ if(!params_.empty()) {
+ for(auto p : params_) {
+ if(!p->val()) {
+ p->val() = Tensor(new TensorBase(nullptr, p->shape(), Type::float32, backend_));
+ }
+ }
+ }
+ }
+
+ virtual void allocateBackward() override {
+ ABORT("Not implemented for memory-mapped parameters");
+ }
+
+ virtual void set_zero_adjoint() override {
+ ABORT("Not implemented for memory-mapped parameters");
+ }
+
+ virtual Tensor vals() override {
+ ABORT("Not implemented for memory-mapped parameters");
+ return nullptr;
+ }
+
+ virtual Tensor grads() override {
+ ABORT("Not implemented for memory-mapped parameters");
+ return nullptr;
+ }
+
+ virtual void clear() override {
+ params_.clear();
+ named_.clear();
+ }
+};
+
+
} // namespace marian
diff --git a/src/layers/weight.cpp b/src/layers/weight.cpp
index d5cee32d..b5d8a2bc 100644
--- a/src/layers/weight.cpp
+++ b/src/layers/weight.cpp
@@ -3,9 +3,8 @@
namespace marian {
Ptr<WeightingBase> WeightingFactory(Ptr<Options> options) {
- if(options->has("data-weighting"))
- return New<DataWeighting>(options->get<std::string>("data-weighting-type"));
- return nullptr;
+ ABORT_IF(!options->has("data-weighting"), "No data-weighting specified in options");
+ return New<DataWeighting>(options->get<std::string>("data-weighting-type"));
}
Expr DataWeighting::getWeights(Ptr<ExpressionGraph> graph,
diff --git a/src/microsoft/quicksand.cpp b/src/microsoft/quicksand.cpp
index 593bf81d..14a60bf9 100644
--- a/src/microsoft/quicksand.cpp
+++ b/src/microsoft/quicksand.cpp
@@ -40,7 +40,6 @@ private:
public:
BeamSearchDecoder(Ptr<Options> options, Word eos)
: IBeamSearchDecoder(options, eos) {
- // createLoggers();
graph_ = New<ExpressionGraph>(true, true);
graph_->setDevice(DeviceId{0, DeviceType::cpu});
@@ -64,10 +63,12 @@ public:
for(auto& model : models) {
Ptr<Options> modelOpts = New<Options>();
YAML::Node config;
- Config::GetYamlFromNpz(config, "special:model.yml", model);
+ io::GetYamlFromModel(config, "special:model.yml", model);
modelOpts->merge(options_);
modelOpts->merge(config);
+
auto encdec = models::from_options(modelOpts, models::usage::translation);
+
scorers_.push_back(New<ScorerWrapper>(
encdec, "F" + std::to_string(scorers_.size()), 1, model));
}
diff --git a/src/models/amun.h b/src/models/amun.h
index cbc0c628..507ecc88 100644
--- a/src/models/amun.h
+++ b/src/models/amun.h
@@ -40,10 +40,6 @@ public:
bool markedReloaded = true) {
using namespace keywords;
- LOG(info, "Loading model from {}", name);
-
- auto numpy = cnpy::npz_load(name);
-
std::map<std::string, std::string> nameMap
= {{"decoder_U", "decoder_cell1_U"},
{"decoder_Ux", "decoder_cell1_Ux"},
@@ -95,41 +91,12 @@ public:
if(opt<bool>("tied-embeddings-src") || opt<bool>("tied-embeddings-all"))
nameMap["Wemb"] = "Wemb";
- graph->setReloaded(false);
-
- for(auto it : numpy) {
- auto name = it.first;
-
- if(name == "decoder_c_tt")
- continue;
- if(name.substr(0, 8) == "special:")
- continue;
-
- Shape shape;
- if(numpy[name]->shape.size() == 2) {
- shape.resize(2);
- shape.set(0, numpy[name]->shape[0]);
- shape.set(1, numpy[name]->shape[1]);
- } else if(numpy[name]->shape.size() == 1) {
- shape.resize(2);
- shape.set(0, 1);
- shape.set(1, numpy[name]->shape[0]);
- }
-
- std::string pName = name;
- if(nameMap.count(name))
- pName = nameMap[name];
-
- graph->param(pName, shape, inits::from_numpy(numpy[name]));
- }
-
- graph->setReloaded(true);
+ graph->load(name, nameMap);
}
void save(Ptr<ExpressionGraph> graph,
const std::string& name,
bool saveTranslatorConfig = false) {
- LOG(info, "Saving model to {}", name);
std::map<std::string, std::string> nameMap
= {{"decoder_cell1_U", "decoder_U"},
@@ -177,36 +144,7 @@ public:
{"encoder_bi_r_gamma1", "encoder_r_gamma1"},
{"encoder_bi_r_gamma2", "encoder_r_gamma2"}};
- unsigned shape[2];
- std::string mode = "w";
-
- for(auto p : graph->params()->getMap()) {
- std::vector<float> v;
- p.second->val()->get(v);
-
- unsigned dim;
- if(p.second->shape()[0] == 1) {
- shape[0] = p.second->shape()[1];
- dim = 1;
- } else {
- shape[0] = p.second->shape()[0];
- shape[1] = p.second->shape()[1];
- dim = 2;
- }
-
- std::string pName = p.first;
- if(nameMap.count(pName))
- pName = nameMap[pName];
-
- cnpy::npz_save(name, pName, v.data(), shape, dim, mode);
- mode = "a";
- }
-
- float ctt = 0;
- shape[0] = 1;
- cnpy::npz_save(name, "decoder_c_tt", &ctt, shape, 1, mode);
-
- saveModelParameters(name);
+ graph->save(name, getModelParametersAsString(), nameMap);
if(saveTranslatorConfig) {
createAmunConfig(name);
diff --git a/src/models/costs.h b/src/models/costs.h
index 777f5147..730d46ce 100644
--- a/src/models/costs.h
+++ b/src/models/costs.h
@@ -134,6 +134,12 @@ public:
encdec_->load(graph, name, markedReloaded);
}
+ virtual void mmap(Ptr<ExpressionGraph> graph,
+ const void* ptr,
+ bool markedReloaded = true) {
+ encdec_->mmap(graph, ptr, markedReloaded);
+ };
+
virtual void save(Ptr<ExpressionGraph> graph,
const std::string& name,
bool saveTranslatorConfig = false) {
diff --git a/src/models/encoder_decoder.cpp b/src/models/encoder_decoder.cpp
index 9202aa63..9620f86a 100644
--- a/src/models/encoder_decoder.cpp
+++ b/src/models/encoder_decoder.cpp
@@ -87,8 +87,11 @@ Config::YamlNode EncoderDecoder::getModelParameters() {
return modelParams;
}
-void EncoderDecoder::saveModelParameters(const std::string& name) {
- Config::AddYamlToNpz(getModelParameters(), "special:model.yml", name);
+std::string EncoderDecoder::getModelParametersAsString() {
+ auto yaml = getModelParameters();
+ YAML::Emitter out;
+ OutputYaml(yaml, out);
+ return std::string(out.c_str());
}
void EncoderDecoder::load(Ptr<ExpressionGraph> graph,
@@ -97,18 +100,19 @@ void EncoderDecoder::load(Ptr<ExpressionGraph> graph,
graph->load(name, markedReloaded && !opt<bool>("ignore-model-config", false));
}
+void EncoderDecoder::mmap(Ptr<ExpressionGraph> graph,
+ const void* ptr,
+ bool markedReloaded) {
+ graph->mmap(ptr, markedReloaded && !opt<bool>("ignore-model-config", false));
+}
+
void EncoderDecoder::save(Ptr<ExpressionGraph> graph,
const std::string& name,
bool saveTranslatorConfig) {
// ignore config for now
LOG(info, "Saving model weights and runtime parameters to {}", name);
- std::vector<cnpy::NpzItem> npzItems;
- graph->save(npzItems); // model weights
- Config::AddYamlToNpzItems(getModelParameters(), // model runtime parameters
- "special:model.yml",
- npzItems);
- cnpy::npz_save(name, npzItems); // save both jointly
- // LOG(info, "Saved {} items.", npzItems.size());
+
+ graph->save(name, getModelParametersAsString());
if(saveTranslatorConfig)
createDecoderConfig(name);
diff --git a/src/models/encoder_decoder.h b/src/models/encoder_decoder.h
index c6536603..568aec87 100644
--- a/src/models/encoder_decoder.h
+++ b/src/models/encoder_decoder.h
@@ -16,6 +16,11 @@ public:
bool markedReloaded = true)
= 0;
+ virtual void mmap(Ptr<ExpressionGraph> graph,
+ const void* ptr,
+ bool markedReloaded = true)
+ = 0;
+
virtual void save(Ptr<ExpressionGraph> graph,
const std::string& name,
bool saveTranslatorConfig = false)
@@ -71,7 +76,7 @@ protected:
std::set<std::string> modelFeatures_;
Config::YamlNode getModelParameters();
- void saveModelParameters(const std::string& name);
+ std::string getModelParametersAsString();
virtual void createDecoderConfig(const std::string& name);
@@ -94,6 +99,10 @@ public:
const std::string& name,
bool markedReloaded = true);
+ virtual void mmap(Ptr<ExpressionGraph> graph,
+ const void* ptr,
+ bool markedReloaded = true);
+
virtual void save(Ptr<ExpressionGraph> graph,
const std::string& name,
bool saveTranslatorConfig = false);
diff --git a/src/models/nematus.h b/src/models/nematus.h
index 9b877e8f..211626cc 100644
--- a/src/models/nematus.h
+++ b/src/models/nematus.h
@@ -30,82 +30,13 @@ public:
void load(Ptr<ExpressionGraph> graph,
const std::string& name,
bool markedReloaded = true) {
- using namespace keywords;
-
- LOG(info, "Loading model from {}", name);
- auto numpy = cnpy::npz_load(name);
-
- graph->setReloaded(false);
-
- for(auto it : numpy) {
- auto name = it.first;
-
- if(name == "decoder_c_tt")
- continue;
- if(name.substr(0, 8) == "special:")
- continue;
-
- Shape shape;
- if(numpy[name]->shape.size() == 2) {
- shape.resize(2);
- shape.set(0, numpy[name]->shape[0]);
- shape.set(1, numpy[name]->shape[1]);
- } else if(numpy[name]->shape.size() == 1) {
- shape.resize(2);
- shape.set(0, 1);
- shape.set(1, numpy[name]->shape[0]);
- }
-
- std::string pName = name;
- if(nameMap_.count(name))
- pName = nameMap_[name];
-
- graph->param(pName, shape, inits::from_numpy(numpy[name]));
- }
-
- graph->setReloaded(true);
+ graph->load(name, nameMap_);
}
void save(Ptr<ExpressionGraph> graph,
const std::string& name,
bool saveTranslatorConfig = false) {
- LOG(info, "Saving model to {}", name);
-
- unsigned shape[2];
- std::string mode = "w";
-
- if(nameMapRev_.empty())
- for(auto& kv : nameMap_)
- nameMapRev_.insert({kv.second, kv.first});
-
- for(auto p : graph->params()->getMap()) {
- std::vector<float> v;
- p.second->val()->get(v);
-
- unsigned dim;
- if(p.second->shape()[0] == 1) {
- shape[0] = p.second->shape()[1];
- dim = 1;
- } else {
- shape[0] = p.second->shape()[0];
- shape[1] = p.second->shape()[1];
- dim = 2;
- }
-
- std::string pName = p.first;
- if(nameMapRev_.count(pName))
- pName = nameMapRev_[pName];
-
- cnpy::npz_save(name, pName, v.data(), shape, dim, mode);
- mode = "a";
- }
-
- float ctt = 0;
- shape[0] = 1;
- cnpy::npz_save(name, "decoder_c_tt", &ctt, shape, 1, mode);
-
- saveModelParameters(name);
-
+ graph->save(name, getModelParametersAsString(), nameMap_);
if(saveTranslatorConfig) {
createAmunConfig(name);
createDecoderConfig(name);
diff --git a/src/optimizers/optimizers.cpp b/src/optimizers/optimizers.cpp
index 4381419a..fc266325 100644
--- a/src/optimizers/optimizers.cpp
+++ b/src/optimizers/optimizers.cpp
@@ -1,6 +1,7 @@
#include "optimizers.h"
#include "tensors/tensor_operators.h"
+#include "common/io.h"
namespace marian {
@@ -47,19 +48,17 @@ void Adagrad::load(const std::string& name,
std::vector<float> vGt;
size_t totalSize = 0;
- auto numpy = cnpy::npz_load(name);
- for(auto it : numpy) {
- auto name = it.first;
- auto np = it.second;
-
+ // @TODO: use new IO
+ auto items = io::loadItems(name);
+ for(auto item : items) {
// get the size of gt_
- totalSize = np->shape[1];
+ totalSize = item.shape.elements();
// extract data into vectors
- if(name == "adagrad_gt") {
+ if(item.name == "adagrad_gt") {
vGt.resize(totalSize);
std::copy(
- (float*)np->data(), (float*)np->data() + totalSize, vGt.begin());
+ (float*)item.data(), (float*)item.data() + totalSize, vGt.begin());
}
}
@@ -108,9 +107,16 @@ void Adagrad::save(const std::string& name,
vGt.insert(vGt.end(), tmp.begin(), tmp.end());
}
- unsigned int shape[2] = {1, (unsigned int)vGt.size()};
+ io::Item item;
+ item.name = "adagrad_gt";
+ item.shape = Shape({1, (int)vGt.size()});
+ item.type = Type::float32;
+ item.bytes.resize(vGt.size() * sizeOf(item.type));
+ std::copy((char*)vGt.data(),
+ (char*)vGt.data() + vGt.size(),
+ item.bytes.begin());
- cnpy::npz_save(name, "adagrad_gt", vGt.data(), shape, 2, "w");
+ io::saveItems(name, {item});
}
void Adagrad::resetStats() {
@@ -164,24 +170,22 @@ void Adam::load(const std::string& name,
std::vector<float> vVt;
size_t totalSize = 0;
- auto numpy = cnpy::npz_load(name);
- for(auto it : numpy) {
- auto name = it.first;
- auto np = it.second;
+ auto items = io::loadItems(name);
+ for(auto item : items) {
// get the size of mt_ and vt_, they are the same
- totalSize = np->shape[1];
+ totalSize = item.shape.elements();
// extract data into vectors
- if(name == "adam_mt") {
+ if(item.name == "adam_mt") {
vMt.resize(totalSize);
std::copy(
- (float*)np->data(), (float*)np->data() + totalSize, vMt.begin());
+ (float*)item.data(), (float*)item.data() + totalSize, vMt.begin());
}
- if(name == "adam_vt") {
+ if(item.name == "adam_vt") {
vVt.resize(totalSize);
std::copy(
- (float*)np->data(), (float*)np->data() + totalSize, vVt.begin());
+ (float*)item.data(), (float*)item.data() + totalSize, vVt.begin());
}
}
@@ -236,13 +240,26 @@ void Adam::save(const std::string& name,
opt->vt_->get(tmp);
vVt.insert(vVt.end(), tmp.begin(), tmp.end());
}
-
- // the shape is the same for mt_ and vt_
- std::vector<unsigned int> shape{1, (unsigned int)vMt.size()};
-
- cnpy::npz_save(name,
- {cnpy::NpzItem("adam_mt", vMt, shape),
- cnpy::NpzItem("adam_vt", vVt, shape)});
+
+ io::Item itemMt;
+ itemMt.name = "adam_mt";
+ itemMt.shape = Shape({1, (int)vMt.size()});
+ itemMt.type = Type::float32;
+ itemMt.bytes.resize(vMt.size() * sizeOf(itemMt.type));
+ std::copy((char*)vMt.data(),
+ (char*)vMt.data() + vMt.size(),
+ itemMt.bytes.begin());
+
+ io::Item itemVt;
+ itemVt.name = "adam_vt";
+ itemVt.shape = Shape({1, (int)vVt.size()});
+ itemVt.type = Type::float32;
+ itemVt.bytes.resize(vVt.size() * sizeOf(itemVt.type));
+ std::copy((char*)vVt.data(),
+ (char*)vVt.data() + vVt.size(),
+ itemVt.bytes.begin());
+
+ io::saveItems(name, {itemMt, itemVt});
}
void Adam::resetStats() {
diff --git a/src/tensors/allocator.h b/src/tensors/allocator.h
index 0695d25e..43c961ad 100644
--- a/src/tensors/allocator.h
+++ b/src/tensors/allocator.h
@@ -9,9 +9,9 @@
#include <vector>
#include "common/definitions.h"
+#include "common/types.h"
#include "tensors/device.h"
#include "tensors/memory_piece.h"
-#include "tensors/types.h"
namespace marian {
diff --git a/src/tensors/cpu/tensor_operators.cpp b/src/tensors/cpu/tensor_operators.cpp
index 4d5d40dc..b82600e1 100644
--- a/src/tensors/cpu/tensor_operators.cpp
+++ b/src/tensors/cpu/tensor_operators.cpp
@@ -381,7 +381,8 @@ void LogSoftmaxGrad(Tensor grad_, Tensor adj_, Tensor val_) {
void CopyRows(Tensor out_,
const Tensor in_,
- const std::vector<size_t>& indices) {
+ const std::vector<size_t>& indices,
+ Ptr<Allocator> allocator) {
size_t cols = in_->shape()[1];
size_t rows = indices.size();
diff --git a/src/tensors/gpu/algorithm.cu b/src/tensors/gpu/algorithm.cu
index b4a773ec..f559fb42 100644
--- a/src/tensors/gpu/algorithm.cu
+++ b/src/tensors/gpu/algorithm.cu
@@ -26,6 +26,7 @@ template void copy<uint16_t>(Ptr<Backend>, const uint16_t*, const uint16_t*, uin
template void copy<uint32_t>(Ptr<Backend>, const uint32_t*, const uint32_t*, uint32_t*);
template void copy<uint64_t>(Ptr<Backend>, const uint64_t*, const uint64_t*, uint64_t*);
+template void copy<char>(Ptr<Backend>, const char*, const char*, char*);
template void copy<float>(Ptr<Backend>, const float*, const float*, float*);
template void copy<double>(Ptr<Backend>, const double*, const double*, double*);
// clang-format on
diff --git a/src/tensors/gpu/tensor_operators.cu b/src/tensors/gpu/tensor_operators.cu
index 4f11f9f8..6eb5a8dd 100644
--- a/src/tensors/gpu/tensor_operators.cu
+++ b/src/tensors/gpu/tensor_operators.cu
@@ -735,7 +735,7 @@ __global__ void gCopyRows(float* out,
}
}
-void CopyRows(Tensor out, const Tensor in, const std::vector<size_t>& indices) {
+void CopyRows(Tensor out, const Tensor in, const std::vector<size_t>& indices, Ptr<Allocator> allocator) {
cudaSetDevice(out->getDevice().no);
size_t cols = in->shape().back();
@@ -744,17 +744,15 @@ void CopyRows(Tensor out, const Tensor in, const std::vector<size_t>& indices) {
int threads = std::min(MAX_THREADS, (int)cols);
int blocks = std::min(MAX_BLOCKS, (int)rowsToCopy);
- size_t* d_indices;
- CUDA_CHECK(cudaMalloc(&d_indices, rowsToCopy * sizeof(size_t)));
- CUDA_CHECK(cudaMemcpy(d_indices,
- indices.data(),
- rowsToCopy * sizeof(size_t),
- cudaMemcpyHostToDevice));
+ auto mp_indices = allocator->alloc<size_t>(rowsToCopy);
+ CudaCopy(indices.data(),
+ indices.data() + indices.size(),
+ mp_indices->data<size_t>());
gCopyRows<<<blocks, threads>>>(
- out->data(), in->data(), cols, d_indices, rowsToCopy);
+ out->data(), in->data(), cols, mp_indices->data<size_t>(), rowsToCopy);
- CUDA_CHECK(cudaFree(d_indices));
+ allocator->free(mp_indices);
}
__global__ void gPasteRows(float* out,
diff --git a/src/tensors/tensor.h b/src/tensors/tensor.h
index e2c6f23f..983691d4 100644
--- a/src/tensors/tensor.h
+++ b/src/tensors/tensor.h
@@ -1,22 +1,20 @@
#pragma once
-#include <iomanip>
-#include <iostream>
-#include <memory>
-#include <sstream>
-
#include "common/definitions.h"
#include "common/shape.h"
+#include "common/types.h"
#include "tensors/backend.h"
#include "tensors/memory_piece.h"
-#include "tensors/types.h"
-
-#include <algorithm>
-
#ifdef CUDA_FOUND
#include "tensors/gpu/algorithm.h"
#endif
+#include <iomanip>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <algorithm>
+
namespace marian {
class TensorBase : public std::enable_shared_from_this<TensorBase> {
diff --git a/src/tensors/tensor_operators.h b/src/tensors/tensor_operators.h
index 87f4f27d..78f61888 100644
--- a/src/tensors/tensor_operators.h
+++ b/src/tensors/tensor_operators.h
@@ -14,13 +14,27 @@
#include "tensors/gpu/add.h"
#include "tensors/gpu/element.h"
#include "tensors/gpu/prod.h"
+#include "tensors/gpu/algorithm.h"
#endif
#include "tensors/cpu/add.h"
#include "tensors/cpu/element.h"
+#include <algorithm>
+
namespace marian {
+template <typename InIt, typename OutIt>
+void copy(Ptr<Backend> backend, const InIt beg, const InIt end, OutIt it) {
+#ifdef CUDA_FOUND
+ if(backend->getDevice().type == DeviceType::gpu)
+ gpu::copy(backend, beg, end, it);
+ else
+#endif
+ std::copy(beg, end, it);
+}
+
+
template <class Functor, class... Tensors>
void Element(Functor functor, marian::Tensor out, Tensors... tensors) {
#ifdef CUDA_FOUND
@@ -119,7 +133,7 @@ static inline void Deconcatenate(std::vector<marian::Tensor>& outputs,
DISPATCH4(HighwayForward, marian::Tensor, const marian::Tensor, const marian::Tensor, const marian::Tensor)
DISPATCH7(HighwayBackward, marian::Tensor, marian::Tensor, marian::Tensor, const marian::Tensor, const marian::Tensor, const marian::Tensor, const marian::Tensor)
- DISPATCH3(CopyRows, marian::Tensor, const marian::Tensor, const std::vector<size_t>&)
+ DISPATCH4(CopyRows, marian::Tensor, const marian::Tensor, const std::vector<size_t>&, Ptr<Allocator>)
DISPATCH3(PasteRows, marian::Tensor, const marian::Tensor, const std::vector<size_t>&)
DISPATCH3(CopyCols, marian::Tensor, const marian::Tensor, const std::vector<size_t>&)
DISPATCH3(PasteCols, marian::Tensor, const marian::Tensor, const std::vector<size_t>&)
diff --git a/src/translator/scorers.cpp b/src/translator/scorers.cpp
index 7ac5e2ad..5f126ae3 100644
--- a/src/translator/scorers.cpp
+++ b/src/translator/scorers.cpp
@@ -2,9 +2,9 @@
namespace marian {
-Ptr<Scorer> scorerByType(std::string fname,
+Ptr<Scorer> scorerByType(const std::string& fname,
float weight,
- std::string model,
+ const std::string& model,
Ptr<Config> config) {
Ptr<Options> options = New<Options>();
options->merge(config);
@@ -27,6 +27,32 @@ Ptr<Scorer> scorerByType(std::string fname,
return New<ScorerWrapper>(encdec, fname, weight, model);
}
+Ptr<Scorer> scorerByType(const std::string& fname,
+ float weight,
+ const void* ptr,
+ Ptr<Config> config) {
+ Ptr<Options> options = New<Options>();
+ options->merge(config);
+ options->set("inference", true);
+
+ std::string type = options->get<std::string>("type");
+
+ // @TODO: solve this better
+ if(type == "lm" && config->has("input")) {
+ size_t index = config->get<std::vector<std::string>>("input").size();
+ options->set("index", index);
+ }
+
+ bool skipCost = config->get<bool>("skip-cost");
+ auto encdec = models::from_options(
+ options, skipCost ? models::usage::raw : models::usage::translation);
+
+ LOG(info, "Loading scorer of type {} as feature {}", type, fname);
+
+ return New<ScorerWrapper>(encdec, fname, weight, ptr);
+}
+
+
std::vector<Ptr<Scorer>> createScorers(Ptr<Config> options) {
std::vector<Ptr<Scorer>> scorers;
@@ -54,4 +80,32 @@ std::vector<Ptr<Scorer>> createScorers(Ptr<Config> options) {
return scorers;
}
+
+std::vector<Ptr<Scorer>> createScorers(Ptr<Config> options,
+ const std::vector<const void*>& ptrs) {
+ std::vector<Ptr<Scorer>> scorers;
+
+ std::vector<float> weights(ptrs.size(), 1.f);
+ if(options->has("weights"))
+ weights = options->get<std::vector<float>>("weights");
+
+ size_t i = 0;
+ for(auto ptr : ptrs) {
+ std::string fname = "F" + std::to_string(i);
+ auto modelOptions = New<Config>(*options);
+
+ try {
+ if(!options->get<bool>("ignore-model-config"))
+ modelOptions->loadModelParameters(ptr);
+ } catch(std::runtime_error& e) {
+ LOG(warn, "No model settings found in model file");
+ }
+
+ scorers.push_back(scorerByType(fname, weights[i], ptr, modelOptions));
+ i++;
+ }
+
+ return scorers;
+}
+
} // namespace marian
diff --git a/src/translator/scorers.h b/src/translator/scorers.h
index 402de8b4..8b651ef9 100644
--- a/src/translator/scorers.h
+++ b/src/translator/scorers.h
@@ -68,6 +68,7 @@ class ScorerWrapper : public Scorer {
private:
Ptr<EncoderDecoderBase> encdec_;
std::string fname_;
+ const void* ptr_;
public:
ScorerWrapper(Ptr<models::ModelBase> encdec,
@@ -76,11 +77,22 @@ public:
const std::string& fname)
: Scorer(name, weight),
encdec_(std::static_pointer_cast<EncoderDecoderBase>(encdec)),
- fname_(fname) {}
+ fname_(fname), ptr_{0} {}
+
+ ScorerWrapper(Ptr<models::ModelBase> encdec,
+ const std::string& name,
+ float weight,
+ const void* ptr)
+ : Scorer(name, weight),
+ encdec_(std::static_pointer_cast<EncoderDecoderBase>(encdec)),
+ ptr_{ptr} {}
virtual void init(Ptr<ExpressionGraph> graph) {
graph->switchParams(getName());
- encdec_->load(graph, fname_);
+ if(ptr_)
+ encdec_->mmap(graph, ptr_);
+ else
+ encdec_->load(graph, fname_);
}
virtual void clear(Ptr<ExpressionGraph> graph) {
@@ -119,10 +131,17 @@ public:
virtual std::vector<float> getAlignment() { return encdec_->getAlignment(); }
};
-Ptr<Scorer> scorerByType(std::string fname,
+Ptr<Scorer> scorerByType(const std::string& fname,
float weight,
- std::string model,
+ const std::string& model,
Ptr<Config> config);
std::vector<Ptr<Scorer>> createScorers(Ptr<Config> options);
+
+Ptr<Scorer> scorerByType(const std::string& fname,
+ float weight,
+ const void* ptr,
+ Ptr<Config> config);
+
+std::vector<Ptr<Scorer>> createScorers(Ptr<Config> options, const std::vector<const void*>& ptrs);
} // namespace marian