Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomasz Dwojak <t.dwojak@amu.edu.pl>2016-10-05 13:23:59 +0300
committerTomasz Dwojak <t.dwojak@amu.edu.pl>2016-10-05 16:20:15 +0300
commit9f5d666848f09ea1bd861ef7c8befa2180ca077d (patch)
tree1cb882f73945dd37a1d1a9ee9cca0cb51a4d5620
parentf4c508c96953dfda235d5a7dddd6756e69193cf4 (diff)
Add mode (CPU|GPU) option
-rw-r--r--CMakeLists.txt1
-rwxr-xr-xscripts/download_models.py4
-rw-r--r--src/CMakeLists.txt10
-rw-r--r--src/common/base_matrix.h33
-rw-r--r--src/common/config.cpp42
-rw-r--r--src/common/decoder_main.cpp35
-rw-r--r--src/common/god.cpp3
-rw-r--r--src/common/loader_factory.cpp62
-rw-r--r--src/common/loader_factory.cu50
-rw-r--r--src/common/loader_factory.h14
-rw-r--r--src/cpu/decoder/encoder_decoder.h2
-rw-r--r--src/cpu/decoder/encoder_decoder_loader.h28
12 files changed, 183 insertions, 101 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 587691d2..5201d1b7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,7 @@ find_package(CUDA)
if(CUDA_FOUND)
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math;)
add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
+ add_definitions(-DCUDA)
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
else(CUDA_FOUND)
add_definitions(-DNO_CUDA)
diff --git a/scripts/download_models.py b/scripts/download_models.py
index 39b3a991..294b46bb 100755
--- a/scripts/download_models.py
+++ b/scripts/download_models.py
@@ -18,6 +18,9 @@ beam-size: 12
devices: [0]
normalize: yes
threads-per-device: 1
+threads: 8
+
+mode: CPU
# scorer configuration
scorers:
@@ -30,6 +33,7 @@ weights:
F0: 1.0
bpe: ./{}{}.bpe
+debpe: yes
# vocabularies
source-vocab: ./vocab.{}.json
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fb192c58..ee270c9c 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,7 +18,6 @@ add_library(libcommon OBJECT
common/god.cpp
common/history.cpp
common/loader.cpp
- common/loader_factory.cpp
common/logging.cpp
common/printer.cpp
common/scorer.cpp
@@ -31,16 +30,19 @@ add_library(libcommon OBJECT
if(CUDA_FOUND)
+set_source_files_properties( common/loader_factory.cpp
+ PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ )
+
cuda_add_executable(
amun
common/decoder_main.cpp
- common/loader_factory.cu
gpu/decoder/ape_penalty.cu
gpu/decoder/encoder_decoder.cu
gpu/dl4mt/encoder.cu
gpu/dl4mt/gru.cu
gpu/mblas/matrix.cu
gpu/npz_converter.cu
+ common/loader_factory.cpp
$<TARGET_OBJECTS:libcommon>
$<TARGET_OBJECTS:cpumode>
$<TARGET_OBJECTS:libyaml-cpp>
@@ -49,13 +51,13 @@ cuda_add_executable(
cuda_add_library(amunmt SHARED
python/amunmt.cpp
- common/loader_factory.cu
gpu/decoder/ape_penalty.cu
gpu/decoder/encoder_decoder.cu
gpu/mblas/matrix.cu
gpu/dl4mt/encoder.cu
gpu/dl4mt/gru.cu
gpu/npz_converter.cu
+ common/loader_factory.cpp
$<TARGET_OBJECTS:libcommon>
$<TARGET_OBJECTS:libcnpy>
$<TARGET_OBJECTS:cpumode>
@@ -67,6 +69,7 @@ else(CUDA_FOUND)
add_executable(
amun
common/decoder_main.cpp
+ common/loader_factory.cpp
$<TARGET_OBJECTS:libcnpy>
$<TARGET_OBJECTS:cpumode>
$<TARGET_OBJECTS:libcommon>
@@ -74,6 +77,7 @@ add_executable(
)
add_library(amunmt SHARED
python/amunmt.cpp
+ common/loader_factory.cpp
$<TARGET_OBJECTS:libcnpy>
$<TARGET_OBJECTS:cpumode>
$<TARGET_OBJECTS:libcommon>
diff --git a/src/common/base_matrix.h b/src/common/base_matrix.h
index 3a687ba2..76f03fcb 100644
--- a/src/common/base_matrix.h
+++ b/src/common/base_matrix.h
@@ -18,22 +18,19 @@ class BaseMatrix;
typedef std::vector<BaseMatrix*> BaseMatrices;
class BaseMatrix {
-public:
- virtual ~BaseMatrix() {}
-
- virtual size_t Rows() const = 0;
- virtual size_t Cols() const = 0;
- virtual void Resize(size_t rows, size_t cols) = 0;
-
- virtual void BestHyps(Beam& bestHyps,
- const Beam& prevHyps,
- BaseMatrices& ProbsEnsemble,
- const size_t beamSize,
- History& history,
- const std::vector<ScorerPtr> &scorers,
- const Words &filterIndices) const = 0;
- virtual std::string Debug() const = 0;
-
+ public:
+ virtual ~BaseMatrix() {}
+
+ virtual size_t Rows() const = 0;
+ virtual size_t Cols() const = 0;
+ virtual void Resize(size_t rows, size_t cols) = 0;
+
+ virtual void BestHyps(Beam& bestHyps,
+ const Beam& prevHyps,
+ BaseMatrices& ProbsEnsemble,
+ const size_t beamSize,
+ History& history,
+ const std::vector<ScorerPtr> &scorers,
+ const Words &filterIndices) const = 0;
+ virtual std::string Debug() const = 0;
};
-
-
diff --git a/src/common/config.cpp b/src/common/config.cpp
index 41accd59..f21bf958 100644
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@@ -98,6 +98,19 @@ void Validate(const YAML::Node& config) {
UTIL_THROW_IF2(!(config["weights"][pair.first.as<std::string>()]), "Scorer has no weight: " << pair.first.as<std::string>());
}
+void OverwriteMode(YAML::Node& config, const std::string& mode) {
+ std::cerr << "PRE LOADING |" << mode << "|" << std::endl;
+ std::stringstream sMode;
+ for (auto& c: mode) {
+ sMode << (char)toupper(c);
+ }
+ std::cerr << "UPPER: " << sMode.str() << std::endl;
+ config["mode"] = sMode.str();
+ UTIL_THROW_IF2(config["mode"].as<std::string>() != "CPU" && config["mode"].as<std::string>() != "GPU",
+ "Unknown mode (allowed only CPU or GPU): " << config["mode"].as<std::string>());
+ std::cerr << "POST LOADING " << mode << std::endl;
+}
+
void OutputRec(const YAML::Node node, YAML::Emitter& out) {
std::set<std::string> flow = { "devices" };
@@ -157,6 +170,12 @@ void Config::AddOptions(size_t argc, char** argv) {
std::vector<std::string> sourceVocabPaths;
std::string targetVocabPath;
std::vector<std::string> bpePaths;
+ std::string mode;
+#ifdef CUDA
+ const std::string defaultMode = "GPU";
+#else
+ const std::string defaultMode = "CPU";
+#endif
bool debpe;
std::vector<size_t> devices;
@@ -177,12 +196,16 @@ void Config::AddOptions(size_t argc, char** argv) {
"Overwrite bpe section in config with bpe code file.")
("debpe", po::value(&debpe)->zero_tokens()->default_value(false),
"Overwrite bpe section in config with bpe code file.")
+ ("mode", po::value(&mode),
+ "Choose mode: CPU or GPU. If CUDA is unavailable, the CPU is the only option.")
("devices,d", po::value(&devices)->multitoken()->default_value(std::vector<size_t>(1, 0), "0"),
"CUDA device(s) to use, set to 0 by default, "
"e.g. set to 0 1 to use gpu0 and gpu1. "
"Implicitly sets minimal number of threads to number of devices.")
("threads-per-device", po::value<size_t>()->default_value(1),
"Number of threads per device, total thread count equals threads x devices")
+ ("threads", po::value<size_t>()->default_value(1),
+ "Number of threads on the CPU.")
("show-weights", po::value<bool>()->zero_tokens()->default_value(false),
"Output used weights to stdout and exit")
("load-weights", po::value<std::string>(),
@@ -265,28 +288,35 @@ void Config::AddOptions(size_t argc, char** argv) {
// @TODO: Apply complex overwrites
- if(Has("load-weights")) {
+ if (Has("load-weights")) {
LoadWeights(config_, Get<std::string>("load-weights"));
}
- if(modelPaths.size()) {
+ if (modelPaths.size()) {
OverwriteModels(config_, modelPaths);
}
- if(sourceVocabPaths.size()) {
+ if (sourceVocabPaths.size()) {
OverwriteSourceVocabs(config_, sourceVocabPaths);
}
- if(targetVocabPath.size()) {
+ if (targetVocabPath.size()) {
OverwriteTargetVocab(config_, targetVocabPath);
}
- if(bpePaths.size()) {
+ if (bpePaths.size()) {
OverwriteBPE(config_, bpePaths);
}
- if(Get<bool>("relative-paths"))
+ if (mode.size()) {
+ OverwriteMode(config_, mode);
+ } else if (!config_["mode"]) {
+ OverwriteMode(config_, defaultMode);
+ }
+
+ if (Get<bool>("relative-paths"))
ProcessPaths(config_, boost::filesystem::path{configPath}.parent_path(), false);
+
Validate(config_);
if(vm_["dump-config"].as<bool>()) {
diff --git a/src/common/decoder_main.cpp b/src/common/decoder_main.cpp
index c9ca78ec..aed340ae 100644
--- a/src/common/decoder_main.cpp
+++ b/src/common/decoder_main.cpp
@@ -3,12 +3,13 @@
#include <string>
#include <boost/timer/timer.hpp>
#include <boost/thread/tss.hpp>
-#include "god.h"
-#include "logging.h"
-#include "search.h"
-#include "threadpool.h"
-#include "printer.h"
-#include "sentence.h"
+
+#include "common/god.h"
+#include "common/logging.h"
+#include "common/search.h"
+#include "common/threadpool.h"
+#include "common/printer.h"
+#include "common/sentence.h"
History TranslationTask(const std::string& in, size_t taskCounter) {
#ifdef __APPLE__
@@ -33,26 +34,34 @@ History TranslationTask(const std::string& in, size_t taskCounter) {
int main(int argc, char* argv[]) {
God::Init(argc, argv);
+ LOG(info) << "Initialization... DONE";
std::setvbuf(stdout, NULL, _IONBF, 0);
boost::timer::cpu_timer timer;
std::string in;
std::size_t taskCounter = 0;
- size_t threadCount = God::Get<size_t>("threads-per-device")
- * God::Get<std::vector<size_t>>("devices").size();
+ size_t threadCount;
+ if (God::Get<std::string>("mode") == "GPU") {
+ threadCount= God::Get<size_t>("threads-per-device")
+ * God::Get<std::vector<size_t>>("devices").size();
+ } else {
+ threadCount = God::Get<size_t>("threads");
+ }
+
+ LOG(info) << "threadCount set to " << threadCount;
- if(God::Get<bool>("wipo")) {
+ if (God::Get<bool>("wipo")) {
LOG(info) << "Reading input";
- while(std::getline(God::GetInputStream(), in)) {
+ while (std::getline(God::GetInputStream(), in)) {
History result = TranslationTask(in, taskCounter);
Printer(result, taskCounter++, std::cout);
}
- }
- else {
+ } else {
LOG(info) << "Setting number of threads to " << threadCount;
ThreadPool pool(threadCount);
LOG(info) << "Reading input";
+
std::vector<std::future<History>> results;
while(std::getline(God::GetInputStream(), in)) {
@@ -67,7 +76,7 @@ int main(int argc, char* argv[]) {
}
size_t lineCounter = 0;
- for(auto&& result : results)
+ for (auto&& result : results)
Printer(result.get(), lineCounter++, std::cout);
}
LOG(info) << "Total time: " << timer.format();
diff --git a/src/common/god.cpp b/src/common/god.cpp
index db54fb34..c150c232 100644
--- a/src/common/god.cpp
+++ b/src/common/god.cpp
@@ -67,9 +67,10 @@ God& God::NonStaticInit(int argc, char** argv) {
exit(0);
}
+ LOG(info) << "Loading scorers...";
for(auto&& pair : config_.Get()["scorers"]) {
std::string name = pair.first.as<std::string>();
- loaders_.emplace(name, LoaderFactory::Create(name, pair.second));
+ loaders_.emplace(name, LoaderFactory::Create(name, pair.second, config_.Get()["mode"].as<std::string>()));
}
if (config_.inputPath.empty()) {
diff --git a/src/common/loader_factory.cpp b/src/common/loader_factory.cpp
index 55754d81..0eb84669 100644
--- a/src/common/loader_factory.cpp
+++ b/src/common/loader_factory.cpp
@@ -1,11 +1,34 @@
#include "loader_factory.h"
-#include "cpu/decoder/encoder_decoder.h"
-#ifdef NO_CUDA
-LoaderPtr LoaderFactory::Create(const std::string& name,
- const YAML::Node& config)
-{
+#include "scorer.h"
+#include "cpu/decoder/encoder_decoder_loader.h"
+
+#ifdef CUDA
+#include "gpu/decoder/encoder_decoder.h"
+#include "gpu/decoder/ape_penalty.h"
+
+#ifdef KENLM
+#include "gpu/decoder/language_model.h"
+#endif
+#endif
+
+
+LoaderPtr LoaderFactory::Create(
+ const std::string& name,
+ const YAML::Node& config,
+ const std::string& mode) {
Loader *loader;
+
+ if (HAS_GPU_SUPPORT && (mode == "GPU")) {
+ loader = CreateGPU(name, config);
+ if (loader) {
+ return LoaderPtr(loader);
+ } else {
+ LOG(info) << "No GPU scorer type. Loading CPU";
+ }
+ }
+
+
loader = CreateCPU(name, config);
if (loader) {
return LoaderPtr(loader);
@@ -14,6 +37,31 @@ LoaderPtr LoaderFactory::Create(const std::string& name,
std::string type = config["type"].as<std::string>();
UTIL_THROW2("Unknown scorer in config file: " << type);
}
+
+#ifdef CUDA
+Loader *LoaderFactory::CreateGPU(
+ const std::string& name,
+ const YAML::Node& config) {
+ UTIL_THROW_IF2(!config["type"],
+ "Missing scorer type in config file");
+
+ std::string type = config["type"].as<std::string>();
+ IF_MATCH_RETURN(type, "Nematus", GPU::EncoderDecoderLoader);
+ IF_MATCH_RETURN(type, "nematus", GPU::EncoderDecoderLoader);
+ IF_MATCH_RETURN(type, "NEMATUS", GPU::EncoderDecoderLoader);
+
+ IF_MATCH_RETURN(type, "Ape", GPU::ApePenaltyLoader);
+ IF_MATCH_RETURN(type, "ape", GPU::ApePenaltyLoader);
+ IF_MATCH_RETURN(type, "APE", GPU::ApePenaltyLoader);
+
+#ifdef KENLM
+ IF_MATCH_RETURN(type, "KenLM", GPU::KenLMLoader)
+ IF_MATCH_RETURN(type, "kenlm", GPU::KenLMLoader)
+ IF_MATCH_RETURN(type, "KENLM", GPU::KenLMLoader)
+#endif
+
+ return NULL;
+}
#endif
@@ -23,7 +71,9 @@ Loader *LoaderFactory::CreateCPU(const std::string& name,
"Missing scorer type in config file");
std::string type = config["type"].as<std::string>();
- IF_MATCH_RETURN(type, "Nematus.CPU", CPU::EncoderDecoderLoader);
+ IF_MATCH_RETURN(type, "Nematus", CPU::EncoderDecoderLoader);
+ IF_MATCH_RETURN(type, "nematus", CPU::EncoderDecoderLoader);
+ IF_MATCH_RETURN(type, "NEMATUS", CPU::EncoderDecoderLoader);
return NULL;
}
diff --git a/src/common/loader_factory.cu b/src/common/loader_factory.cu
deleted file mode 100644
index c7880072..00000000
--- a/src/common/loader_factory.cu
+++ /dev/null
@@ -1,50 +0,0 @@
-#include "loader_factory.h"
-#include "scorer.h"
-#include "gpu/decoder/encoder_decoder.h"
-#include "gpu/decoder/ape_penalty.h"
-
-#ifdef KENLM
-#include "gpu/decoder/language_model.h"
-#endif
-
-LoaderPtr LoaderFactory::Create(const std::string& name,
- const YAML::Node& config)
-{
- Loader *loader;
- loader = CreateGPU(name, config);
- if (loader) {
- return LoaderPtr(loader);
- }
-
-
- loader = CreateCPU(name, config);
- if (loader) {
- return LoaderPtr(loader);
- }
-
- std::string type = config["type"].as<std::string>();
- UTIL_THROW2("Unknown scorer in config file: " << type);
-}
-
-Loader *LoaderFactory::CreateGPU(const std::string& name,
- const YAML::Node& config) {
- UTIL_THROW_IF2(!config["type"],
- "Missing scorer type in config file");
-
- std::string type = config["type"].as<std::string>();
- IF_MATCH_RETURN(type, "Nematus", GPU::EncoderDecoderLoader);
- IF_MATCH_RETURN(type, "nematus", GPU::EncoderDecoderLoader);
- IF_MATCH_RETURN(type, "NEMATUS", GPU::EncoderDecoderLoader);
-
- IF_MATCH_RETURN(type, "Ape", GPU::ApePenaltyLoader);
- IF_MATCH_RETURN(type, "ape", GPU::ApePenaltyLoader);
- IF_MATCH_RETURN(type, "APE", GPU::ApePenaltyLoader);
-
-#ifdef KENLM
- IF_MATCH_RETURN(type, "KenLM", GPU::KenLMLoader)
- IF_MATCH_RETURN(type, "kenlm", GPU::KenLMLoader)
- IF_MATCH_RETURN(type, "KENLM", GPU::KenLMLoader)
-#endif
-
- return NULL;
-}
diff --git a/src/common/loader_factory.h b/src/common/loader_factory.h
index a4022c76..c00f63bc 100644
--- a/src/common/loader_factory.h
+++ b/src/common/loader_factory.h
@@ -18,14 +18,22 @@ do { \
class LoaderFactory {
public:
static LoaderPtr Create(const std::string& name,
- const YAML::Node& config);
+ const YAML::Node& config,
+ const std::string& mode);
protected:
- static Loader *CreateGPU(const std::string& name,
- const YAML::Node& config);
static Loader *CreateCPU(const std::string& name,
const YAML::Node& config);
+ static Loader *CreateGPU(const std::string& name,
+ const YAML::Node& config);
+
+#ifdef CUDA
+ static const bool HAS_GPU_SUPPORT = true;
+#else
+ static const bool HAS_GPU_SUPPORT = false;
+#endif
+
};
diff --git a/src/cpu/decoder/encoder_decoder.h b/src/cpu/decoder/encoder_decoder.h
index 19bbfd85..92d3aa60 100644
--- a/src/cpu/decoder/encoder_decoder.h
+++ b/src/cpu/decoder/encoder_decoder.h
@@ -27,7 +27,7 @@ class EncoderDecoderState : public State {
CPU::mblas::Matrix& GetStates();
- CPU::mblas::Matrix& GetEmbeddings();
+ CPU::mblas::Matrix& GetEmbeddings();
const CPU::mblas::Matrix& GetStates() const;
diff --git a/src/cpu/decoder/encoder_decoder_loader.h b/src/cpu/decoder/encoder_decoder_loader.h
new file mode 100644
index 00000000..7346b58e
--- /dev/null
+++ b/src/cpu/decoder/encoder_decoder_loader.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <vector>
+#include <string>
+#include <yaml-cpp/yaml.h>
+
+#include "common/scorer.h"
+#include "common/loader.h"
+#include "common/logging.h"
+
+namespace CPU {
+
+class Weights;
+
+class EncoderDecoderLoader : public Loader {
+ public:
+ EncoderDecoderLoader(const std::string name,
+ const YAML::Node& config);
+
+ virtual void Load();
+
+ virtual ScorerPtr NewScorer(const size_t taskId);
+
+ private:
+ std::vector<std::unique_ptr<Weights>> weights_;
+};
+
+} // namespace CPU