diff options
author | Tomasz Dwojak <t.dwojak@amu.edu.pl> | 2016-10-05 13:23:59 +0300 |
---|---|---|
committer | Tomasz Dwojak <t.dwojak@amu.edu.pl> | 2016-10-05 16:20:15 +0300 |
commit | 9f5d666848f09ea1bd861ef7c8befa2180ca077d (patch) | |
tree | 1cb882f73945dd37a1d1a9ee9cca0cb51a4d5620 | |
parent | f4c508c96953dfda235d5a7dddd6756e69193cf4 (diff) |
Add mode (CPU|GPU) option
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rwxr-xr-x | scripts/download_models.py | 4 | ||||
-rw-r--r-- | src/CMakeLists.txt | 10 | ||||
-rw-r--r-- | src/common/base_matrix.h | 33 | ||||
-rw-r--r-- | src/common/config.cpp | 42 | ||||
-rw-r--r-- | src/common/decoder_main.cpp | 35 | ||||
-rw-r--r-- | src/common/god.cpp | 3 | ||||
-rw-r--r-- | src/common/loader_factory.cpp | 62 | ||||
-rw-r--r-- | src/common/loader_factory.cu | 50 | ||||
-rw-r--r-- | src/common/loader_factory.h | 14 | ||||
-rw-r--r-- | src/cpu/decoder/encoder_decoder.h | 2 | ||||
-rw-r--r-- | src/cpu/decoder/encoder_decoder_loader.h | 28 |
12 files changed, 183 insertions, 101 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 587691d2..5201d1b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ find_package(CUDA) if(CUDA_FOUND) LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math;) add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM) + add_definitions(-DCUDA) SET(CUDA_PROPAGATE_HOST_FLAGS OFF) else(CUDA_FOUND) add_definitions(-DNO_CUDA) diff --git a/scripts/download_models.py b/scripts/download_models.py index 39b3a991..294b46bb 100755 --- a/scripts/download_models.py +++ b/scripts/download_models.py @@ -18,6 +18,9 @@ beam-size: 12 devices: [0] normalize: yes threads-per-device: 1 +threads: 8 + +mode: CPU # scorer configuration scorers: @@ -30,6 +33,7 @@ weights: F0: 1.0 bpe: ./{}{}.bpe +debpe: yes # vocabularies source-vocab: ./vocab.{}.json diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fb192c58..ee270c9c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,7 +18,6 @@ add_library(libcommon OBJECT common/god.cpp common/history.cpp common/loader.cpp - common/loader_factory.cpp common/logging.cpp common/printer.cpp common/scorer.cpp @@ -31,16 +30,19 @@ add_library(libcommon OBJECT if(CUDA_FOUND) +set_source_files_properties( common/loader_factory.cpp + PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ ) + cuda_add_executable( amun common/decoder_main.cpp - common/loader_factory.cu gpu/decoder/ape_penalty.cu gpu/decoder/encoder_decoder.cu gpu/dl4mt/encoder.cu gpu/dl4mt/gru.cu gpu/mblas/matrix.cu gpu/npz_converter.cu + common/loader_factory.cpp $<TARGET_OBJECTS:libcommon> $<TARGET_OBJECTS:cpumode> $<TARGET_OBJECTS:libyaml-cpp> @@ -49,13 +51,13 @@ cuda_add_executable( cuda_add_library(amunmt SHARED python/amunmt.cpp - common/loader_factory.cu gpu/decoder/ape_penalty.cu gpu/decoder/encoder_decoder.cu gpu/mblas/matrix.cu gpu/dl4mt/encoder.cu gpu/dl4mt/gru.cu gpu/npz_converter.cu + common/loader_factory.cpp $<TARGET_OBJECTS:libcommon> $<TARGET_OBJECTS:libcnpy> $<TARGET_OBJECTS:cpumode> @@ -67,6 +69,7 @@ else(CUDA_FOUND) add_executable( amun common/decoder_main.cpp + common/loader_factory.cpp $<TARGET_OBJECTS:libcnpy> $<TARGET_OBJECTS:cpumode> $<TARGET_OBJECTS:libcommon> @@ -74,6 +77,7 @@ add_executable( ) add_library(amunmt SHARED python/amunmt.cpp + common/loader_factory.cpp $<TARGET_OBJECTS:libcnpy> $<TARGET_OBJECTS:cpumode> $<TARGET_OBJECTS:libcommon> diff --git a/src/common/base_matrix.h b/src/common/base_matrix.h index 3a687ba2..76f03fcb 100644 --- a/src/common/base_matrix.h +++ b/src/common/base_matrix.h @@ -18,22 +18,19 @@ class BaseMatrix; typedef std::vector<BaseMatrix*> BaseMatrices; class BaseMatrix { -public: - virtual ~BaseMatrix() {} - - virtual size_t Rows() const = 0; - virtual size_t Cols() const = 0; - virtual void Resize(size_t rows, size_t cols) = 0; - - virtual void BestHyps(Beam& bestHyps, - const Beam& prevHyps, - BaseMatrices& ProbsEnsemble, - const size_t beamSize, - History& history, - const std::vector<ScorerPtr> &scorers, - const Words &filterIndices) const = 0; - virtual std::string Debug() const = 0; - + public: + virtual ~BaseMatrix() {} + + virtual size_t Rows() const = 0; + virtual size_t Cols() const = 0; + virtual void Resize(size_t rows, size_t cols) = 0; + + virtual void BestHyps(Beam& bestHyps, + const Beam& prevHyps, + BaseMatrices& ProbsEnsemble, + const size_t beamSize, + History& history, + const std::vector<ScorerPtr> &scorers, + const Words &filterIndices) const = 0; + virtual std::string Debug() const = 0; }; - - diff --git a/src/common/config.cpp b/src/common/config.cpp index 41accd59..f21bf958 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -98,6 +98,19 @@ void Validate(const YAML::Node& config) { UTIL_THROW_IF2(!(config["weights"][pair.first.as<std::string>()]), "Scorer has no weight: " << pair.first.as<std::string>()); } +void OverwriteMode(YAML::Node& config, const std::string& mode) { + std::cerr << "PRE LOADING |" << mode << "|" << std::endl; + std::stringstream sMode; + for (auto& c: mode) { + sMode << (char)toupper(c); + } + std::cerr << "UPPER: " << sMode.str() << std::endl; + config["mode"] = sMode.str(); + UTIL_THROW_IF2(config["mode"].as<std::string>() != "CPU" && config["mode"].as<std::string>() != "GPU", + "Unknown mode (allowed only CPU or GPU): " << config["mode"].as<std::string>()); + std::cerr << "POST LOADING " << mode << std::endl; +} + void OutputRec(const YAML::Node node, YAML::Emitter& out) { std::set<std::string> flow = { "devices" }; @@ -157,6 +170,12 @@ void Config::AddOptions(size_t argc, char** argv) { std::vector<std::string> sourceVocabPaths; std::string targetVocabPath; std::vector<std::string> bpePaths; + std::string mode; +#ifdef CUDA + const std::string defaultMode = "GPU"; +#else + const std::string defaultMode = "CPU"; +#endif bool debpe; std::vector<size_t> devices; @@ -177,12 +196,16 @@ void Config::AddOptions(size_t argc, char** argv) { "Overwrite bpe section in config with bpe code file.") ("debpe", po::value(&debpe)->zero_tokens()->default_value(false), "Overwrite bpe section in config with bpe code file.") + ("mode", po::value(&mode), + "Choose mode: CPU or GPU. If CUDA is unavailable, the CPU is the only option.") ("devices,d", po::value(&devices)->multitoken()->default_value(std::vector<size_t>(1, 0), "0"), "CUDA device(s) to use, set to 0 by default, " "e.g. set to 0 1 to use gpu0 and gpu1. " "Implicitly sets minimal number of threads to number of devices.") ("threads-per-device", po::value<size_t>()->default_value(1), "Number of threads per device, total thread count equals threads x devices") + ("threads", po::value<size_t>()->default_value(1), + "Number of threads on the CPU.") ("show-weights", po::value<bool>()->zero_tokens()->default_value(false), "Output used weights to stdout and exit") ("load-weights", po::value<std::string>(), @@ -265,28 +288,35 @@ void Config::AddOptions(size_t argc, char** argv) { // @TODO: Apply complex overwrites - if(Has("load-weights")) { + if (Has("load-weights")) { LoadWeights(config_, Get<std::string>("load-weights")); } - if(modelPaths.size()) { + if (modelPaths.size()) { OverwriteModels(config_, modelPaths); } - if(sourceVocabPaths.size()) { + if (sourceVocabPaths.size()) { OverwriteSourceVocabs(config_, sourceVocabPaths); } - if(targetVocabPath.size()) { + if (targetVocabPath.size()) { OverwriteTargetVocab(config_, targetVocabPath); } - if(bpePaths.size()) { + if (bpePaths.size()) { OverwriteBPE(config_, bpePaths); } - if(Get<bool>("relative-paths")) + if (mode.size()) { + OverwriteMode(config_, mode); + } else if (!config_["mode"]) { + OverwriteMode(config_, defaultMode); + } + + if (Get<bool>("relative-paths")) ProcessPaths(config_, boost::filesystem::path{configPath}.parent_path(), false); + Validate(config_); if(vm_["dump-config"].as<bool>()) { diff --git a/src/common/decoder_main.cpp b/src/common/decoder_main.cpp index c9ca78ec..aed340ae 100644 --- a/src/common/decoder_main.cpp +++ b/src/common/decoder_main.cpp @@ -3,12 +3,13 @@ #include <string> #include <boost/timer/timer.hpp> #include <boost/thread/tss.hpp> -#include "god.h" -#include "logging.h" -#include "search.h" -#include "threadpool.h" -#include "printer.h" -#include "sentence.h" + +#include "common/god.h" +#include "common/logging.h" +#include "common/search.h" +#include "common/threadpool.h" +#include "common/printer.h" +#include "common/sentence.h" History TranslationTask(const std::string& in, size_t taskCounter) { #ifdef __APPLE__ @@ -33,26 +34,34 @@ History TranslationTask(const std::string& in, size_t taskCounter) { int main(int argc, char* argv[]) { God::Init(argc, argv); + LOG(info) << "Initialization... DONE"; std::setvbuf(stdout, NULL, _IONBF, 0); boost::timer::cpu_timer timer; std::string in; std::size_t taskCounter = 0; - size_t threadCount = God::Get<size_t>("threads-per-device") - * God::Get<std::vector<size_t>>("devices").size(); + size_t threadCount; + if (God::Get<std::string>("mode") == "GPU") { + threadCount= God::Get<size_t>("threads-per-device") + * God::Get<std::vector<size_t>>("devices").size(); + } else { + threadCount = God::Get<size_t>("threads"); + } + + LOG(info) << "threadCount set to " << threadCount; - if(God::Get<bool>("wipo")) { + if (God::Get<bool>("wipo")) { LOG(info) << "Reading input"; - while(std::getline(God::GetInputStream(), in)) { + while (std::getline(God::GetInputStream(), in)) { History result = TranslationTask(in, taskCounter); Printer(result, taskCounter++, std::cout); } - } - else { + } else { LOG(info) << "Setting number of threads to " << threadCount; ThreadPool pool(threadCount); LOG(info) << "Reading input"; + std::vector<std::future<History>> results; while(std::getline(God::GetInputStream(), in)) { @@ -67,7 +76,7 @@ int main(int argc, char* argv[]) { } size_t lineCounter = 0; - for(auto&& result : results) + for (auto&& result : results) Printer(result.get(), lineCounter++, std::cout); } LOG(info) << "Total time: " << timer.format(); diff --git a/src/common/god.cpp b/src/common/god.cpp index db54fb34..c150c232 100644 --- a/src/common/god.cpp +++ b/src/common/god.cpp @@ -67,9 +67,10 @@ God& God::NonStaticInit(int argc, char** argv) { exit(0); } + LOG(info) << "Loading scorers..."; for(auto&& pair : config_.Get()["scorers"]) { std::string name = pair.first.as<std::string>(); - loaders_.emplace(name, LoaderFactory::Create(name, pair.second)); + loaders_.emplace(name, LoaderFactory::Create(name, pair.second, config_.Get()["mode"].as<std::string>())); } if (config_.inputPath.empty()) { diff --git a/src/common/loader_factory.cpp b/src/common/loader_factory.cpp index 55754d81..0eb84669 100644 --- a/src/common/loader_factory.cpp +++ b/src/common/loader_factory.cpp @@ -1,11 +1,34 @@ #include "loader_factory.h" -#include "cpu/decoder/encoder_decoder.h" -#ifdef NO_CUDA -LoaderPtr LoaderFactory::Create(const std::string& name, - const YAML::Node& config) -{ +#include "scorer.h" +#include "cpu/decoder/encoder_decoder_loader.h" + +#ifdef CUDA +#include "gpu/decoder/encoder_decoder.h" +#include "gpu/decoder/ape_penalty.h" + +#ifdef KENLM +#include "gpu/decoder/language_model.h" +#endif +#endif + + +LoaderPtr LoaderFactory::Create( + const std::string& name, + const YAML::Node& config, + const std::string& mode) { Loader *loader; + + if (HAS_GPU_SUPPORT && (mode == "GPU")) { + loader = CreateGPU(name, config); + if (loader) { + return LoaderPtr(loader); + } else { + LOG(info) << "No GPU scorer type. Loading CPU"; + } + } + + loader = CreateCPU(name, config); if (loader) { return LoaderPtr(loader); @@ -14,6 +37,31 @@ LoaderPtr LoaderFactory::Create(const std::string& name, std::string type = config["type"].as<std::string>(); UTIL_THROW2("Unknown scorer in config file: " << type); } + +#ifdef CUDA +Loader *LoaderFactory::CreateGPU( + const std::string& name, + const YAML::Node& config) { + UTIL_THROW_IF2(!config["type"], + "Missing scorer type in config file"); + + std::string type = config["type"].as<std::string>(); + IF_MATCH_RETURN(type, "Nematus", GPU::EncoderDecoderLoader); + IF_MATCH_RETURN(type, "nematus", GPU::EncoderDecoderLoader); + IF_MATCH_RETURN(type, "NEMATUS", GPU::EncoderDecoderLoader); + + IF_MATCH_RETURN(type, "Ape", GPU::ApePenaltyLoader); + IF_MATCH_RETURN(type, "ape", GPU::ApePenaltyLoader); + IF_MATCH_RETURN(type, "APE", GPU::ApePenaltyLoader); + +#ifdef KENLM + IF_MATCH_RETURN(type, "KenLM", GPU::KenLMLoader) + IF_MATCH_RETURN(type, "kenlm", GPU::KenLMLoader) + IF_MATCH_RETURN(type, "KENLM", GPU::KenLMLoader) +#endif + + return NULL; +} #endif @@ -23,7 +71,9 @@ Loader *LoaderFactory::CreateCPU(const std::string& name, "Missing scorer type in config file"); std::string type = config["type"].as<std::string>(); - IF_MATCH_RETURN(type, "Nematus.CPU", CPU::EncoderDecoderLoader); + IF_MATCH_RETURN(type, "Nematus", CPU::EncoderDecoderLoader); + IF_MATCH_RETURN(type, "nematus", CPU::EncoderDecoderLoader); + IF_MATCH_RETURN(type, "NEMATUS", CPU::EncoderDecoderLoader); return NULL; } diff --git a/src/common/loader_factory.cu b/src/common/loader_factory.cu deleted file mode 100644 index c7880072..00000000 --- a/src/common/loader_factory.cu +++ /dev/null @@ -1,50 +0,0 @@ -#include "loader_factory.h" -#include "scorer.h" -#include "gpu/decoder/encoder_decoder.h" -#include "gpu/decoder/ape_penalty.h" - -#ifdef KENLM -#include "gpu/decoder/language_model.h" -#endif - -LoaderPtr LoaderFactory::Create(const std::string& name, - const YAML::Node& config) -{ - Loader *loader; - loader = CreateGPU(name, config); - if (loader) { - return LoaderPtr(loader); - } - - - loader = CreateCPU(name, config); - if (loader) { - return LoaderPtr(loader); - } - - std::string type = config["type"].as<std::string>(); - UTIL_THROW2("Unknown scorer in config file: " << type); -} - -Loader *LoaderFactory::CreateGPU(const std::string& name, - const YAML::Node& config) { - UTIL_THROW_IF2(!config["type"], - "Missing scorer type in config file"); - - std::string type = config["type"].as<std::string>(); - IF_MATCH_RETURN(type, "Nematus", GPU::EncoderDecoderLoader); - IF_MATCH_RETURN(type, "nematus", GPU::EncoderDecoderLoader); - IF_MATCH_RETURN(type, "NEMATUS", GPU::EncoderDecoderLoader); - - IF_MATCH_RETURN(type, "Ape", GPU::ApePenaltyLoader); - IF_MATCH_RETURN(type, "ape", GPU::ApePenaltyLoader); - IF_MATCH_RETURN(type, "APE", GPU::ApePenaltyLoader); - -#ifdef KENLM - IF_MATCH_RETURN(type, "KenLM", GPU::KenLMLoader) - IF_MATCH_RETURN(type, "kenlm", GPU::KenLMLoader) - IF_MATCH_RETURN(type, "KENLM", GPU::KenLMLoader) -#endif - - return NULL; -} diff --git a/src/common/loader_factory.h b/src/common/loader_factory.h index a4022c76..c00f63bc 100644 --- a/src/common/loader_factory.h +++ b/src/common/loader_factory.h @@ -18,14 +18,22 @@ do { \ class LoaderFactory { public: static LoaderPtr Create(const std::string& name, - const YAML::Node& config); + const YAML::Node& config, + const std::string& mode); protected: - static Loader *CreateGPU(const std::string& name, - const YAML::Node& config); static Loader *CreateCPU(const std::string& name, const YAML::Node& config); + static Loader *CreateGPU(const std::string& name, + const YAML::Node& config); + +#ifdef CUDA + static const bool HAS_GPU_SUPPORT = true; +#else + static const bool HAS_GPU_SUPPORT = false; +#endif + }; diff --git a/src/cpu/decoder/encoder_decoder.h b/src/cpu/decoder/encoder_decoder.h index 19bbfd85..92d3aa60 100644 --- a/src/cpu/decoder/encoder_decoder.h +++ b/src/cpu/decoder/encoder_decoder.h @@ -27,7 +27,7 @@ class EncoderDecoderState : public State { CPU::mblas::Matrix& GetStates(); - CPU::mblas::Matrix& GetEmbeddings(); + CPU::mblas::Matrix& GetEmbeddings(); const CPU::mblas::Matrix& GetStates() const; diff --git a/src/cpu/decoder/encoder_decoder_loader.h b/src/cpu/decoder/encoder_decoder_loader.h new file mode 100644 index 00000000..7346b58e --- /dev/null +++ b/src/cpu/decoder/encoder_decoder_loader.h @@ -0,0 +1,28 @@ +#pragma once + +#include <vector> +#include <string> +#include <yaml-cpp/yaml.h> + +#include "common/scorer.h" +#include "common/loader.h" +#include "common/logging.h" + +namespace CPU { + +class Weights; + +class EncoderDecoderLoader : public Loader { + public: + EncoderDecoderLoader(const std::string name, + const YAML::Node& config); + + virtual void Load(); + + virtual ScorerPtr NewScorer(const size_t taskId); + + private: + std::vector<std::unique_ptr<Weights>> weights_; +}; + +} // namespace CPU |