Add mode (CPU|GPU) option

author: Tomasz Dwojak <t.dwojak@amu.edu.pl> 2016-10-05 13:23:59 +0300
committer: Tomasz Dwojak <t.dwojak@amu.edu.pl> 2016-10-05 16:20:15 +0300
commit: 9f5d666848f09ea1bd861ef7c8befa2180ca077d (patch)
tree: 1cb882f73945dd37a1d1a9ee9cca0cb51a4d5620
parent: f4c508c96953dfda235d5a7dddd6756e69193cf4 (diff)
12 files changed, 183 insertions, 101 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 587691d2..5201d1b7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,7 @@ find_package(CUDA)
 if(CUDA_FOUND)
     LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math;)
     add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
+    add_definitions(-DCUDA)
     SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
 else(CUDA_FOUND)
     add_definitions(-DNO_CUDA)
diff --git a/scripts/download_models.py b/scripts/download_models.py
index 39b3a991..294b46bb 100755
--- a/scripts/download_models.py
+++ b/scripts/download_models.py
@@ -18,6 +18,9 @@ beam-size: 12
 devices: [0]
 normalize: yes
 threads-per-device: 1
+threads: 8
+
+mode: CPU
 
 # scorer configuration
 scorers:
@@ -30,6 +33,7 @@ weights:
   F0: 1.0
 
 bpe: ./{}{}.bpe
+debpe: yes
 
 # vocabularies
 source-vocab: ./vocab.{}.json
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fb192c58..ee270c9c 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,7 +18,6 @@ add_library(libcommon OBJECT
   common/god.cpp
   common/history.cpp
   common/loader.cpp
-  common/loader_factory.cpp
   common/logging.cpp
   common/printer.cpp
   common/scorer.cpp
@@ -31,16 +30,19 @@ add_library(libcommon OBJECT
 
 if(CUDA_FOUND)
 
+set_source_files_properties( common/loader_factory.cpp
+  PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ )
+
 cuda_add_executable(
   amun
   common/decoder_main.cpp
-  common/loader_factory.cu
   gpu/decoder/ape_penalty.cu
   gpu/decoder/encoder_decoder.cu
   gpu/dl4mt/encoder.cu
   gpu/dl4mt/gru.cu
   gpu/mblas/matrix.cu
   gpu/npz_converter.cu
+  common/loader_factory.cpp
   $<TARGET_OBJECTS:libcommon>
   $<TARGET_OBJECTS:cpumode>
   $<TARGET_OBJECTS:libyaml-cpp>
@@ -49,13 +51,13 @@ cuda_add_executable(
 
 cuda_add_library(amunmt SHARED
   python/amunmt.cpp
-  common/loader_factory.cu
   gpu/decoder/ape_penalty.cu
   gpu/decoder/encoder_decoder.cu
   gpu/mblas/matrix.cu
   gpu/dl4mt/encoder.cu
   gpu/dl4mt/gru.cu
   gpu/npz_converter.cu
+  common/loader_factory.cpp
   $<TARGET_OBJECTS:libcommon>
   $<TARGET_OBJECTS:libcnpy>
   $<TARGET_OBJECTS:cpumode>
@@ -67,6 +69,7 @@ else(CUDA_FOUND)
 add_executable(
   amun
   common/decoder_main.cpp
+  common/loader_factory.cpp
   $<TARGET_OBJECTS:libcnpy>
   $<TARGET_OBJECTS:cpumode>
   $<TARGET_OBJECTS:libcommon>
@@ -74,6 +77,7 @@ add_executable(
 )
 add_library(amunmt SHARED
   python/amunmt.cpp
+  common/loader_factory.cpp
   $<TARGET_OBJECTS:libcnpy>
   $<TARGET_OBJECTS:cpumode>
   $<TARGET_OBJECTS:libcommon>
diff --git a/src/common/base_matrix.h b/src/common/base_matrix.h
index 3a687ba2..76f03fcb 100644
--- a/src/common/base_matrix.h
+++ b/src/common/base_matrix.h
@@ -18,22 +18,19 @@ class BaseMatrix;
 typedef std::vector<BaseMatrix*> BaseMatrices;
 
 class BaseMatrix {
-public:
-	virtual ~BaseMatrix() {}
-
-	virtual size_t Rows() const = 0;
-	virtual size_t Cols() const = 0;
-	virtual void Resize(size_t rows, size_t cols) = 0;
-
-    virtual void BestHyps(Beam& bestHyps,
-    		const Beam& prevHyps,
-    		BaseMatrices& ProbsEnsemble,
-    		const size_t beamSize,
-    		History& history,
-    		const std::vector<ScorerPtr> &scorers,
-    		const Words &filterIndices) const = 0;
-    virtual std::string Debug() const = 0;
-
+  public:
+    virtual ~BaseMatrix() {}
+
+    virtual size_t Rows() const = 0;
+    virtual size_t Cols() const = 0;
+    virtual void Resize(size_t rows, size_t cols) = 0;
+
+      virtual void BestHyps(Beam& bestHyps,
+          const Beam& prevHyps,
+          BaseMatrices& ProbsEnsemble,
+          const size_t beamSize,
+          History& history,
+          const std::vector<ScorerPtr> &scorers,
+          const Words &filterIndices) const = 0;
+      virtual std::string Debug() const = 0;
 };
-
-
diff --git a/src/common/config.cpp b/src/common/config.cpp
index 41accd59..f21bf958 100644
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@@ -98,6 +98,19 @@ void Validate(const YAML::Node& config) {
     UTIL_THROW_IF2(!(config["weights"][pair.first.as<std::string>()]), "Scorer has no weight: " << pair.first.as<std::string>());
 }
 
+void OverwriteMode(YAML::Node& config, const std::string& mode) {
+  std::cerr << "PRE LOADING |" << mode << "|" << std::endl;
+  std::stringstream sMode;
+  for (auto& c: mode) {
+    sMode << (char)toupper(c);
+  }
+  std::cerr << "UPPER: " << sMode.str() << std::endl;
+  config["mode"] = sMode.str();
+  UTIL_THROW_IF2(config["mode"].as<std::string>() != "CPU" && config["mode"].as<std::string>() != "GPU",
+                 "Unknown mode (allowed only CPU or GPU): " << config["mode"].as<std::string>());
+  std::cerr << "POST LOADING " << mode << std::endl;
+}
+
 
 void OutputRec(const YAML::Node node, YAML::Emitter& out) {
   std::set<std::string> flow = { "devices" };
@@ -157,6 +170,12 @@ void Config::AddOptions(size_t argc, char** argv) {
   std::vector<std::string> sourceVocabPaths;
   std::string targetVocabPath;
   std::vector<std::string> bpePaths;
+  std::string mode;
+#ifdef CUDA
+  const std::string defaultMode = "GPU";
+#else
+  const std::string defaultMode = "CPU";
+#endif
   bool debpe;
 
   std::vector<size_t> devices;
@@ -177,12 +196,16 @@ void Config::AddOptions(size_t argc, char** argv) {
      "Overwrite bpe section in config with bpe code file.")
     ("debpe", po::value(&debpe)->zero_tokens()->default_value(false),
      "Overwrite bpe section in config with bpe code file.")
+    ("mode", po::value(&mode),
+     "Choose mode: CPU or GPU. If CUDA is unavailable, the CPU is the only option.")
     ("devices,d", po::value(&devices)->multitoken()->default_value(std::vector<size_t>(1, 0), "0"),
      "CUDA device(s) to use, set to 0 by default, "
      "e.g. set to 0 1 to use gpu0 and gpu1. "
      "Implicitly sets minimal number of threads to number of devices.")
     ("threads-per-device", po::value<size_t>()->default_value(1),
      "Number of threads per device, total thread count equals threads x devices")
+    ("threads", po::value<size_t>()->default_value(1),
+     "Number of threads on the CPU.")
     ("show-weights", po::value<bool>()->zero_tokens()->default_value(false),
      "Output used weights to stdout and exit")
     ("load-weights", po::value<std::string>(),
@@ -265,28 +288,35 @@ void Config::AddOptions(size_t argc, char** argv) {
 
   // @TODO: Apply complex overwrites
 
-  if(Has("load-weights")) {
+  if (Has("load-weights")) {
     LoadWeights(config_, Get<std::string>("load-weights"));
   }
 
-  if(modelPaths.size()) {
+  if (modelPaths.size()) {
     OverwriteModels(config_, modelPaths);
   }
 
-  if(sourceVocabPaths.size()) {
+  if (sourceVocabPaths.size()) {
     OverwriteSourceVocabs(config_, sourceVocabPaths);
   }
 
-  if(targetVocabPath.size()) {
+  if (targetVocabPath.size()) {
     OverwriteTargetVocab(config_, targetVocabPath);
   }
 
-  if(bpePaths.size()) {
+  if (bpePaths.size()) {
     OverwriteBPE(config_, bpePaths);
   }
 
-  if(Get<bool>("relative-paths"))
+  if (mode.size()) {
+    OverwriteMode(config_, mode);
+  } else if (!config_["mode"]) {
+    OverwriteMode(config_, defaultMode);
+  }
+
+  if (Get<bool>("relative-paths"))
     ProcessPaths(config_, boost::filesystem::path{configPath}.parent_path(), false);
+
   Validate(config_);
 
   if(vm_["dump-config"].as<bool>()) {
diff --git a/src/common/decoder_main.cpp b/src/common/decoder_main.cpp
index c9ca78ec..aed340ae 100644
--- a/src/common/decoder_main.cpp
+++ b/src/common/decoder_main.cpp
@@ -3,12 +3,13 @@
 #include <string>
 #include <boost/timer/timer.hpp>
 #include <boost/thread/tss.hpp>
-#include "god.h"
-#include "logging.h"
-#include "search.h"
-#include "threadpool.h"
-#include "printer.h"
-#include "sentence.h"
+
+#include "common/god.h"
+#include "common/logging.h"
+#include "common/search.h"
+#include "common/threadpool.h"
+#include "common/printer.h"
+#include "common/sentence.h"
 
 History TranslationTask(const std::string& in, size_t taskCounter) {
 #ifdef __APPLE__
@@ -33,26 +34,34 @@ History TranslationTask(const std::string& in, size_t taskCounter) {
 
 int main(int argc, char* argv[]) {
   God::Init(argc, argv);
+  LOG(info) << "Initialization... DONE";
   std::setvbuf(stdout, NULL, _IONBF, 0);
   boost::timer::cpu_timer timer;
 
   std::string in;
   std::size_t taskCounter = 0;
 
-  size_t threadCount = God::Get<size_t>("threads-per-device")
-                       * God::Get<std::vector<size_t>>("devices").size();
+  size_t threadCount;
+  if (God::Get<std::string>("mode") == "GPU") {
+    threadCount= God::Get<size_t>("threads-per-device")
+                 * God::Get<std::vector<size_t>>("devices").size();
+  } else {
+    threadCount = God::Get<size_t>("threads");
+  }
+
+  LOG(info) << "threadCount set to " << threadCount;
 
-  if(God::Get<bool>("wipo")) {
+  if (God::Get<bool>("wipo")) {
     LOG(info) << "Reading input";
-    while(std::getline(God::GetInputStream(), in)) {
+    while (std::getline(God::GetInputStream(), in)) {
       History result = TranslationTask(in, taskCounter);
       Printer(result, taskCounter++, std::cout);
     }
-  }
-  else {
+  } else {
     LOG(info) << "Setting number of threads to " << threadCount;
     ThreadPool pool(threadCount);
     LOG(info) << "Reading input";
+
     std::vector<std::future<History>> results;
 
     while(std::getline(God::GetInputStream(), in)) {
@@ -67,7 +76,7 @@ int main(int argc, char* argv[]) {
     }
 
     size_t lineCounter = 0;
-    for(auto&& result : results)
+    for (auto&& result : results)
       Printer(result.get(), lineCounter++, std::cout);
   }
   LOG(info) << "Total time: " << timer.format();
diff --git a/src/common/god.cpp b/src/common/god.cpp
index db54fb34..c150c232 100644
--- a/src/common/god.cpp
+++ b/src/common/god.cpp
@@ -67,9 +67,10 @@ God& God::NonStaticInit(int argc, char** argv) {
     exit(0);
   }
 
+  LOG(info) << "Loading scorers...";
   for(auto&& pair : config_.Get()["scorers"]) {
     std::string name = pair.first.as<std::string>();
-    loaders_.emplace(name, LoaderFactory::Create(name, pair.second));
+    loaders_.emplace(name, LoaderFactory::Create(name, pair.second, config_.Get()["mode"].as<std::string>()));
   }
 
   if (config_.inputPath.empty()) {
diff --git a/src/common/loader_factory.cpp b/src/common/loader_factory.cpp
index 55754d81..0eb84669 100644
--- a/src/common/loader_factory.cpp
+++ b/src/common/loader_factory.cpp
@@ -1,11 +1,34 @@
 #include "loader_factory.h"
-#include "cpu/decoder/encoder_decoder.h"
 
-#ifdef NO_CUDA
-LoaderPtr LoaderFactory::Create(const std::string& name,
-						const YAML::Node& config)
-{
+#include "scorer.h"
+#include "cpu/decoder/encoder_decoder_loader.h"
+
+#ifdef CUDA
+#include "gpu/decoder/encoder_decoder.h"
+#include "gpu/decoder/ape_penalty.h"
+
+#ifdef KENLM
+#include "gpu/decoder/language_model.h"
+#endif
+#endif
+
+
+LoaderPtr LoaderFactory::Create(
+    const std::string& name,
+    const YAML::Node& config,
+    const std::string& mode) {
 	Loader *loader;
+
+  if (HAS_GPU_SUPPORT && (mode == "GPU")) {
+    loader = CreateGPU(name, config);
+    if (loader) {
+      return LoaderPtr(loader);
+    } else {
+      LOG(info) << "No GPU scorer type. Loading CPU";
+    }
+  }
+
+
 	loader = CreateCPU(name, config);
 	if (loader) {
 		return LoaderPtr(loader);
@@ -14,6 +37,31 @@ LoaderPtr LoaderFactory::Create(const std::string& name,
 	std::string type = config["type"].as<std::string>();
 	UTIL_THROW2("Unknown scorer in config file: " << type);
 }
+
+#ifdef CUDA
+Loader *LoaderFactory::CreateGPU(
+    const std::string& name,
+    const YAML::Node& config) {
+  UTIL_THROW_IF2(!config["type"],
+				 "Missing scorer type in config file");
+
+  std::string type = config["type"].as<std::string>();
+  IF_MATCH_RETURN(type, "Nematus", GPU::EncoderDecoderLoader);
+  IF_MATCH_RETURN(type, "nematus", GPU::EncoderDecoderLoader);
+  IF_MATCH_RETURN(type, "NEMATUS", GPU::EncoderDecoderLoader);
+
+  IF_MATCH_RETURN(type, "Ape", GPU::ApePenaltyLoader);
+  IF_MATCH_RETURN(type, "ape", GPU::ApePenaltyLoader);
+  IF_MATCH_RETURN(type, "APE", GPU::ApePenaltyLoader);
+
+#ifdef KENLM
+  IF_MATCH_RETURN(type, "KenLM", GPU::KenLMLoader)
+  IF_MATCH_RETURN(type, "kenlm", GPU::KenLMLoader)
+  IF_MATCH_RETURN(type, "KENLM", GPU::KenLMLoader)
+#endif
+
+  return NULL;
+}
 #endif
 
 
@@ -23,7 +71,9 @@ Loader *LoaderFactory::CreateCPU(const std::string& name,
 				 "Missing scorer type in config file");
   std::string type = config["type"].as<std::string>();
 
-  IF_MATCH_RETURN(type, "Nematus.CPU", CPU::EncoderDecoderLoader);
+  IF_MATCH_RETURN(type, "Nematus", CPU::EncoderDecoderLoader);
+  IF_MATCH_RETURN(type, "nematus", CPU::EncoderDecoderLoader);
+  IF_MATCH_RETURN(type, "NEMATUS", CPU::EncoderDecoderLoader);
 
   return NULL;
 }
diff --git a/src/common/loader_factory.cu b/src/common/loader_factory.cu
deleted file mode 100644
index c7880072..00000000
--- a/src/common/loader_factory.cu
+++ /dev/null
@@ -1,50 +0,0 @@
-#include "loader_factory.h"
-#include "scorer.h"
-#include "gpu/decoder/encoder_decoder.h"
-#include "gpu/decoder/ape_penalty.h"
-
-#ifdef KENLM
-#include "gpu/decoder/language_model.h"
-#endif
-
-LoaderPtr LoaderFactory::Create(const std::string& name,
-						const YAML::Node& config)
-{
-	Loader *loader;
-	loader = CreateGPU(name, config);
-	if (loader) {
-		return LoaderPtr(loader);
-	}
-
-
-	loader = CreateCPU(name, config);
-	if (loader) {
-		return LoaderPtr(loader);
-	}
-
-	std::string type = config["type"].as<std::string>();
-	UTIL_THROW2("Unknown scorer in config file: " << type);
-}
-
-Loader *LoaderFactory::CreateGPU(const std::string& name,
-						const YAML::Node& config) {
-  UTIL_THROW_IF2(!config["type"],
-				 "Missing scorer type in config file");
-
-  std::string type = config["type"].as<std::string>();
-  IF_MATCH_RETURN(type, "Nematus", GPU::EncoderDecoderLoader);
-  IF_MATCH_RETURN(type, "nematus", GPU::EncoderDecoderLoader);
-  IF_MATCH_RETURN(type, "NEMATUS", GPU::EncoderDecoderLoader);
-
-  IF_MATCH_RETURN(type, "Ape", GPU::ApePenaltyLoader);
-  IF_MATCH_RETURN(type, "ape", GPU::ApePenaltyLoader);
-  IF_MATCH_RETURN(type, "APE", GPU::ApePenaltyLoader);
-
-#ifdef KENLM
-  IF_MATCH_RETURN(type, "KenLM", GPU::KenLMLoader)
-  IF_MATCH_RETURN(type, "kenlm", GPU::KenLMLoader)
-  IF_MATCH_RETURN(type, "KENLM", GPU::KenLMLoader)
-#endif
-
-  return NULL;
-}
diff --git a/src/common/loader_factory.h b/src/common/loader_factory.h
index a4022c76..c00f63bc 100644
--- a/src/common/loader_factory.h
+++ b/src/common/loader_factory.h
@@ -18,14 +18,22 @@ do { \
 class LoaderFactory {
   public:
     static LoaderPtr Create(const std::string& name,
-                            const YAML::Node& config);
+                            const YAML::Node& config,
+                            const std::string& mode);
 
   protected:
-    static Loader *CreateGPU(const std::string& name,
-                            const YAML::Node& config);
 
     static Loader *CreateCPU(const std::string& name,
                             const YAML::Node& config);
 
+    static Loader *CreateGPU(const std::string& name,
+                            const YAML::Node& config);
+
+#ifdef CUDA
+    static const bool HAS_GPU_SUPPORT = true;
+#else
+    static const bool HAS_GPU_SUPPORT = false;
+#endif
+
 };
 
diff --git a/src/cpu/decoder/encoder_decoder.h b/src/cpu/decoder/encoder_decoder.h
index 19bbfd85..92d3aa60 100644
--- a/src/cpu/decoder/encoder_decoder.h
+++ b/src/cpu/decoder/encoder_decoder.h
@@ -27,7 +27,7 @@ class EncoderDecoderState : public State {
 
     CPU::mblas::Matrix& GetStates();
 
-	CPU::mblas::Matrix& GetEmbeddings();
+  	CPU::mblas::Matrix& GetEmbeddings();
 
     const CPU::mblas::Matrix& GetStates() const;
 
diff --git a/src/cpu/decoder/encoder_decoder_loader.h b/src/cpu/decoder/encoder_decoder_loader.h
new file mode 100644
index 00000000..7346b58e
--- /dev/null
+++ b/src/cpu/decoder/encoder_decoder_loader.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <vector>
+#include <string>
+#include <yaml-cpp/yaml.h>
+
+#include "common/scorer.h"
+#include "common/loader.h"
+#include "common/logging.h"
+
+namespace CPU {
+
+class Weights;
+
+class EncoderDecoderLoader : public Loader {
+  public:
+    EncoderDecoderLoader(const std::string name,
+                         const YAML::Node& config);
+
+    virtual void Load();
+
+    virtual ScorerPtr NewScorer(const size_t taskId);
+
+  private:
+    std::vector<std::unique_ptr<Weights>> weights_;
+};
+
+} // namespace CPU
author	Tomasz Dwojak <t.dwojak@amu.edu.pl>	2016-10-05 13:23:59 +0300
committer	Tomasz Dwojak <t.dwojak@amu.edu.pl>	2016-10-05 16:20:15 +0300
commit	9f5d666848f09ea1bd861ef7c8befa2180ca077d (patch)
tree	1cb882f73945dd37a1d1a9ee9cca0cb51a4d5620
parent	f4c508c96953dfda235d5a7dddd6756e69193cf4 (diff)