diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2018-01-18 20:38:43 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2018-01-18 20:38:43 +0300 |
commit | 6cdb4b7511e15a6af16f67a1c1cd06157f79013a (patch) | |
tree | 1b739f3cadaeefe2d22ce36b1070304c9f48c4a1 | |
parent | 83b6de15c53e0d308b31f15528dd4393fbe46085 (diff) | |
parent | 2f8751c20dcba7ece6c10da87fde12361cb0c5b5 (diff) |
Merge branch 'const-batch2' of https://github.com/hieuhoang/marian into const-batch2
-rw-r--r-- | src/amun/common/config.cpp | 3 | ||||
-rw-r--r-- | src/amun/common/god.cpp | 3 | ||||
-rw-r--r-- | src/amun/common/god.h | 6 | ||||
-rw-r--r-- | src/amun/gpu/decoder/encoder_decoder.cu | 20 | ||||
-rw-r--r-- | src/amun/gpu/decoder/encoder_decoder.h | 2 |
5 files changed, 32 insertions, 2 deletions
diff --git a/src/amun/common/config.cpp b/src/amun/common/config.cpp index 85b8cc4b..1b20dd9b 100644 --- a/src/amun/common/config.cpp +++ b/src/amun/common/config.cpp @@ -200,6 +200,8 @@ void Config::AddOptions(size_t argc, char** argv) { "Implicitly sets minimal number of threads to number of devices.") ("gpu-threads", po::value<size_t>()->default_value(1), "Number of threads on a single GPU.") + ("tensor-cores", po::value<bool>()->default_value(false), + "Use Tensor Cores, if available.") #endif #ifdef HAS_CPU @@ -333,6 +335,7 @@ void Config::AddOptions(size_t argc, char** argv) { #ifdef CUDA SET_OPTION("gpu-threads", size_t); SET_OPTION("devices", std::vector<size_t>); + SET_OPTION("tensor-cores", bool); #endif #ifdef HAS_CPU SET_OPTION("cpu-threads", size_t); diff --git a/src/amun/common/god.cpp b/src/amun/common/god.cpp index 9a531698..6acd01df 100644 --- a/src/amun/common/god.cpp +++ b/src/amun/common/god.cpp @@ -101,6 +101,9 @@ God& God::Init(int argc, char** argv) { //useFusedSoftmax_ = false; //cerr << "useFusedSoftmax_=" << useFusedSoftmax_ << endl; + useTensorCores_ = Get<bool>("tensor-cores"); + //cerr << "useTensorCores_=" << useTensorCores_ << endl; + if (Has("input-file")) { LOG(info)->info("Reading from {}", Get<std::string>("input-file")); inputStream_.reset(new InputFileStream(Get<std::string>("input-file"))); diff --git a/src/amun/common/god.h b/src/amun/common/god.h index cd719ad9..2eceae80 100644 --- a/src/amun/common/god.h +++ b/src/amun/common/god.h @@ -88,6 +88,9 @@ class God { bool UseFusedSoftmax() const { return useFusedSoftmax_; } + bool UseTensorCores() const + { return useTensorCores_; } + private: void LoadScorers(); void LoadFiltering(); @@ -120,8 +123,7 @@ class God { std::unique_ptr<ThreadPool> pool_; - bool returnNBestList_; - bool useFusedSoftmax_; + bool returnNBestList_, useFusedSoftmax_, useTensorCores_; }; } diff --git a/src/amun/gpu/decoder/encoder_decoder.cu b/src/amun/gpu/decoder/encoder_decoder.cu index 1ec3f66d..792b3cd9 100644 --- a/src/amun/gpu/decoder/encoder_decoder.cu +++ b/src/amun/gpu/decoder/encoder_decoder.cu @@ -75,6 +75,8 @@ EncoderDecoder::~EncoderDecoder() void EncoderDecoder::Encode(const SentencesPtr &source) { BEGIN_TIMER("Encode"); + SetTensorCore(); + EncOutPtr encOut(new EncOutGPU(source)); if (source->size()) { @@ -163,6 +165,8 @@ void EncoderDecoder::DecodeAsync() void EncoderDecoder::DecodeAsyncInternal() { + SetTensorCore(); + unsigned maxBeamSize = god_.Get<unsigned>("beam-size"); unsigned miniBatch = god_.Get<unsigned>("mini-batch"); @@ -460,6 +464,22 @@ unsigned EncoderDecoder::SentencesToGet(const Histories& histories) //*/ } +void EncoderDecoder::SetTensorCore() +{ +#if CUDA_VERSION >= 9000 + if (god_.UseTensorCores()) { + //cerr << "using tensor cores" << endl; + cublasHandle_t handle = mblas::CublasHandler::GetHandle(); + cublasStatus_t stat = cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH); + if (stat != CUBLAS_STATUS_SUCCESS) { + printf ("cublasSetMathMode failed\n"); + abort(); + } + } +#endif + +} + } } diff --git a/src/amun/gpu/decoder/encoder_decoder.h b/src/amun/gpu/decoder/encoder_decoder.h index a0071621..d7e99974 100644 --- a/src/amun/gpu/decoder/encoder_decoder.h +++ b/src/amun/gpu/decoder/encoder_decoder.h @@ -120,6 +120,8 @@ class EncoderDecoder : public Scorer { unsigned SentencesToGet(const Histories& histories); + void SetTensorCore(); + }; } |