From d430350f2d6b6e7848baed265d0a1038b8a07f36 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 18 Jan 2018 17:38:09 +0000 Subject: add --tensor-cores arg --- src/amun/common/config.cpp | 3 +++ src/amun/common/god.cpp | 3 +++ src/amun/common/god.h | 6 ++++-- src/amun/gpu/decoder/encoder_decoder.cu | 20 ++++++++++++++++++++ src/amun/gpu/decoder/encoder_decoder.h | 2 ++ 5 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/amun/common/config.cpp b/src/amun/common/config.cpp index 85b8cc4b..1b20dd9b 100644 --- a/src/amun/common/config.cpp +++ b/src/amun/common/config.cpp @@ -200,6 +200,8 @@ void Config::AddOptions(size_t argc, char** argv) { "Implicitly sets minimal number of threads to number of devices.") ("gpu-threads", po::value()->default_value(1), "Number of threads on a single GPU.") + ("tensor-cores", po::value()->default_value(false), + "Use Tensor Cores, if available.") #endif #ifdef HAS_CPU @@ -333,6 +335,7 @@ void Config::AddOptions(size_t argc, char** argv) { #ifdef CUDA SET_OPTION("gpu-threads", size_t); SET_OPTION("devices", std::vector); + SET_OPTION("tensor-cores", bool); #endif #ifdef HAS_CPU SET_OPTION("cpu-threads", size_t); diff --git a/src/amun/common/god.cpp b/src/amun/common/god.cpp index 9a531698..6acd01df 100644 --- a/src/amun/common/god.cpp +++ b/src/amun/common/god.cpp @@ -101,6 +101,9 @@ God& God::Init(int argc, char** argv) { //useFusedSoftmax_ = false; //cerr << "useFusedSoftmax_=" << useFusedSoftmax_ << endl; + useTensorCores_ = Get("tensor-cores"); + //cerr << "useTensorCores_=" << useTensorCores_ << endl; + if (Has("input-file")) { LOG(info)->info("Reading from {}", Get("input-file")); inputStream_.reset(new InputFileStream(Get("input-file"))); diff --git a/src/amun/common/god.h b/src/amun/common/god.h index cd719ad9..2eceae80 100644 --- a/src/amun/common/god.h +++ b/src/amun/common/god.h @@ -88,6 +88,9 @@ class God { bool UseFusedSoftmax() const { return useFusedSoftmax_; } + bool UseTensorCores() const + { return useTensorCores_; } + private: void LoadScorers(); void LoadFiltering(); @@ -120,8 +123,7 @@ class God { std::unique_ptr pool_; - bool returnNBestList_; - bool useFusedSoftmax_; + bool returnNBestList_, useFusedSoftmax_, useTensorCores_; }; } diff --git a/src/amun/gpu/decoder/encoder_decoder.cu b/src/amun/gpu/decoder/encoder_decoder.cu index b4331f6d..ab0a1aa6 100644 --- a/src/amun/gpu/decoder/encoder_decoder.cu +++ b/src/amun/gpu/decoder/encoder_decoder.cu @@ -75,6 +75,8 @@ EncoderDecoder::~EncoderDecoder() void EncoderDecoder::Encode(const SentencesPtr &source) { BEGIN_TIMER("Encode"); + SetTensorCore(); + EncOutPtr encOut(new EncOutGPU(source)); if (source->size()) { @@ -163,6 +165,8 @@ void EncoderDecoder::DecodeAsync() void EncoderDecoder::DecodeAsyncInternal() { + SetTensorCore(); + unsigned maxBeamSize = god_.Get("beam-size"); unsigned miniBatch = god_.Get("mini-batch"); @@ -459,6 +463,22 @@ unsigned EncoderDecoder::SentencesToGet(const Histories& histories) return ret; } +void EncoderDecoder::SetTensorCore() +{ +#if CUDA_VERSION >= 9000 + if (god_.UseTensorCores()) { + //cerr << "using tensor cores" << endl; + cublasHandle_t handle = mblas::CublasHandler::GetHandle(); + cublasStatus_t stat = cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH); + if (stat != CUBLAS_STATUS_SUCCESS) { + printf ("cublasSetMathMode failed\n"); + abort(); + } + } +#endif + +} + } } diff --git a/src/amun/gpu/decoder/encoder_decoder.h b/src/amun/gpu/decoder/encoder_decoder.h index a0071621..d7e99974 100644 --- a/src/amun/gpu/decoder/encoder_decoder.h +++ b/src/amun/gpu/decoder/encoder_decoder.h @@ -120,6 +120,8 @@ class EncoderDecoder : public Scorer { unsigned SentencesToGet(const Histories& histories); + void SetTensorCore(); + }; } -- cgit v1.2.3