Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2018-01-18 20:38:09 +0300
committerHieu Hoang <hieuhoang@gmail.com>2018-01-18 20:38:09 +0300
commitd430350f2d6b6e7848baed265d0a1038b8a07f36 (patch)
treeb08a24b4986d38513656b974dd82f6df5a8df30a
parent23b73264d15ca43ee5d9d9d2c3dac27af404341b (diff)
add --tensor-cores arg
-rw-r--r--src/amun/common/config.cpp3
-rw-r--r--src/amun/common/god.cpp3
-rw-r--r--src/amun/common/god.h6
-rw-r--r--src/amun/gpu/decoder/encoder_decoder.cu20
-rw-r--r--src/amun/gpu/decoder/encoder_decoder.h2
5 files changed, 32 insertions, 2 deletions
diff --git a/src/amun/common/config.cpp b/src/amun/common/config.cpp
index 85b8cc4b..1b20dd9b 100644
--- a/src/amun/common/config.cpp
+++ b/src/amun/common/config.cpp
@@ -200,6 +200,8 @@ void Config::AddOptions(size_t argc, char** argv) {
"Implicitly sets minimal number of threads to number of devices.")
("gpu-threads", po::value<size_t>()->default_value(1),
"Number of threads on a single GPU.")
+ ("tensor-cores", po::value<bool>()->default_value(false),
+ "Use Tensor Cores, if available.")
#endif
#ifdef HAS_CPU
@@ -333,6 +335,7 @@ void Config::AddOptions(size_t argc, char** argv) {
#ifdef CUDA
SET_OPTION("gpu-threads", size_t);
SET_OPTION("devices", std::vector<size_t>);
+ SET_OPTION("tensor-cores", bool);
#endif
#ifdef HAS_CPU
SET_OPTION("cpu-threads", size_t);
diff --git a/src/amun/common/god.cpp b/src/amun/common/god.cpp
index 9a531698..6acd01df 100644
--- a/src/amun/common/god.cpp
+++ b/src/amun/common/god.cpp
@@ -101,6 +101,9 @@ God& God::Init(int argc, char** argv) {
//useFusedSoftmax_ = false;
//cerr << "useFusedSoftmax_=" << useFusedSoftmax_ << endl;
+ useTensorCores_ = Get<bool>("tensor-cores");
+ //cerr << "useTensorCores_=" << useTensorCores_ << endl;
+
if (Has("input-file")) {
LOG(info)->info("Reading from {}", Get<std::string>("input-file"));
inputStream_.reset(new InputFileStream(Get<std::string>("input-file")));
diff --git a/src/amun/common/god.h b/src/amun/common/god.h
index cd719ad9..2eceae80 100644
--- a/src/amun/common/god.h
+++ b/src/amun/common/god.h
@@ -88,6 +88,9 @@ class God {
bool UseFusedSoftmax() const
{ return useFusedSoftmax_; }
+ bool UseTensorCores() const
+ { return useTensorCores_; }
+
private:
void LoadScorers();
void LoadFiltering();
@@ -120,8 +123,7 @@ class God {
std::unique_ptr<ThreadPool> pool_;
- bool returnNBestList_;
- bool useFusedSoftmax_;
+ bool returnNBestList_, useFusedSoftmax_, useTensorCores_;
};
}
diff --git a/src/amun/gpu/decoder/encoder_decoder.cu b/src/amun/gpu/decoder/encoder_decoder.cu
index b4331f6d..ab0a1aa6 100644
--- a/src/amun/gpu/decoder/encoder_decoder.cu
+++ b/src/amun/gpu/decoder/encoder_decoder.cu
@@ -75,6 +75,8 @@ EncoderDecoder::~EncoderDecoder()
void EncoderDecoder::Encode(const SentencesPtr &source) {
BEGIN_TIMER("Encode");
+ SetTensorCore();
+
EncOutPtr encOut(new EncOutGPU(source));
if (source->size()) {
@@ -163,6 +165,8 @@ void EncoderDecoder::DecodeAsync()
void EncoderDecoder::DecodeAsyncInternal()
{
+ SetTensorCore();
+
unsigned maxBeamSize = god_.Get<unsigned>("beam-size");
unsigned miniBatch = god_.Get<unsigned>("mini-batch");
@@ -459,6 +463,22 @@ unsigned EncoderDecoder::SentencesToGet(const Histories& histories)
return ret;
}
+void EncoderDecoder::SetTensorCore()
+{
+#if CUDA_VERSION >= 9000
+ if (god_.UseTensorCores()) {
+ //cerr << "using tensor cores" << endl;
+ cublasHandle_t handle = mblas::CublasHandler::GetHandle();
+ cublasStatus_t stat = cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH);
+ if (stat != CUBLAS_STATUS_SUCCESS) {
+ printf ("cublasSetMathMode failed\n");
+ abort();
+ }
+ }
+#endif
+
+}
+
}
}
diff --git a/src/amun/gpu/decoder/encoder_decoder.h b/src/amun/gpu/decoder/encoder_decoder.h
index a0071621..d7e99974 100644
--- a/src/amun/gpu/decoder/encoder_decoder.h
+++ b/src/amun/gpu/decoder/encoder_decoder.h
@@ -120,6 +120,8 @@ class EncoderDecoder : public Scorer {
unsigned SentencesToGet(const Histories& histories);
+ void SetTensorCore();
+
};
}