Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/OpenNMT/CTranslate2.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuillaume Klein <guillaumekln@users.noreply.github.com>2022-11-07 11:56:49 +0300
committerGitHub <noreply@github.com>2022-11-07 11:56:49 +0300
commit453b405de1bce522b62cead9d23856d240cfdf8c (patch)
tree02ddb6e8f0762f33c4ddcbd290143cdbf626fc18
parent258e3bd8cf74fb174da8efc6ac6fcf798376c45e (diff)
Synchronize the stream instead of the device when the worker goes idle (#973)
-rw-r--r--include/ctranslate2/replica_pool.h6
1 files changed, 4 insertions, 2 deletions
diff --git a/include/ctranslate2/replica_pool.h b/include/ctranslate2/replica_pool.h
index 1cb9ed1a..b701b146 100644
--- a/include/ctranslate2/replica_pool.h
+++ b/include/ctranslate2/replica_pool.h
@@ -339,6 +339,8 @@ namespace ctranslate2 {
protected:
void initialize() override {
+ set_device_index(_device, _device_index);
+
// Set the number of OpenMP threads for the current thread.
set_num_threads(_num_threads);
@@ -347,9 +349,9 @@ namespace ctranslate2 {
}
void idle() override {
- // When no new jobs are immediately available, we synchronize the CUDA device
+ // When no new jobs are immediately available, we synchronize the CUDA stream
// so that the CudaAsyncAllocator can release some memory.
- synchronize_device(_device, _device_index);
+ synchronize_stream(_device);
}
void finalize() override {