diff options
author | Guillaume Klein <guillaumekln@users.noreply.github.com> | 2022-11-07 11:56:49 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-07 11:56:49 +0300 |
commit | 453b405de1bce522b62cead9d23856d240cfdf8c (patch) | |
tree | 02ddb6e8f0762f33c4ddcbd290143cdbf626fc18 | |
parent | 258e3bd8cf74fb174da8efc6ac6fcf798376c45e (diff) |
Synchronize the stream instead of the device when the worker goes idle (#973)
-rw-r--r-- | include/ctranslate2/replica_pool.h | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/include/ctranslate2/replica_pool.h b/include/ctranslate2/replica_pool.h index 1cb9ed1a..b701b146 100644 --- a/include/ctranslate2/replica_pool.h +++ b/include/ctranslate2/replica_pool.h @@ -339,6 +339,8 @@ namespace ctranslate2 { protected: void initialize() override { + set_device_index(_device, _device_index); + // Set the number of OpenMP threads for the current thread. set_num_threads(_num_threads); @@ -347,9 +349,9 @@ namespace ctranslate2 { } void idle() override { - // When no new jobs are immediately available, we synchronize the CUDA device + // When no new jobs are immediately available, we synchronize the CUDA stream // so that the CudaAsyncAllocator can release some memory. - synchronize_device(_device, _device_index); + synchronize_stream(_device); } void finalize() override { |