Process outstanding CUDA events in recordEvent

Without this, the cuda_events could continuously grow from calls to cudaMemcpyAsync, but would never be processed if there were no new pinned memory allocations. For example: t1 = cutorch.createCudaHostTensor(10) t2 = torch.CudaTensor(10) while true do t2:copyAsync(t1) end
author: Sam Gross <sgross@fb.com> 2016-12-02 06:09:47 +0300
committer: Sam Gross <sgross@fb.com> 2016-12-02 06:09:47 +0300
commit: a070fc9a76d8e6ddcbb6b4ba4bbef1461f1fe812 (patch)
tree: 108ac497ba560618c3fadcc94ad119690b69d128
parent: 0267dae09461f6dbb6ec1f69c0219a28ae8d68d0 (diff)
1 files changed, 6 insertions, 0 deletions
diff --git a/lib/THC/THCCachingHostAllocator.cpp b/lib/THC/THCCachingHostAllocator.cpp
index 6d1b870..7ebc710 100644
--- a/lib/THC/THCCachingHostAllocator.cpp
+++ b/lib/THC/THCCachingHostAllocator.cpp
@@ -123,6 +123,12 @@ struct HostAllocator
     Block& block = it->second;
     THAssert(block.allocated);
 
+    // process outstanding cuda events which may have occurred
+    err = processEvents();
+    if (err != cudaSuccess) {
+      return err;
+    }
+
     // create and record an event in the given stream
     cudaEvent_t event;
     err = cudaEventCreateWithFlags(&event, cudaEventDisableTiming);
author	Sam Gross <sgross@fb.com>	2016-12-02 06:09:47 +0300
committer	Sam Gross <sgross@fb.com>	2016-12-02 06:09:47 +0300
commit	a070fc9a76d8e6ddcbb6b4ba4bbef1461f1fe812 (patch)
tree	108ac497ba560618c3fadcc94ad119690b69d128
parent	0267dae09461f6dbb6ec1f69c0219a28ae8d68d0 (diff)