Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorMartijn Berger <martijn.berger@gmail.com>2014-01-28 21:40:08 +0400
committerMartijn Berger <martijn.berger@gmail.com>2014-01-28 21:40:08 +0400
commit84f95875401f0c9cd815d842d9c6f4526dc8f939 (patch)
treeb5fe4c799a83e0d93ff967b8e4c5c7def4a00677 /intern
parentabf18033f39c09f983154d88cef1a53bce8dd75a (diff)
Cuda use streams and async to avoid busywaiting
This is my first stab at this and is based on this IRC converstation: <mib2berlin> brecht: this is meaning as reminder only, I know you have other things to do > http://openvidia.sourceforge.net/index.php/Optimization_Notes#avoiding_busy_waits <brecht> mib2berlin: thanks, bookmarked only tested on Ubuntu 14.04 / cuda 5.0 but ill do some more testing tomorrow. Also unsure about the placement and the lifetime of the stream and the event. But creating / deleting these seems to incur a non trivial cost. Reviewers: brecht Reviewed By: brecht CC: mib2berlin, dingto Differential Revision: https://developer.blender.org/D262
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/device_cuda.cpp12
1 files changed, 10 insertions, 2 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 0fbb48cf431..3073f078d12 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -41,6 +41,8 @@ public:
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
+ CUstream cuStream;
+ CUevent tileDone;
map<device_ptr, bool> tex_interp_map;
int cuDevId;
int cuDevArchitecture;
@@ -207,6 +209,9 @@ public:
if(cuda_error_(result, "cuCtxCreate"))
return;
+ cuda_assert(cuStreamCreate(&cuStream, 0))
+ cuda_assert(cuEventCreate(&tileDone, 0x1))
+
int major, minor;
cuDeviceComputeCapability(&major, &minor, cuDevId);
cuDevArchitecture = major*100 + minor*10;
@@ -223,6 +228,8 @@ public:
{
task_pool.stop();
+ cuda_assert(cuEventDestroy(tileDone))
+ cuda_assert(cuStreamDestroy(cuStream))
cuda_assert(cuCtxDestroy(cuContext))
}
@@ -645,9 +652,10 @@ public:
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1))
cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1))
- cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks))
+ cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream))
- cuda_assert(cuCtxSynchronize())
+ cuda_assert(cuEventRecord(tileDone, cuStream ))
+ cuda_assert(cuEventSynchronize(tileDone))
cuda_pop_context();
}