diff options
author | Martijn Berger <mberger@denc.com> | 2014-07-25 15:33:19 +0400 |
---|---|---|
committer | Martijn Berger <mberger@denc.com> | 2014-07-25 15:33:19 +0400 |
commit | bae2b3a688a2c2ee3eb8457c62af3a10bae76131 (patch) | |
tree | 2c333d1dab366bd64b8f4beba3b5ebbed755ed35 /intern | |
parent | d0f0d8220523858eddfd30f186327d716761e5b5 (diff) |
Switch to Cuda 4.0 style api for kernel invocation. This is a small clean-up that has no functional changes but makes code a bit more readable.
Differential revision: https://developer.blender.org/D659
Reviewed by: Sergey Sharybin, Thomas Dinges
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 135 | ||||
-rw-r--r-- | intern/cycles/util/util_cuda.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/util/util_cuda.h | 2 |
3 files changed, 45 insertions, 94 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index d1d227b3761..022dcd0275c 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -615,40 +615,17 @@ public: if(have_error()) return; - - /* pass in parameters */ - int offset = 0; - - cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer))); - offset += sizeof(d_buffer); - - cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state))); - offset += sizeof(d_rng_state); - - offset = align_up(offset, __alignof(sample)); - - cuda_assert(cuParamSeti(cuPathTrace, offset, sample)); - offset += sizeof(sample); - - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x)); - offset += sizeof(rtile.x); - - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y)); - offset += sizeof(rtile.y); - - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w)); - offset += sizeof(rtile.w); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h)); - offset += sizeof(rtile.h); - - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset)); - offset += sizeof(rtile.offset); - - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride)); - offset += sizeof(rtile.stride); - - cuda_assert(cuParamSetSize(cuPathTrace, offset)); + /* pass in parameters */ + void *args[] = {&d_buffer, + &d_rng_state, + &sample, + &rtile.x, + &rtile.y, + &rtile.w, + &rtile.h, + &rtile.offset, + &rtile.stride}; /* launch kernel */ int threads_per_block; @@ -666,8 +643,11 @@ public: int yblocks = (rtile.h + ythreads - 1)/ythreads; cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1)); - cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks)); + + cuda_assert(cuLaunchKernel(cuPathTrace, + xblocks , yblocks, 1, /* blocks */ + xthreads, ythreads, 1, /* threads */ + 0, 0, args, 0)); cuda_assert(cuCtxSynchronize()); @@ -693,40 +673,19 @@ public: cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte")); } - /* pass in parameters */ - int offset = 0; - - cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba))); - offset += sizeof(d_rgba); - - cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer))); - offset += sizeof(d_buffer); float sample_scale = 1.0f/(task.sample + 1); - offset = align_up(offset, __alignof(sample_scale)); - - cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale)); - offset += sizeof(sample_scale); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x)); - offset += sizeof(task.x); - - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y)); - offset += sizeof(task.y); - - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w)); - offset += sizeof(task.w); - - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h)); - offset += sizeof(task.h); - - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset)); - offset += sizeof(task.offset); - - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride)); - offset += sizeof(task.stride); - - cuda_assert(cuParamSetSize(cuFilmConvert, offset)); + /* pass in parameters */ + void *args[] = {&d_rgba, + &d_buffer, + &sample_scale, + &task.x, + &task.y, + &task.w, + &task.h, + &task.offset, + &task.stride}; /* launch kernel */ int threads_per_block; @@ -738,8 +697,11 @@ public: int yblocks = (task.h + ythreads - 1)/ythreads; cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1)); - cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks)); + + cuda_assert(cuLaunchKernel(cuFilmConvert, + xblocks , yblocks, 1, /* blocks */ + xthreads, ythreads, 1, /* threads */ + 0, 0, args, 0)); unmap_pixels((rgba_byte)? rgba_byte: rgba_half); @@ -777,31 +739,14 @@ public: int shader_w = min(shader_chunk_size, end - shader_x); for(int sample = 0; sample < task.num_samples; sample++) { - /* pass in parameters */ - int offset = 0; - cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input))); - offset += sizeof(d_input); - - cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output))); - offset += sizeof(d_output); - - int shader_eval_type = task.shader_eval_type; - offset = align_up(offset, __alignof(shader_eval_type)); - - cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type)); - offset += sizeof(task.shader_eval_type); - - cuda_assert(cuParamSeti(cuShader, offset, shader_x)); - offset += sizeof(shader_x); - - cuda_assert(cuParamSeti(cuShader, offset, shader_w)); - offset += sizeof(shader_w); - - cuda_assert(cuParamSeti(cuShader, offset, sample)); - offset += sizeof(sample); - - cuda_assert(cuParamSetSize(cuShader, offset)); + /* pass in parameters */ + void *args[] = {&d_input, + &d_output, + &task.shader_eval_type, + &shader_x, + &shader_w, + &sample}; /* launch kernel */ int threads_per_block; @@ -810,8 +755,10 @@ public: int xblocks = (shader_w + threads_per_block - 1)/threads_per_block; cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1)); - cuda_assert(cuLaunchGrid(cuShader, xblocks, 1)); + cuda_assert(cuLaunchKernel(cuShader, + xblocks , 1, 1, /* blocks */ + threads_per_block, 1, 1, /* threads */ + 0, 0, args, 0)); cuda_assert(cuCtxSynchronize()); } diff --git a/intern/cycles/util/util_cuda.cpp b/intern/cycles/util/util_cuda.cpp index 9404f45d9a6..50690434f03 100644 --- a/intern/cycles/util/util_cuda.cpp +++ b/intern/cycles/util/util_cuda.cpp @@ -149,6 +149,7 @@ tcuGLCtxCreate *cuGLCtxCreate; tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; tcuCtxSetCurrent *cuCtxSetCurrent; +tcuLaunchKernel *cuLaunchKernel; CCL_NAMESPACE_BEGIN @@ -386,6 +387,7 @@ bool cuLibraryInit() /* cuda 4.0 */ CUDA_LIBRARY_FIND(cuCtxSetCurrent); + CUDA_LIBRARY_FIND(cuLaunchKernel); if(cuHavePrecompiledKernels()) result = true; diff --git a/intern/cycles/util/util_cuda.h b/intern/cycles/util/util_cuda.h index b5e9c7188cd..a633fb21eca 100644 --- a/intern/cycles/util/util_cuda.h +++ b/intern/cycles/util/util_cuda.h @@ -509,6 +509,7 @@ typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUd typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags); typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags); typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx); +typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, unsigned sharedMemBytes, CUstream hStream, void* kernelParams, void* extra); /* function declarations */ @@ -629,6 +630,7 @@ extern tcuGLCtxCreate *cuGLCtxCreate; extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; extern tcuCtxSetCurrent *cuCtxSetCurrent; +extern tcuLaunchKernel *cuLaunchKernel; #endif /* __UTIL_CUDA_H__ */ |