Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorMartijn Berger <mberger@denc.com>2014-07-25 15:33:19 +0400
committerMartijn Berger <mberger@denc.com>2014-07-25 15:33:19 +0400
commitbae2b3a688a2c2ee3eb8457c62af3a10bae76131 (patch)
tree2c333d1dab366bd64b8f4beba3b5ebbed755ed35 /intern
parentd0f0d8220523858eddfd30f186327d716761e5b5 (diff)
Switch to Cuda 4.0 style api for kernel invocation. This is a small clean-up that has no functional changes but makes code a bit more readable.
Differential revision: https://developer.blender.org/D659 Reviewed by: Sergey Sharybin, Thomas Dinges
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/device_cuda.cpp135
-rw-r--r--intern/cycles/util/util_cuda.cpp2
-rw-r--r--intern/cycles/util/util_cuda.h2
3 files changed, 45 insertions, 94 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index d1d227b3761..022dcd0275c 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -615,40 +615,17 @@ public:
if(have_error())
return;
-
- /* pass in parameters */
- int offset = 0;
-
- cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer)));
- offset += sizeof(d_buffer);
-
- cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state)));
- offset += sizeof(d_rng_state);
-
- offset = align_up(offset, __alignof(sample));
-
- cuda_assert(cuParamSeti(cuPathTrace, offset, sample));
- offset += sizeof(sample);
-
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x));
- offset += sizeof(rtile.x);
-
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y));
- offset += sizeof(rtile.y);
-
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w));
- offset += sizeof(rtile.w);
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h));
- offset += sizeof(rtile.h);
-
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset));
- offset += sizeof(rtile.offset);
-
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride));
- offset += sizeof(rtile.stride);
-
- cuda_assert(cuParamSetSize(cuPathTrace, offset));
+ /* pass in parameters */
+ void *args[] = {&d_buffer,
+ &d_rng_state,
+ &sample,
+ &rtile.x,
+ &rtile.y,
+ &rtile.w,
+ &rtile.h,
+ &rtile.offset,
+ &rtile.stride};
/* launch kernel */
int threads_per_block;
@@ -666,8 +643,11 @@ public:
int yblocks = (rtile.h + ythreads - 1)/ythreads;
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1));
- cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks));
+
+ cuda_assert(cuLaunchKernel(cuPathTrace,
+ xblocks , yblocks, 1, /* blocks */
+ xthreads, ythreads, 1, /* threads */
+ 0, 0, args, 0));
cuda_assert(cuCtxSynchronize());
@@ -693,40 +673,19 @@ public:
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"));
}
- /* pass in parameters */
- int offset = 0;
-
- cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba)));
- offset += sizeof(d_rgba);
-
- cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer)));
- offset += sizeof(d_buffer);
float sample_scale = 1.0f/(task.sample + 1);
- offset = align_up(offset, __alignof(sample_scale));
-
- cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale));
- offset += sizeof(sample_scale);
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x));
- offset += sizeof(task.x);
-
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y));
- offset += sizeof(task.y);
-
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w));
- offset += sizeof(task.w);
-
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h));
- offset += sizeof(task.h);
-
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset));
- offset += sizeof(task.offset);
-
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride));
- offset += sizeof(task.stride);
-
- cuda_assert(cuParamSetSize(cuFilmConvert, offset));
+ /* pass in parameters */
+ void *args[] = {&d_rgba,
+ &d_buffer,
+ &sample_scale,
+ &task.x,
+ &task.y,
+ &task.w,
+ &task.h,
+ &task.offset,
+ &task.stride};
/* launch kernel */
int threads_per_block;
@@ -738,8 +697,11 @@ public:
int yblocks = (task.h + ythreads - 1)/ythreads;
cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1));
- cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks));
+
+ cuda_assert(cuLaunchKernel(cuFilmConvert,
+ xblocks , yblocks, 1, /* blocks */
+ xthreads, ythreads, 1, /* threads */
+ 0, 0, args, 0));
unmap_pixels((rgba_byte)? rgba_byte: rgba_half);
@@ -777,31 +739,14 @@ public:
int shader_w = min(shader_chunk_size, end - shader_x);
for(int sample = 0; sample < task.num_samples; sample++) {
- /* pass in parameters */
- int offset = 0;
- cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
- offset += sizeof(d_input);
-
- cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
- offset += sizeof(d_output);
-
- int shader_eval_type = task.shader_eval_type;
- offset = align_up(offset, __alignof(shader_eval_type));
-
- cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
- offset += sizeof(task.shader_eval_type);
-
- cuda_assert(cuParamSeti(cuShader, offset, shader_x));
- offset += sizeof(shader_x);
-
- cuda_assert(cuParamSeti(cuShader, offset, shader_w));
- offset += sizeof(shader_w);
-
- cuda_assert(cuParamSeti(cuShader, offset, sample));
- offset += sizeof(sample);
-
- cuda_assert(cuParamSetSize(cuShader, offset));
+ /* pass in parameters */
+ void *args[] = {&d_input,
+ &d_output,
+ &task.shader_eval_type,
+ &shader_x,
+ &shader_w,
+ &sample};
/* launch kernel */
int threads_per_block;
@@ -810,8 +755,10 @@ public:
int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
- cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
- cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
+ cuda_assert(cuLaunchKernel(cuShader,
+ xblocks , 1, 1, /* blocks */
+ threads_per_block, 1, 1, /* threads */
+ 0, 0, args, 0));
cuda_assert(cuCtxSynchronize());
}
diff --git a/intern/cycles/util/util_cuda.cpp b/intern/cycles/util/util_cuda.cpp
index 9404f45d9a6..50690434f03 100644
--- a/intern/cycles/util/util_cuda.cpp
+++ b/intern/cycles/util/util_cuda.cpp
@@ -149,6 +149,7 @@ tcuGLCtxCreate *cuGLCtxCreate;
tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
tcuCtxSetCurrent *cuCtxSetCurrent;
+tcuLaunchKernel *cuLaunchKernel;
CCL_NAMESPACE_BEGIN
@@ -386,6 +387,7 @@ bool cuLibraryInit()
/* cuda 4.0 */
CUDA_LIBRARY_FIND(cuCtxSetCurrent);
+ CUDA_LIBRARY_FIND(cuLaunchKernel);
if(cuHavePrecompiledKernels())
result = true;
diff --git a/intern/cycles/util/util_cuda.h b/intern/cycles/util/util_cuda.h
index b5e9c7188cd..a633fb21eca 100644
--- a/intern/cycles/util/util_cuda.h
+++ b/intern/cycles/util/util_cuda.h
@@ -509,6 +509,7 @@ typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUd
typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags);
typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
+typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, unsigned sharedMemBytes, CUstream hStream, void* kernelParams, void* extra);
/* function declarations */
@@ -629,6 +630,7 @@ extern tcuGLCtxCreate *cuGLCtxCreate;
extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
extern tcuCtxSetCurrent *cuCtxSetCurrent;
+extern tcuLaunchKernel *cuLaunchKernel;
#endif /* __UTIL_CUDA_H__ */