diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-05-27 15:20:07 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-05-27 17:11:32 +0400 |
commit | 69c7522b2463245ef16ebcf2806645c78e83b4df (patch) | |
tree | 96721c9d2bc755d68cf9eeded77255f4c58018f3 /intern/cycles/device/device_opencl.cpp | |
parent | bc9e66f0830627e807f720dca4b9d5d8d39e732a (diff) |
Fix T40379: world MIS causing too much CUDA memory usage.
The kernel for baking the world texture was the same as the one used for
baking. Now that's separate which allows the kernel to reserve much less
memory.
Diffstat (limited to 'intern/cycles/device/device_opencl.cpp')
-rw-r--r-- | intern/cycles/device/device_opencl.cpp | 29 |
1 files changed, 21 insertions, 8 deletions
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 694ec9db036..f841daba124 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -321,6 +321,7 @@ public: cl_kernel ckFilmConvertByteKernel; cl_kernel ckFilmConvertHalfFloatKernel; cl_kernel ckShaderKernel; + cl_kernel ckBakeKernel; cl_int ciErr; typedef map<string, device_vector<uchar>*> ConstMemMap; @@ -443,6 +444,7 @@ public: ckFilmConvertByteKernel = NULL; ckFilmConvertHalfFloatKernel = NULL; ckShaderKernel = NULL; + ckBakeKernel = NULL; null_mem = 0; device_initialized = false; @@ -791,6 +793,10 @@ public: if(opencl_error(ciErr)) return false; + ckBakeKernel = clCreateKernel(cpProgram, "kernel_ocl_bake", &ciErr); + if(opencl_error(ciErr)) + return false; + return true; } @@ -1054,19 +1060,26 @@ public: /* sample arguments */ cl_uint narg = 0; - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data)); - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input)); - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output)); + cl_kernel kernel; + + if(task.shader_eval_type >= SHADER_EVAL_BAKE) + kernel = ckBakeKernel; + else + kernel = ckShaderKernel; + + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output)); #define KERNEL_TEX(type, ttype, name) \ - set_kernel_arg_mem(ckShaderKernel, &narg, #name); + set_kernel_arg_mem(kernel, &narg, #name); #include "kernel_textures.h" - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); - enqueue_kernel(ckShaderKernel, task.shader_w, 1); + enqueue_kernel(kernel, task.shader_w, 1); } void thread_run(DeviceTask *task) |