diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-05-27 15:20:07 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-05-27 17:11:32 +0400 |
commit | 69c7522b2463245ef16ebcf2806645c78e83b4df (patch) | |
tree | 96721c9d2bc755d68cf9eeded77255f4c58018f3 /intern/cycles/device | |
parent | bc9e66f0830627e807f720dca4b9d5d8d39e732a (diff) |
Fix T40379: world MIS causing too much CUDA memory usage.
The kernel for baking the world texture was the same as the one used for
baking. Now that's separate which allows the kernel to reserve much less
memory.
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 7 | ||||
-rw-r--r-- | intern/cycles/device/device_opencl.cpp | 29 |
2 files changed, 27 insertions, 9 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index b19f5e22769..48d1c18555a 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -746,7 +746,12 @@ public: CUdeviceptr d_output = cuda_device_ptr(task.shader_output); /* get kernel function */ - cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader")); + if(task.shader_eval_type >= SHADER_EVAL_BAKE) { + cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake")); + } + else { + cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader")); + } /* do tasks in smaller chunks, so we can cancel it */ const int shader_chunk_size = 65536; diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 694ec9db036..f841daba124 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -321,6 +321,7 @@ public: cl_kernel ckFilmConvertByteKernel; cl_kernel ckFilmConvertHalfFloatKernel; cl_kernel ckShaderKernel; + cl_kernel ckBakeKernel; cl_int ciErr; typedef map<string, device_vector<uchar>*> ConstMemMap; @@ -443,6 +444,7 @@ public: ckFilmConvertByteKernel = NULL; ckFilmConvertHalfFloatKernel = NULL; ckShaderKernel = NULL; + ckBakeKernel = NULL; null_mem = 0; device_initialized = false; @@ -791,6 +793,10 @@ public: if(opencl_error(ciErr)) return false; + ckBakeKernel = clCreateKernel(cpProgram, "kernel_ocl_bake", &ciErr); + if(opencl_error(ciErr)) + return false; + return true; } @@ -1054,19 +1060,26 @@ public: /* sample arguments */ cl_uint narg = 0; - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data)); - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input)); - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output)); + cl_kernel kernel; + + if(task.shader_eval_type >= SHADER_EVAL_BAKE) + kernel = ckBakeKernel; + else + kernel = ckShaderKernel; + + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output)); #define KERNEL_TEX(type, ttype, name) \ - set_kernel_arg_mem(ckShaderKernel, &narg, #name); + set_kernel_arg_mem(kernel, &narg, #name); #include "kernel_textures.h" - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); - opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); - enqueue_kernel(ckShaderKernel, task.shader_w, 1); + enqueue_kernel(kernel, task.shader_w, 1); } void thread_run(DeviceTask *task) |