From 865dfa8a7e8c29b2a7f0aa3e6bcdfb3353e39052 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Thu, 5 Jun 2014 18:10:06 +0200 Subject: Fix T40228: cycles CUDA multi GPU + world MIS giving error. --- intern/cycles/device/device_cuda.cpp | 6 +++++- intern/cycles/kernel/kernel.cu | 10 ++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 48d1c18555a..0429bfc6e97 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -762,6 +762,8 @@ public: if(task.get_cancel()) break; + int shader_w = min(shader_chunk_size, end - shader_x); + /* pass in parameters */ int offset = 0; @@ -780,13 +782,15 @@ public: cuda_assert(cuParamSeti(cuShader, offset, shader_x)); offset += sizeof(shader_x); + cuda_assert(cuParamSeti(cuShader, offset, shader_w)); + offset += sizeof(shader_w); + cuda_assert(cuParamSetSize(cuShader, offset)); /* launch kernel */ int threads_per_block; cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)); - int shader_w = min(shader_chunk_size, end - shader_x); int xblocks = (shader_w + threads_per_block - 1)/threads_per_block; cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 69c2600ea98..12273cc2853 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -146,20 +146,22 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx) +kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x); + if(x < sx + sw) + kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x); } extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx) +kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x); + if(x < sx + sw) + kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x); } #endif -- cgit v1.2.3