Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2014-06-05 20:10:06 +0400
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2014-06-05 20:10:32 +0400
commit865dfa8a7e8c29b2a7f0aa3e6bcdfb3353e39052 (patch)
tree63d3f2a4ab983be993ec06a2fd1907f4fcbb791d /intern
parent2305e3289bcfced0e56d4d0fac8d525cb847d46e (diff)
Fix T40228: cycles CUDA multi GPU + world MIS giving error.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/device_cuda.cpp6
-rw-r--r--intern/cycles/kernel/kernel.cu10
2 files changed, 11 insertions, 5 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 48d1c18555a..0429bfc6e97 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -762,6 +762,8 @@ public:
if(task.get_cancel())
break;
+ int shader_w = min(shader_chunk_size, end - shader_x);
+
/* pass in parameters */
int offset = 0;
@@ -780,13 +782,15 @@ public:
cuda_assert(cuParamSeti(cuShader, offset, shader_x));
offset += sizeof(shader_x);
+ cuda_assert(cuParamSeti(cuShader, offset, shader_w));
+ offset += sizeof(shader_w);
+
cuda_assert(cuParamSetSize(cuShader, offset));
/* launch kernel */
int threads_per_block;
cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
- int shader_w = min(shader_chunk_size, end - shader_x);
int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index 69c2600ea98..12273cc2853 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -146,20 +146,22 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx)
+kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
- kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+ if(x < sx + sw)
+ kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
}
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx)
+kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
- kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+ if(x < sx + sw)
+ kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
}
#endif