From 8d3cc431d7fdcc9f3243cc24dfdcb94124be0993 Mon Sep 17 00:00:00 2001 From: Dalai Felinto Date: Tue, 19 Aug 2014 11:39:40 +0200 Subject: Fix T41471 Cycles Bake: Setting small tile size results in wrong bake with stripes rather than the expected noise pattern This problem was introduced in 983cbafd1877f8dbaae60b064a14e27b5b640f18 Basically the issue is that we were not getting a unique index in the baking routine for the RNG (random number generator). Reviewers: sergey Differential Revision: https://developer.blender.org/D749 --- intern/cycles/device/device_cpu.cpp | 18 ++++++++++++------ intern/cycles/device/device_cuda.cpp | 2 ++ intern/cycles/device/device_opencl.cpp | 2 ++ intern/cycles/kernel/kernel.cl | 6 +++--- intern/cycles/kernel/kernel.cpp | 4 ++-- intern/cycles/kernel/kernel.cu | 4 ++-- intern/cycles/kernel/kernel.h | 12 ++++++------ intern/cycles/kernel/kernel_avx.cpp | 4 ++-- intern/cycles/kernel/kernel_avx2.cpp | 4 ++-- intern/cycles/kernel/kernel_bake.h | 5 +++-- intern/cycles/kernel/kernel_sse2.cpp | 4 ++-- intern/cycles/kernel/kernel_sse3.cpp | 4 ++-- intern/cycles/kernel/kernel_sse41.cpp | 4 ++-- intern/cycles/render/bake.cpp | 1 + 14 files changed, 43 insertions(+), 31 deletions(-) (limited to 'intern') diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 4fdeef6bdcb..fd5ae1d7828 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -435,7 +435,8 @@ public: if(system_cpu_support_avx2()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -449,7 +450,8 @@ public: if(system_cpu_support_avx()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -463,7 +465,8 @@ public: if(system_cpu_support_sse41()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -477,7 +480,8 @@ public: if(system_cpu_support_sse3()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -491,7 +495,8 @@ public: if(system_cpu_support_sse2()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -504,7 +509,8 @@ public: { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 6629069c6c6..9e3d703f5d9 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -676,6 +676,7 @@ public: const int shader_chunk_size = 65536; const int start = task.shader_x; const int end = task.shader_x + task.shader_w; + int offset = task.offset; bool canceled = false; for(int sample = 0; sample < task.num_samples && !canceled; sample++) { @@ -688,6 +689,7 @@ public: &task.shader_eval_type, &shader_x, &shader_w, + &offset, &sample}; /* launch kernel */ diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 077ff9df51e..82419cd62b1 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -1004,6 +1004,7 @@ public: cl_int d_shader_eval_type = task.shader_eval_type; cl_int d_shader_x = task.shader_x; cl_int d_shader_w = task.shader_w; + cl_int d_offset = task.offset; /* sample arguments */ cl_uint narg = 0; @@ -1033,6 +1034,7 @@ public: opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_offset), (void*)&d_offset)); opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_sample), (void*)&d_sample)); enqueue_kernel(kernel, task.shader_w, 1); diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index 2e0a49435a8..4f20ef9ca15 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader( ccl_global type *name, #include "kernel_textures.h" - int type, int sx, int sw, int sample) + int type, int sx, int sw, int offset, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -140,7 +140,7 @@ __kernel void kernel_ocl_bake( ccl_global type *name, #include "kernel_textures.h" - int type, int sx, int sw, int sample) + int type, int sx, int sw, int offset, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -153,6 +153,6 @@ __kernel void kernel_ocl_bake( int x = sx + get_global_id(0); if(x < sx + sw) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample); } diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 42eb9a62518..fa2113fbb46 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -120,10 +120,10 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu /* Shader Evaluation */ -void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 9208acc232e..23f56ee3c86 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -156,12 +156,12 @@ kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int s extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int sample) +kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; if(x < sx + sw) - kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample); + kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, offset, sample); } #endif diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 264e5e3e4d0..19e06b88797 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, @@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 @@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 @@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX @@ -84,7 +84,7 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 @@ -95,7 +95,7 @@ void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp index d612a82b785..e7ff21a6f09 100644 --- a/intern/cycles/kernel/kernel_avx.cpp +++ b/intern/cycles/kernel/kernel_avx.cpp @@ -68,10 +68,10 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float /* Shader Evaluate */ -void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernel_avx2.cpp index 339421a002b..cb1662bbfbe 100644 --- a/intern/cycles/kernel/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernel_avx2.cpp @@ -69,10 +69,10 @@ void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 0df1725edbe..dfbb49db7e6 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -172,7 +172,8 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u) } #endif -ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample) +ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, + ShaderEvalType type, int i, int offset, int sample) { ShaderData sd; uint4 in = input[i * 2]; @@ -197,7 +198,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, int num_samples = kernel_data.integrator.aa_samples; /* random number generator */ - RNG rng = cmj_hash(i, 0); + RNG rng = cmj_hash(offset + i, 0); #if 0 uint rng_state = cmj_hash(i, 0); diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index 67bd0943b1b..740998e8c92 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -64,10 +64,10 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp index 40d621b66f6..da73a3a1c97 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernel_sse3.cpp @@ -66,10 +66,10 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp index 4b48d10b020..5704f60e138 100644 --- a/intern/cycles/kernel/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernel_sse41.cpp @@ -67,10 +67,10 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo /* Shader Evaluate */ -void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index ac0fb817ba5..5723a22dd84 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -179,6 +179,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre task.shader_output = d_output.device_pointer; task.shader_eval_type = shader_type; task.shader_x = 0; + task.offset = shader_offset; task.shader_w = d_output.size(); task.num_samples = this->num_samples; task.get_cancel = function_bind(&Progress::get_cancel, &progress); -- cgit v1.2.3