diff options
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 18 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_opencl.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.cl | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.cu | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.h | 12 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_avx.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_avx2.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_bake.h | 5 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse2.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse3.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse41.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/render/bake.cpp | 1 |
14 files changed, 43 insertions, 31 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 4fdeef6bdcb..fd5ae1d7828 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -435,7 +435,8 @@ public: if(system_cpu_support_avx2()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -449,7 +450,8 @@ public: if(system_cpu_support_avx()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -463,7 +465,8 @@ public: if(system_cpu_support_sse41()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -477,7 +480,8 @@ public: if(system_cpu_support_sse3()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -491,7 +495,8 @@ public: if(system_cpu_support_sse2()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -504,7 +509,8 @@ public: { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 6629069c6c6..9e3d703f5d9 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -676,6 +676,7 @@ public: const int shader_chunk_size = 65536; const int start = task.shader_x; const int end = task.shader_x + task.shader_w; + int offset = task.offset; bool canceled = false; for(int sample = 0; sample < task.num_samples && !canceled; sample++) { @@ -688,6 +689,7 @@ public: &task.shader_eval_type, &shader_x, &shader_w, + &offset, &sample}; /* launch kernel */ diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 077ff9df51e..82419cd62b1 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -1004,6 +1004,7 @@ public: cl_int d_shader_eval_type = task.shader_eval_type; cl_int d_shader_x = task.shader_x; cl_int d_shader_w = task.shader_w; + cl_int d_offset = task.offset; /* sample arguments */ cl_uint narg = 0; @@ -1033,6 +1034,7 @@ public: opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_offset), (void*)&d_offset)); opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_sample), (void*)&d_sample)); enqueue_kernel(kernel, task.shader_w, 1); diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index 2e0a49435a8..4f20ef9ca15 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader( ccl_global type *name, #include "kernel_textures.h" - int type, int sx, int sw, int sample) + int type, int sx, int sw, int offset, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -140,7 +140,7 @@ __kernel void kernel_ocl_bake( ccl_global type *name, #include "kernel_textures.h" - int type, int sx, int sw, int sample) + int type, int sx, int sw, int offset, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -153,6 +153,6 @@ __kernel void kernel_ocl_bake( int x = sx + get_global_id(0); if(x < sx + sw) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample); } diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 42eb9a62518..fa2113fbb46 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -120,10 +120,10 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu /* Shader Evaluation */ -void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 9208acc232e..23f56ee3c86 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -156,12 +156,12 @@ kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int s extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int sample) +kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; if(x < sx + sw) - kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample); + kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, offset, sample); } #endif diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 264e5e3e4d0..19e06b88797 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, @@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 @@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 @@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX @@ -84,7 +84,7 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 @@ -95,7 +95,7 @@ void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp index d612a82b785..e7ff21a6f09 100644 --- a/intern/cycles/kernel/kernel_avx.cpp +++ b/intern/cycles/kernel/kernel_avx.cpp @@ -68,10 +68,10 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float /* Shader Evaluate */ -void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernel_avx2.cpp index 339421a002b..cb1662bbfbe 100644 --- a/intern/cycles/kernel/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernel_avx2.cpp @@ -69,10 +69,10 @@ void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 0df1725edbe..dfbb49db7e6 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -172,7 +172,8 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u) } #endif -ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample) +ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, + ShaderEvalType type, int i, int offset, int sample) { ShaderData sd; uint4 in = input[i * 2]; @@ -197,7 +198,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, int num_samples = kernel_data.integrator.aa_samples; /* random number generator */ - RNG rng = cmj_hash(i, 0); + RNG rng = cmj_hash(offset + i, 0); #if 0 uint rng_state = cmj_hash(i, 0); diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index 67bd0943b1b..740998e8c92 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -64,10 +64,10 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp index 40d621b66f6..da73a3a1c97 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernel_sse3.cpp @@ -66,10 +66,10 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp index 4b48d10b020..5704f60e138 100644 --- a/intern/cycles/kernel/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernel_sse41.cpp @@ -67,10 +67,10 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo /* Shader Evaluate */ -void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index ac0fb817ba5..5723a22dd84 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -179,6 +179,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre task.shader_output = d_output.device_pointer; task.shader_eval_type = shader_type; task.shader_x = 0; + task.offset = shader_offset; task.shader_w = d_output.size(); task.num_samples = this->num_samples; task.get_cancel = function_bind(&Progress::get_cancel, &progress); |