Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device_cpu.cpp12
-rw-r--r--intern/cycles/device/device_cuda.cpp20
-rw-r--r--intern/cycles/device/device_multi.cpp1
-rw-r--r--intern/cycles/device/device_network.cpp3
-rw-r--r--intern/cycles/device/device_network.h4
-rw-r--r--intern/cycles/device/device_opencl.cpp7
-rw-r--r--intern/cycles/device/device_task.cpp2
-rw-r--r--intern/cycles/device/device_task.h2
-rw-r--r--intern/cycles/kernel/kernel_bake.h28
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h1
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h3
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu20
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel.cl12
13 files changed, 91 insertions, 24 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index f06963c146e..832f4d1c1fd 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -343,7 +343,7 @@ public:
#ifdef WITH_OSL
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
- void(*shader_kernel)(KernelGlobals*, uint4*, float4*, int, int, int, int);
+ void(*shader_kernel)(KernelGlobals*, uint4*, float4*, float*, int, int, int, int);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2())
@@ -374,8 +374,14 @@ public:
for(int sample = 0; sample < task.num_samples; sample++) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
- shader_kernel(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
- task.shader_eval_type, x, task.offset, sample);
+ shader_kernel(&kg,
+ (uint4*)task.shader_input,
+ (float4*)task.shader_output,
+ (float*)task.shader_output_luma,
+ task.shader_eval_type,
+ x,
+ task.offset,
+ sample);
if(task.get_cancel() || task_pool.canceled())
break;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index d9d6fd77ecb..5c9ca3454c6 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -726,6 +726,7 @@ public:
CUfunction cuShader;
CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
+ CUdeviceptr d_output_luma = cuda_device_ptr(task.shader_output_luma);
/* get kernel function */
if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
@@ -747,13 +748,18 @@ public:
int shader_w = min(shader_chunk_size, end - shader_x);
/* pass in parameters */
- void *args[] = {&d_input,
- &d_output,
- &task.shader_eval_type,
- &shader_x,
- &shader_w,
- &offset,
- &sample};
+ void *args[8];
+ int arg = 0;
+ args[arg++] = &d_input;
+ args[arg++] = &d_output;
+ if(task.shader_eval_type < SHADER_EVAL_BAKE) {
+ args[arg++] = &d_output_luma;
+ }
+ args[arg++] = &task.shader_eval_type;
+ args[arg++] = &shader_x;
+ args[arg++] = &shader_w;
+ args[arg++] = &offset;
+ args[arg++] = &sample;
/* launch kernel */
int threads_per_block;
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 8fb841b2b0d..069305e8a29 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -316,6 +316,7 @@ public:
if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
+ if(task.shader_output_luma) subtask.shader_output_luma = sub.ptr_map[task.shader_output_luma];
sub.device->task_add(subtask);
}
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index afa35224aba..23faa61e8e5 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -648,6 +648,9 @@ protected:
if(task.shader_output)
task.shader_output = device_ptr_from_client_pointer(task.shader_output);
+ if(task.shader_output)luma)
+ task.shader_output_luma = device_ptr_from_client_pointer(task.shader_output_luma);
+
task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2);
task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1);
diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h
index 2e751f6697f..60ecc1d0a86 100644
--- a/intern/cycles/device/device_network.h
+++ b/intern/cycles/device/device_network.h
@@ -132,7 +132,7 @@ public:
archive & type & task.x & task.y & task.w & task.h;
archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
archive & task.offset & task.stride;
- archive & task.shader_input & task.shader_output & task.shader_eval_type;
+ archive & task.shader_input & task.shader_output & task.shader_output_luma & task.shader_eval_type;
archive & task.shader_x & task.shader_w;
archive & task.need_finish_queue;
}
@@ -291,7 +291,7 @@ public:
*archive & type & task.x & task.y & task.w & task.h;
*archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
*archive & task.offset & task.stride;
- *archive & task.shader_input & task.shader_output & task.shader_eval_type;
+ *archive & task.shader_input & task.shader_output & task.shader_output_luma & task.shader_eval_type;
*archive & task.shader_x & task.shader_w;
*archive & task.need_finish_queue;
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index e0c602461ed..a1743f53831 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1304,6 +1304,7 @@ public:
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_input = CL_MEM_PTR(task.shader_input);
cl_mem d_output = CL_MEM_PTR(task.shader_output);
+ cl_mem d_output_luma = CL_MEM_PTR(task.shader_output_luma);
cl_int d_shader_eval_type = task.shader_eval_type;
cl_int d_shader_x = task.shader_x;
cl_int d_shader_w = task.shader_w;
@@ -1330,6 +1331,12 @@ public:
d_input,
d_output);
+ if(task.shader_eval_type < SHADER_EVAL_BAKE) {
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
+ d_output_luma);
+ }
+
#define KERNEL_TEX(type, ttype, name) \
set_kernel_arg_mem(kernel, &start_arg_index, #name);
#include "kernel_textures.h"
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index d527540f300..0cae118a692 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN
DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0),
sample(0), num_samples(1),
- shader_input(0), shader_output(0),
+ shader_input(0), shader_output(0), shader_output_luma(0),
shader_eval_type(0), shader_x(0), shader_w(0)
{
last_update_time = time_dt();
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 834ea60988a..7654508d4a5 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -46,7 +46,7 @@ public:
int offset, stride;
device_ptr shader_input;
- device_ptr shader_output;
+ device_ptr shader_output, shader_output_luma;
int shader_eval_type;
int shader_x, shader_w;
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 715c11c7ea0..b54afbd21b8 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -453,7 +453,13 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
output[i] += make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
}
-ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample)
+ccl_device void kernel_shader_evaluate(KernelGlobals *kg,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
+ ccl_global float *output_luma,
+ ShaderEvalType type,
+ int i,
+ int sample)
{
ShaderData sd;
uint4 in = input[i];
@@ -500,10 +506,22 @@ ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *inpu
}
/* write output */
- if(sample == 0)
- output[i] = make_float4(out.x, out.y, out.z, 0.0f);
- else
- output[i] += make_float4(out.x, out.y, out.z, 0.0f);
+ if(sample == 0) {
+ if(output != NULL) {
+ output[i] = make_float4(out.x, out.y, out.z, 0.0f);
+ }
+ if(output_luma != NULL) {
+ output_luma[i] = average(out);
+ }
+ }
+ else {
+ if(output != NULL) {
+ output[i] += make_float4(out.x, out.y, out.z, 0.0f);
+ }
+ if(output_luma != NULL) {
+ output_luma[i] += average(out);
+ }
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 2560c6d8dee..1ce1e41272b 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -42,6 +42,7 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
uint4 *input,
float4 *output,
+ float *output_luma,
int type,
int i,
int offset,
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 693285ec3a8..0249610b381 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -99,12 +99,14 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
uint4 *input,
float4 *output,
+ float *output_luma,
int type,
int i,
int offset,
int sample)
{
if(type >= SHADER_EVAL_BAKE) {
+ kernel_assert(output_luma == NULL);
kernel_bake_evaluate(kg,
input,
output,
@@ -117,6 +119,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
kernel_shader_evaluate(kg,
input,
output,
+ output_luma,
(ShaderEvalType)type,
i,
sample);
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index 3929b676f07..e094612de01 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -159,12 +159,26 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample)
+kernel_cuda_shader(uint4 *input,
+ float4 *output,
+ float *output_luma,
+ int type,
+ int sx,
+ int sw,
+ int offset,
+ int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
- if(x < sx + sw)
- kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample);
+ if(x < sx + sw) {
+ kernel_shader_evaluate(NULL,
+ input,
+ output,
+ output_luma,
+ (ShaderEvalType)type,
+ x,
+ sample);
+ }
}
extern "C" __global__ void
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index 57db6fd9098..4c9f7ba1d7c 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -61,6 +61,7 @@ __kernel void kernel_ocl_shader(
ccl_constant KernelData *data,
ccl_global uint4 *input,
ccl_global float4 *output,
+ ccl_global float *output_luma,
#define KERNEL_TEX(type, ttype, name) \
ccl_global type *name,
@@ -78,8 +79,15 @@ __kernel void kernel_ocl_shader(
int x = sx + get_global_id(0);
- if(x < sx + sw)
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
+ if(x < sx + sw) {
+ kernel_shader_evaluate(kg,
+ input,
+ output,
+ output_luma,
+ (ShaderEvalType)type,
+ x,
+ sample);
+ }
}
__kernel void kernel_ocl_bake(