diff options
author | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2013-08-31 03:49:38 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2013-08-31 03:49:38 +0400 |
commit | 29f6616d609fbd92cf313b0fdec555c2fcb4ede0 (patch) | |
tree | e0c9500368c5210071cb841ea86f5674b0cf6f25 /intern/cycles/kernel | |
parent | 60ff60dcdc9f43891fb8a19e10f9bb7964a539bf (diff) |
Cycles: viewport render now takes scene color management settings into account,
except for curves, that's still missing from the OpenColorIO GLSL shader.
The pixels are stored in a half float texture, converterd from full float with
native GPU instructions and SIMD on the CPU, so it should be pretty quick.
Using a GLSL shader is useful for GPU render because it avoids a copy through
CPU memory.
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/kernel.cl | 33 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.cpp | 11 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.cu | 13 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.h | 18 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cuda.h | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_film.h | 23 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse2.cpp | 11 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse3.cpp | 11 |
8 files changed, 99 insertions, 27 deletions
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index dd8ffdd2b33..28e72d78731 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -52,7 +52,7 @@ __kernel void kernel_ocl_path_trace( kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } -__kernel void kernel_ocl_tonemap( +__kernel void kernel_ocl_convert_to_byte( __constant KernelData *data, __global uchar4 *rgba, __global float *buffer, @@ -61,7 +61,34 @@ __kernel void kernel_ocl_tonemap( __global type *name, #include "kernel_textures.h" - int sample, + float sample_scale, + int sx, int sy, int sw, int sh, int offset, int stride) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "kernel_textures.h" + + int x = sx + get_global_id(0); + int y = sy + get_global_id(1); + + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +__kernel void kernel_ocl_convert_to_half_float( + __constant KernelData *data, + __global uchar4 *rgba, + __global float *buffer, + +#define KERNEL_TEX(type, ttype, name) \ + __global type *name, +#include "kernel_textures.h" + + float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) { KernelGlobals kglobals, *kg = &kglobals; @@ -76,7 +103,7 @@ __kernel void kernel_ocl_tonemap( int y = sy + get_global_id(1); if(x < sx + sw && y < sy + sh) - kernel_film_tonemap(kg, rgba, buffer, sample, x, y, offset, stride); + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); } __kernel void kernel_ocl_shader( diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 3f357763a8f..3e2727fde9a 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -96,11 +96,16 @@ void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_s kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } -/* Tonemapping */ +/* Film */ -void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, int sample, int x, int y, int offset, int stride) +void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) { - kernel_film_tonemap(kg, rgba, buffer, sample, x, y, offset, stride); + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); } /* Shader Evaluation */ diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index c4da1d440b7..5e6748c66fc 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -44,13 +44,22 @@ extern "C" __global__ void kernel_cuda_branched_path_trace(float *buffer, uint * } #endif -extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float *buffer, int sample, int sx, int sy, int sw, int sh, int offset, int stride) +extern "C" __global__ void kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; if(x < sx + sw && y < sy + sh) - kernel_film_tonemap(NULL, rgba, buffer, sample, x, y, offset, stride); + kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride); +} + +extern "C" __global__ void kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) +{ + int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + int y = sy + blockDim.y*blockIdx.y + threadIdx.y; + + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride); } extern "C" __global__ void kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx) diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 6efc28ed2af..361f5b0856d 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -36,23 +36,29 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride); -void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, - int sample, int x, int y, int offset, int stride); +void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, + float sample_scale, int x, int y, int offset, int stride); +void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, + float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i); #ifdef WITH_OPTIMIZED_KERNEL void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride); -void kernel_cpu_sse2_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, - int sample, int x, int y, int offset, int stride); +void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, + float sample_scale, int x, int y, int offset, int stride); +void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, + float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i); void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride); -void kernel_cpu_sse3_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, - int sample, int x, int y, int offset, int stride); +void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, + float sample_scale, int x, int y, int offset, int stride); +void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, + float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i); #endif diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index a6bf4a96975..cb86ce8c4ae 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -25,8 +25,6 @@ #include <cuda.h> #include <float.h> -#include "util_types.h" - /* Qualifier wrappers for different names on different devices */ #define __device __device__ __inline__ @@ -41,6 +39,10 @@ #define kernel_assert(cond) +/* Types */ + +#include "util_types.h" + /* Textures */ typedef texture<float4, 1> texture_float4; diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index ba2149cc709..3ef33a2703b 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -16,9 +16,8 @@ CCL_NAMESPACE_BEGIN -__device float4 film_map(KernelGlobals *kg, float4 irradiance, int sample) +__device float4 film_map(KernelGlobals *kg, float4 irradiance, float scale) { - float scale = 1.0f/(float)(sample+1); float exposure = kernel_data.film.exposure; float4 result = irradiance*scale; @@ -46,9 +45,9 @@ __device uchar4 film_float_to_byte(float4 color) return result; } -__device void kernel_film_tonemap(KernelGlobals *kg, +__device void kernel_film_convert_to_byte(KernelGlobals *kg, __global uchar4 *rgba, __global float *buffer, - int sample, int x, int y, int offset, int stride) + float sample_scale, int x, int y, int offset, int stride) { /* buffer offset */ int index = offset + x + y*stride; @@ -58,11 +57,25 @@ __device void kernel_film_tonemap(KernelGlobals *kg, /* map colors */ float4 irradiance = *((__global float4*)buffer); - float4 float_result = film_map(kg, irradiance, sample); + float4 float_result = film_map(kg, irradiance, sample_scale); uchar4 byte_result = film_float_to_byte(float_result); *rgba = byte_result; } +__device void kernel_film_convert_to_half_float(KernelGlobals *kg, + __global uchar4 *rgba, __global float *buffer, + float sample_scale, int x, int y, int offset, int stride) +{ + /* buffer offset */ + int index = offset + x + y*stride; + + float4 *in = (__global float4*)(buffer + index*kernel_data.film.pass_stride); + half *out = (half*)rgba + index*4; + float scale = kernel_data.film.exposure*sample_scale; + + float4_store_half(out, in, scale); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index e0413ddf445..862626d6899 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -45,11 +45,16 @@ void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int * kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } -/* Tonemapping */ +/* Film */ -void kernel_cpu_sse2_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, int sample, int x, int y, int offset, int stride) +void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) { - kernel_film_tonemap(kg, rgba, buffer, sample, x, y, offset, stride); + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); } /* Shader Evaluate */ diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp index 86f4705ca18..c44098606a5 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernel_sse3.cpp @@ -47,11 +47,16 @@ void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int * kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } -/* Tonemapping */ +/* Film */ -void kernel_cpu_sse3_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, int sample, int x, int y, int offset, int stride) +void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) { - kernel_film_tonemap(kg, rgba, buffer, sample, x, y, offset, stride); + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); } /* Shader Evaluate */ |