diff options
Diffstat (limited to 'intern/cycles/kernel/kernels')
19 files changed, 1669 insertions, 0 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp new file mode 100644 index 00000000000..a7eaa758f5d --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -0,0 +1,132 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CPU kernel entry points */ + +#include "kernel_compat_cpu.h" +#include "kernel.h" +#include "kernel_math.h" +#include "kernel_types.h" +#include "kernel_globals.h" +#include "kernel_film.h" +#include "kernel_path.h" +#include "kernel_bake.h" + +CCL_NAMESPACE_BEGIN + +/* Memory Copy */ + +void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size) +{ + if(strcmp(name, "__data") == 0) + memcpy(&kg->__data, host, size); + else + assert(0); +} + +void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height, size_t depth, InterpolationType interpolation) +{ + if(0) { + } + +#define KERNEL_TEX(type, ttype, tname) \ + else if(strcmp(name, #tname) == 0) { \ + kg->tname.data = (type*)mem; \ + kg->tname.width = width; \ + } +#define KERNEL_IMAGE_TEX(type, ttype, tname) +#include "kernel_textures.h" + + else if(strstr(name, "__tex_image_float")) { + texture_image_float4 *tex = NULL; + int id = atoi(name + strlen("__tex_image_float_")); + int array_index = id; + + if(array_index >= 0 && array_index < MAX_FLOAT_IMAGES) { + tex = &kg->texture_float_images[array_index]; + } + + if(tex) { + tex->data = (float4*)mem; + tex->dimensions_set(width, height, depth); + tex->interpolation = interpolation; + } + } + else if(strstr(name, "__tex_image")) { + texture_image_uchar4 *tex = NULL; + int id = atoi(name + strlen("__tex_image_")); + int array_index = id - MAX_FLOAT_IMAGES; + + if(array_index >= 0 && array_index < MAX_BYTE_IMAGES) { + tex = &kg->texture_byte_images[array_index]; + } + + if(tex) { + tex->data = (uchar4*)mem; + tex->dimensions_set(width, height, depth); + tex->interpolation = interpolation; + } + } + else + assert(0); +} + +/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */ +#if defined(__x86_64__) || defined(_M_X64) +#define __KERNEL_SSE2__ +#endif + +/* quiet unused define warnings */ +#if defined(__KERNEL_SSE2__) + /* do nothing */ +#endif + +/* Path Tracing */ + +void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) +{ +#ifdef __BRANCHED_PATH__ + if(kernel_data.integrator.branched) + kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); + else +#endif + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); +} + +/* Film */ + +void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +/* Shader Evaluation */ + +void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) +{ + if(type >= SHADER_EVAL_BAKE) + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); + else + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp new file mode 100644 index 00000000000..f1027ad413d --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp @@ -0,0 +1,86 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with AVX + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +#define __KERNEL_SSE2__ +#define __KERNEL_SSE3__ +#define __KERNEL_SSSE3__ +#define __KERNEL_SSE41__ +#define __KERNEL_AVX__ +#endif + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX + +#include "kernel_compat_cpu.h" +#include "kernel.h" +#include "kernel_math.h" +#include "kernel_types.h" +#include "kernel_globals.h" +#include "kernel_film.h" +#include "kernel_path.h" +#include "kernel_bake.h" + +CCL_NAMESPACE_BEGIN + +/* Path Tracing */ + +void kernel_cpu_avx_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) +{ +#ifdef __BRANCHED_PATH__ + if(kernel_data.integrator.branched) + kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); + else +#endif + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); +} + +/* Film */ + +void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +/* Shader Evaluate */ + +void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) +{ + if(type >= SHADER_EVAL_BAKE) + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); + else + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); +} + +CCL_NAMESPACE_END +#else + +/* needed for some linkers in combination with scons making empty compilation unit in a library */ +void __dummy_function_cycles_avx(void); +void __dummy_function_cycles_avx(void) {} + +#endif diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp new file mode 100644 index 00000000000..b2f16ff54d8 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp @@ -0,0 +1,87 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with AVX2 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +#define __KERNEL_SSE2__ +#define __KERNEL_SSE3__ +#define __KERNEL_SSSE3__ +#define __KERNEL_SSE41__ +#define __KERNEL_AVX__ +#define __KERNEL_AVX2__ +#endif + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 + +#include "kernel_compat_cpu.h" +#include "kernel.h" +#include "kernel_math.h" +#include "kernel_types.h" +#include "kernel_globals.h" +#include "kernel_film.h" +#include "kernel_path.h" +#include "kernel_bake.h" + +CCL_NAMESPACE_BEGIN + +/* Path Tracing */ + +void kernel_cpu_avx2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) +{ +#ifdef __BRANCHED_PATH__ + if(kernel_data.integrator.branched) + kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); + else +#endif + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); +} + +/* Film */ + +void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +/* Shader Evaluate */ + +void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) +{ + if(type >= SHADER_EVAL_BAKE) + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); + else + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); +} + +CCL_NAMESPACE_END +#else + +/* needed for some linkers in combination with scons making empty compilation unit in a library */ +void __dummy_function_cycles_avx2(void); +void __dummy_function_cycles_avx2(void) {} + +#endif diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp new file mode 100644 index 00000000000..cc8c603e8f8 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp @@ -0,0 +1,83 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE2 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +#define __KERNEL_SSE2__ +#endif + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 + +#include "kernel_compat_cpu.h" +#include "kernel.h" +#include "kernel_math.h" +#include "kernel_types.h" +#include "kernel_globals.h" +#include "kernel_film.h" +#include "kernel_path.h" +#include "kernel_bake.h" + +CCL_NAMESPACE_BEGIN + +/* Path Tracing */ + +void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) +{ +#ifdef __BRANCHED_PATH__ + if(kernel_data.integrator.branched) + kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); + else +#endif + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); +} + +/* Film */ + +void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +/* Shader Evaluate */ + +void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) +{ + if(type >= SHADER_EVAL_BAKE) + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); + else + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); +} + +CCL_NAMESPACE_END + +#else + +/* needed for some linkers in combination with scons making empty compilation unit in a library */ +void __dummy_function_cycles_sse2(void); +void __dummy_function_cycles_sse2(void) {} + +#endif diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp new file mode 100644 index 00000000000..20919a4f26e --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp @@ -0,0 +1,84 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +#define __KERNEL_SSE2__ +#define __KERNEL_SSE3__ +#define __KERNEL_SSSE3__ +#endif + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 + +#include "kernel_compat_cpu.h" +#include "kernel.h" +#include "kernel_math.h" +#include "kernel_types.h" +#include "kernel_globals.h" +#include "kernel_film.h" +#include "kernel_path.h" +#include "kernel_bake.h" + +CCL_NAMESPACE_BEGIN + +/* Path Tracing */ + +void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) +{ +#ifdef __BRANCHED_PATH__ + if(kernel_data.integrator.branched) + kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); + else +#endif + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); +} + +/* Film */ + +void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +/* Shader Evaluate */ + +void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) +{ + if(type >= SHADER_EVAL_BAKE) + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); + else + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); +} + +CCL_NAMESPACE_END +#else + +/* needed for some linkers in combination with scons making empty compilation unit in a library */ +void __dummy_function_cycles_sse3(void); +void __dummy_function_cycles_sse3(void) {} + +#endif diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp new file mode 100644 index 00000000000..48579d3b7e5 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp @@ -0,0 +1,85 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +#define __KERNEL_SSE2__ +#define __KERNEL_SSE3__ +#define __KERNEL_SSSE3__ +#define __KERNEL_SSE41__ +#endif + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 + +#include "kernel_compat_cpu.h" +#include "kernel.h" +#include "kernel_math.h" +#include "kernel_types.h" +#include "kernel_globals.h" +#include "kernel_film.h" +#include "kernel_path.h" +#include "kernel_bake.h" + +CCL_NAMESPACE_BEGIN + +/* Path Tracing */ + +void kernel_cpu_sse41_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) +{ +#ifdef __BRANCHED_PATH__ + if(kernel_data.integrator.branched) + kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); + else +#endif + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); +} + +/* Film */ + +void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) +{ + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +/* Shader Evaluate */ + +void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) +{ + if(type >= SHADER_EVAL_BAKE) + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); + else + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); +} + +CCL_NAMESPACE_END +#else + +/* needed for some linkers in combination with scons making empty compilation unit in a library */ +void __dummy_function_cycles_sse41(void); +void __dummy_function_cycles_sse41(void) {} + +#endif diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu new file mode 100644 index 00000000000..29bf67d9750 --- /dev/null +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -0,0 +1,180 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CUDA kernel entry points */ + +#include "../../kernel_compat_cuda.h" +#include "../../kernel_math.h" +#include "../../kernel_types.h" +#include "../../kernel_globals.h" +#include "../../kernel_film.h" +#include "../../kernel_path.h" +#include "../../kernel_bake.h" + +/* device data taken from CUDA occupancy calculator */ + +#ifdef __CUDA_ARCH__ + +/* 2.0 and 2.1 */ +#if __CUDA_ARCH__ == 200 || __CUDA_ARCH__ == 210 +#define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768 +#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 8 +#define CUDA_BLOCK_MAX_THREADS 1024 +#define CUDA_THREAD_MAX_REGISTERS 63 + +/* tunable parameters */ +#define CUDA_THREADS_BLOCK_WIDTH 16 +#define CUDA_KERNEL_MAX_REGISTERS 32 +#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40 + +/* 3.0 and 3.5 */ +#elif __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350 +#define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 +#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 +#define CUDA_BLOCK_MAX_THREADS 1024 +#define CUDA_THREAD_MAX_REGISTERS 63 + +/* tunable parameters */ +#define CUDA_THREADS_BLOCK_WIDTH 16 +#define CUDA_KERNEL_MAX_REGISTERS 63 +#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 + +/* 3.2 */ +#elif __CUDA_ARCH__ == 320 +#define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768 +#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 +#define CUDA_BLOCK_MAX_THREADS 1024 +#define CUDA_THREAD_MAX_REGISTERS 63 + +/* tunable parameters */ +#define CUDA_THREADS_BLOCK_WIDTH 16 +#define CUDA_KERNEL_MAX_REGISTERS 63 +#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 + +/* 5.0 and 5.2 */ +#elif __CUDA_ARCH__ == 500 || __CUDA_ARCH__ == 520 +#define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 +#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32 +#define CUDA_BLOCK_MAX_THREADS 1024 +#define CUDA_THREAD_MAX_REGISTERS 255 + +/* tunable parameters */ +#define CUDA_THREADS_BLOCK_WIDTH 16 +#define CUDA_KERNEL_MAX_REGISTERS 40 +#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 + +/* unknown architecture */ +#else +#error "Unknown or unsupported CUDA architecture, can't determine launch bounds" +#endif + +/* compute number of threads per block and minimum blocks per multiprocessor + * given the maximum number of registers per thread */ + +#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \ + __launch_bounds__( \ + threads_block_width*threads_block_width, \ + CUDA_MULTIPRESSOR_MAX_REGISTERS/(threads_block_width*threads_block_width*thread_num_registers) \ + ) + +/* sanity checks */ + +#if CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS +#error "Maximum number of threads per block exceeded" +#endif + +#if CUDA_MULTIPRESSOR_MAX_REGISTERS/(CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH*CUDA_KERNEL_MAX_REGISTERS) > CUDA_MULTIPROCESSOR_MAX_BLOCKS +#error "Maximum number of blocks per multiprocessor exceeded" +#endif + +#if CUDA_KERNEL_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS +#error "Maximum number of registers per thread exceeded" +#endif + +#if CUDA_KERNEL_BRANCHED_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS +#error "Maximum number of registers per thread exceeded" +#endif + +/* kernels */ + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) +{ + int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + int y = sy + blockDim.y*blockIdx.y + threadIdx.y; + + if(x < sx + sw && y < sy + sh) + kernel_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride); +} + +#ifdef __BRANCHED_PATH__ +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS) +kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) +{ + int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + int y = sy + blockDim.y*blockIdx.y + threadIdx.y; + + if(x < sx + sw && y < sy + sh) + kernel_branched_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride); +} +#endif + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) +{ + int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + int y = sy + blockDim.y*blockIdx.y + threadIdx.y; + + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride); +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) +{ + int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + int y = sy + blockDim.y*blockIdx.y + threadIdx.y; + + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride); +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample) +{ + int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + + if(x < sx + sw) + kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample); +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample) +{ + int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + + if(x < sx + sw) + kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, offset, sample); +} + +#endif + diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl new file mode 100644 index 00000000000..bffcd53bab3 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -0,0 +1,174 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* OpenCL kernel entry points - unfinished */ + +#include "../../kernel_compat_opencl.h" +#include "../../kernel_math.h" +#include "../../kernel_types.h" +#include "../../kernel_globals.h" + +#include "../../kernel_film.h" +#include "../../kernel_path.h" +#include "../../kernel_bake.h" + +#ifdef __COMPILE_ONLY_MEGAKERNEL__ + +__kernel void kernel_ocl_path_trace( + ccl_constant KernelData *data, + ccl_global float *buffer, + ccl_global uint *rng_state, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + int sample, + int sx, int sy, int sw, int sh, int offset, int stride) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../../kernel_textures.h" + + int x = sx + get_global_id(0); + int y = sy + get_global_id(1); + + if(x < sx + sw && y < sy + sh) + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); +} + +#else // __COMPILE_ONLY_MEGAKERNEL__ + +__kernel void kernel_ocl_shader( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + int type, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../../kernel_textures.h" + + int x = sx + get_global_id(0); + + if(x < sx + sw) + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample); +} + +__kernel void kernel_ocl_bake( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + int type, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../../kernel_textures.h" + + int x = sx + get_global_id(0); + + if(x < sx + sw) { +#if defined(__KERNEL_OPENCL_NVIDIA__) && __COMPUTE_CAPABILITY__ < 300 + /* NVidia compiler is spending infinite amount of time trying + * to deal with kernel_bake_evaluate() on architectures prior + * to sm_30. + * For now we disable baking kernel for those devices, so at + * least rendering with split kernel could be compiled. + */ + output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); +#else + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample); +#endif + } +} + +__kernel void kernel_ocl_convert_to_byte( + ccl_constant KernelData *data, + ccl_global uchar4 *rgba, + ccl_global float *buffer, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + float sample_scale, + int sx, int sy, int sw, int sh, int offset, int stride) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../../kernel_textures.h" + + int x = sx + get_global_id(0); + int y = sy + get_global_id(1); + + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +__kernel void kernel_ocl_convert_to_half_float( + ccl_constant KernelData *data, + ccl_global uchar4 *rgba, + ccl_global float *buffer, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + float sample_scale, + int sx, int sy, int sw, int sh, int offset, int stride) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../../kernel_textures.h" + + int x = sx + get_global_id(0); + int y = sy + get_global_id(1); + + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +#endif // __COMPILE_ONLY_MEGAKERNEL__
\ No newline at end of file diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl new file mode 100644 index 00000000000..2d1944d01e6 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl @@ -0,0 +1,81 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_background_buffer_update.h" + +__kernel void kernel_ocl_path_trace_background_buffer_update( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_state, + ccl_global uint *rng_coop, /* Required for buffer Update */ + ccl_global float3 *throughput_coop, /* Required for background hit processing */ + PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */ + ccl_global Ray *Ray_coop, /* Required for background hit processing */ + ccl_global PathState *PathState_coop, /* Required for background hit processing */ + ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */ + ccl_global char *ray_state, /* Stores information on the current state of a ray */ + int sw, int sh, int sx, int sy, int stride, + int rng_state_offset_x, + int rng_state_offset_y, + int rng_state_stride, + ccl_global unsigned int *work_array, /* Denotes work of each ray */ + ccl_global int *Queue_data, /* Queues memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + int end_sample, + int start_sample, +#ifdef __WORK_STEALING__ + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + kernel_background_buffer_update(globals, + data, + shader_data, + per_sample_output_buffers, + rng_state, + rng_coop, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + L_transparent_coop, + ray_state, + sw, sh, sx, sy, stride, + rng_state_offset_x, + rng_state_offset_y, + rng_state_stride, + work_array, + Queue_data, + Queue_index, + queuesize, + end_sample, + start_sample, +#ifdef __WORK_STEALING__ + work_pool_wgs, + num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl new file mode 100644 index 00000000000..015f0872413 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -0,0 +1,242 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_data_init.h" + +__kernel void kernel_ocl_path_trace_data_init( + ccl_global char *globals, + ccl_global char *shader_data_sd, /* Arguments related to ShaderData */ + ccl_global char *shader_data_sd_DL_shadow, /* Arguments related to ShaderData */ + + ccl_global float3 *P_sd, + ccl_global float3 *P_sd_DL_shadow, + + ccl_global float3 *N_sd, + ccl_global float3 *N_sd_DL_shadow, + + ccl_global float3 *Ng_sd, + ccl_global float3 *Ng_sd_DL_shadow, + + ccl_global float3 *I_sd, + ccl_global float3 *I_sd_DL_shadow, + + ccl_global int *shader_sd, + ccl_global int *shader_sd_DL_shadow, + + ccl_global int *flag_sd, + ccl_global int *flag_sd_DL_shadow, + + ccl_global int *prim_sd, + ccl_global int *prim_sd_DL_shadow, + + ccl_global int *type_sd, + ccl_global int *type_sd_DL_shadow, + + ccl_global float *u_sd, + ccl_global float *u_sd_DL_shadow, + + ccl_global float *v_sd, + ccl_global float *v_sd_DL_shadow, + + ccl_global int *object_sd, + ccl_global int *object_sd_DL_shadow, + + ccl_global float *time_sd, + ccl_global float *time_sd_DL_shadow, + + ccl_global float *ray_length_sd, + ccl_global float *ray_length_sd_DL_shadow, + + ccl_global int *ray_depth_sd, + ccl_global int *ray_depth_sd_DL_shadow, + + ccl_global int *transparent_depth_sd, + ccl_global int *transparent_depth_sd_DL_shadow, + + /* Ray differentials. */ + ccl_global differential3 *dP_sd, + ccl_global differential3 *dP_sd_DL_shadow, + + ccl_global differential3 *dI_sd, + ccl_global differential3 *dI_sd_DL_shadow, + + ccl_global differential *du_sd, + ccl_global differential *du_sd_DL_shadow, + + ccl_global differential *dv_sd, + ccl_global differential *dv_sd_DL_shadow, + + /* Dp/Du */ + ccl_global float3 *dPdu_sd, + ccl_global float3 *dPdu_sd_DL_shadow, + + ccl_global float3 *dPdv_sd, + ccl_global float3 *dPdv_sd_DL_shadow, + + /* Object motion. */ + ccl_global Transform *ob_tfm_sd, + ccl_global Transform *ob_tfm_sd_DL_shadow, + + ccl_global Transform *ob_itfm_sd, + ccl_global Transform *ob_itfm_sd_DL_shadow, + + ShaderClosure *closure_sd, + ShaderClosure *closure_sd_DL_shadow, + + ccl_global int *num_closure_sd, + ccl_global int *num_closure_sd_DL_shadow, + + ccl_global float *randb_closure_sd, + ccl_global float *randb_closure_sd_DL_shadow, + + ccl_global float3 *ray_P_sd, + ccl_global float3 *ray_P_sd_DL_shadow, + + ccl_global differential3 *ray_dP_sd, + ccl_global differential3 *ray_dP_sd_DL_shadow, + + ccl_constant KernelData *data, + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_state, + ccl_global uint *rng_coop, /* rng array to store rng values for all rays */ + ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */ + ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */ + PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */ + ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */ + ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */ + ccl_global char *ray_state, /* Stores information on current state of a ray */ + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + int start_sample, int sx, int sy, int sw, int sh, int offset, int stride, + int rng_state_offset_x, + int rng_state_offset_y, + int rng_state_stride, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* size (capacity) of the queue */ + ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ + ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */ +#ifdef __WORK_STEALING__ + ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */ + unsigned int num_samples, /* Total number of samples per pixel */ +#endif +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples /* Number of samples to be processed in parallel */ + ) +{ + kernel_data_init(globals, + shader_data_sd, + shader_data_sd_DL_shadow, + P_sd, + P_sd_DL_shadow, + N_sd, + N_sd_DL_shadow, + Ng_sd, + Ng_sd_DL_shadow, + I_sd, + I_sd_DL_shadow, + shader_sd, + shader_sd_DL_shadow, + flag_sd, + flag_sd_DL_shadow, + prim_sd, + prim_sd_DL_shadow, + type_sd, + type_sd_DL_shadow, + u_sd, + u_sd_DL_shadow, + v_sd, + v_sd_DL_shadow, + object_sd, + object_sd_DL_shadow, + time_sd, + time_sd_DL_shadow, + ray_length_sd, + ray_length_sd_DL_shadow, + ray_depth_sd, + ray_depth_sd_DL_shadow, + transparent_depth_sd, + transparent_depth_sd_DL_shadow, + + /* Ray differentials. */ + dP_sd, + dP_sd_DL_shadow, + dI_sd, + dI_sd_DL_shadow, + du_sd, + du_sd_DL_shadow, + dv_sd, + dv_sd_DL_shadow, + + /* Dp/Du */ + dPdu_sd, + dPdu_sd_DL_shadow, + dPdv_sd, + dPdv_sd_DL_shadow, + + /* Object motion. */ + ob_tfm_sd, + ob_tfm_sd_DL_shadow, + ob_itfm_sd, + ob_itfm_sd_DL_shadow, + + closure_sd, + closure_sd_DL_shadow, + num_closure_sd, + num_closure_sd_DL_shadow, + randb_closure_sd, + randb_closure_sd_DL_shadow, + ray_P_sd, + ray_P_sd_DL_shadow, + ray_dP_sd, + ray_dP_sd_DL_shadow, + data, + per_sample_output_buffers, + rng_state, + rng_coop, + throughput_coop, + L_transparent_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + ray_state, + +#define KERNEL_TEX(type, ttype, name) name, +#include "../../kernel_textures.h" + + start_sample, sx, sy, sw, sh, offset, stride, + rng_state_offset_x, + rng_state_offset_y, + rng_state_stride, + Queue_data, + Queue_index, + queuesize, + use_queues_flag, + work_array, +#ifdef __WORK_STEALING__ + work_pool_wgs, + num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl new file mode 100644 index 00000000000..0b22c6d0864 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -0,0 +1,47 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_direct_lighting.h" + +__kernel void kernel_ocl_path_trace_direct_lighting( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for direct lighting */ + ccl_global char *shader_DL, /* Required for direct lighting */ + ccl_global uint *rng_coop, /* Required for direct lighting */ + ccl_global PathState *PathState_coop, /* Required for direct lighting */ + ccl_global int *ISLamp_coop, /* Required for direct lighting */ + ccl_global Ray *LightRay_coop, /* Required for direct lighting */ + ccl_global BsdfEval *BSDFEval_coop, /* Required for direct lighting */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize) /* Size (capacity) of each queue */ +{ + kernel_direct_lighting(globals, + data, + shader_data, + shader_DL, + rng_coop, + PathState_coop, + ISLamp_coop, + LightRay_coop, + BSDFEval_coop, + ray_state, + Queue_data, + Queue_index, + queuesize); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl new file mode 100644 index 00000000000..502f10a7a59 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -0,0 +1,67 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" + +__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */ + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */ + ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */ + ccl_global float *L_transparent_coop, /* Required for handling holdout material */ + PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */ + ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */ + Intersection *Intersection_coop, /* Required for indirect primitive emission */ + ccl_global float3 *AOAlpha_coop, /* Required for AO */ + ccl_global float3 *AOBSDF_coop, /* Required for AO */ + ccl_global Ray *AOLightRay_coop, /* Required for AO */ + int sw, int sh, int sx, int sy, int stride, + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ +#ifdef __WORK_STEALING__ + unsigned int start_sample, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + kernel_holdout_emission_blurring_pathtermination_ao(globals, + data, + shader_data, + per_sample_output_buffers, + rng_coop, + throughput_coop, + L_transparent_coop, + PathRadiance_coop, + PathState_coop, + Intersection_coop, + AOAlpha_coop, + AOBSDF_coop, + AOLightRay_coop, + sw, sh, sx, sy, stride, + ray_state, + work_array, + Queue_data, + Queue_index, + queuesize, +#ifdef __WORK_STEALING__ + start_sample, +#endif + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl new file mode 100644 index 00000000000..af83e68b53e --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -0,0 +1,52 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_lamp_emission.h" + +__kernel void kernel_ocl_path_trace_lamp_emission( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for lamp emission */ + ccl_global float3 *throughput_coop, /* Required for lamp emission */ + PathRadiance *PathRadiance_coop, /* Required for lamp emission */ + ccl_global Ray *Ray_coop, /* Required for lamp emission */ + ccl_global PathState *PathState_coop, /* Required for lamp emission */ + Intersection *Intersection_coop, /* Required for lamp emission */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int sw, int sh, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* Size (capacity) of queues */ + ccl_global char *use_queues_flag, /* used to decide if this kernel should use queues to fetch ray index */ + int parallel_samples /* Number of samples to be processed in parallel */ + ) +{ + kernel_lamp_emission(globals, + data, + shader_data, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + sw, sh, + Queue_data, + Queue_index, + queuesize, + use_queues_flag, + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl new file mode 100644 index 00000000000..4acd991f0b4 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -0,0 +1,59 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_next_iteration_setup.h" + +__kernel void kernel_ocl_path_trace_next_iteration_setup( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for setting up ray for next iteration */ + ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */ + ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */ + PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */ + ccl_global Ray *Ray_coop, /* Required for setting up ray for next iteration */ + ccl_global PathState *PathState_coop, /* Required for setting up ray for next iteration */ + ccl_global Ray *LightRay_dl_coop, /* Required for radiance update - direct lighting */ + ccl_global int *ISLamp_coop, /* Required for radiance update - direct lighting */ + ccl_global BsdfEval *BSDFEval_coop, /* Required for radiance update - direct lighting */ + ccl_global Ray *LightRay_ao_coop, /* Required for radiance update - AO */ + ccl_global float3 *AOBSDF_coop, /* Required for radiance update - AO */ + ccl_global float3 *AOAlpha_coop, /* Required for radiance update - AO */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + ccl_global char *use_queues_flag) /* flag to decide if scene_intersect kernel should use queues to fetch ray index */ +{ + kernel_next_iteration_setup(globals, + data, + shader_data, + rng_coop, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + LightRay_dl_coop, + ISLamp_coop, + BSDFEval_coop, + LightRay_ao_coop, + AOBSDF_coop, + AOAlpha_coop, + ray_state, + Queue_data, + Queue_index, + queuesize, + use_queues_flag); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl new file mode 100644 index 00000000000..62cf08c387d --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -0,0 +1,29 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_queue_enqueue.h" + +__kernel void kernel_ocl_path_trace_queue_enqueue( + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int queuesize) /* Size (capacity) of each queue */ +{ + kernel_queue_enqueue(Queue_data, + Queue_index, + ray_state, + queuesize); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl new file mode 100644 index 00000000000..d219874d391 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -0,0 +1,53 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_scene_intersect.h" + +__kernel void kernel_ocl_path_trace_scene_intersect( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global uint *rng_coop, + ccl_global Ray *Ray_coop, /* Required for scene_intersect */ + ccl_global PathState *PathState_coop, /* Required for scene_intersect */ + Intersection *Intersection_coop, /* Required for scene_intersect */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int sw, int sh, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* Size (capacity) of queues */ + ccl_global char *use_queues_flag, /* used to decide if this kernel should use queues to fetch ray index */ +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + kernel_scene_intersect(globals, + data, + rng_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + sw, sh, + Queue_data, + Queue_index, + queuesize, + use_queues_flag, +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl new file mode 100644 index 00000000000..04769d7d792 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -0,0 +1,43 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_shader_eval.h" + +__kernel void kernel_ocl_path_trace_shader_eval( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Output ShaderData structure to be filled */ + ccl_global uint *rng_coop, /* Required for rbsdf calculation */ + ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */ + ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */ + Intersection *Intersection_coop, /* Required for setting up shader from ray */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize) /* Size (capacity) of each queue */ +{ + kernel_shader_eval(globals, + data, + shader_data, + rng_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + Queue_data, + Queue_index, + queuesize); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl new file mode 100644 index 00000000000..9d57364c8d6 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl @@ -0,0 +1,47 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_shadow_blocked.h" + +__kernel void kernel_ocl_path_trace_shadow_blocked( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_shadow, /* Required for shadow blocked */ + ccl_global PathState *PathState_coop, /* Required for shadow blocked */ + ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */ + ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */ + Intersection *Intersection_coop_AO, + Intersection *Intersection_coop_DL, + ccl_global char *ray_state, + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + int total_num_rays) +{ + kernel_shadow_blocked(globals, + data, + shader_shadow, + PathState_coop, + LightRay_dl_coop, + LightRay_ao_coop, + Intersection_coop_AO, + Intersection_coop_DL, + ray_state, + Queue_data, + Queue_index, + queuesize, + total_num_rays); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl new file mode 100644 index 00000000000..88a1ed830af --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl @@ -0,0 +1,38 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_sum_all_radiance.h" + +__kernel void kernel_ocl_path_trace_sum_all_radiance( + ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */ + ccl_global float *buffer, /* Output buffer of RenderTile */ + ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */ + int parallel_samples, int sw, int sh, int stride, + int buffer_offset_x, + int buffer_offset_y, + int buffer_stride, + int start_sample) +{ + kernel_sum_all_radiance(data, + buffer, + per_sample_output_buffer, + parallel_samples, + sw, sh, stride, + buffer_offset_x, + buffer_offset_y, + buffer_stride, + start_sample); +} |