Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt84
-rw-r--r--intern/cycles/kernel/SConscript9
-rw-r--r--intern/cycles/kernel/closure/bsdf.h100
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h14
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h6
-rw-r--r--intern/cycles/kernel/closure/bsdf_diffuse_ramp.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h193
-rw-r--r--intern/cycles/kernel/closure/bsdf_oren_nayar.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_phong_ramp.h4
-rw-r--r--intern/cycles/kernel/closure/bsdf_toon.h8
-rw-r--r--intern/cycles/kernel/closure/bssrdf.h8
-rw-r--r--intern/cycles/kernel/closure/volume.h13
-rw-r--r--intern/cycles/kernel/geom/geom.h2
-rw-r--r--intern/cycles/kernel/geom/geom_attribute.h8
-rw-r--r--intern/cycles/kernel/geom/geom_bvh.h113
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_shadow.h6
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_subsurface.h4
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_traversal.h9
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_volume.h4
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_volume_all.h454
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h130
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle.h44
-rw-r--r--intern/cycles/kernel/geom/geom_object.h75
-rw-r--r--intern/cycles/kernel/geom/geom_primitive.h38
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_shadow.h10
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_subsurface.h4
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_traversal.h11
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_volume.h8
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_volume_all.h446
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h42
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h114
-rw-r--r--intern/cycles/kernel/geom/geom_volume.h2
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h10
-rw-r--r--intern/cycles/kernel/kernel_bake.h6
-rw-r--r--intern/cycles/kernel/kernel_camera.h57
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h11
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h9
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h16
-rw-r--r--intern/cycles/kernel/kernel_debug.h14
-rw-r--r--intern/cycles/kernel/kernel_differential.h6
-rw-r--r--intern/cycles/kernel/kernel_emission.h91
-rw-r--r--intern/cycles/kernel/kernel_film.h10
-rw-r--r--intern/cycles/kernel/kernel_globals.h6
-rw-r--r--intern/cycles/kernel/kernel_jitter.h2
-rw-r--r--intern/cycles/kernel/kernel_light.h411
-rw-r--r--intern/cycles/kernel/kernel_passes.h40
-rw-r--r--intern/cycles/kernel/kernel_path.h581
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h534
-rw-r--r--intern/cycles/kernel/kernel_path_common.h50
-rw-r--r--intern/cycles/kernel/kernel_path_state.h8
-rw-r--r--intern/cycles/kernel/kernel_path_surface.h48
-rw-r--r--intern/cycles/kernel/kernel_projection.h8
-rw-r--r--intern/cycles/kernel/kernel_queues.h132
-rw-r--r--intern/cycles/kernel/kernel_random.h18
-rw-r--r--intern/cycles/kernel/kernel_shader.h310
-rw-r--r--intern/cycles/kernel/kernel_shaderdata_vars.h99
-rw-r--r--intern/cycles/kernel/kernel_shadow.h68
-rw-r--r--intern/cycles/kernel/kernel_textures.h1
-rw-r--r--intern/cycles/kernel/kernel_types.h259
-rw-r--r--intern/cycles/kernel/kernel_volume.h90
-rw-r--r--intern/cycles/kernel/kernel_work_stealing.h193
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel.cpp (renamed from intern/cycles/kernel/kernel.cpp)5
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx.cpp (renamed from intern/cycles/kernel/kernel_avx.cpp)1
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp (renamed from intern/cycles/kernel/kernel_avx2.cpp)1
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp (renamed from intern/cycles/kernel/kernel_sse2.cpp)1
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp (renamed from intern/cycles/kernel/kernel_sse3.cpp)1
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp (renamed from intern/cycles/kernel/kernel_sse41.cpp)1
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu (renamed from intern/cycles/kernel/kernel.cu)15
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel.cl (renamed from intern/cycles/kernel/kernel.cl)107
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl128
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_data_init.cl241
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl90
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl124
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl84
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl115
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl106
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl82
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl69
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl83
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl38
-rw-r--r--intern/cycles/kernel/osl/SConscript12
-rw-r--r--intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp1
-rw-r--r--intern/cycles/kernel/osl/bsdf_phong_ramp.cpp1
-rw-r--r--intern/cycles/kernel/osl/osl_bssrdf.cpp1
-rw-r--r--intern/cycles/kernel/osl/osl_closures.h8
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp183
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp45
-rw-r--r--intern/cycles/kernel/shaders/node_image_texture.osl6
-rw-r--r--intern/cycles/kernel/shaders/node_math.osl4
-rw-r--r--intern/cycles/kernel/shaders/node_musgrave_texture.osl25
-rw-r--r--intern/cycles/kernel/shaders/node_noise_texture.osl17
-rw-r--r--intern/cycles/kernel/shaders/node_texture.h106
-rw-r--r--intern/cycles/kernel/shaders/node_voronoi_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_wave_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h14
-rw-r--r--intern/cycles/kernel/split/kernel_background_buffer_update.h255
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h418
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h116
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h264
-rw-r--r--intern/cycles/kernel/split/kernel_lamp_emission.h179
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h145
-rw-r--r--intern/cycles/kernel/split/kernel_scene_intersect.h137
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h75
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked.h99
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h62
-rw-r--r--intern/cycles/kernel/split/kernel_sum_all_radiance.h59
-rw-r--r--intern/cycles/kernel/svm/svm.h296
-rw-r--r--intern/cycles/kernel/svm/svm_attribute.h6
-rw-r--r--intern/cycles/kernel/svm/svm_blackbody.h40
-rw-r--r--intern/cycles/kernel/svm/svm_brick.h2
-rw-r--r--intern/cycles/kernel/svm/svm_brightness.h2
-rw-r--r--intern/cycles/kernel/svm/svm_camera.h8
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h172
-rw-r--r--intern/cycles/kernel/svm/svm_displace.h10
-rw-r--r--intern/cycles/kernel/svm/svm_fresnel.h14
-rw-r--r--intern/cycles/kernel/svm/svm_gamma.h8
-rw-r--r--intern/cycles/kernel/svm/svm_geometry.h42
-rw-r--r--intern/cycles/kernel/svm/svm_gradient.h2
-rw-r--r--intern/cycles/kernel/svm/svm_hsv.h4
-rw-r--r--intern/cycles/kernel/svm/svm_image.h16
-rw-r--r--intern/cycles/kernel/svm/svm_invert.h2
-rw-r--r--intern/cycles/kernel/svm/svm_light_path.h14
-rw-r--r--intern/cycles/kernel/svm/svm_math_util.h64
-rw-r--r--intern/cycles/kernel/svm/svm_mix.h8
-rw-r--r--intern/cycles/kernel/svm/svm_musgrave.h24
-rw-r--r--intern/cycles/kernel/svm/svm_noisetex.h13
-rw-r--r--intern/cycles/kernel/svm/svm_normal.h4
-rw-r--r--intern/cycles/kernel/svm/svm_ramp.h2
-rw-r--r--intern/cycles/kernel/svm/svm_sepcomb_hsv.h8
-rw-r--r--intern/cycles/kernel/svm/svm_sepcomb_vector.h8
-rw-r--r--intern/cycles/kernel/svm/svm_tex_coord.h114
-rw-r--r--intern/cycles/kernel/svm/svm_texture.h258
-rw-r--r--intern/cycles/kernel/svm/svm_types.h45
-rw-r--r--intern/cycles/kernel/svm/svm_vector_transform.h6
-rw-r--r--intern/cycles/kernel/svm/svm_voronoi.h86
-rw-r--r--intern/cycles/kernel/svm/svm_wave.h2
-rw-r--r--intern/cycles/kernel/svm/svm_wavelength.h2
-rw-r--r--intern/cycles/kernel/svm/svm_wireframe.h45
138 files changed, 7356 insertions, 2554 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index a25eb3f5b50..18d1360542c 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -1,3 +1,4 @@
+remove_extra_strict_flags()
set(INC
.
@@ -11,9 +12,20 @@ set(INC_SYS
)
set(SRC
- kernel.cpp
- kernel.cl
- kernel.cu
+ kernels/cpu/kernel.cpp
+ kernels/opencl/kernel.cl
+ kernels/opencl/kernel_data_init.cl
+ kernels/opencl/kernel_queue_enqueue.cl
+ kernels/opencl/kernel_scene_intersect.cl
+ kernels/opencl/kernel_lamp_emission.cl
+ kernels/opencl/kernel_background_buffer_update.cl
+ kernels/opencl/kernel_shader_eval.cl
+ kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+ kernels/opencl/kernel_direct_lighting.cl
+ kernels/opencl/kernel_shadow_blocked.cl
+ kernels/opencl/kernel_next_iteration_setup.cl
+ kernels/opencl/kernel_sum_all_radiance.cl
+ kernels/cuda/kernel.cu
)
set(SRC_HEADERS
@@ -35,17 +47,22 @@ set(SRC_HEADERS
kernel_montecarlo.h
kernel_passes.h
kernel_path.h
+ kernel_path_branched.h
+ kernel_path_common.h
kernel_path_state.h
kernel_path_surface.h
kernel_path_volume.h
kernel_projection.h
+ kernel_queues.h
kernel_random.h
kernel_shader.h
+ kernel_shaderdata_vars.h
kernel_shadow.h
kernel_subsurface.h
kernel_textures.h
kernel_types.h
kernel_volume.h
+ kernel_work_stealing.h
)
set(SRC_CLOSURE_HEADERS
@@ -67,6 +84,7 @@ set(SRC_CLOSURE_HEADERS
closure/emissive.h
closure/volume.h
)
+
set(SRC_SVM_HEADERS
svm/svm.h
svm/svm_attribute.h
@@ -118,6 +136,7 @@ set(SRC_GEOM_HEADERS
geom/geom_bvh_subsurface.h
geom/geom_bvh_traversal.h
geom/geom_bvh_volume.h
+ geom/geom_bvh_volume_all.h
geom/geom_curve.h
geom/geom_motion_curve.h
geom/geom_motion_triangle.h
@@ -128,12 +147,14 @@ set(SRC_GEOM_HEADERS
geom/geom_qbvh_subsurface.h
geom/geom_qbvh_traversal.h
geom/geom_qbvh_volume.h
+ geom/geom_qbvh_volume_all.h
geom/geom_triangle.h
geom/geom_triangle_intersect.h
geom/geom_volume.h
)
set(SRC_UTIL_HEADERS
+ ../util/util_atomic.h
../util/util_color.h
../util/util_half.h
../util/util_math.h
@@ -141,6 +162,21 @@ set(SRC_UTIL_HEADERS
../util/util_transform.h
../util/util_types.h
)
+
+set(SRC_SPLIT_HEADERS
+ split/kernel_background_buffer_update.h
+ split/kernel_data_init.h
+ split/kernel_direct_lighting.h
+ split/kernel_holdout_emission_blurring_pathtermination_ao.h
+ split/kernel_lamp_emission.h
+ split/kernel_next_iteration_setup.h
+ split/kernel_scene_intersect.h
+ split/kernel_shader_eval.h
+ split/kernel_shadow_blocked.h
+ split/kernel_split_common.h
+ split/kernel_sum_all_radiance.h
+)
+
# CUDA module
if(WITH_CYCLES_CUDA_BINARIES)
@@ -166,12 +202,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
endif()
# build for each arch
- set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
+ set(cuda_sources kernels/cuda/kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
set(cuda_cubins)
macro(CYCLES_CUDA_KERNEL_ADD arch experimental)
if(${experimental})
- set(cuda_extra_flags "-D__KERNEL_CUDA_EXPERIMENTAL__")
+ set(cuda_extra_flags "-D__KERNEL_EXPERIMENTAL__")
set(cuda_cubin kernel_experimental_${arch}.cubin)
else()
set(cuda_extra_flags "")
@@ -192,7 +228,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
COMMAND ${CUDA_NVCC_EXECUTABLE}
-arch=${arch}
-m${CUDA_BITS}
- --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu
+ --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda/kernel.cu
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
--ptxas-options="-v"
${cuda_arch_flags}
@@ -240,28 +276,28 @@ include_directories(SYSTEM ${INC_SYS})
if(CXX_HAS_SSE)
list(APPEND SRC
- kernel_sse2.cpp
- kernel_sse3.cpp
- kernel_sse41.cpp
+ kernels/cpu/kernel_sse2.cpp
+ kernels/cpu/kernel_sse3.cpp
+ kernels/cpu/kernel_sse41.cpp
)
- set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
- set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
- set_source_files_properties(kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX)
list(APPEND SRC
- kernel_avx.cpp
+ kernels/cpu/kernel_avx.cpp
)
- set_source_files_properties(kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX2)
list(APPEND SRC
- kernel_avx2.cpp
+ kernels/cpu/kernel_avx2.cpp
)
- set_source_files_properties(kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif()
add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS})
@@ -280,11 +316,23 @@ endif()
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cl" ${CYCLES_INSTALL_PATH}/kernel)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cu" ${CYCLES_INSTALL_PATH}/kernel)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/geom)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SPLIT_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/split)
diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript
index c0d969e24ae..e8d51013924 100644
--- a/intern/cycles/kernel/SConscript
+++ b/intern/cycles/kernel/SConscript
@@ -57,8 +57,9 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
build_dir = os.path.join(root_build_dir, 'intern/cycles/kernel')
# source directories and files
+ kernel_file_rel = os.path.join("kernels", "cuda", "kernel.cu")
source_dir = Dir('.').srcnode().path
- kernel_file = os.path.join(source_dir, "kernel.cu")
+ kernel_file = os.path.join(source_dir, kernel_file_rel)
util_dir = os.path.join(source_dir, "../util")
svm_dir = os.path.join(source_dir, "../svm")
geom_dir = os.path.join(source_dir, "../geom")
@@ -83,11 +84,11 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
nvcc_flags += " -D__KERNEL_DEBUG__"
# dependencies
- dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h')
+ dependencies = [kernel_file_rel] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h')
last_cubin_file = None
configs = (("kernel_%s.cubin", ''),
- ("kernel_experimental_%s.cubin", ' -D__KERNEL_CUDA_EXPERIMENTAL__'))
+ ("kernel_experimental_%s.cubin", ' -D__KERNEL_EXPERIMENTAL__'))
# add command for each cuda architecture
for arch in cuda_archs:
@@ -105,7 +106,7 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
else:
command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file)
- kernel.Command(cubin_file, 'kernel.cu', command)
+ kernel.Command(cubin_file, kernel_file_rel, command)
kernel.Depends(cubin_file, dependencies)
kernel_binaries.append(cubin_file)
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 2b9e2a4e44d..558aa0dc6a9 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -47,79 +47,79 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader
switch(sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
- label = bsdf_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_diffuse_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
#ifdef __SVM__
case CLOSURE_BSDF_OREN_NAYAR_ID:
- label = bsdf_oren_nayar_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_oren_nayar_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
/*case CLOSURE_BSDF_PHONG_RAMP_ID:
- label = bsdf_phong_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_phong_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
- label = bsdf_diffuse_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_diffuse_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;*/
case CLOSURE_BSDF_TRANSLUCENT_ID:
- label = bsdf_translucent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_translucent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_REFLECTION_ID:
- label = bsdf_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_reflection_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_REFRACTION_ID:
- label = bsdf_refraction_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_refraction_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
- label = bsdf_transparent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_transparent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_microfacet_ggx_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_microfacet_beckmann_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
- label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_ashikhmin_shirley_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
- label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_ashikhmin_velvet_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
- label = bsdf_diffuse_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_diffuse_toon_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
- label = bsdf_glossy_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_glossy_toon_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- label = bsdf_hair_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_hair_reflection_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
- label = bsdf_hair_transmission_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_hair_transmission_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
#endif
#ifdef __VOLUME__
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
- label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
+ label = volume_henyey_greenstein_sample(sc, ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
#endif
default:
@@ -139,67 +139,67 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
return OSLShader::bsdf_eval(sd, sc, omega_in, *pdf);
#endif
- if(dot(sd->Ng, omega_in) >= 0.0f) {
+ if(dot(ccl_fetch(sd, Ng), omega_in) >= 0.0f) {
switch(sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
- eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_diffuse_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#ifdef __SVM__
case CLOSURE_BSDF_OREN_NAYAR_ID:
- eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_oren_nayar_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
/*case CLOSURE_BSDF_PHONG_RAMP_ID:
- eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_phong_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
- eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_diffuse_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;*/
case CLOSURE_BSDF_TRANSLUCENT_ID:
- eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_translucent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_REFLECTION_ID:
- eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_reflection_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_REFRACTION_ID:
- eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_refraction_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
- eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_transparent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_microfacet_ggx_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_microfacet_beckmann_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
- eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_ashikhmin_shirley_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
- eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_ashikhmin_velvet_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
- eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_diffuse_toon_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
- eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_glossy_toon_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_hair_reflection_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
- eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf);
+ eval = bsdf_hair_transmission_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#endif
#ifdef __VOLUME__
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
- eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
+ eval = volume_henyey_greenstein_eval_phase(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#endif
default:
@@ -211,57 +211,57 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
switch(sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
- eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_diffuse_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#ifdef __SVM__
case CLOSURE_BSDF_OREN_NAYAR_ID:
- eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_oren_nayar_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_TRANSLUCENT_ID:
- eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_translucent_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_REFLECTION_ID:
- eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_reflection_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_REFRACTION_ID:
- eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_refraction_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
- eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_transparent_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_microfacet_ggx_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_microfacet_beckmann_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
- eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_ashikhmin_shirley_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
- eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_ashikhmin_velvet_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
- eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_diffuse_toon_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
- eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_glossy_toon_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_hair_reflection_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
- eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf);
+ eval = bsdf_hair_transmission_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#endif
#ifdef __VOLUME__
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
- eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
+ eval = volume_henyey_greenstein_eval_phase(sc, ccl_fetch(sd, I), omega_in, pdf);
break;
#endif
default:
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index acc477246d2..8d7d533d6f8 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -69,6 +69,9 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c
float out = 0.0f;
+ if(fmaxf(sc->data0, sc->data1) <= 1e-4f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
if(NdotI > 0.0f && NdotO > 0.0f) {
NdotI = fmaxf(NdotI, 1e-6f);
NdotO = fmaxf(NdotO, 1e-6f);
@@ -190,8 +193,15 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
/* reflect I on H to get omega_in */
*omega_in = -I + (2.0f * HdotI) * H;
- /* leave the rest to eval_reflect */
- *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf);
+ if(fmaxf(sc->data0, sc->data1) <= 1e-4f) {
+ /* Some high number for MIS. */
+ *pdf = 1e6f;
+ *eval = make_float3(1e6f, 1e6f, 1e6f);
+ }
+ else {
+ /* leave the rest to eval_reflect */
+ *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf);
+ }
#ifdef __RAY_DIFFERENTIALS__
/* just do the reflection thing for now */
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index 580f50d5dd6..f1a26650078 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ -59,7 +59,7 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, co
float cosHO = fabsf(dot(I, H));
if(!(fabsf(cosNH) < 1.0f-1e-5f && cosHO > 1e-5f))
- return make_float3(0, 0, 0);
+ return make_float3(0.0f, 0.0f, 0.0f);
float cosNHdivHO = cosNH / cosHO;
cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
@@ -80,7 +80,7 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, co
return make_float3(out, out, out);
}
- return make_float3(0, 0, 0);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
@@ -114,7 +114,7 @@ ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc, float3 Ng,
float sinNH2 = 1 - cosNH * cosNH;
float sinNH4 = sinNH2 * sinNH2;
- float cotangent2 = (cosNH * cosNH) / sinNH2;
+ float cotangent2 = (cosNH * cosNH) / sinNH2;
float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index cdaf84f1750..e0287e7655a 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@ -52,6 +52,8 @@ ccl_device float3 bsdf_diffuse_ramp_get_color(const ShaderClosure *sc, const flo
ccl_device int bsdf_diffuse_ramp_setup(ShaderClosure *sc)
{
sc->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID;
+ sc->data0 = 0.0f;
+ sc->data1 = 0.0f;
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 71086f2e764..6a50bbed3b3 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -35,79 +35,7 @@
CCL_NAMESPACE_BEGIN
-/* Approximate erf and erfinv implementations.
- * Implementation comes straight from Wikipedia:
- *
- * http://en.wikipedia.org/wiki/Error_function
- *
- * Some constants are baked into the code.
- */
-
-ccl_device_inline float approx_erff_do(float x)
-{
- /* Such a clamp doesn't give much distortion to the output value
- * and gives quite a few of the speedup.
- */
- if(x > 3.0f) {
- return 1.0f;
- }
- float t = 1.0f / (1.0f + 0.47047f*x);
- return (1.0f -
- t*(0.3480242f + t*(-0.0958798f + t*0.7478556f)) * expf(-x*x));
-}
-
-ccl_device_inline float approx_erff(float x)
-{
- if(x >= 0.0f) {
- return approx_erff_do(x);
- }
- else {
- return -approx_erff_do(-x);
- }
-}
-
-ccl_device_inline float approx_erfinvf_do(float x)
-{
- if(x <= 0.7f) {
- const float x2 = x * x;
- const float a1 = 0.886226899f;
- const float a2 = -1.645349621f;
- const float a3 = 0.914624893f;
- const float a4 = -0.140543331f;
- const float b1 = -2.118377725f;
- const float b2 = 1.442710462f;
- const float b3 = -0.329097515f;
- const float b4 = 0.012229801f;
- return x * (((a4 * x2 + a3) * x2 + a2) * x2 + a1) /
- ((((b4 * x2 + b3) * x2 + b2) * x2 + b1) * x2 + 1.0f);
- }
- else {
- const float c1 = -1.970840454f;
- const float c2 = -1.624906493f;
- const float c3 = 3.429567803f;
- const float c4 = 1.641345311f;
- const float d1 = 3.543889200f;
- const float d2 = 1.637067800f;
- const float z = sqrtf(-logf((1.0f - x) * 0.5f));
- return (((c4 * z + c3) * z + c2) * z + c1) /
- ((d2 * z + d1) * z + 1.0f);
- }
-}
-
-ccl_device_inline float approx_erfinvf(float x)
-{
- if(x >= 0.0f) {
- return approx_erfinvf_do(x);
- }
- else {
- return -approx_erfinvf_do(-x);
- }
-}
-
-/* Beckmann and GGX microfacet importance sampling from:
- *
- * Importance Sampling Microfacet-Based BSDFs using the Distribution of Visible Normals.
- * E. Heitz and E. d'Eon, EGSR 2014 */
+/* Beckmann and GGX microfacet importance sampling. */
ccl_device_inline void microfacet_beckmann_sample_slopes(
KernelGlobals *kg,
@@ -128,64 +56,71 @@ ccl_device_inline void microfacet_beckmann_sample_slopes(
/* precomputations */
const float tan_theta_i = sin_theta_i/cos_theta_i;
const float inv_a = tan_theta_i;
- const float a = 1.0f/inv_a;
- const float erf_a = approx_erff(a);
- const float exp_a2 = expf(-a*a);
+ const float cot_theta_i = 1.0f/tan_theta_i;
+ const float erf_a = fast_erff(cot_theta_i);
+ const float exp_a2 = expf(-cot_theta_i*cot_theta_i);
const float SQRT_PI_INV = 0.56418958354f;
const float Lambda = 0.5f*(erf_a - 1.0f) + (0.5f*SQRT_PI_INV)*(exp_a2*inv_a);
const float G1 = 1.0f/(1.0f + Lambda); /* masking */
*G1i = G1;
-#if 0
- const float C = 1.0f - G1 * erf_a;
-
- /* sample slope X */
- if(randu < C) {
- /* rescale randu */
- randu = randu / C;
- const float w_1 = 0.5f * SQRT_PI_INV * sin_theta_i * exp_a2;
- const float w_2 = cos_theta_i * (0.5f - 0.5f*erf_a);
- const float p = w_1 / (w_1 + w_2);
+#if defined(__KERNEL_GPU__)
+ /* Based on paper from Wenzel Jakob
+ * An Improved Visible Normal Sampling Routine for the Beckmann Distribution
+ *
+ * http://www.mitsuba-renderer.org/~wenzel/files/visnormal.pdf
+ *
+ * Reformulation from OpenShadingLanguage which avoids using inverse
+ * trigonometric functions.
+ */
- if(randu < p) {
- randu = randu / p;
- *slope_x = -sqrtf(-logf(randu*exp_a2));
- }
- else {
- randu = (randu - p) / (1.0f - p);
- *slope_x = approx_erfinvf(randu - 1.0f - randu*erf_a);
- }
+ /* Sample slope X.
+ *
+ * Compute a coarse approximation using the approximation:
+ * exp(-ierf(x)^2) ~= 1 - x * x
+ * solve y = 1 + b + K * (1 - b * b)
+ */
+ float K = tan_theta_i * SQRT_PI_INV;
+ float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a));
+ float y_exact = randu * (1.0f + erf_a + K * exp_a2);
+ float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f;
+
+ /* Perform newton step to refine toward the true root. */
+ float inv_erf = fast_ierff(b);
+ float value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
+ /* Check if we are close enough already,
+ * this also avoids NaNs as we get close to the root.
+ */
+ if(fabsf(value) > 1e-6f) {
+ b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 1. */
+ inv_erf = fast_ierff(b);
+ value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact;
+ b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 2. */
+ /* Compute the slope from the refined value. */
+ *slope_x = fast_ierff(b);
}
else {
- /* rescale randu */
- randu = (randu - C) / (1.0f - C);
- *slope_x = approx_erfinvf((-1.0f + 2.0f*randu)*erf_a);
-
- const float p = (-(*slope_x)*sin_theta_i + cos_theta_i) / (2.0f*cos_theta_i);
-
- if(randv > p) {
- *slope_x = -(*slope_x);
- randv = (randv - p) / (1.0f - p);
- }
- else
- randv = randv / p;
+ /* We are close enough already. */
+ *slope_x = inv_erf;
}
-
- /* sample slope Y */
- *slope_y = approx_erfinvf(2.0f*randv - 1.0f);
+ *slope_y = fast_ierff(2.0f*randv - 1.0f);
#else
- /* use precomputed table, because it better preserves stratification
- * of the random number pattern */
+ /* Use precomputed table on CPU, it gives better perfomance. */
int beckmann_table_offset = kernel_data.tables.beckmann_offset;
*slope_x = lookup_table_read_2D(kg, randu, cos_theta_i,
beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE);
- *slope_y = approx_erfinvf(2.0f*randv - 1.0f);
+ *slope_y = fast_ierff(2.0f*randv - 1.0f);
#endif
-
}
+/* GGX microfacet importance sampling from:
+ *
+ * Importance Sampling Microfacet-Based BSDFs using the Distribution of Visible Normals.
+ * E. Heitz and E. d'Eon, EGSR 2014
+ */
+
ccl_device_inline void microfacet_ggx_sample_slopes(
const float cos_theta_i, const float sin_theta_i,
float randu, float randv, float *slope_x, float *slope_y,
@@ -300,7 +235,7 @@ ccl_device_inline float3 microfacet_sample_stretched(
ccl_device int bsdf_microfacet_ggx_setup(ShaderClosure *sc)
{
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
+ sc->data0 = saturate(sc->data0); /* alpha_x */
sc->data1 = sc->data0; /* alpha_y */
sc->type = CLOSURE_BSDF_MICROFACET_GGX_ID;
@@ -310,8 +245,8 @@ ccl_device int bsdf_microfacet_ggx_setup(ShaderClosure *sc)
ccl_device int bsdf_microfacet_ggx_aniso_setup(ShaderClosure *sc)
{
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
- sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* alpha_y */
+ sc->data0 = saturate(sc->data0); /* alpha_x */
+ sc->data1 = saturate(sc->data1); /* alpha_y */
sc->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
@@ -320,7 +255,7 @@ ccl_device int bsdf_microfacet_ggx_aniso_setup(ShaderClosure *sc)
ccl_device int bsdf_microfacet_ggx_refraction_setup(ShaderClosure *sc)
{
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
+ sc->data0 = saturate(sc->data0); /* alpha_x */
sc->data1 = sc->data0; /* alpha_y */
sc->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
@@ -342,7 +277,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, cons
float3 N = sc->N;
if(m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
- return make_float3(0, 0, 0);
+ return make_float3(0.0f, 0.0f, 0.0f);
float cosNO = dot(N, I);
float cosNI = dot(N, omega_in);
@@ -421,7 +356,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, cons
return make_float3(out, out, out);
}
- return make_float3(0, 0, 0);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
@@ -433,13 +368,13 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con
float3 N = sc->N;
if(!m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
- return make_float3(0, 0, 0);
+ return make_float3(0.0f, 0.0f, 0.0f);
float cosNO = dot(N, I);
float cosNI = dot(N, omega_in);
if(cosNO <= 0 || cosNI >= 0)
- return make_float3(0, 0, 0); /* vectors on same side -- not possible */
+ return make_float3(0.0f, 0.0f, 0.0f); /* vectors on same side -- not possible */
/* compute half-vector of the refraction (eq. 16) */
float3 ht = -(m_eta * omega_in + I);
@@ -653,7 +588,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
ccl_device int bsdf_microfacet_beckmann_setup(ShaderClosure *sc)
{
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
+ sc->data0 = saturate(sc->data0); /* alpha_x */
sc->data1 = sc->data0; /* alpha_y */
sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
@@ -662,8 +597,8 @@ ccl_device int bsdf_microfacet_beckmann_setup(ShaderClosure *sc)
ccl_device int bsdf_microfacet_beckmann_aniso_setup(ShaderClosure *sc)
{
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
- sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* alpha_y */
+ sc->data0 = saturate(sc->data0); /* alpha_x */
+ sc->data1 = saturate(sc->data1); /* alpha_y */
sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
return SD_BSDF|SD_BSDF_HAS_EVAL;
@@ -671,7 +606,7 @@ ccl_device int bsdf_microfacet_beckmann_aniso_setup(ShaderClosure *sc)
ccl_device int bsdf_microfacet_beckmann_refraction_setup(ShaderClosure *sc)
{
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
+ sc->data0 = saturate(sc->data0); /* alpha_x */
sc->data1 = sc->data0; /* alpha_y */
sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
@@ -692,7 +627,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
float3 N = sc->N;
if(m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
- return make_float3(0, 0, 0);
+ return make_float3(0.0f, 0.0f, 0.0f);
float cosNO = dot(N, I);
float cosNI = dot(N, omega_in);
@@ -774,7 +709,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
return make_float3(out, out, out);
}
- return make_float3(0, 0, 0);
+ return make_float3(0.0f, 0.0f, 0.0f);
}
ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
@@ -786,13 +721,13 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc
float3 N = sc->N;
if(!m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
- return make_float3(0, 0, 0);
+ return make_float3(0.0f, 0.0f, 0.0f);
float cosNO = dot(N, I);
float cosNI = dot(N, omega_in);
if(cosNO <= 0 || cosNI >= 0)
- return make_float3(0, 0, 0);
+ return make_float3(0.0f, 0.0f, 0.0f);
/* compute half-vector of the refraction (eq. 16) */
float3 ht = -(m_eta * omega_in + I);
diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
index c476d4ca4e2..61b7cb11b02 100644
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@ -37,7 +37,7 @@ ccl_device int bsdf_oren_nayar_setup(ShaderClosure *sc)
sc->type = CLOSURE_BSDF_OREN_NAYAR_ID;
- sigma = clamp(sigma, 0.0f, 1.0f);
+ sigma = saturate(sigma);
float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma);
diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
index f9f263719e9..1ab15eee954 100644
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@ -51,9 +51,9 @@ ccl_device float3 bsdf_phong_ramp_get_color(const ShaderClosure *sc, const float
ccl_device int bsdf_phong_ramp_setup(ShaderClosure *sc)
{
- sc->data0 = max(sc->data0, 0.0f);
-
sc->type = CLOSURE_BSDF_PHONG_RAMP_ID;
+ sc->data0 = max(sc->data0, 0.0f);
+ sc->data1 = 0.0f;
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h
index df03942638f..e5b6ab93a64 100644
--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@ -40,8 +40,8 @@ CCL_NAMESPACE_BEGIN
ccl_device int bsdf_diffuse_toon_setup(ShaderClosure *sc)
{
sc->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f);
- sc->data1 = clamp(sc->data1, 0.0f, 1.0f);
+ sc->data0 = saturate(sc->data0);
+ sc->data1 = saturate(sc->data1);
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
@@ -120,8 +120,8 @@ ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, float3 Ng, floa
ccl_device int bsdf_glossy_toon_setup(ShaderClosure *sc)
{
sc->type = CLOSURE_BSDF_GLOSSY_TOON_ID;
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f);
- sc->data1 = clamp(sc->data1, 0.0f, 1.0f);
+ sc->data0 = saturate(sc->data0);
+ sc->data1 = saturate(sc->data1);
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index b6de2da8c71..f817dcd5f2d 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -30,8 +30,8 @@ ccl_device int bssrdf_setup(ShaderClosure *sc, ClosureType type)
return flag;
}
else {
- sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* texture blur */
- sc->T.x = clamp(sc->T.x, 0.0f, 1.0f); /* sharpness */
+ sc->data1 = saturate(sc->data1); /* texture blur */
+ sc->T.x = saturate(sc->T.x); /* sharpness */
sc->type = type;
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF;
@@ -157,7 +157,7 @@ ccl_device float bssrdf_cubic_quintic_root_find(float xi)
float x = 0.25f;
int i;
- for (i = 0; i < max_iteration_count; i++) {
+ for(i = 0; i < max_iteration_count; i++) {
float x2 = x*x;
float x3 = x2*x;
float nx = (1.0f - x);
@@ -168,7 +168,7 @@ ccl_device float bssrdf_cubic_quintic_root_find(float xi)
if(fabsf(f) < tolerance || f_ == 0.0f)
break;
- x = clamp(x - f/f_, 0.0f, 1.0f);
+ x = saturate(x - f/f_);
}
return x;
diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index 439610546e5..4d71ba50ec3 100644
--- a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ -107,18 +107,9 @@ ccl_device int volume_absorption_setup(ShaderClosure *sc)
ccl_device float3 volume_phase_eval(const ShaderData *sd, const ShaderClosure *sc, float3 omega_in, float *pdf)
{
- float3 eval;
+ kernel_assert(sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID);
- switch(sc->type) {
- case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
- eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
- break;
- default:
- eval = make_float3(0.0f, 0.0f, 0.0f);
- break;
- }
-
- return eval;
+ return volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf);
}
ccl_device int volume_phase_sample(const ShaderData *sd, const ShaderClosure *sc, float randu,
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index bf0d86e6206..5ab900d47aa 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -22,7 +22,9 @@
#define BVH_STACK_SIZE 192
#define BVH_QSTACK_SIZE 384
#define BVH_NODE_SIZE 4
+#define BVH_NODE_LEAF_SIZE 1
#define BVH_QNODE_SIZE 7
+#define BVH_QNODE_LEAF_SIZE 1
#define TRI_NODE_SIZE 3
/* silly workaround for float extended precision that happens when compiling
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index 9ac16e86085..c7364e9edac 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -29,13 +29,13 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem)
{
- if(sd->object == PRIM_NONE)
+ if(ccl_fetch(sd, object) == PRIM_NONE)
return (int)ATTR_STD_NOT_FOUND;
/* for SVM, find attribute by unique id */
- uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride;
+ uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride;
#ifdef __HAIR__
- attr_offset = (sd->type & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset;
+ attr_offset = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset;
#endif
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
@@ -49,7 +49,7 @@ ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, ui
*elem = (AttributeElement)attr_map.y;
- if(sd->prim == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH)
+ if(ccl_fetch(sd, prim) == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH)
return ATTR_STD_NOT_FOUND;
/* return result */
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index c0eefcd9c7f..3d0d406dd0b 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -115,7 +115,39 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_subsurface.h"
#endif
-/* Record all BVH intersection for shadows */
+/* Volume BVH traversal */
+
+#if defined(__VOLUME__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume
+#define BVH_FUNCTION_FEATURES 0
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__INSTANCING__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__HAIR__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_hair
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_hair_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
+#include "geom_bvh_volume.h"
+#endif
+
+/* Record all intersections - Shadow BVH traversal */
#if defined(__SHADOW_RECORD_ALL__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all
@@ -147,36 +179,36 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_shadow.h"
#endif
-/* Camera inside Volume BVH intersection */
+/* Record all intersections - Volume BVH traversal */
-#if defined(__VOLUME__)
-#define BVH_FUNCTION_NAME bvh_intersect_volume
+#if defined(__VOLUME_RECORD_ALL__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_all
#define BVH_FUNCTION_FEATURES 0
-#include "geom_bvh_volume.h"
+#include "geom_bvh_volume_all.h"
#endif
-#if defined(__VOLUME__) && defined(__INSTANCING__)
-#define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
+#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
#define BVH_FUNCTION_FEATURES BVH_INSTANCING
-#include "geom_bvh_volume.h"
+#include "geom_bvh_volume_all.h"
#endif
-#if defined(__VOLUME__) && defined(__HAIR__)
-#define BVH_FUNCTION_NAME bvh_intersect_volume_hair
+#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
-#include "geom_bvh_volume.h"
+#include "geom_bvh_volume_all.h"
#endif
-#if defined(__VOLUME__) && defined(__OBJECT_MOTION__)
-#define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-#include "geom_bvh_volume.h"
+#include "geom_bvh_volume_all.h"
#endif
-#if defined(__VOLUME__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
-#define BVH_FUNCTION_NAME bvh_intersect_volume_hair_motion
+#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
-#include "geom_bvh_volume.h"
+#include "geom_bvh_volume_all.h"
#endif
#undef BVH_FEATURE
@@ -330,6 +362,37 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
}
#endif
+#ifdef __VOLUME_RECORD_ALL__
+ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect,
+ const uint max_hits)
+{
+#ifdef __OBJECT_MOTION__
+ if(kernel_data.bvh.have_motion) {
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_volume_all_hair_motion(kg, ray, isect, max_hits);
+#endif /* __HAIR__ */
+
+ return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits);
+ }
+#endif /* __OBJECT_MOTION__ */
+
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_volume_all_hair(kg, ray, isect, max_hits);
+#endif /* __HAIR__ */
+
+#ifdef __INSTANCING__
+ if(kernel_data.bvh.have_instancing)
+ return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits);
+#endif /* __INSTANCING__ */
+
+ return bvh_intersect_volume_all(kg, ray, isect, max_hits);
+}
+#endif
+
/* Ray offset to avoid self intersection.
*
@@ -384,5 +447,21 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
#endif
}
+#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__)
+/* ToDo: Move to another file? */
+ccl_device int intersections_compare(const void *a, const void *b)
+{
+ const Intersection *isect_a = (const Intersection*)a;
+ const Intersection *isect_b = (const Intersection*)b;
+
+ if(isect_a->t < isect_b->t)
+ return -1;
+ else if(isect_a->t > isect_b->t)
+ return 1;
+ else
+ return 0;
+}
+#endif
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
index 193f49074a3..e4cba99dc96 100644
--- a/intern/cycles/kernel/geom/geom_bvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h
@@ -200,7 +200,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -226,7 +226,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
@@ -264,7 +264,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
{
- shader = kernel_tex_fetch(__tri_shader, prim);
+ shader = kernel_tex_fetch(__tri_shader, prim);
}
#ifdef __HAIR__
else {
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index 290297ef5c5..a73139f9c88 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -187,7 +187,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -210,7 +210,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object)
continue;
- triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
}
break;
}
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index 0298e687de2..73d79fd78ee 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -76,6 +76,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if defined(__KERNEL_DEBUG__)
isect->num_traversal_steps = 0;
+ isect->num_traversed_instances = 0;
#endif
#if defined(__KERNEL_SSE2__)
@@ -248,7 +249,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -269,7 +270,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
isect->num_traversal_steps++;
#endif
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr)) {
+ if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
/* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
if(visibility == PATH_RAY_SHADOW_OPAQUE)
@@ -362,6 +363,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
+
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversed_instances++;
+#endif
}
}
#endif /* FEATURE(BVH_INSTANCING) */
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h
index 0862812a170..41c784869f2 100644
--- a/intern/cycles/kernel/geom/geom_bvh_volume.h
+++ b/intern/cycles/kernel/geom/geom_bvh_volume.h
@@ -188,7 +188,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+3);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -213,7 +213,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
- triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr);
+ triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
}
break;
}
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume_all.h b/intern/cycles/kernel/geom/geom_bvh_volume_all.h
new file mode 100644
index 00000000000..b6db36f4b17
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_bvh_volume_all.h
@@ -0,0 +1,454 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __QBVH__
+#include "geom_qbvh_volume_all.h"
+#endif
+
+/* This is a template BVH traversal function for volumes, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect_array,
+ const uint max_hits)
+{
+ /* todo:
+ * - test if pushing distance on the stack helps (for non shadow rays)
+ * - separate version for shadow rays
+ * - likely and unlikely for if() statements
+ * - test restrict attribute for pointers
+ */
+
+ /* traversal stack in CUDA thread-local memory */
+ int traversalStack[BVH_STACK_SIZE];
+ traversalStack[0] = ENTRYPOINT_SENTINEL;
+
+ /* traversal variables in registers */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* ray parameters in registers */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
+
+ const uint visibility = PATH_RAY_ALL_VISIBILITY;
+
+#if BVH_FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ int num_hits_in_instance = 0;
+#endif
+
+ uint num_hits = 0;
+ isect_array->t = tmax;
+
+#if defined(__KERNEL_SSE2__)
+ const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+ const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
+ shuffle_swap_t shufflexyz[3];
+
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
+
+ ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ /* traversal loop */
+ do {
+ do {
+ /* traverse internal nodes */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ bool traverseChild0, traverseChild1;
+ int nodeAddrChild1;
+
+#if !defined(__KERNEL_SSE2__)
+ /* Intersect two child bounding boxes, non-SSE version */
+ float t = isect_array->t;
+
+ /* fetch node data */
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
+
+ /* intersect ray against child nodes */
+ NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+ NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+ NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+ NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+ /* decide which nodes to traverse next */
+ traverseChild0 = (c0max >= c0min);
+ traverseChild1 = (c1max >= c1min);
+
+#else // __KERNEL_SSE2__
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+
+ /* fetch node data */
+ const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+ const float4 cnodes = ((float4*)bvh_nodes)[3];
+
+ /* intersect ray against child nodes */
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+
+ /* decide which nodes to traverse next */
+ traverseChild0 = (movemask(lrhit) & 1);
+ traverseChild1 = (movemask(lrhit) & 2);
+#endif // __KERNEL_SSE2__
+
+ nodeAddr = __float_as_int(cnodes.x);
+ nodeAddrChild1 = __float_as_int(cnodes.y);
+
+ if(traverseChild0 && traverseChild1) {
+ /* both children were intersected, push the farther one */
+#if !defined(__KERNEL_SSE2__)
+ bool closestChild1 = (c1min < c0min);
+#else
+ bool closestChild1 = tminmax[1] < tminmax[0];
+#endif
+
+ if(closestChild1) {
+ int tmp = nodeAddr;
+ nodeAddr = nodeAddrChild1;
+ nodeAddrChild1 = tmp;
+ }
+
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_STACK_SIZE);
+ traversalStack[stackPtr] = nodeAddrChild1;
+ }
+ else {
+ /* one child was intersected */
+ if(traverseChild1) {
+ nodeAddr = nodeAddrChild1;
+ }
+ else if(!traverseChild0) {
+ /* neither child was intersected */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+ }
+ }
+
+ /* if node is leaf, fetch triangle list */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
+ int primAddr = __float_as_int(leaf.x);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ const int primAddr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ bool hit;
+
+ /* pop */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+
+ /* primitive intersection */
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ /* intersect ray against primitive */
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+#if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+#else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#if BVH_FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ /* intersect ray against primitive */
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+# if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+# else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ /* intersect ray against primitive */
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ else
+ hit = bvh_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+# if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+# else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#endif
+ default: {
+ break;
+ }
+ }
+ }
+#if BVH_FEATURE(BVH_INSTANCING)
+ else {
+ /* instance push */
+ object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+
+ if(object_flag & SD_OBJECT_HAS_VOLUME) {
+
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm);
+#else
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+#endif
+
+ triangle_intersect_precalc(dir, &isect_precalc);
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
+
+#if defined(__KERNEL_SSE2__)
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
+
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_STACK_SIZE);
+ traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* pop */
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ if(num_hits_in_instance) {
+ float t_fac;
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm);
+#else
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ /* Scale isect->t to adjust for instancing. */
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+ }
+ else {
+ float ignore_t = FLT_MAX;
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ }
+
+ isect_t = tmax;
+ isect_array->t = isect_t;
+
+#if defined(__KERNEL_SSE2__)
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
+
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+#endif /* FEATURE(BVH_MOTION) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return num_hits;
+}
+
+ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect_array,
+ const uint max_hits)
+{
+#ifdef __QBVH__
+ if(kernel_data.bvh.use_qbvh) {
+ return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
+ ray,
+ isect_array,
+ max_hits);
+ }
+ else
+#endif
+ {
+ kernel_assert(kernel_data.bvh.use_qbvh == false);
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg,
+ ray,
+ isect_array,
+ max_hits);
+ }
+}
+
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index ac6c6ec4929..9653ad8f1bb 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -32,22 +32,22 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd,
if(dy) *dy = 0.0f;
#endif
- return kernel_tex_fetch(__attributes_float, offset + sd->prim);
+ return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim));
}
else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float f0 = kernel_tex_fetch(__attributes_float, offset + k0);
float f1 = kernel_tex_fetch(__attributes_float, offset + k1);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*(f1 - f0);
+ if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0);
if(dy) *dy = 0.0f;
#endif
- return (1.0f - sd->u)*f0 + sd->u*f1;
+ return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1;
}
else {
#ifdef __RAY_DIFFERENTIALS__
@@ -71,22 +71,22 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
#endif
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim)));
}
else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0));
float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1));
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*(f1 - f0);
+ if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
#endif
- return (1.0f - sd->u)*f0 + sd->u*f1;
+ return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1;
}
else {
#ifdef __RAY_DIFFERENTIALS__
@@ -104,22 +104,22 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
{
float r = 0.0f;
- if(sd->type & PRIMITIVE_ALL_CURVE) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
+ float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float4 P_curve[2];
- if(sd->type & PRIMITIVE_CURVE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
}
else {
- motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+ motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve);
}
- r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
+ r = (P_curve[1].w - P_curve[0].w) * ccl_fetch(sd, u) + P_curve[0].w;
}
return r*2.0f;
@@ -130,8 +130,8 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd)
{
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float4 P_curve[2];
@@ -139,7 +139,7 @@ ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
- return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
+ return float4_to_float3(P_curve[1]) * ccl_fetch(sd, u) + float4_to_float3(P_curve[0]) * (1.0f - ccl_fetch(sd, u));
}
/* Curve tangent normal */
@@ -148,14 +148,14 @@ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd)
{
float3 tgN = make_float3(0.0f,0.0f,0.0f);
- if(sd->type & PRIMITIVE_ALL_CURVE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
- tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu)));
+ tgN = -(-ccl_fetch(sd, I) - ccl_fetch(sd, dPdu) * (dot(ccl_fetch(sd, dPdu),-ccl_fetch(sd, I)) / len_squared(ccl_fetch(sd, dPdu))));
tgN = normalize(tgN);
/* need to find suitable scaled gd for corrected normal */
#if 0
- tgN = normalize(tgN - gd * sd->dPdu);
+ tgN = normalize(tgN - gd * ccl_fetch(sd, dPdu));
#endif
}
@@ -442,12 +442,12 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
float r_ext = mw_extension + r_curr;
float coverage = 1.0f;
- if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) {
+ if(bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) {
/* the bounding box does not overlap the square centered at O */
tree += level;
level = tree & -tree;
}
- else if (level == 1) {
+ else if(level == 1) {
/* the maximum recursion depth is reached.
* check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
@@ -459,13 +459,13 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
if(flags & CURVE_KN_RIBBONS) {
float3 tg = (p_en - p_st);
float w = tg.x * tg.x + tg.y * tg.y;
- if (w == 0) {
+ if(w == 0) {
tree++;
level = tree & -tree;
continue;
}
w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
- w = clamp((float)w, 0.0f, 1.0f);
+ w = saturate(w);
/* compute u on the curve segment */
u = i_st * (1 - w) + i_en * w;
@@ -474,17 +474,17 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0];
float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if (dot(tg, dp_st)< 0)
+ if(dot(tg, dp_st)< 0)
dp_st *= -1;
- if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
+ if(dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
tree++;
level = tree & -tree;
continue;
}
float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if (dot(tg, dp_en) < 0)
+ if(dot(tg, dp_en) < 0)
dp_en *= -1;
- if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
+ if(dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
tree++;
level = tree & -tree;
continue;
@@ -500,13 +500,13 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
float d0 = d - r_curr;
float d1 = d + r_curr;
float inv_mw_extension = 1.0f/mw_extension;
- if (d0 >= 0)
+ if(d0 >= 0)
coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f;
else // inside
coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f;
}
- if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
+ if(p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
tree++;
level = tree & -tree;
continue;
@@ -548,7 +548,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1)));
float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd;
float td = tb*tb - 4*cyla*tc;
- if (td < 0.0f) {
+ if(td < 0.0f) {
tree++;
level = tree & -tree;
continue;
@@ -559,10 +559,10 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
t = tcentre + correction;
float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if (dot(tg, dp_st)< 0)
+ if(dot(tg, dp_st)< 0)
dp_st *= -1;
float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if (dot(tg, dp_en) < 0)
+ if(dot(tg, dp_en) < 0)
dp_en *= -1;
if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) {
@@ -570,14 +570,14 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
t = tcentre + correction;
}
- if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) {
+ if(dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) {
tree++;
level = tree & -tree;
continue;
}
float w = (zcentre + (tg.z * correction)) * invl;
- w = clamp((float)w, 0.0f, 1.0f);
+ w = saturate(w);
/* compute u on the curve segment */
u = i_st * (1 - w) + i_en * w;
@@ -777,7 +777,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma;
float td = tb*tb - 4*a*tc;
- if (td < 0.0f)
+ if(td < 0.0f)
return false;
float rootd = 0.0f;
@@ -818,7 +818,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
if(t > 0.0f && t < isect->t && z >= 0 && z <= l) {
- if (flags & CURVE_KN_ENCLOSEFILTER) {
+ if(flags & CURVE_KN_ENCLOSEFILTER) {
float enc_ratio = 1.01f;
if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) {
float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio));
@@ -890,7 +890,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
@@ -903,7 +903,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
int prim = kernel_tex_fetch(__prim_index, isect->prim);
float4 v00 = kernel_tex_fetch(__curves, prim);
- int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
float3 tg;
@@ -914,14 +914,14 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
float4 P_curve[4];
- if(sd->type & PRIMITIVE_CURVE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
}
else {
- motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
+ motion_cardinal_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), ka, k0, k1, kb, P_curve);
}
float3 p[4];
@@ -933,43 +933,43 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
P = P + D*t;
#ifdef __UV__
- sd->u = isect->u;
- sd->v = 0.0f;
+ ccl_fetch(sd, u) = isect->u;
+ ccl_fetch(sd, v) = 0.0f;
#endif
tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
- sd->Ng = normalize(-(D - tg * (dot(tg, D))));
+ ccl_fetch(sd, Ng) = normalize(-(D - tg * (dot(tg, D))));
}
else {
/* direction from inside to surface of curve */
float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
- sd->Ng = normalize(P - p_curr);
+ ccl_fetch(sd, Ng) = normalize(P - p_curr);
/* adjustment for changing radius */
float gd = isect->v;
if(gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- sd->Ng = normalize(sd->Ng);
+ ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg;
+ ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
}
}
/* todo: sometimes the normal is still so that this is detected as
* backfacing even if cull backfaces is enabled */
- sd->N = sd->Ng;
+ ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
}
else {
float4 P_curve[2];
- if(sd->type & PRIMITIVE_CURVE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) {
P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
}
else {
- motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+ motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve);
}
float l = 1.0f;
@@ -980,39 +980,39 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
float3 dif = P - float4_to_float3(P_curve[0]);
#ifdef __UV__
- sd->u = dot(dif,tg)/l;
- sd->v = 0.0f;
+ ccl_fetch(sd, u) = dot(dif,tg)/l;
+ ccl_fetch(sd, v) = 0.0f;
#endif
- if (flag & CURVE_KN_TRUETANGENTGNORMAL) {
- sd->Ng = -(D - tg * dot(tg, D));
- sd->Ng = normalize(sd->Ng);
+ if(flag & CURVE_KN_TRUETANGENTGNORMAL) {
+ ccl_fetch(sd, Ng) = -(D - tg * dot(tg, D));
+ ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
}
else {
float gd = isect->v;
/* direction from inside to surface of curve */
- sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd);
+ ccl_fetch(sd, Ng) = (dif - tg * ccl_fetch(sd, u) * l) / (P_curve[0].w + ccl_fetch(sd, u) * l * gd);
/* adjustment for changing radius */
- if (gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- sd->Ng = normalize(sd->Ng);
+ if(gd != 0.0f) {
+ ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg;
+ ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng));
}
}
- sd->N = sd->Ng;
+ ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
}
#ifdef __DPDU__
/* dPdu/dPdv */
- sd->dPdu = tg;
- sd->dPdv = cross(tg, sd->Ng);
+ ccl_fetch(sd, dPdu) = tg;
+ ccl_fetch(sd, dPdv) = cross(tg, ccl_fetch(sd, Ng));
#endif
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index d3297e05c67..86f93f242a1 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -134,7 +134,7 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s
return P;
}
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
@@ -161,7 +161,7 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
@@ -187,7 +187,7 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh
#ifdef __INTERSECTION_REFINE__
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
@@ -213,7 +213,7 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
@@ -236,25 +236,25 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh
ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface)
{
/* get shader */
- sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+ ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
/* get motion info */
int numsteps, numverts;
- object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
+ object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL);
/* figure out which steps we need to fetch and their interpolation factor */
int maxstep = numsteps*2;
- int step = min((int)(sd->time*maxstep), maxstep-1);
- float t = sd->time*maxstep - step;
+ int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1);
+ float t = ccl_fetch(sd, time)*maxstep - step;
/* find attribute */
AttributeElement elem;
- int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_POSITION, &elem);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch vertex coordinates */
float3 verts[3], next_verts[3];
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)));
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
@@ -268,33 +268,33 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD
#ifdef __SUBSURFACE__
if(!subsurface)
#endif
- sd->P = motion_triangle_refine(kg, sd, isect, ray, verts);
+ ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts);
#ifdef __SUBSURFACE__
else
- sd->P = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts);
+ ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts);
#endif
/* compute face normal */
float3 Ng;
- if(sd->flag & SD_NEGATIVE_SCALE_APPLIED)
+ if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED)
Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
else
Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
- sd->Ng = Ng;
- sd->N = Ng;
+ ccl_fetch(sd, Ng) = Ng;
+ ccl_fetch(sd, N) = Ng;
/* compute derivatives of P w.r.t. uv */
#ifdef __DPDU__
- sd->dPdu = (verts[0] - verts[2]);
- sd->dPdv = (verts[1] - verts[2]);
+ ccl_fetch(sd, dPdu) = (verts[0] - verts[2]);
+ ccl_fetch(sd, dPdv) = (verts[1] - verts[2]);
#endif
/* compute smooth normal */
- if(sd->shader & SHADER_SMOOTH_NORMAL) {
+ if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
/* find attribute */
AttributeElement elem;
- int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
+ int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch vertex coordinates */
@@ -308,10 +308,10 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD
normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
/* interpolate between vertices */
- float u = sd->u;
- float v = sd->v;
+ float u = ccl_fetch(sd, u);
+ float v = ccl_fetch(sd, v);
float w = 1.0f - u - v;
- sd->N = (u*normals[0] + v*normals[1] + w*normals[2]);
+ ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]);
}
}
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 79a56683454..9d0a008fff1 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -123,9 +123,9 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg
ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
{
#ifdef __OBJECT_MOTION__
- *P = transform_point(&sd->ob_tfm, *P);
+ *P = transform_point_auto(&ccl_fetch(sd, ob_tfm), *P);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
*P = transform_point(&tfm, *P);
#endif
}
@@ -135,9 +135,9 @@ ccl_device_inline void object_position_transform(KernelGlobals *kg, const Shader
ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
{
#ifdef __OBJECT_MOTION__
- *P = transform_point(&sd->ob_itfm, *P);
+ *P = transform_point_auto(&ccl_fetch(sd, ob_itfm), *P);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
*P = transform_point(&tfm, *P);
#endif
}
@@ -147,9 +147,9 @@ ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, cons
ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
{
#ifdef __OBJECT_MOTION__
- *N = normalize(transform_direction_transposed(&sd->ob_tfm, *N));
+ *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_tfm), *N));
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
*N = normalize(transform_direction_transposed(&tfm, *N));
#endif
}
@@ -159,9 +159,9 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const
ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
{
#ifdef __OBJECT_MOTION__
- *N = normalize(transform_direction_transposed(&sd->ob_itfm, *N));
+ *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_itfm), *N));
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
*N = normalize(transform_direction_transposed(&tfm, *N));
#endif
}
@@ -171,9 +171,9 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderDa
ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
{
#ifdef __OBJECT_MOTION__
- *D = transform_direction(&sd->ob_tfm, *D);
+ *D = transform_direction_auto(&ccl_fetch(sd, ob_tfm), *D);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
*D = transform_direction(&tfm, *D);
#endif
}
@@ -183,9 +183,9 @@ ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData
ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
{
#ifdef __OBJECT_MOTION__
- *D = transform_direction(&sd->ob_itfm, *D);
+ *D = transform_direction_auto(&ccl_fetch(sd, ob_itfm), *D);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
*D = transform_direction(&tfm, *D);
#endif
}
@@ -194,13 +194,13 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const Sha
ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd)
{
- if(sd->object == OBJECT_NONE)
+ if(ccl_fetch(sd, object) == OBJECT_NONE)
return make_float3(0.0f, 0.0f, 0.0f);
#ifdef __OBJECT_MOTION__
- return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w);
+ return make_float3(ccl_fetch(sd, ob_tfm).x.w, ccl_fetch(sd, ob_tfm).y.w, ccl_fetch(sd, ob_tfm).z.w);
#else
- Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
return make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
#endif
}
@@ -243,7 +243,7 @@ ccl_device_inline float object_random_number(KernelGlobals *kg, int object)
ccl_device_inline int object_particle_id(KernelGlobals *kg, int object)
{
if(object == OBJECT_NONE)
- return 0.0f;
+ return 0;
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
float4 f = kernel_tex_fetch(__objects, offset);
@@ -296,7 +296,7 @@ ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *nu
ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
{
- return kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2 + 1);
+ return kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2 + 1);
}
/* Particle data from which object was instanced */
@@ -377,7 +377,7 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
/* Transform ray into object space to enter static object in BVH */
-ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t)
+ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
{
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
@@ -425,7 +425,7 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
/* Transorm ray to exit static object in BVH */
-ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t)
+ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
{
if(*t != FLT_MAX) {
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
@@ -453,7 +453,7 @@ ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, co
#ifdef __OBJECT_MOTION__
/* Transform ray into object space to enter motion blurred object in BVH */
-ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm)
+ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t, Transform *tfm)
{
Transform itfm;
*tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm);
@@ -497,7 +497,7 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, int object,
/* Transorm ray to exit motion blurred object in BVH */
-ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm)
+ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t, Transform *tfm)
{
if(*t != FLT_MAX)
*t *= len(transform_direction(tfm, 1.0f/(*idir)));
@@ -520,5 +520,38 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, int obj
#endif
+/* TODO(sergey): This is only for until we've got OpenCL 2.0
+ * on all devices we consider supported. It'll be replaced with
+ * generic address space.
+ */
+
+#ifdef __KERNEL_OPENCL__
+ccl_device_inline void object_dir_transform_addrspace(KernelGlobals *kg,
+ const ShaderData *sd,
+ ccl_addr_space float3 *D)
+{
+ float3 private_D = *D;
+ object_dir_transform(kg, sd, &private_D);
+ *D = private_D;
+}
+
+ccl_device_inline void object_normal_transform_addrspace(KernelGlobals *kg,
+ const ShaderData *sd,
+ ccl_addr_space float3 *N)
+{
+ float3 private_N = *N;
+ object_normal_transform(kg, sd, &private_N);
+ *N = private_N;
+}
+#endif
+
+#ifndef __KERNEL_OPENCL__
+# define object_dir_transform_auto object_dir_transform
+# define object_normal_transform_auto object_normal_transform
+#else
+# define object_dir_transform_auto object_dir_transform_addrspace
+# define object_normal_transform_auto object_normal_transform_addrspace
+#endif
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index b52ec7ef1b2..30f12d32355 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -25,16 +25,16 @@ CCL_NAMESPACE_BEGIN
ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
{
- if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) {
return triangle_attribute_float(kg, sd, elem, offset, dx, dy);
}
#ifdef __HAIR__
- else if(sd->type & PRIMITIVE_ALL_CURVE) {
+ else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float(kg, sd, elem, offset, dx, dy);
}
#endif
#ifdef __VOLUME__
- else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
+ else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
return volume_attribute_float(kg, sd, elem, offset, dx, dy);
}
#endif
@@ -47,16 +47,16 @@ ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *
ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
{
- if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) {
return triangle_attribute_float3(kg, sd, elem, offset, dx, dy);
}
#ifdef __HAIR__
- else if(sd->type & PRIMITIVE_ALL_CURVE) {
+ else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float3(kg, sd, elem, offset, dx, dy);
}
#endif
#ifdef __VOLUME__
- else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
+ else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
return volume_attribute_float3(kg, sd, elem, offset, dx, dy);
}
#endif
@@ -108,9 +108,9 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in
ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
{
#ifdef __HAIR__
- if(sd->type & PRIMITIVE_ALL_CURVE)
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)
#ifdef __DPDU__
- return normalize(sd->dPdu);
+ return normalize(ccl_fetch(sd, dPdu));
#else
return make_float3(0.0f, 0.0f, 0.0f);
#endif
@@ -124,12 +124,12 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
float3 data = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL);
data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
object_normal_transform(kg, sd, &data);
- return cross(sd->N, normalize(cross(data, sd->N)));
+ return cross(ccl_fetch(sd, N), normalize(cross(data, ccl_fetch(sd, N))));
}
else {
/* otherwise use surface derivatives */
#ifdef __DPDU__
- return normalize(sd->dPdu);
+ return normalize(ccl_fetch(sd, dPdu));
#else
return make_float3(0.0f, 0.0f, 0.0f);
#endif
@@ -144,16 +144,16 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
float3 center;
#ifdef __HAIR__
- bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE;
+ bool is_curve_primitive = ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE;
if(is_curve_primitive) {
center = curve_motion_center_location(kg, sd);
- if(!(sd->flag & SD_TRANSFORM_APPLIED))
+ if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED))
object_position_transform(kg, sd, &center);
}
else
#endif
- center = sd->P;
+ center = ccl_fetch(sd, P);
float3 motion_pre = center, motion_post = center;
@@ -164,16 +164,16 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
if(offset != ATTR_STD_NOT_FOUND) {
/* get motion info */
int numverts, numkeys;
- object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
+ object_motion_info(kg, ccl_fetch(sd, object), NULL, &numverts, &numkeys);
/* lookup attributes */
- int offset_next = (sd->type & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys;
+ int offset_next = (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys;
motion_pre = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL);
motion_post = primitive_attribute_float3(kg, sd, elem, offset_next, NULL, NULL);
#ifdef __HAIR__
- if(is_curve_primitive && (sd->flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
+ if(is_curve_primitive && (ccl_fetch(sd, flag) & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
object_position_transform(kg, sd, &motion_pre);
object_position_transform(kg, sd, &motion_post);
}
@@ -184,17 +184,17 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
* transformation was set match the world/object space of motion_pre/post */
Transform tfm;
- tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_PRE);
+ tfm = object_fetch_vector_transform(kg, ccl_fetch(sd, object), OBJECT_VECTOR_MOTION_PRE);
motion_pre = transform_point(&tfm, motion_pre);
- tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_POST);
+ tfm = object_fetch_vector_transform(kg, ccl_fetch(sd, object), OBJECT_VECTOR_MOTION_POST);
motion_post = transform_point(&tfm, motion_post);
float3 motion_center;
/* camera motion, for perspective/orthographic motion.pre/post will be a
* world-to-raster matrix, for panorama it's world-to-camera */
- if (kernel_data.cam.type != CAMERA_PANORAMA) {
+ if(kernel_data.cam.type != CAMERA_PANORAMA) {
tfm = kernel_data.cam.worldtoraster;
motion_center = transform_perspective(&tfm, center);
diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
index 4233ff15c86..f79b2ed9f34 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_shadow.h
@@ -155,11 +155,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
- traversalStack[stackPtr].dist = c1;
+ traversalStack[stackPtr].dist = d1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
- traversalStack[stackPtr].dist = c0;
+ traversalStack[stackPtr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
@@ -206,7 +206,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
/* Pop. */
@@ -241,7 +241,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, &isect_precalc, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
@@ -279,7 +279,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
{
- shader = kernel_tex_fetch(__tri_shader, prim);
+ shader = kernel_tex_fetch(__tri_shader, prim);
}
#ifdef __HAIR__
else {
diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
index 62598115fa3..d85e1a4691e 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h
@@ -202,7 +202,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -226,7 +226,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(tri_object != subsurface_object) {
continue;
}
- triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
}
break;
}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
index 99d2fb20837..7e356ea062b 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_traversal.h
@@ -80,6 +80,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if defined(__KERNEL_DEBUG__)
isect->num_traversal_steps = 0;
+ isect->num_traversed_instances = 0;
#endif
ssef tnear(0.0f), tfar(ray->t);
@@ -185,6 +186,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(traverseChild == 0) {
if(d1 < d0) {
nodeAddr = c1;
+ nodeDist = d1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
@@ -193,6 +195,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
else {
nodeAddr = c0;
+ nodeDist = d0;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
@@ -260,7 +263,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
#ifdef __VISIBILITY_FLAG__
if(UNLIKELY((nodeDist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
@@ -296,7 +299,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect->num_traversal_steps++;
#endif
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
- if(triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr)) {
+ if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
tfar = ssef(isect->t);
/* Shadow ray early termination. */
if(visibility == PATH_RAY_SHADOW_OPAQUE)
@@ -377,6 +380,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
traversalStack[stackPtr].dist = -FLT_MAX;
nodeAddr = kernel_tex_fetch(__object_node, object);
+
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversed_instances++;
+#endif
}
}
#endif /* FEATURE(BVH_INSTANCING) */
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/geom/geom_qbvh_volume.h
index 2c396e99fc4..d8cfa3a4061 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_volume.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_volume.h
@@ -95,10 +95,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
/* Traverse internal nodes. */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
-#if defined(__KERNEL_DEBUG__)
- isect->num_traversal_steps++;
-#endif
-
ssef dist;
int traverseChild = qbvh_node_intersect(kg,
tnear,
@@ -208,7 +204,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
@@ -234,7 +230,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
continue;
}
/* Intersect ray against primitive. */
- triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr);
+ triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
}
break;
}
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h
new file mode 100644
index 00000000000..d5131919944
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h
@@ -0,0 +1,446 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function for volumes, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect_array,
+ const uint max_hits)
+{
+ /* TODO(sergey):
+ * - Test if pushing distance on the stack helps.
+ * - Likely and unlikely for if() statements.
+ * - Test restrict attribute for pointers.
+ */
+
+ /* Traversal stack in CUDA thread-local memory. */
+ QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
+ traversalStack[0].addr = ENTRYPOINT_SENTINEL;
+
+ /* Traversal variables in registers. */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* Ray parameters in registers. */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
+
+ const uint visibility = PATH_RAY_ALL_VISIBILITY;
+
+#if BVH_FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+#ifndef __KERNEL_SSE41__
+ if(!isfinite(P.x)) {
+ return false;
+ }
+#endif
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ int num_hits_in_instance = 0;
+#endif
+
+ uint num_hits = 0;
+ isect_array->t = tmax;
+
+ ssef tnear(0.0f), tfar(isect_t);
+ sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+
+#ifdef __KERNEL_AVX2__
+ float3 P_idir = P*idir;
+ sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+
+ /* Offsets to select the side that becomes the lower or upper bound. */
+ int near_x, near_y, near_z;
+ int far_x, far_y, far_z;
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
+ /* Traversal loop. */
+ do {
+ do {
+ /* Traverse internal nodes. */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ ssef dist;
+ int traverseChild = qbvh_node_intersect(kg,
+ tnear,
+ tfar,
+#ifdef __KERNEL_AVX2__
+ P_idir4,
+#else
+ org,
+#endif
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ nodeAddr,
+ &dist);
+
+ if(traverseChild != 0) {
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
+
+ /* One child is hit, continue with that child. */
+ int r = __bscf(traverseChild);
+ if(traverseChild == 0) {
+ nodeAddr = __float_as_int(cnodes[r]);
+ continue;
+ }
+
+ /* Two children are hit, push far child, and continue with
+ * closer child.
+ */
+ int c0 = __float_as_int(cnodes[r]);
+ float d0 = ((float*)&dist)[r];
+ r = __bscf(traverseChild);
+ int c1 = __float_as_int(cnodes[r]);
+ float d1 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ if(d1 < d0) {
+ nodeAddr = c1;
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
+ continue;
+ }
+ else {
+ nodeAddr = c0;
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
+ continue;
+ }
+ }
+
+ /* Here starts the slow path for 3 or 4 hit children. We push
+ * all nodes onto the stack to sort them there.
+ */
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c1;
+ traversalStack[stackPtr].dist = d1;
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c0;
+ traversalStack[stackPtr].dist = d0;
+
+ /* Three children are hit, push all onto stack and sort 3
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c2 = __float_as_int(cnodes[r]);
+ float d2 = ((float*)&dist)[r];
+ if(traverseChild == 0) {
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2]);
+ nodeAddr = traversalStack[stackPtr].addr;
+ --stackPtr;
+ continue;
+ }
+
+ /* Four children are hit, push all onto stack and sort 4
+ * stack items, continue with closest child.
+ */
+ r = __bscf(traverseChild);
+ int c3 = __float_as_int(cnodes[r]);
+ float d3 = ((float*)&dist)[r];
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c3;
+ traversalStack[stackPtr].dist = d3;
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = c2;
+ traversalStack[stackPtr].dist = d2;
+ qbvh_stack_sort(&traversalStack[stackPtr],
+ &traversalStack[stackPtr - 1],
+ &traversalStack[stackPtr - 2],
+ &traversalStack[stackPtr - 3]);
+ }
+
+ nodeAddr = traversalStack[stackPtr].addr;
+ --stackPtr;
+ }
+
+ /* If node is leaf, fetch triangle list. */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
+ int primAddr = __float_as_int(leaf.x);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ int primAddr2 = __float_as_int(leaf.y);
+ const uint type = __float_as_int(leaf.w);
+ const uint p_type = type & PRIMITIVE_ALL;
+ bool hit;
+
+ /* Pop. */
+ nodeAddr = traversalStack[stackPtr].addr;
+ --stackPtr;
+
+ /* Primitive intersection. */
+ switch(p_type) {
+ case PRIMITIVE_TRIANGLE: {
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+#if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+#else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#if BVH_FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+# if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+# else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ for(; primAddr < primAddr2; primAddr++) {
+ kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
+ /* Only primitives from volume object. */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+ /* Intersect ray against primitive. */
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ else
+ hit = bvh_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ if(hit) {
+ /* Move on to next entry in intersections array. */
+ isect_array++;
+ num_hits++;
+#if BVH_FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+ isect_array->t = isect_t;
+ if(num_hits == max_hits) {
+#if BVH_FEATURE(BVH_INSTANCING)
+# if BVH_FEATURE(BVH_MOTION)
+ float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir));
+# else
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ float t_fac = len(transform_direction(&tfm, 1.0f/idir));
+#endif
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+#endif /* BVH_FEATURE(BVH_INSTANCING) */
+ return num_hits;
+ }
+ }
+ }
+ break;
+ }
+#endif
+ }
+ }
+#if BVH_FEATURE(BVH_INSTANCING)
+ else {
+ /* Instance push. */
+ object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+
+ if(object_flag & SD_OBJECT_HAS_VOLUME) {
+
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm);
+#else
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+#endif
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect_t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ num_hits_in_instance = 0;
+ isect_array->t = isect_t;
+
+ ++stackPtr;
+ kernel_assert(stackPtr < BVH_QSTACK_SIZE);
+ traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* Pop. */
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr].addr;
+ --stackPtr;
+ }
+ }
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if BVH_FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ /* Instance pop. */
+ if(num_hits_in_instance) {
+ float t_fac;
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm);
+#else
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ /* Scale isect->t to adjust for instancing. */
+ for(int i = 0; i < num_hits_in_instance; i++) {
+ (isect_array-i-1)->t *= t_fac;
+ }
+ }
+ else {
+ float ignore_t = FLT_MAX;
+#if BVH_FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ }
+
+ if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
+ if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
+ if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+ tfar = ssef(isect_t);
+ idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+#ifdef __KERNEL_AVX2__
+ P_idir = P*idir;
+ P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
+#else
+ org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+ isect_t = tmax;
+ isect_array->t = isect_t;
+
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr].addr;
+ --stackPtr;
+ }
+#endif /* FEATURE(BVH_INSTANCING) */
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return num_hits;
+}
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index dd3928682e3..995dfac5b09 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -27,14 +27,14 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd)
{
/* load triangle vertices */
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* return normal */
- if(sd->flag & SD_NEGATIVE_SCALE_APPLIED)
+ if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED)
return normalize(cross(v2 - v0, v1 - v0));
else
return normalize(cross(v1 - v0, v2 - v0));
@@ -94,7 +94,7 @@ ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, flo
/* Ray differentials on triangle */
-ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, float3 *dPdu, float3 *dPdv)
+ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_space float3 *dPdu, ccl_addr_space float3 *dPdv)
{
/* fetch triangle vertex coordinates */
float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -116,34 +116,34 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
- return kernel_tex_fetch(__attributes_float, offset + sd->prim);
+ return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim));
}
else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x));
float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y));
float f2 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.z));
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
+ if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2;
}
else if(elem == ATTR_ELEMENT_CORNER) {
- int tri = offset + sd->prim*3;
+ int tri = offset + ccl_fetch(sd, prim)*3;
float f0 = kernel_tex_fetch(__attributes_float, tri + 0);
float f1 = kernel_tex_fetch(__attributes_float, tri + 1);
float f2 = kernel_tex_fetch(__attributes_float, tri + 2);
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
+ if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2;
}
else {
if(dx) *dx = 0.0f;
@@ -159,24 +159,24 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim)));
}
else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
- float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
+ if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2;
}
else if(elem == ATTR_ELEMENT_CORNER || elem == ATTR_ELEMENT_CORNER_BYTE) {
- int tri = offset + sd->prim*3;
+ int tri = offset + ccl_fetch(sd, prim)*3;
float3 f0, f1, f2;
if(elem == ATTR_ELEMENT_CORNER) {
@@ -191,11 +191,11 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData
}
#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
+ if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2;
#endif
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2;
}
else {
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index c9e30a451da..3ef918dc842 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-/* Triangle/Ray intersections .
+/* Triangle/Ray intersections.
*
* For BVH ray intersection we use a precomputed triangle storage to accelerate
* intersection at the cost of more memory usage.
@@ -49,18 +49,27 @@ typedef struct IsectPrecalc {
float Sx, Sy, Sz;
} IsectPrecalc;
-/* Workaround for CUDA toolkit 6.5.16. */
-#if defined(__KERNEL_CPU__) || !defined(__KERNEL_CUDA_EXPERIMENTAL__) || __CUDA_ARCH__ < 500
+#if defined(__KERNEL_CUDA__)
# if (defined(i386) || defined(_M_IX86))
+# if __CUDA_ARCH__ > 500
ccl_device_noinline
-# else
+# else /* __CUDA_ARCH__ > 500 */
ccl_device_inline
-# endif
-#else
+# endif /* __CUDA_ARCH__ > 500 */
+# else /* (defined(i386) || defined(_M_IX86)) */
+# if defined(__KERNEL_EXPERIMENTAL__) && (__CUDA_ARCH__ >= 500)
ccl_device_noinline
-#endif
+# else
+ccl_device_inline
+# endif
+# endif /* (defined(i386) || defined(_M_IX86)) */
+#elif defined(__KERNEL_OPENCL_APPLE__)
+ccl_device_noinline
+#else /* defined(__KERNEL_OPENCL_APPLE__) */
+ccl_device_inline
+#endif /* defined(__KERNEL_OPENCL_APPLE__) */
void triangle_intersect_precalc(float3 dir,
- IsectPrecalc *isect_precalc)
+ IsectPrecalc *isect_precalc)
{
/* Calculate dimension where the ray direction is maximal. */
int kz = util_max_axis(make_float3(fabsf(dir.x),
@@ -77,10 +86,10 @@ void triangle_intersect_precalc(float3 dir,
}
/* Calculate the shear constants. */
- float inf_dir_z = 1.0f / IDX(dir, kz);
- isect_precalc->Sx = IDX(dir, kx) * inf_dir_z;
- isect_precalc->Sy = IDX(dir, ky) * inf_dir_z;
- isect_precalc->Sz = inf_dir_z;
+ float inv_dir_z = 1.0f / IDX(dir, kz);
+ isect_precalc->Sx = IDX(dir, kx) * inv_dir_z;
+ isect_precalc->Sy = IDX(dir, ky) * inv_dir_z;
+ isect_precalc->Sz = inv_dir_z;
/* Store the dimensions. */
isect_precalc->kx = kx;
@@ -98,7 +107,6 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const IsectPrecalc *isect_precalc,
Intersection *isect,
float3 P,
- float3 dir,
uint visibility,
int object,
int triAddr)
@@ -111,14 +119,12 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
-
- const float3 A = tri[0] - P;
- const float3 B = tri[1] - P;
- const float3 C = tri[2] - P;
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+ const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
+ const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
+ const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
@@ -155,8 +161,8 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
*/
const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
const float sign_T = xor_signmast(T, sign_mask);
- if ((sign_T < 0.0f) ||
- (sign_T > isect->t * xor_signmast(det, sign_mask)))
+ if((sign_T < 0.0f) ||
+ (sign_T > isect->t * xor_signmast(det, sign_mask)))
{
return false;
}
@@ -191,7 +197,6 @@ ccl_device_inline void triangle_intersect_subsurface(
const IsectPrecalc *isect_precalc,
Intersection *isect_array,
float3 P,
- float3 dir,
int object,
int triAddr,
float tmax,
@@ -207,14 +212,12 @@ ccl_device_inline void triangle_intersect_subsurface(
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
-
- const float3 A = tri[0] - P;
- const float3 B = tri[1] - P;
- const float3 C = tri[2] - P;
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+ const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
+ const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
+ const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
@@ -249,13 +252,10 @@ ccl_device_inline void triangle_intersect_subsurface(
/* Calculate scaled z−coordinates of vertices and use them to calculate
* the hit distance.
*/
- const float Az = Sz * A_kz;
- const float Bz = Sz * B_kz;
- const float Cz = Sz * C_kz;
- const float T = U * Az + V * Bz + W * Cz;
-
- if ((xor_signmast(T, sign_mask) < 0.0f) ||
- (xor_signmast(T, sign_mask) > tmax * xor_signmast(det, sign_mask)))
+ const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
+ const float sign_T = xor_signmast(T, sign_mask);
+ if((sign_T < 0.0f) ||
+ (sign_T > tmax * xor_signmast(det, sign_mask)))
{
return;
}
@@ -315,7 +315,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
return P;
}
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
@@ -327,14 +327,12 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
P = P + D*t;
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
-
- float3 edge1 = tri[0] - tri[2];
- float3 edge2 = tri[1] - tri[2];
- float3 tvec = P - tri[2];
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+ float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+ float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+ float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
float3 qvec = cross(tvec, edge1);
float3 pvec = cross(D, edge2);
float rt = dot(edge2, qvec) / dot(edge1, pvec);
@@ -343,7 +341,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
@@ -372,7 +370,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
#ifdef __INTERSECTION_REFINE__
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg,
isect->object,
@@ -386,14 +384,12 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
P = P + D*t;
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
-
- float3 edge1 = tri[0] - tri[2];
- float3 edge2 = tri[1] - tri[2];
- float3 tvec = P - tri[2];
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+ float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+ float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+ float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
float3 qvec = cross(tvec, edge1);
float3 pvec = cross(D, edge2);
float rt = dot(edge2, qvec) / dot(edge1, pvec);
@@ -402,7 +398,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg,
isect->object,
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index c33509fbf4f..c72afa2a3a4 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -60,7 +60,7 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
#endif
if(dx) *dx = 0.0f;
- if(dx) *dy = 0.0f;
+ if(dy) *dy = 0.0f;
/* todo: support float textures to lower memory usage for single floats */
return average(float4_to_float3(r));
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 369c615eade..2dc87fffcbc 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -176,7 +176,7 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
#endif
}
-ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, float3 *throughput,
+ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, ccl_addr_space float3 *throughput,
BsdfEval *bsdf_eval, float bsdf_pdf, int bounce, int bsdf_label)
{
float inverse_pdf = 1.0f/bsdf_pdf;
@@ -341,12 +341,12 @@ ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadiance *L)
{
- float3 L_sum, L_direct, L_indirect;
- float clamp_direct = kernel_data.integrator.sample_clamp_direct;
- float clamp_indirect = kernel_data.integrator.sample_clamp_indirect;
-
+ float3 L_sum;
/* Light Passes are used */
#ifdef __PASSES__
+ float3 L_direct, L_indirect;
+ float clamp_direct = kernel_data.integrator.sample_clamp_direct;
+ float clamp_indirect = kernel_data.integrator.sample_clamp_indirect;
if(L->use_light_pass) {
path_radiance_sum_indirect(L);
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 20d7a143c67..2fca83c615f 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -57,7 +57,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample subsurface scattering */
if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
- if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
+ if(kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
is_sss_sample = true;
}
#endif
@@ -208,7 +208,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
filter_x = filter_y = 0.5f;
}
else {
- path_rng_2D(kg, &rng, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_x);
+ path_rng_2D(kg, &rng, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y);
}
/* subpixel u/v offset */
@@ -259,7 +259,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
/* data passes */
case SHADER_EVAL_NORMAL:
{
- if ((sd.flag & SD_HAS_BUMP)) {
+ if((sd.flag & SD_HAS_BUMP)) {
shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
}
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index ded222e20ff..3ce5134181a 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -16,17 +16,6 @@
CCL_NAMESPACE_BEGIN
-/* Workaround for explicit conversion from constant to private memory
- * pointer when using OpenCL.
- *
- * TODO(sergey): Find a real solution for this.
- */
-#ifdef __KERNEL_OPENCL__
-# define __motion_as_decoupled_const_ptr(motion) ((motion))
-#else
-# define __motion_as_decoupled_const_ptr(motion) ((const DecompMotionTransform*)(motion))
-#endif
-
/* Perspective Camera */
ccl_device float2 camera_sample_aperture(KernelGlobals *kg, float u, float v)
@@ -50,7 +39,7 @@ ccl_device float2 camera_sample_aperture(KernelGlobals *kg, float u, float v)
return bokeh;
}
-ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, Ray *ray)
+ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
{
/* create ray form raster position */
Transform rastertocamera = kernel_data.cam.rastertocamera;
@@ -80,9 +69,16 @@ ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, flo
#ifdef __CAMERA_MOTION__
if(kernel_data.cam.have_motion) {
+#ifdef __KERNEL_OPENCL__
+ const MotionTransform tfm = kernel_data.cam.motion;
transform_motion_interpolate(&cameratoworld,
- __motion_as_decoupled_const_ptr(&kernel_data.cam.motion),
+ ((const DecompMotionTransform*)&tfm),
ray->time);
+#else
+ transform_motion_interpolate(&cameratoworld,
+ ((const DecompMotionTransform*)&kernel_data.cam.motion),
+ ray->time);
+#endif
}
#endif
@@ -112,8 +108,7 @@ ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, flo
}
/* Orthographic Camera */
-
-ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, Ray *ray)
+ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
{
/* create ray form raster position */
Transform rastertocamera = kernel_data.cam.rastertocamera;
@@ -144,9 +139,16 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
#ifdef __CAMERA_MOTION__
if(kernel_data.cam.have_motion) {
+#ifdef __KERNEL_OPENCL__
+ const MotionTransform tfm = kernel_data.cam.motion;
transform_motion_interpolate(&cameratoworld,
- __motion_as_decoupled_const_ptr(&kernel_data.cam.motion),
+ (const DecompMotionTransform*)&tfm,
ray->time);
+#else
+ transform_motion_interpolate(&cameratoworld,
+ (const DecompMotionTransform*)&kernel_data.cam.motion,
+ ray->time);
+#endif
}
#endif
@@ -172,7 +174,7 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
/* Panorama Camera */
-ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, Ray *ray)
+ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
{
Transform rastertocamera = kernel_data.cam.rastertocamera;
float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
@@ -220,10 +222,18 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float
Transform cameratoworld = kernel_data.cam.cameratoworld;
#ifdef __CAMERA_MOTION__
- if(kernel_data.cam.have_motion)
+ if(kernel_data.cam.have_motion) {
+#ifdef __KERNEL_OPENCL__
+ const MotionTransform tfm = kernel_data.cam.motion;
transform_motion_interpolate(&cameratoworld,
- __motion_as_decoupled_const_ptr(&kernel_data.cam.motion),
+ (const DecompMotionTransform*)&tfm,
ray->time);
+#else
+ transform_motion_interpolate(&cameratoworld,
+ (const DecompMotionTransform*)&kernel_data.cam.motion,
+ ray->time);
+#endif
+ }
#endif
ray->P = transform_point(&cameratoworld, ray->P);
@@ -245,7 +255,7 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float
/* Common */
ccl_device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, float filter_v,
- float lens_u, float lens_v, float time, Ray *ray)
+ float lens_u, float lens_v, float time, ccl_addr_space Ray *ray)
{
/* pixel filter */
int filter_table_offset = kernel_data.film.filter_table_offset;
@@ -308,7 +318,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd,
{
if(kernel_data.cam.type != CAMERA_PANORAMA) {
/* perspective / ortho */
- if(sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE)
+ if(ccl_fetch(sd, object) == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE)
P += camera_position(kg);
Transform tfm = kernel_data.cam.worldtondc;
@@ -318,7 +328,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd,
/* panorama */
Transform tfm = kernel_data.cam.worldtocamera;
- if(sd->object != OBJECT_NONE)
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
P = normalize(transform_point(&tfm, P));
else
P = normalize(transform_direction(&tfm, P));
@@ -329,7 +339,4 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd,
}
}
-#undef __motion_as_decoupled_const_ptr
-
CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 200667a0911..0bf1ed36d1e 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -24,6 +24,15 @@
*/
#if defined(__GNUC__) && defined(NDEBUG)
# pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+# pragma GCC diagnostic ignored "-Wuninitialized"
+#endif
+
+/* Selective nodes compilation. */
+#ifndef __NODES_MAX_GROUP__
+# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
+#endif
+#ifndef __NODES_FEATURES__
+# define __NODES_FEATURES__ NODE_FEATURE_ALL
#endif
#include "util_debug.h"
@@ -32,6 +41,8 @@
#include "util_half.h"
#include "util_types.h"
+#define ccl_addr_space
+
/* On x86_64, versions of glibc < 2.16 have an issue where expf is
* much slower than the double version. This was fixed in glibc 2.16.
*/
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 904736c190c..9fdd3abfec3 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -22,6 +22,14 @@
#define CCL_NAMESPACE_BEGIN
#define CCL_NAMESPACE_END
+/* Selective nodes compilation. */
+#ifndef __NODES_MAX_GROUP__
+# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
+#endif
+#ifndef __NODES_FEATURES__
+# define __NODES_FEATURES__ NODE_FEATURE_ALL
+#endif
+
#include <cuda.h>
#include <float.h>
@@ -33,6 +41,7 @@
#define ccl_global
#define ccl_constant
#define ccl_may_alias
+#define ccl_addr_space
/* No assert supported for CUDA */
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index d480ec0f270..e8b36d2605d 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -37,6 +37,22 @@
#define ccl_may_alias
#define ccl_constant __constant
#define ccl_global __global
+#define ccl_local __local
+#define ccl_private __private
+
+#ifdef __SPLIT_KERNEL__
+#define ccl_addr_space __global
+#else
+#define ccl_addr_space
+#endif
+
+/* Selective nodes compilation. */
+#ifndef __NODES_MAX_GROUP__
+# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
+#endif
+#ifndef __NODES_FEATURES__
+# define __NODES_FEATURES__ NODE_FEATURE_ALL
+#endif
/* no assert in opencl */
#define kernel_assert(cond)
diff --git a/intern/cycles/kernel/kernel_debug.h b/intern/cycles/kernel/kernel_debug.h
index f532442ba41..24d6458567e 100644
--- a/intern/cycles/kernel/kernel_debug.h
+++ b/intern/cycles/kernel/kernel_debug.h
@@ -19,11 +19,13 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void debug_data_init(DebugData *debug_data)
{
debug_data->num_bvh_traversal_steps = 0;
+ debug_data->num_bvh_traversed_instances = 0;
+ debug_data->num_ray_bounces = 0;
}
ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
ccl_global float *buffer,
- PathState *state,
+ ccl_addr_space PathState *state,
DebugData *debug_data,
int sample)
{
@@ -33,6 +35,16 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
sample,
debug_data->num_bvh_traversal_steps);
}
+ if(flag & PASS_BVH_TRAVERSED_INSTANCES) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
+ sample,
+ debug_data->num_bvh_traversed_instances);
+ }
+ if(flag & PASS_RAY_BOUNCES) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
+ sample,
+ debug_data->num_ray_bounces);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_differential.h b/intern/cycles/kernel/kernel_differential.h
index e5fbd5b450e..ae1e70f0167 100644
--- a/intern/cycles/kernel/kernel_differential.h
+++ b/intern/cycles/kernel/kernel_differential.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
/* See "Tracing Ray Differentials", Homan Igehy, 1999. */
-ccl_device void differential_transfer(differential3 *dP_, const differential3 dP, float3 D, const differential3 dD, float3 Ng, float t)
+ccl_device void differential_transfer(ccl_addr_space differential3 *dP_, const differential3 dP, float3 D, const differential3 dD, float3 Ng, float t)
{
/* ray differential transfer through homogeneous medium, to
* compute dPdx/dy at a shading point from the incoming ray */
@@ -31,7 +31,7 @@ ccl_device void differential_transfer(differential3 *dP_, const differential3 dP
dP_->dy = tmpy - dot(tmpy, Ng)*tmp;
}
-ccl_device void differential_incoming(differential3 *dI, const differential3 dD)
+ccl_device void differential_incoming(ccl_addr_space differential3 *dI, const differential3 dD)
{
/* compute dIdx/dy at a shading point, we just need to negate the
* differential of the ray direction */
@@ -40,7 +40,7 @@ ccl_device void differential_incoming(differential3 *dI, const differential3 dD)
dI->dy = -dD.dy;
}
-ccl_device void differential_dudv(differential *du, differential *dv, float3 dPdu, float3 dPdv, differential3 dP, float3 Ng)
+ccl_device void differential_dudv(ccl_addr_space differential *du, ccl_addr_space differential *dv, float3 dPdu, float3 dPdv, differential3 dP, float3 Ng)
{
/* now we have dPdx/dy from the ray differential transfer, and dPdu/dv
* from the primitive, we can compute dudx/dy and dvdx/dy. these are
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 7523105607f..de9e8d77ec8 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -17,12 +17,20 @@
CCL_NAMESPACE_BEGIN
/* Direction Emission */
-
ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
- LightSample *ls, float3 I, differential3 dI, float t, float time, int bounce, int transparent_bounce)
+ LightSample *ls, float3 I, differential3 dI, float t, float time, int bounce, int transparent_bounce
+#ifdef __SPLIT_KERNEL__
+ ,ShaderData *sd_input
+#endif
+)
{
/* setup shading at emitter */
- ShaderData sd;
+#ifdef __SPLIT_KERNEL__
+ ShaderData *sd = sd_input;
+#else
+ ShaderData sd_object;
+ ShaderData *sd = &sd_object;
+#endif
float3 eval;
#ifdef __BACKGROUND_MIS__
@@ -37,23 +45,23 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ray.dP = differential3_zero();
ray.dD = dI;
- shader_setup_from_background(kg, &sd, &ray, bounce+1, transparent_bounce);
- eval = shader_eval_background(kg, &sd, 0, SHADER_CONTEXT_EMISSION);
+ shader_setup_from_background(kg, sd, &ray, bounce+1, transparent_bounce);
+ eval = shader_eval_background(kg, sd, 0, SHADER_CONTEXT_EMISSION);
}
else
#endif
{
- shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, bounce+1, transparent_bounce);
+ shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, bounce+1, transparent_bounce);
- ls->Ng = sd.Ng;
+ ls->Ng = ccl_fetch(sd, Ng);
/* no path flag, we're evaluating this for all closures. that's weak but
* we'd have to do multiple evaluations otherwise */
- shader_eval_surface(kg, &sd, 0.0f, 0, SHADER_CONTEXT_EMISSION);
+ shader_eval_surface(kg, sd, 0.0f, 0, SHADER_CONTEXT_EMISSION);
/* evaluate emissive closure */
- if(sd.flag & SD_EMISSION)
- eval = shader_emissive_eval(kg, &sd);
+ if(ccl_fetch(sd, flag) & SD_EMISSION)
+ eval = shader_emissive_eval(kg, sd);
else
eval = make_float3(0.0f, 0.0f, 0.0f);
}
@@ -65,7 +73,11 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
LightSample *ls, Ray *ray, BsdfEval *eval, bool *is_lamp,
- int bounce, int transparent_bounce)
+ int bounce, int transparent_bounce
+#ifdef __SPLIT_KERNEL__
+ , ShaderData *sd_DL
+#endif
+ )
{
if(ls->pdf == 0.0f)
return false;
@@ -74,7 +86,14 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
differential3 dD = differential3_zero();
/* evaluate closure */
- float3 light_eval = direct_emissive_eval(kg, ls, -ls->D, dD, ls->t, sd->time, bounce, transparent_bounce);
+
+ float3 light_eval = direct_emissive_eval(kg, ls, -ls->D, dD, ls->t, ccl_fetch(sd, time),
+ bounce,
+ transparent_bounce
+#ifdef __SPLIT_KERNEL__
+ ,sd_DL
+#endif
+ );
if(is_zero(light_eval))
return false;
@@ -83,7 +102,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
float bsdf_pdf;
#ifdef __VOLUME__
- if(sd->prim != PRIM_NONE)
+ if(ccl_fetch(sd, prim) != PRIM_NONE)
shader_bsdf_eval(kg, sd, ls->D, eval, &bsdf_pdf);
else
shader_volume_phase_eval(kg, sd, ls->D, eval, &bsdf_pdf);
@@ -118,8 +137,8 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
if(ls->shader & SHADER_CAST_SHADOW) {
/* setup ray */
- bool transmit = (dot(sd->Ng, ls->D) < 0.0f);
- ray->P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng);
+ bool transmit = (dot(ccl_fetch(sd, Ng), ls->D) < 0.0f);
+ ray->P = ray_offset(ccl_fetch(sd, P), (transmit)? -ccl_fetch(sd, Ng): ccl_fetch(sd, Ng));
if(ls->t == FLT_MAX) {
/* distant light */
@@ -132,7 +151,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
ray->D = normalize_len(ray->D, &ray->t);
}
- ray->dP = sd->dP;
+ ray->dP = ccl_fetch(sd, dP);
ray->dD = differential3_zero();
}
else {
@@ -154,14 +173,14 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
float3 L = shader_emissive_eval(kg, sd);
#ifdef __HAIR__
- if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_ALL_TRIANGLE))
+ if(!(path_flag & PATH_RAY_MIS_SKIP) && (ccl_fetch(sd, flag) & SD_USE_MIS) && (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE))
#else
- if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
+ if(!(path_flag & PATH_RAY_MIS_SKIP) && (ccl_fetch(sd, flag) & SD_USE_MIS))
#endif
{
/* multiple importance sampling, get triangle light pdf,
* and compute weight with respect to BSDF pdf */
- float pdf = triangle_light_pdf(kg, sd->Ng, sd->I, t);
+ float pdf = triangle_light_pdf(kg, ccl_fetch(sd, Ng), ccl_fetch(sd, I), t);
float mis_weight = power_heuristic(bsdf_pdf, pdf);
return L*mis_weight;
@@ -172,7 +191,11 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
/* Indirect Lamp Emission */
-ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *state, Ray *ray, float3 *emission)
+ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *state, Ray *ray, float3 *emission
+#ifdef __SPLIT_KERNEL__
+ ,ShaderData *sd
+#endif
+ )
{
bool hit_lamp = false;
@@ -188,14 +211,21 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *st
/* use visibility flag to skip lights */
if(ls.shader & SHADER_EXCLUDE_ANY) {
if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
- ((ls.shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_REFLECT)) ||
+ ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
+ ((state->flag & (PATH_RAY_GLOSSY|PATH_RAY_REFLECT)) == (PATH_RAY_GLOSSY|PATH_RAY_REFLECT))) ||
((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
continue;
}
#endif
- float3 L = direct_emissive_eval(kg, &ls, -ray->D, ray->dD, ls.t, ray->time, state->bounce, state->transparent_bounce);
+ float3 L = direct_emissive_eval(kg, &ls, -ray->D, ray->dD, ls.t, ray->time,
+ state->bounce,
+ state->transparent_bounce
+#ifdef __SPLIT_KERNEL__
+ ,sd
+#endif
+ );
#ifdef __VOLUME__
if(state->volume_stack[0].shader != SHADER_NONE) {
@@ -224,7 +254,11 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *st
/* Indirect Background */
-ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *state, Ray *ray)
+ccl_device_noinline float3 indirect_background(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space Ray *ray
+#ifdef __SPLIT_KERNEL__
+ ,ShaderData *sd_global
+#endif
+ )
{
#ifdef __BACKGROUND__
int shader = kernel_data.background.surface_shader;
@@ -232,18 +266,25 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *sta
/* use visibility flag to skip lights */
if(shader & SHADER_EXCLUDE_ANY) {
if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
- ((shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_REFLECT)) ||
+ ((shader & SHADER_EXCLUDE_GLOSSY) &&
+ ((state->flag & (PATH_RAY_GLOSSY|PATH_RAY_REFLECT)) == (PATH_RAY_GLOSSY|PATH_RAY_REFLECT))) ||
((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) ||
((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
return make_float3(0.0f, 0.0f, 0.0f);
}
+#ifdef __SPLIT_KERNEL__
/* evaluate background closure */
+ Ray priv_ray = *ray;
+ shader_setup_from_background(kg, sd_global, &priv_ray, state->bounce+1, state->transparent_bounce);
+ float3 L = shader_eval_background(kg, sd_global, state->flag, SHADER_CONTEXT_EMISSION);
+#else
ShaderData sd;
shader_setup_from_background(kg, &sd, ray, state->bounce+1, state->transparent_bounce);
float3 L = shader_eval_background(kg, &sd, state->flag, SHADER_CONTEXT_EMISSION);
+#endif
#ifdef __BACKGROUND_MIS__
/* check if background light exists or if we should skip pdf */
@@ -252,7 +293,7 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *sta
if(!(state->flag & PATH_RAY_MIS_SKIP) && res) {
/* multiple importance sampling, get background light pdf for ray
* direction, and compute weight with respect to BSDF pdf */
- float pdf = background_light_pdf(kg, ray->D);
+ float pdf = background_light_pdf(kg, ray->P, ray->D);
float mis_weight = power_heuristic(state->ray_pdf, pdf);
return L*mis_weight;
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index 4668b40b86d..f9e9b413898 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -27,7 +27,7 @@ ccl_device float4 film_map(KernelGlobals *kg, float4 irradiance, float scale)
result.z = color_scene_linear_to_srgb(result.z*exposure);
/* clamp since alpha might be > 1.0 due to russian roulette */
- result.w = clamp(result.w, 0.0f, 1.0f);
+ result.w = saturate(result.w);
return result;
}
@@ -37,10 +37,10 @@ ccl_device uchar4 film_float_to_byte(float4 color)
uchar4 result;
/* simple float to byte conversion */
- result.x = (uchar)clamp(color.x*255.0f, 0.0f, 255.0f);
- result.y = (uchar)clamp(color.y*255.0f, 0.0f, 255.0f);
- result.z = (uchar)clamp(color.z*255.0f, 0.0f, 255.0f);
- result.w = (uchar)clamp(color.w*255.0f, 0.0f, 255.0f);
+ result.x = (uchar)(saturate(color.x)*255.0f);
+ result.y = (uchar)(saturate(color.y)*255.0f);
+ result.z = (uchar)(saturate(color.z)*255.0f);
+ result.w = (uchar)(saturate(color.w)*255.0f);
return result;
}
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 0a9753baca2..17fa18909c4 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -80,7 +80,7 @@ typedef struct KernelGlobals {} KernelGlobals;
#ifdef __KERNEL_OPENCL__
-typedef struct KernelGlobals {
+typedef ccl_addr_space struct KernelGlobals {
ccl_constant KernelData *data;
#define KERNEL_TEX(type, ttype, name) \
@@ -94,7 +94,7 @@ typedef struct KernelGlobals {
ccl_device float lookup_table_read(KernelGlobals *kg, float x, int offset, int size)
{
- x = clamp(x, 0.0f, 1.0f)*(size-1);
+ x = saturate(x)*(size-1);
int index = min(float_to_int(x), size-1);
int nindex = min(index+1, size-1);
@@ -110,7 +110,7 @@ ccl_device float lookup_table_read(KernelGlobals *kg, float x, int offset, int s
ccl_device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize)
{
- y = clamp(y, 0.0f, 1.0f)*(ysize-1);
+ y = saturate(y)*(ysize-1);
int index = min(float_to_int(y), ysize-1);
int nindex = min(index+1, ysize-1);
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index 6953f005ea9..9ba41635b9e 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -128,7 +128,7 @@ ccl_device_inline uint cmj_permute(uint i, uint l, uint p)
i *= 0xc860a3df;
i &= w;
i ^= i >> 5;
- } while (i >= l);
+ } while(i >= l);
return (i + p) % l;
}
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 76fa754b5fa..1badbc3b9f7 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -33,6 +33,98 @@ typedef struct LightSample {
LightType type; /* type of light */
} LightSample;
+/* Area light sampling */
+
+/* Uses the following paper:
+ *
+ * Carlos Urena et al.
+ * An Area-Preserving Parametrization for Spherical Rectangles.
+ *
+ * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
+ *
+ * Note: light_p is modified when sample_coord is true.
+ */
+ccl_device float area_light_sample(float3 P,
+ float3 *light_p,
+ float3 axisu, float3 axisv,
+ float randu, float randv,
+ bool sample_coord)
+{
+ /* In our name system we're using P for the center,
+ * which is o in the paper.
+ */
+
+ float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
+ float axisu_len, axisv_len;
+ /* Compute local reference system R. */
+ float3 x = normalize_len(axisu, &axisu_len);
+ float3 y = normalize_len(axisv, &axisv_len);
+ float3 z = cross(x, y);
+ /* Compute rectangle coords in local reference system. */
+ float3 dir = corner - P;
+ float z0 = dot(dir, z);
+ /* Flip 'z' to make it point against Q. */
+ if(z0 > 0.0f) {
+ z *= -1.0f;
+ z0 *= -1.0f;
+ }
+ float x0 = dot(dir, x);
+ float y0 = dot(dir, y);
+ float x1 = x0 + axisu_len;
+ float y1 = y0 + axisv_len;
+ /* Create vectors to four vertices. */
+ float3 v00 = make_float3(x0, y0, z0);
+ float3 v01 = make_float3(x0, y1, z0);
+ float3 v10 = make_float3(x1, y0, z0);
+ float3 v11 = make_float3(x1, y1, z0);
+ /* Compute normals to edges. */
+ float3 n0 = normalize(cross(v00, v10));
+ float3 n1 = normalize(cross(v10, v11));
+ float3 n2 = normalize(cross(v11, v01));
+ float3 n3 = normalize(cross(v01, v00));
+ /* Compute internal angles (gamma_i). */
+ float g0 = safe_acosf(-dot(n0, n1));
+ float g1 = safe_acosf(-dot(n1, n2));
+ float g2 = safe_acosf(-dot(n2, n3));
+ float g3 = safe_acosf(-dot(n3, n0));
+ /* Compute predefined constants. */
+ float b0 = n0.z;
+ float b1 = n2.z;
+ float b0sq = b0 * b0;
+ float k = M_2PI_F - g2 - g3;
+ /* Compute solid angle from internal angles. */
+ float S = g0 + g1 - k;
+
+ if(sample_coord) {
+ /* Compute cu. */
+ float au = randu * S + k;
+ float fu = (cosf(au) * b0 - b1) / sinf(au);
+ float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
+ cu = clamp(cu, -1.0f, 1.0f);
+ /* Compute xu. */
+ float xu = -(cu * z0) / sqrtf(1.0f - cu * cu);
+ xu = clamp(xu, x0, x1);
+ /* Compute yv. */
+ float z0sq = z0 * z0;
+ float y0sq = y0 * y0;
+ float y1sq = y1 * y1;
+ float d = sqrtf(xu * xu + z0sq);
+ float h0 = y0 / sqrtf(d * d + y0sq);
+ float h1 = y1 / sqrtf(d * d + y1sq);
+ float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
+ float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
+
+ /* Transform (xu, yv, z0) to world coords. */
+ *light_p = P + xu * x + yv * y + z0 * z;
+ }
+
+ /* return pdf */
+ if(S != 0.0f)
+ return 1.0f / S;
+ else
+ return 0.0f;
+}
+
/* Background Light */
#ifdef __BACKGROUND_MIS__
@@ -46,7 +138,7 @@ ccl_device_noinline
#else
ccl_device
#endif
-float3 background_light_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
+float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
{
/* for the following, the CDF values are actually a pair of floats, with the
* function value as X and the actual CDF as Y. The last entry's function
@@ -116,10 +208,8 @@ float3 background_light_sample(KernelGlobals *kg, float randu, float randv, floa
else
*pdf = (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
- *pdf *= kernel_data.integrator.pdf_lights;
-
/* compute direction */
- return -equirectangular_to_direction(u, v);
+ return equirectangular_to_direction(u, v);
}
/* TODO(sergey): Same as above, after the release we should consider using
@@ -130,7 +220,7 @@ ccl_device_noinline
#else
ccl_device
#endif
-float background_light_pdf(KernelGlobals *kg, float3 direction)
+float background_map_pdf(KernelGlobals *kg, float3 direction)
{
float2 uv = direction_to_equirectangular(direction);
int res = kernel_data.integrator.pdf_background_res;
@@ -156,9 +246,223 @@ float background_light_pdf(KernelGlobals *kg, float3 direction)
float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * (res + 1) + index_u);
float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
- float pdf = (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
+ return (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
+}
+
+ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals *kg,
+ float3 P,
+ int index,
+ float3 *lightpos,
+ float3 *dir)
+{
+ float4 data0 = kernel_tex_fetch(__light_data, (index + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 0);
+ float4 data3 = kernel_tex_fetch(__light_data, (index + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 3);
+
+ *lightpos = make_float3(data0.y, data0.z, data0.w);
+ *dir = make_float3(data3.y, data3.z, data3.w);
+
+ /* Check whether portal is on the right side. */
+ if(dot(*dir, P - *lightpos) > 1e-5f)
+ return true;
+
+ return false;
+}
+
+ccl_device float background_portal_pdf(KernelGlobals *kg,
+ float3 P,
+ float3 direction,
+ int ignore_portal,
+ bool *is_possible)
+{
+ float portal_pdf = 0.0f;
+
+ for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
+ if(p == ignore_portal)
+ continue;
+
+ float3 lightpos, dir;
+ if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ continue;
+
+ if(is_possible) {
+ /* There's a portal that could be sampled from this position. */
+ *is_possible = true;
+ }
+
+ float t = -(dot(P, dir) - dot(lightpos, dir)) / dot(direction, dir);
+ if(t <= 1e-5f) {
+ /* Either behind the portal or too close. */
+ continue;
+ }
- return pdf * kernel_data.integrator.pdf_lights;
+ float4 data1 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 1);
+ float4 data2 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 2);
+
+ float3 axisu = make_float3(data1.y, data1.z, data1.w);
+ float3 axisv = make_float3(data2.y, data2.z, data2.w);
+
+ float3 hit = P + t*direction;
+ float3 inplane = hit - lightpos;
+ /* Skip if the the ray doesn't pass through portal. */
+ if(fabsf(dot(inplane, axisu) / dot(axisu, axisu)) > 0.5f)
+ continue;
+ if(fabsf(dot(inplane, axisv) / dot(axisv, axisv)) > 0.5f)
+ continue;
+
+ portal_pdf += area_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
+ }
+
+ return kernel_data.integrator.num_portals? portal_pdf / kernel_data.integrator.num_portals: 0.0f;
+}
+
+ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P)
+{
+ int num_possible_portals = 0;
+ for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
+ float3 lightpos, dir;
+ if(background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ num_possible_portals++;
+ }
+ return num_possible_portals;
+}
+
+ccl_device float3 background_portal_sample(KernelGlobals *kg,
+ float3 P,
+ float randu,
+ float randv,
+ int num_possible,
+ int *sampled_portal,
+ float *pdf)
+{
+ /* Pick a portal, then re-normalize randv. */
+ randv *= num_possible;
+ int portal = (int)randv;
+ randv -= portal;
+
+ /* TODO(sergey): Some smarter way of finding portal to sample
+ * is welcome.
+ */
+ for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
+ /* Search for the sampled portal. */
+ float3 lightpos, dir;
+ if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ continue;
+
+ if(portal == 0) {
+ /* p is the portal to be sampled. */
+ float4 data1 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 1);
+ float4 data2 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 2);
+ float3 axisu = make_float3(data1.y, data1.z, data1.w);
+ float3 axisv = make_float3(data2.y, data2.z, data2.w);
+
+ *pdf = area_light_sample(P, &lightpos,
+ axisu, axisv,
+ randu, randv,
+ true);
+
+ *pdf /= num_possible;
+ *sampled_portal = p;
+ return normalize(lightpos - P);
+ }
+
+ portal--;
+ }
+
+ return make_float3(0.0f, 0.0f, 0.0f);
+}
+
+ccl_device float3 background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
+{
+ /* Probability of sampling portals instead of the map. */
+ float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
+
+ /* Check if there are portals in the scene which we can sample. */
+ if(portal_sampling_pdf > 0.0f) {
+ int num_portals = background_num_possible_portals(kg, P);
+ if(num_portals > 0) {
+ if(portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) {
+ if(portal_sampling_pdf < 1.0f) {
+ randu /= portal_sampling_pdf;
+ }
+ int portal;
+ float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
+ if(num_portals > 1) {
+ /* Ignore the chosen portal, its pdf is already included. */
+ *pdf += background_portal_pdf(kg, P, D, portal, NULL);
+ }
+ /* We could also have sampled the map, so combine with MIS. */
+ if(portal_sampling_pdf < 1.0f) {
+ float cdf_pdf = background_map_pdf(kg, D);
+ *pdf = (portal_sampling_pdf * (*pdf)
+ + (1.0f - portal_sampling_pdf) * cdf_pdf);
+ }
+ return D;
+ } else {
+ /* Sample map, but with nonzero portal_sampling_pdf for MIS. */
+ randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf);
+ }
+ } else {
+ /* We can't sample a portal.
+ * Check if we can sample the map instead.
+ */
+ if(portal_sampling_pdf == 1.0f) {
+ /* Use uniform as a fallback if we can't sample the map. */
+ *pdf = 1.0f / M_4PI_F;
+ return sample_uniform_sphere(randu, randv);
+ }
+ else {
+ portal_sampling_pdf = 0.0f;
+ }
+ }
+ }
+
+ float3 D = background_map_sample(kg, randu, randv, pdf);
+ /* Use MIS if portals could be sampled as well. */
+ if(portal_sampling_pdf > 0.0f) {
+ float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL);
+ *pdf = (portal_sampling_pdf * portal_pdf
+ + (1.0f - portal_sampling_pdf) * (*pdf));
+ }
+ return D;
+}
+
+ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction)
+{
+ /* Probability of sampling portals instead of the map. */
+ float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
+
+ if(portal_sampling_pdf > 0.0f) {
+ bool is_possible = false;
+ float portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible);
+ if(portal_pdf == 0.0f) {
+ if(portal_sampling_pdf == 1.0f) {
+ /* If there are no possible portals at this point,
+ * the fallback sampling would have been used.
+ * Otherwise, the direction would not be sampled at all => pdf = 0
+ */
+ return is_possible? 0.0f: kernel_data.integrator.pdf_lights / M_4PI_F;
+ }
+ else {
+ /* We can only sample the map. */
+ return background_map_pdf(kg, direction) * kernel_data.integrator.pdf_lights;
+ }
+ } else {
+ if(portal_sampling_pdf == 1.0f) {
+ /* We can only sample portals. */
+ return portal_pdf * kernel_data.integrator.pdf_lights;
+ }
+ else {
+ /* We can sample both, so combine with MIS. */
+ return (background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf)
+ + portal_pdf * portal_sampling_pdf) * kernel_data.integrator.pdf_lights;
+ }
+ }
+ }
+
+ /* No portals in the scene, so must sample the map.
+ * At least one of them is always possible if we have a LIGHT_BACKGROUND.
+ */
+ return background_map_pdf(kg, direction) * kernel_data.integrator.pdf_lights;
}
#endif
@@ -184,96 +488,6 @@ ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, flo
return disk_light_sample(normalize(P - center), randu, randv)*radius;
}
-/* Uses the following paper:
- *
- * Carlos Urena et al.
- * An Area-Preserving Parametrization for Spherical Rectangles.
- *
- * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
- *
- * Note: light_p is modified when sample_coord is true.
- */
-ccl_device float area_light_sample(float3 P,
- float3 *light_p,
- float3 axisu, float3 axisv,
- float randu, float randv,
- bool sample_coord)
-{
- /* In our name system we're using P for the center,
- * which is o in the paper.
- */
-
- float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
- float axisu_len, axisv_len;
- /* Compute local reference system R. */
- float3 x = normalize_len(axisu, &axisu_len);
- float3 y = normalize_len(axisv, &axisv_len);
- float3 z = cross(x, y);
- /* Compute rectangle coords in local reference system. */
- float3 dir = corner - P;
- float z0 = dot(dir, z);
- /* Flip 'z' to make it point against Q. */
- if(z0 > 0.0f) {
- z *= -1.0f;
- z0 *= -1.0f;
- }
- float x0 = dot(dir, x);
- float y0 = dot(dir, y);
- float x1 = x0 + axisu_len;
- float y1 = y0 + axisv_len;
- /* Create vectors to four vertices. */
- float3 v00 = make_float3(x0, y0, z0);
- float3 v01 = make_float3(x0, y1, z0);
- float3 v10 = make_float3(x1, y0, z0);
- float3 v11 = make_float3(x1, y1, z0);
- /* Compute normals to edges. */
- float3 n0 = normalize(cross(v00, v10));
- float3 n1 = normalize(cross(v10, v11));
- float3 n2 = normalize(cross(v11, v01));
- float3 n3 = normalize(cross(v01, v00));
- /* Compute internal angles (gamma_i). */
- float g0 = safe_acosf(-dot(n0, n1));
- float g1 = safe_acosf(-dot(n1, n2));
- float g2 = safe_acosf(-dot(n2, n3));
- float g3 = safe_acosf(-dot(n3, n0));
- /* Compute predefined constants. */
- float b0 = n0.z;
- float b1 = n2.z;
- float b0sq = b0 * b0;
- float k = M_2PI_F - g2 - g3;
- /* Compute solid angle from internal angles. */
- float S = g0 + g1 - k;
-
- if(sample_coord) {
- /* Compute cu. */
- float au = randu * S + k;
- float fu = (cosf(au) * b0 - b1) / sinf(au);
- float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
- cu = clamp(cu, -1.0f, 1.0f);
- /* Compute xu. */
- float xu = -(cu * z0) / sqrtf(1.0f - cu * cu);
- xu = clamp(xu, x0, x1);
- /* Compute yv. */
- float z0sq = z0 * z0;
- float y0sq = y0 * y0;
- float y1sq = y1 * y1;
- float d = sqrtf(xu * xu + z0sq);
- float h0 = y0 / sqrtf(d * d + y0sq);
- float h1 = y1 / sqrtf(d * d + y1sq);
- float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
- float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
-
- /* Transform (xu, yv, z0) to world coords. */
- *light_p = P + xu * x + yv * y + z0 * z;
- }
-
- /* return pdf */
- if(S != 0.0f)
- return 1.0f / S;
- else
- return 0.0f;
-}
-
ccl_device float spot_light_attenuation(float4 data1, float4 data2, LightSample *ls)
{
float3 dir = make_float3(data2.y, data2.z, data2.w);
@@ -344,13 +558,14 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
#ifdef __BACKGROUND_MIS__
else if(type == LIGHT_BACKGROUND) {
/* infinite area light (e.g. light dome or env light) */
- float3 D = background_light_sample(kg, randu, randv, &ls->pdf);
+ float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf);
ls->P = D;
ls->Ng = D;
ls->D = -D;
ls->t = FLT_MAX;
ls->eval_fac = 1.0f;
+ ls->pdf *= kernel_data.integrator.pdf_lights;
}
#endif
else {
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 6bb39ee485d..20cf3fa931b 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -19,23 +19,49 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, int sample, float value)
{
ccl_global float *buf = buffer;
+#if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__)
+ atomic_add_float(buf, value);
+#else
*buf = (sample == 0)? value: *buf + value;
+#endif // __SPLIT_KERNEL__ && __WORK_STEALING__
}
ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sample, float3 value)
{
+#if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__)
+ ccl_global float *buf_x = buffer + 0;
+ ccl_global float *buf_y = buffer + 1;
+ ccl_global float *buf_z = buffer + 2;
+
+ atomic_add_float(buf_x, value.x);
+ atomic_add_float(buf_y, value.y);
+ atomic_add_float(buf_z, value.z);
+#else
ccl_global float3 *buf = (ccl_global float3*)buffer;
*buf = (sample == 0)? value: *buf + value;
+#endif // __SPLIT_KERNEL__ && __WORK_STEALING__
}
ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sample, float4 value)
{
+#if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__)
+ ccl_global float *buf_x = buffer + 0;
+ ccl_global float *buf_y = buffer + 1;
+ ccl_global float *buf_z = buffer + 2;
+ ccl_global float *buf_w = buffer + 3;
+
+ atomic_add_float(buf_x, value.x);
+ atomic_add_float(buf_y, value.y);
+ atomic_add_float(buf_z, value.z);
+ atomic_add_float(buf_w, value.w);
+#else
ccl_global float4 *buf = (ccl_global float4*)buffer;
*buf = (sample == 0)? value: *buf + value;
+#endif // __SPLIT_KERNEL__ && __WORK_STEALING__
}
ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L,
- ShaderData *sd, int sample, PathState *state, float3 throughput)
+ ShaderData *sd, int sample, ccl_addr_space PathState *state, float3 throughput)
{
#ifdef __PASSES__
int path_flag = state->flag;
@@ -49,18 +75,18 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl
return;
if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
- if(!(sd->flag & SD_TRANSPARENT) ||
+ if(!(ccl_fetch(sd, flag) & SD_TRANSPARENT) ||
kernel_data.film.pass_alpha_threshold == 0.0f ||
average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold)
{
if(sample == 0) {
if(flag & PASS_DEPTH) {
- float depth = camera_distance(kg, sd->P);
+ float depth = camera_distance(kg, ccl_fetch(sd, P));
kernel_write_pass_float(buffer + kernel_data.film.pass_depth, sample, depth);
}
if(flag & PASS_OBJECT_ID) {
- float id = object_pass_id(kg, sd->object);
+ float id = object_pass_id(kg, ccl_fetch(sd, object));
kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, sample, id);
}
if(flag & PASS_MATERIAL_ID) {
@@ -70,7 +96,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl
}
if(flag & PASS_NORMAL) {
- float3 normal = sd->N;
+ float3 normal = ccl_fetch(sd, N);
kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, sample, normal);
}
if(flag & PASS_UV) {
@@ -101,8 +127,8 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl
float mist_start = kernel_data.film.mist_start;
float mist_inv_depth = kernel_data.film.mist_inv_depth;
- float depth = camera_distance(kg, sd->P);
- float mist = clamp((depth - mist_start)*mist_inv_depth, 0.0f, 1.0f);
+ float depth = camera_distance(kg, ccl_fetch(sd, P));
+ float mist = saturate((depth - mist_start)*mist_inv_depth);
/* falloff */
float mist_falloff = kernel_data.film.mist_falloff;
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 9b9495644dd..9794ad1d180 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -42,6 +42,7 @@
#include "kernel_path_state.h"
#include "kernel_shadow.h"
#include "kernel_emission.h"
+#include "kernel_path_common.h"
#include "kernel_path_surface.h"
#include "kernel_path_volume.h"
@@ -273,8 +274,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg, rng, &state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
-
- state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
}
}
#endif
@@ -307,17 +306,17 @@ ccl_device void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *
sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
- if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+ if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
Ray light_ray;
float3 ao_shadow;
- light_ray.P = ray_offset(sd->P, sd->Ng);
+ light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
light_ray.D = ao_D;
light_ray.t = kernel_data.background.ao_distance;
#ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
+ light_ray.time = ccl_fetch(sd, time);
#endif
- light_ray.dP = sd->dP;
+ light_ray.dP = ccl_fetch(sd, dP);
light_ray.dD = differential3_zero();
if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
@@ -325,70 +324,8 @@ ccl_device void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *
}
}
-ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
-{
- int num_samples = kernel_data.integrator.ao_samples;
- float num_samples_inv = 1.0f/num_samples;
- float ao_factor = kernel_data.background.ao_factor;
- float3 ao_N;
- float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
- float3 ao_alpha = shader_bsdf_alpha(kg, sd);
-
- for(int j = 0; j < num_samples; j++) {
- float bsdf_u, bsdf_v;
- path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
- float3 ao_D;
- float ao_pdf;
-
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
- if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
- Ray light_ray;
- float3 ao_shadow;
-
- light_ray.P = ray_offset(sd->P, sd->Ng);
- light_ray.D = ao_D;
- light_ray.t = kernel_data.background.ao_distance;
-#ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
-#endif
- light_ray.dP = sd->dP;
- light_ray.dD = differential3_zero();
-
- if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
- path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
- }
- }
-}
-
#ifdef __SUBSURFACE__
-#ifdef __VOLUME__
-ccl_device void kernel_path_subsurface_update_volume_stack(KernelGlobals *kg,
- Ray *ray,
- VolumeStack *stack)
-{
- kernel_assert(kernel_data.integrator.use_volumes);
-
- Ray volume_ray = *ray;
- Intersection isect;
- int step = 0;
- while(step < VOLUME_STACK_SIZE &&
- scene_intersect_volume(kg, &volume_ray, &isect))
- {
- ShaderData sd;
- shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0);
- kernel_volume_stack_enter_exit(kg, &sd, stack);
-
- /* Move ray forward. */
- volume_ray.P = ray_offset(sd.P, -sd.Ng);
- volume_ray.t -= sd.ray_length;
- ++step;
- }
-}
-#endif
-
ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, Ray *ray, float3 *throughput)
{
float bssrdf_probability;
@@ -408,7 +345,7 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
#ifdef __VOLUME__
Ray volume_ray = *ray;
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
- sd->flag & SD_OBJECT_INTERSECTS_VOLUME;
+ ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
#endif
/* compute lighting with the BSDF closure */
@@ -417,7 +354,6 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
PathState hit_state = *state;
Ray hit_ray = *ray;
- hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
hit_state.rng_offset += PRNG_BOUNCE_NUM;
kernel_path_surface_connect_light(kg, rng, &bssrdf_sd[hit], tp, state, L);
@@ -433,7 +369,7 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
volume_ray.D = normalize_len(hit_ray.P - volume_ray.P,
&volume_ray.t);
- kernel_path_subsurface_update_volume_stack(
+ kernel_volume_stack_update_for_subsurface(
kg,
&volume_ray,
hit_state.volume_stack);
@@ -503,7 +439,9 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
#ifdef __KERNEL_DEBUG__
if(state.flag & PATH_RAY_CAMERA) {
debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
+ debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
}
+ debug_data.num_ray_bounces++;
#endif
#ifdef __LAMP_MIS__
@@ -733,474 +671,6 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
}
-#ifdef __BRANCHED_PATH__
-
-/* branched path tracing: bounce off surface and integrate indirect light */
-ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
- RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust,
- PathState *state, PathRadiance *L)
-{
- for(int i = 0; i< sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
-
- if(!CLOSURE_IS_BSDF(sc->type))
- continue;
- /* transparency is not handled here, but in outer loop */
- if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
- continue;
-
- int num_samples;
-
- if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
- num_samples = kernel_data.integrator.diffuse_samples;
- else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
- num_samples = 1;
- else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
- num_samples = kernel_data.integrator.glossy_samples;
- else
- num_samples = kernel_data.integrator.transmission_samples;
-
- num_samples = ceil_to_int(num_samples_adjust*num_samples);
-
- float num_samples_inv = num_samples_adjust/num_samples;
- RNG bsdf_rng = cmj_hash(*rng, i);
-
- for(int j = 0; j < num_samples; j++) {
- PathState ps = *state;
- float3 tp = throughput;
- Ray bsdf_ray;
-
- if(!kernel_branched_path_surface_bounce(kg, &bsdf_rng, sd, sc, j, num_samples, &tp, &ps, L, &bsdf_ray))
- continue;
-
- kernel_path_indirect(kg, rng, bsdf_ray, tp*num_samples_inv, num_samples, ps, L);
-
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(L);
- path_radiance_reset_indirect(L);
- }
- }
-}
-
-#ifdef __SUBSURFACE__
-ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
- ShaderData *sd,
- PathRadiance *L,
- PathState *state,
- RNG *rng,
- Ray *ray,
- float3 throughput)
-{
- for(int i = 0; i< sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if(!CLOSURE_IS_BSSRDF(sc->type))
- continue;
-
- /* set up random number generator */
- uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
- int num_samples = kernel_data.integrator.subsurface_samples;
- float num_samples_inv = 1.0f/num_samples;
- RNG bssrdf_rng = cmj_hash(*rng, i);
-
- state->flag |= PATH_RAY_BSSRDF_ANCESTOR;
-
- /* do subsurface scatter step with copy of shader data, this will
- * replace the BSSRDF with a diffuse BSDF closure */
- for(int j = 0; j < num_samples; j++) {
- ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
- float bssrdf_u, bssrdf_v;
- path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
- int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
-#ifdef __VOLUME__
- Ray volume_ray = *ray;
- bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
- sd->flag & SD_OBJECT_INTERSECTS_VOLUME;
-#endif
-
- /* compute lighting with the BSDF closure */
- for(int hit = 0; hit < num_hits; hit++) {
- PathState hit_state = *state;
-
- path_state_branch(&hit_state, j, num_samples);
-
-#ifdef __VOLUME__
- if(need_update_volume_stack) {
- /* Setup ray from previous surface point to the new one. */
- float3 P = ray_offset(bssrdf_sd[hit].P, -bssrdf_sd[hit].Ng);
- volume_ray.D = normalize_len(P - volume_ray.P,
- &volume_ray.t);
-
- kernel_path_subsurface_update_volume_stack(
- kg,
- &volume_ray,
- hit_state.volume_stack);
-
- /* Move volume ray forward. */
- volume_ray.P = P;
- }
-#endif
-
-#if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
- /* direct light */
- if(kernel_data.integrator.use_direct_light) {
- bool all = kernel_data.integrator.sample_all_lights_direct;
- kernel_branched_path_surface_connect_light(kg, rng,
- &bssrdf_sd[hit], &hit_state, throughput, num_samples_inv, L, all);
- }
-#endif
-
- /* indirect light */
- kernel_branched_path_surface_indirect_light(kg, rng,
- &bssrdf_sd[hit], throughput, num_samples_inv,
- &hit_state, L);
- }
- }
-
- state->flag &= ~PATH_RAY_BSSRDF_ANCESTOR;
- }
-}
-#endif
-
-ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
-{
- /* initialize */
- PathRadiance L;
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- float L_transparent = 0.0f;
-
- path_radiance_init(&L, kernel_data.film.use_light_pass);
-
- PathState state;
- path_state_init(kg, &state, rng, sample, &ray);
-
-#ifdef __KERNEL_DEBUG__
- DebugData debug_data;
- debug_data_init(&debug_data);
-#endif
-
- for(;;) {
- /* intersect scene */
- Intersection isect;
- uint visibility = path_state_ray_visibility(kg, &state);
-
-#ifdef __HAIR__
- float difl = 0.0f, extmax = 0.0f;
- uint lcg_state = 0;
-
- if(kernel_data.bvh.have_curves) {
- if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {
- float3 pixdiff = ray.dD.dx + ray.dD.dy;
- /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
- difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
- }
-
- extmax = kernel_data.curve.maximum_width;
- lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
- }
-
- bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
-#else
- bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
-#endif
-
-#ifdef __KERNEL_DEBUG__
- if(state.flag & PATH_RAY_CAMERA) {
- debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
- }
-#endif
-
-#ifdef __VOLUME__
- /* volume attenuation, emission, scatter */
- if(state.volume_stack[0].shader != SHADER_NONE) {
- Ray volume_ray = ray;
- volume_ray.t = (hit)? isect.t: FLT_MAX;
-
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
-
-#ifdef __VOLUME_DECOUPLED__
- /* decoupled ray marching only supported on CPU */
-
- /* cache steps along volume for repeated sampling */
- VolumeSegment volume_segment;
- ShaderData volume_sd;
-
- shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
- kernel_volume_decoupled_record(kg, &state,
- &volume_ray, &volume_sd, &volume_segment, heterogeneous);
-
- /* direct light sampling */
- if(volume_segment.closure_flag & SD_SCATTER) {
- volume_segment.sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
-
- bool all = kernel_data.integrator.sample_all_lights_direct;
-
- kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
- throughput, &state, &L, all, &volume_ray, &volume_segment);
-
- /* indirect light sampling */
- int num_samples = kernel_data.integrator.volume_samples;
- float num_samples_inv = 1.0f/num_samples;
-
- for(int j = 0; j < num_samples; j++) {
- /* workaround to fix correlation bug in T38710, can find better solution
- * in random number generator later, for now this is done here to not impact
- * performance of rendering without volumes */
- RNG tmp_rng = cmj_hash(*rng, state.rng_offset);
-
- PathState ps = state;
- Ray pray = ray;
- float3 tp = throughput;
-
- /* branch RNG state */
- path_state_branch(&ps, j, num_samples);
-
- /* scatter sample. if we use distance sampling and take just one
- * sample for direct and indirect light, we could share this
- * computation, but makes code a bit complex */
- float rphase = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_PHASE);
- float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
-
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
-
- (void)result;
- kernel_assert(result == VOLUME_PATH_SCATTERED);
-
- if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
- kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
-
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(&L);
- path_radiance_reset_indirect(&L);
- }
- }
- }
-
- /* emission and transmittance */
- if(volume_segment.closure_flag & SD_EMISSION)
- path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
- throughput *= volume_segment.accum_transmittance;
-
- /* free cached steps */
- kernel_volume_decoupled_free(kg, &volume_segment);
-#else
- /* GPU: no decoupled ray marching, scatter probalistically */
- int num_samples = kernel_data.integrator.volume_samples;
- float num_samples_inv = 1.0f/num_samples;
-
- /* todo: we should cache the shader evaluations from stepping
- * through the volume, for now we redo them multiple times */
-
- for(int j = 0; j < num_samples; j++) {
- PathState ps = state;
- Ray pray = ray;
- ShaderData volume_sd;
- float3 tp = throughput * num_samples_inv;
-
- /* branch RNG state */
- path_state_branch(&ps, j, num_samples);
-
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
-
-#ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* todo: support equiangular, MIS and all light sampling.
- * alternatively get decoupled ray marching working on the GPU */
- kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
-
- if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
- kernel_path_indirect(kg, rng, pray, tp, num_samples, ps, &L);
-
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(&L);
- path_radiance_reset_indirect(&L);
- }
- }
-#endif
- }
-
- /* todo: avoid this calculation using decoupled ray marching */
- kernel_volume_shadow(kg, &state, &volume_ray, &throughput);
-#endif
- }
-#endif
-
- if(!hit) {
- /* eval background shader if nothing hit */
- if(kernel_data.background.transparent) {
- L_transparent += average(throughput);
-
-#ifdef __PASSES__
- if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
-#endif
- break;
- }
-
-#ifdef __BACKGROUND__
- /* sample background shader */
- float3 L_background = indirect_background(kg, &state, &ray);
- path_radiance_accum_background(&L, throughput, L_background, state.bounce);
-#endif
-
- break;
- }
-
- /* setup shading */
- ShaderData sd;
- shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce);
- shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
- shader_merge_closures(&sd);
-
- /* holdout */
-#ifdef __HOLDOUT__
- if(sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) {
- if(kernel_data.background.transparent) {
- float3 holdout_weight;
-
- if(sd.flag & SD_HOLDOUT_MASK)
- holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
- else
- holdout_weight = shader_holdout_eval(kg, &sd);
-
- /* any throughput is ok, should all be identical here */
- L_transparent += average(holdout_weight*throughput);
- }
-
- if(sd.flag & SD_HOLDOUT_MASK)
- break;
- }
-#endif
-
- /* holdout mask objects do not write data passes */
- kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
-
-#ifdef __EMISSION__
- /* emission */
- if(sd.flag & SD_EMISSION) {
- float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
- path_radiance_accum_emission(&L, throughput, emission, state.bounce);
- }
-#endif
-
- /* transparency termination */
- if(state.flag & PATH_RAY_TRANSPARENT) {
- /* path termination. this is a strange place to put the termination, it's
- * mainly due to the mixed in MIS that we use. gives too many unneeded
- * shader evaluations, only need emission if we are going to terminate */
- float probability = path_state_terminate_probability(kg, &state, throughput);
-
- if(probability == 0.0f) {
- break;
- }
- else if(probability != 1.0f) {
- float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
-
- if(terminate >= probability)
- break;
-
- throughput /= probability;
- }
- }
-
-#ifdef __AO__
- /* ambient occlusion */
- if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
- kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput);
- }
-#endif
-
-#ifdef __SUBSURFACE__
- /* bssrdf scatter to a different location on the same object */
- if(sd.flag & SD_BSSRDF) {
- kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state,
- rng, &ray, throughput);
- }
-#endif
-
- if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
- PathState hit_state = state;
-
-#ifdef __EMISSION__
- /* direct light */
- if(kernel_data.integrator.use_direct_light) {
- bool all = kernel_data.integrator.sample_all_lights_direct;
- kernel_branched_path_surface_connect_light(kg, rng,
- &sd, &hit_state, throughput, 1.0f, &L, all);
- }
-#endif
-
- /* indirect light */
- kernel_branched_path_surface_indirect_light(kg, rng,
- &sd, throughput, 1.0f, &hit_state, &L);
-
- /* continue in case of transparency */
- throughput *= shader_bsdf_transparency(kg, &sd);
-
- if(is_zero(throughput))
- break;
- }
-
- path_state_next(kg, &state, LABEL_TRANSPARENT);
- ray.P = ray_offset(sd.P, -sd.Ng);
- ray.t -= sd.ray_length; /* clipping works through transparent */
-
-
-#ifdef __RAY_DIFFERENTIALS__
- ray.dP = sd.dP;
- ray.dD.dx = -sd.dI.dx;
- ray.dD.dy = -sd.dI.dy;
-#endif
-
-#ifdef __VOLUME__
- /* enter/exit volume */
- kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
-#endif
- }
-
- float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
-
- kernel_write_light_passes(kg, buffer, &L, sample);
-
-#ifdef __KERNEL_DEBUG__
- kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
-#endif
-
- return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
-}
-
-#endif
-
-ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int x, int y, RNG *rng, Ray *ray)
-{
- float filter_u;
- float filter_v;
-
- int num_samples = kernel_data.integrator.aa_samples;
-
- path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v);
-
- /* sample camera ray */
-
- float lens_u = 0.0f, lens_v = 0.0f;
-
- if(kernel_data.cam.aperturesize > 0.0f)
- path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
-
- float time = 0.0f;
-
-#ifdef __CAMERA_MOTION__
- if(kernel_data.cam.shuttertime != -1.0f)
- time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
-#endif
-
- camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
-}
-
ccl_device void kernel_path_trace(KernelGlobals *kg,
ccl_global float *buffer, ccl_global uint *rng_state,
int sample, int x, int y, int offset, int stride)
@@ -1232,38 +702,5 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
path_rng_end(kg, rng_state, rng);
}
-#ifdef __BRANCHED_PATH__
-ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
- ccl_global float *buffer, ccl_global uint *rng_state,
- int sample, int x, int y, int offset, int stride)
-{
- /* buffer offset */
- int index = offset + x + y*stride;
- int pass_stride = kernel_data.film.pass_stride;
-
- rng_state += index;
- buffer += index*pass_stride;
-
- /* initialize random numbers and ray */
- RNG rng;
- Ray ray;
-
- kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
-
- /* integrate */
- float4 L;
-
- if(ray.t != 0.0f)
- L = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer);
- else
- L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- /* accumulate result in output buffer */
- kernel_write_pass_float4(buffer, sample, L);
-
- path_rng_end(kg, rng_state, rng);
-}
-#endif
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
new file mode 100644
index 00000000000..b6d64985f6a
--- /dev/null
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -0,0 +1,534 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __BRANCHED_PATH__
+
+ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
+{
+ int num_samples = kernel_data.integrator.ao_samples;
+ float num_samples_inv = 1.0f/num_samples;
+ float ao_factor = kernel_data.background.ao_factor;
+ float3 ao_N;
+ float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+ float3 ao_alpha = shader_bsdf_alpha(kg, sd);
+
+ for(int j = 0; j < num_samples; j++) {
+ float bsdf_u, bsdf_v;
+ path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+ float3 ao_D;
+ float ao_pdf;
+
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+ if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
+ Ray light_ray;
+ float3 ao_shadow;
+
+ light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
+ light_ray.D = ao_D;
+ light_ray.t = kernel_data.background.ao_distance;
+#ifdef __OBJECT_MOTION__
+ light_ray.time = ccl_fetch(sd, time);
+#endif
+ light_ray.dP = ccl_fetch(sd, dP);
+ light_ray.dD = differential3_zero();
+
+ if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
+ path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
+ }
+ }
+}
+
+
+/* bounce off surface and integrate indirect light */
+ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
+ RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust,
+ PathState *state, PathRadiance *L)
+{
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ const ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
+
+ if(!CLOSURE_IS_BSDF(sc->type))
+ continue;
+ /* transparency is not handled here, but in outer loop */
+ if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
+ continue;
+
+ int num_samples;
+
+ if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+ num_samples = kernel_data.integrator.diffuse_samples;
+ else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
+ num_samples = 1;
+ else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
+ num_samples = kernel_data.integrator.glossy_samples;
+ else
+ num_samples = kernel_data.integrator.transmission_samples;
+
+ num_samples = ceil_to_int(num_samples_adjust*num_samples);
+
+ float num_samples_inv = num_samples_adjust/num_samples;
+ RNG bsdf_rng = cmj_hash(*rng, i);
+
+ for(int j = 0; j < num_samples; j++) {
+ PathState ps = *state;
+ float3 tp = throughput;
+ Ray bsdf_ray;
+
+ if(!kernel_branched_path_surface_bounce(kg, &bsdf_rng, sd, sc, j, num_samples, &tp, &ps, L, &bsdf_ray))
+ continue;
+
+ kernel_path_indirect(kg, rng, bsdf_ray, tp*num_samples_inv, num_samples, ps, L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+ }
+ }
+}
+
+#ifdef __SUBSURFACE__
+ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
+ ShaderData *sd,
+ PathRadiance *L,
+ PathState *state,
+ RNG *rng,
+ Ray *ray,
+ float3 throughput)
+{
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
+
+ if(!CLOSURE_IS_BSSRDF(sc->type))
+ continue;
+
+ /* set up random number generator */
+ uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
+ int num_samples = kernel_data.integrator.subsurface_samples;
+ float num_samples_inv = 1.0f/num_samples;
+ RNG bssrdf_rng = cmj_hash(*rng, i);
+
+ /* do subsurface scatter step with copy of shader data, this will
+ * replace the BSSRDF with a diffuse BSDF closure */
+ for(int j = 0; j < num_samples; j++) {
+ ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
+ float bssrdf_u, bssrdf_v;
+ path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+#ifdef __VOLUME__
+ Ray volume_ray = *ray;
+ bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+ ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
+#endif
+
+ /* compute lighting with the BSDF closure */
+ for(int hit = 0; hit < num_hits; hit++) {
+ PathState hit_state = *state;
+
+ path_state_branch(&hit_state, j, num_samples);
+
+#ifdef __VOLUME__
+ if(need_update_volume_stack) {
+ /* Setup ray from previous surface point to the new one. */
+ float3 P = ray_offset(bssrdf_sd[hit].P, -bssrdf_sd[hit].Ng);
+ volume_ray.D = normalize_len(P - volume_ray.P,
+ &volume_ray.t);
+
+ kernel_volume_stack_update_for_subsurface(
+ kg,
+ &volume_ray,
+ hit_state.volume_stack);
+
+ /* Move volume ray forward. */
+ volume_ray.P = P;
+ }
+#endif
+
+#ifdef __EMISSION__
+ /* direct light */
+ if(kernel_data.integrator.use_direct_light) {
+ bool all = kernel_data.integrator.sample_all_lights_direct;
+ kernel_branched_path_surface_connect_light(kg, rng,
+ &bssrdf_sd[hit], &hit_state, throughput, num_samples_inv, L, all);
+ }
+#endif
+
+ /* indirect light */
+ kernel_branched_path_surface_indirect_light(kg, rng,
+ &bssrdf_sd[hit], throughput, num_samples_inv,
+ &hit_state, L);
+ }
+ }
+ }
+}
+#endif
+
+ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
+{
+ /* initialize */
+ PathRadiance L;
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ float L_transparent = 0.0f;
+
+ path_radiance_init(&L, kernel_data.film.use_light_pass);
+
+ PathState state;
+ path_state_init(kg, &state, rng, sample, &ray);
+
+#ifdef __KERNEL_DEBUG__
+ DebugData debug_data;
+ debug_data_init(&debug_data);
+#endif
+
+ /* Main Loop
+ * Here we only handle transparency intersections from the camera ray.
+ * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
+ */
+ for(;;) {
+ /* intersect scene */
+ Intersection isect;
+ uint visibility = path_state_ray_visibility(kg, &state);
+
+#ifdef __HAIR__
+ float difl = 0.0f, extmax = 0.0f;
+ uint lcg_state = 0;
+
+ if(kernel_data.bvh.have_curves) {
+ if(kernel_data.cam.resolution == 1) {
+ float3 pixdiff = ray.dD.dx + ray.dD.dy;
+ /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
+ difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
+ }
+
+ extmax = kernel_data.curve.maximum_width;
+ lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
+ }
+
+ bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
+#else
+ bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
+#endif
+
+#ifdef __KERNEL_DEBUG__
+ debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
+ debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
+ debug_data.num_ray_bounces++;
+#endif
+
+#ifdef __VOLUME__
+ /* volume attenuation, emission, scatter */
+ if(state.volume_stack[0].shader != SHADER_NONE) {
+ Ray volume_ray = ray;
+ volume_ray.t = (hit)? isect.t: FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
+
+#ifdef __VOLUME_DECOUPLED__
+ /* decoupled ray marching only supported on CPU */
+
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
+ ShaderData volume_sd;
+
+ shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
+ kernel_volume_decoupled_record(kg, &state,
+ &volume_ray, &volume_sd, &volume_segment, heterogeneous);
+
+ /* direct light sampling */
+ if(volume_segment.closure_flag & SD_SCATTER) {
+ volume_segment.sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
+
+ bool all = kernel_data.integrator.sample_all_lights_direct;
+
+ kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
+ throughput, &state, &L, all, &volume_ray, &volume_segment);
+
+ /* indirect light sampling */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f/num_samples;
+
+ for(int j = 0; j < num_samples; j++) {
+ /* workaround to fix correlation bug in T38710, can find better solution
+ * in random number generator later, for now this is done here to not impact
+ * performance of rendering without volumes */
+ RNG tmp_rng = cmj_hash(*rng, state.rng_offset);
+
+ PathState ps = state;
+ Ray pray = ray;
+ float3 tp = throughput;
+
+ /* branch RNG state */
+ path_state_branch(&ps, j, num_samples);
+
+ /* scatter sample. if we use distance sampling and take just one
+ * sample for direct and indirect light, we could share this
+ * computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_PHASE);
+ float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+
+ (void)result;
+ kernel_assert(result == VOLUME_PATH_SCATTERED);
+
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
+ kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&L);
+ path_radiance_reset_indirect(&L);
+ }
+ }
+ }
+
+ /* emission and transmittance */
+ if(volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
+ throughput *= volume_segment.accum_transmittance;
+
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
+#else
+ /* GPU: no decoupled ray marching, scatter probalistically */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f/num_samples;
+
+ /* todo: we should cache the shader evaluations from stepping
+ * through the volume, for now we redo them multiple times */
+
+ for(int j = 0; j < num_samples; j++) {
+ PathState ps = state;
+ Ray pray = ray;
+ ShaderData volume_sd;
+ float3 tp = throughput * num_samples_inv;
+
+ /* branch RNG state */
+ path_state_branch(&ps, j, num_samples);
+
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
+
+#ifdef __VOLUME_SCATTER__
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* todo: support equiangular, MIS and all light sampling.
+ * alternatively get decoupled ray marching working on the GPU */
+ kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
+
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
+ kernel_path_indirect(kg, rng, pray, tp, num_samples, ps, &L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&L);
+ path_radiance_reset_indirect(&L);
+ }
+ }
+#endif
+ }
+
+ /* todo: avoid this calculation using decoupled ray marching */
+ kernel_volume_shadow(kg, &state, &volume_ray, &throughput);
+#endif
+ }
+#endif
+
+ if(!hit) {
+ /* eval background shader if nothing hit */
+ if(kernel_data.background.transparent) {
+ L_transparent += average(throughput);
+
+#ifdef __PASSES__
+ if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
+#endif
+ break;
+ }
+
+#ifdef __BACKGROUND__
+ /* sample background shader */
+ float3 L_background = indirect_background(kg, &state, &ray);
+ path_radiance_accum_background(&L, throughput, L_background, state.bounce);
+#endif
+
+ break;
+ }
+
+ /* setup shading */
+ ShaderData sd;
+ shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce);
+ shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
+ shader_merge_closures(&sd);
+
+ /* holdout */
+#ifdef __HOLDOUT__
+ if(sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) {
+ if(kernel_data.background.transparent) {
+ float3 holdout_weight;
+
+ if(sd.flag & SD_HOLDOUT_MASK)
+ holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
+ else
+ holdout_weight = shader_holdout_eval(kg, &sd);
+
+ /* any throughput is ok, should all be identical here */
+ L_transparent += average(holdout_weight*throughput);
+ }
+
+ if(sd.flag & SD_HOLDOUT_MASK)
+ break;
+ }
+#endif
+
+ /* holdout mask objects do not write data passes */
+ kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
+
+#ifdef __EMISSION__
+ /* emission */
+ if(sd.flag & SD_EMISSION) {
+ float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
+ path_radiance_accum_emission(&L, throughput, emission, state.bounce);
+ }
+#endif
+
+ /* transparency termination */
+ if(state.flag & PATH_RAY_TRANSPARENT) {
+ /* path termination. this is a strange place to put the termination, it's
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate */
+ float probability = path_state_terminate_probability(kg, &state, throughput);
+
+ if(probability == 0.0f) {
+ break;
+ }
+ else if(probability != 1.0f) {
+ float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
+
+ if(terminate >= probability)
+ break;
+
+ throughput /= probability;
+ }
+ }
+
+#ifdef __AO__
+ /* ambient occlusion */
+ if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
+ kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput);
+ }
+#endif
+
+#ifdef __SUBSURFACE__
+ /* bssrdf scatter to a different location on the same object */
+ if(sd.flag & SD_BSSRDF) {
+ kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state,
+ rng, &ray, throughput);
+ }
+#endif
+
+ if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+ PathState hit_state = state;
+
+#ifdef __EMISSION__
+ /* direct light */
+ if(kernel_data.integrator.use_direct_light) {
+ bool all = kernel_data.integrator.sample_all_lights_direct;
+ kernel_branched_path_surface_connect_light(kg, rng,
+ &sd, &hit_state, throughput, 1.0f, &L, all);
+ }
+#endif
+
+ /* indirect light */
+ kernel_branched_path_surface_indirect_light(kg, rng,
+ &sd, throughput, 1.0f, &hit_state, &L);
+
+ /* continue in case of transparency */
+ throughput *= shader_bsdf_transparency(kg, &sd);
+
+ if(is_zero(throughput))
+ break;
+ }
+
+ /* Update Path State */
+ state.flag |= PATH_RAY_TRANSPARENT;
+ state.transparent_bounce++;
+
+ ray.P = ray_offset(sd.P, -sd.Ng);
+ ray.t -= sd.ray_length; /* clipping works through transparent */
+
+
+#ifdef __RAY_DIFFERENTIALS__
+ ray.dP = sd.dP;
+ ray.dD.dx = -sd.dI.dx;
+ ray.dD.dy = -sd.dI.dy;
+#endif
+
+#ifdef __VOLUME__
+ /* enter/exit volume */
+ kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
+#endif
+ }
+
+ float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
+
+ kernel_write_light_passes(kg, buffer, &L, sample);
+
+#ifdef __KERNEL_DEBUG__
+ kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
+#endif
+
+ return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
+}
+
+ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
+ ccl_global float *buffer, ccl_global uint *rng_state,
+ int sample, int x, int y, int offset, int stride)
+{
+ /* buffer offset */
+ int index = offset + x + y*stride;
+ int pass_stride = kernel_data.film.pass_stride;
+
+ rng_state += index;
+ buffer += index*pass_stride;
+
+ /* initialize random numbers and ray */
+ RNG rng;
+ Ray ray;
+
+ kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
+
+ /* integrate */
+ float4 L;
+
+ if(ray.t != 0.0f)
+ L = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer);
+ else
+ L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ /* accumulate result in output buffer */
+ kernel_write_pass_float4(buffer, sample, L);
+
+ path_rng_end(kg, rng_state, rng);
+}
+
+#endif /* __BRANCHED_PATH__ */
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/kernel_path_common.h b/intern/cycles/kernel/kernel_path_common.h
new file mode 100644
index 00000000000..1912dfa16ed
--- /dev/null
+++ b/intern/cycles/kernel/kernel_path_common.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
+ ccl_global uint *rng_state,
+ int sample,
+ int x, int y,
+ ccl_addr_space RNG *rng,
+ ccl_addr_space Ray *ray)
+{
+ float filter_u;
+ float filter_v;
+
+ int num_samples = kernel_data.integrator.aa_samples;
+
+ path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v);
+
+ /* sample camera ray */
+
+ float lens_u = 0.0f, lens_v = 0.0f;
+
+ if(kernel_data.cam.aperturesize > 0.0f)
+ path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
+
+ float time = 0.0f;
+
+#ifdef __CAMERA_MOTION__
+ if(kernel_data.cam.shuttertime != -1.0f)
+ time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
+#endif
+
+ camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index ab146c72cd0..15efb2371de 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline void path_state_init(KernelGlobals *kg, PathState *state, RNG *rng, int sample, Ray *ray)
+ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space RNG *rng, int sample, ccl_addr_space Ray *ray)
{
state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP;
@@ -51,7 +51,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, PathState *state, RNG
#endif
}
-ccl_device_inline void path_state_next(KernelGlobals *kg, PathState *state, int label)
+ccl_device_inline void path_state_next(KernelGlobals *kg, ccl_addr_space PathState *state, int label)
{
/* ray through transparent keeps same flags from previous ray and is
* not counted as a regular bounce, transparent has separate max */
@@ -106,7 +106,7 @@ ccl_device_inline void path_state_next(KernelGlobals *kg, PathState *state, int
state->flag &= ~(PATH_RAY_GLOSSY|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP);
}
else if(label & LABEL_GLOSSY) {
- state->flag |= PATH_RAY_GLOSSY|PATH_RAY_GLOSSY_ANCESTOR;
+ state->flag |= PATH_RAY_GLOSSY;
state->flag &= ~(PATH_RAY_DIFFUSE|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP);
}
else {
@@ -138,7 +138,7 @@ ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *s
return flag;
}
-ccl_device_inline float path_state_terminate_probability(KernelGlobals *kg, PathState *state, const float3 throughput)
+ccl_device_inline float path_state_terminate_probability(KernelGlobals *kg, ccl_addr_space PathState *state, const float3 throughput)
{
if(state->flag & PATH_RAY_TRANSPARENT) {
/* transparent rays treated separately */
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index f0d4e98c5e0..fe85a6b6e4b 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -24,7 +24,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN
{
#ifdef __EMISSION__
/* sample illumination from lights to find path contribution */
- if(!(sd->flag & SD_BSDF_HAS_EVAL))
+ if(!(ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL))
return;
Ray light_ray;
@@ -32,7 +32,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN
bool is_lamp;
#ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
+ light_ray.time = ccl_fetch(sd, time);
#endif
if(sample_all_lights) {
@@ -53,7 +53,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN
path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
LightSample ls;
- lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls);
+ lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls);
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
/* trace shadow ray */
@@ -85,7 +85,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN
light_t = 0.5f*light_t;
LightSample ls;
- light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls);
+ light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
/* trace shadow ray */
@@ -106,7 +106,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN
path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
LightSample ls;
- light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls);
+ light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
/* sample random light */
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
@@ -149,15 +149,15 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
path_state_next(kg, state, label);
/* setup ray */
- ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
+ ray->P = ray_offset(ccl_fetch(sd, P), (label & LABEL_TRANSMIT)? -ccl_fetch(sd, Ng): ccl_fetch(sd, Ng));
ray->D = bsdf_omega_in;
ray->t = FLT_MAX;
#ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
+ ray->dP = ccl_fetch(sd, dP);
ray->dD = bsdf_domega_in;
#endif
#ifdef __OBJECT_MOTION__
- ray->time = sd->time;
+ ray->time = ccl_fetch(sd, time);
#endif
#ifdef __VOLUME__
@@ -181,12 +181,13 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
#endif
+#ifndef __SPLIT_KERNEL__
/* path tracing: connect path directly to position on a light and add it to L */
-ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
- ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L)
+ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng,
+ ShaderData *sd, float3 throughput, ccl_addr_space PathState *state, PathRadiance *L)
{
#ifdef __EMISSION__
- if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)))
+ if(!(kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)))
return;
/* sample illumination from lights to find path contribution */
@@ -199,11 +200,11 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG
bool is_lamp;
#ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
+ light_ray.time = ccl_fetch(sd, time);
#endif
LightSample ls;
- light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls);
+ light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
/* trace shadow ray */
@@ -216,13 +217,14 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG
}
#endif
}
+#endif
/* path tracing: bounce off or through surface to with new direction stored in ray */
-ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, RNG *rng,
- ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray)
+ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, ccl_addr_space RNG *rng,
+ ShaderData *sd, ccl_addr_space float3 *throughput, ccl_addr_space PathState *state, PathRadiance *L, ccl_addr_space Ray *ray)
{
/* no BSDF? we can stop here */
- if(sd->flag & SD_BSDF) {
+ if(ccl_fetch(sd, flag) & SD_BSDF) {
/* sample BSDF */
float bsdf_pdf;
BsdfEval bsdf_eval;
@@ -254,16 +256,16 @@ ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, RNG *rng,
path_state_next(kg, state, label);
/* setup ray */
- ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
+ ray->P = ray_offset(ccl_fetch(sd, P), (label & LABEL_TRANSMIT)? -ccl_fetch(sd, Ng): ccl_fetch(sd, Ng));
ray->D = bsdf_omega_in;
if(state->bounce == 0)
- ray->t -= sd->ray_length; /* clipping works through transparent */
+ ray->t -= ccl_fetch(sd, ray_length); /* clipping works through transparent */
else
ray->t = FLT_MAX;
#ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
+ ray->dP = ccl_fetch(sd, dP);
ray->dD = bsdf_domega_in;
#endif
@@ -275,21 +277,21 @@ ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, RNG *rng,
return true;
}
#ifdef __VOLUME__
- else if(sd->flag & SD_HAS_ONLY_VOLUME) {
+ else if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME) {
/* no surface shader but have a volume shader? act transparent */
/* update path state, count as transparent */
path_state_next(kg, state, LABEL_TRANSPARENT);
if(state->bounce == 0)
- ray->t -= sd->ray_length; /* clipping works through transparent */
+ ray->t -= ccl_fetch(sd, ray_length); /* clipping works through transparent */
else
ray->t = FLT_MAX;
/* setup ray position, direction stays unchanged */
- ray->P = ray_offset(sd->P, -sd->Ng);
+ ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng));
#ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
+ ray->dP = ccl_fetch(sd, dP);
#endif
/* enter/exit volume */
diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h
index bd18fd21354..62922df3286 100644
--- a/intern/cycles/kernel/kernel_projection.h
+++ b/intern/cycles/kernel/kernel_projection.h
@@ -163,6 +163,10 @@ ccl_device float3 mirrorball_to_direction(float u, float v)
dir.x = 2.0f*u - 1.0f;
dir.z = 2.0f*v - 1.0f;
+
+ if(dir.x*dir.x + dir.z*dir.z > 1.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
dir.y = -sqrtf(max(1.0f - dir.x*dir.x - dir.z*dir.z, 0.0f));
/* reflection */
@@ -191,6 +195,8 @@ ccl_device float3 panorama_to_direction(KernelGlobals *kg, float u, float v)
switch(kernel_data.cam.panorama_type) {
case PANORAMA_EQUIRECTANGULAR:
return equirectangular_range_to_direction(u, v, kernel_data.cam.equirectangular_range);
+ case PANORAMA_MIRRORBALL:
+ return mirrorball_to_direction(u, v);
case PANORAMA_FISHEYE_EQUIDISTANT:
return fisheye_to_direction(u, v, kernel_data.cam.fisheye_fov);
case PANORAMA_FISHEYE_EQUISOLID:
@@ -205,6 +211,8 @@ ccl_device float2 direction_to_panorama(KernelGlobals *kg, float3 dir)
switch(kernel_data.cam.panorama_type) {
case PANORAMA_EQUIRECTANGULAR:
return direction_to_equirectangular_range(dir, kernel_data.cam.equirectangular_range);
+ case PANORAMA_MIRRORBALL:
+ return direction_to_mirrorball(dir);
case PANORAMA_FISHEYE_EQUIDISTANT:
return direction_to_fisheye(dir, kernel_data.cam.fisheye_fov);
case PANORAMA_FISHEYE_EQUISOLID:
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
new file mode 100644
index 00000000000..9e65e2b0768
--- /dev/null
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_QUEUE_H__
+#define __KERNEL_QUEUE_H__
+
+/*
+ * Queue utility functions for split kernel
+ */
+
+#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
+#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
+
+/*
+ * Enqueue ray index into the queue
+ */
+ccl_device void enqueue_ray_index (
+ int ray_index, /* Ray index to be enqueued */
+ int queue_number, /* Queue in which the ray index should be enqueued*/
+ ccl_global int *queues, /* Buffer of all queues */
+ int queue_size, /* Size of each queue */
+ ccl_global int *queue_index /* Array of size num_queues; Used for atomic increment */
+ )
+{
+ /* This thread's queue index */
+ int my_queue_index = atomic_inc(&queue_index[queue_number]) + (queue_number * queue_size);
+ queues[my_queue_index] = ray_index;
+}
+
+/*
+ * Get the ray index for this thread
+ * Returns a positive ray_index for threads that have to do some work;
+ * Returns 'QUEUE_EMPTY_SLOT' for threads that don't have any work
+ * i.e All ray's in the queue has been successfully allocated and there
+ * is no more ray to allocate to other threads.
+ */
+ccl_device int get_ray_index (
+ int thread_index, /* Global thread index */
+ int queue_number, /* Queue to operate on */
+ ccl_global int *queues, /* Buffer of all queues */
+ int queuesize, /* Size of a queue */
+ int empty_queue /* Empty the queue slot as soon as we fetch the ray index */
+ )
+{
+ int ray_index = queues[queue_number * queuesize + thread_index];
+
+ if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
+ queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ }
+
+ return ray_index;
+}
+
+/* The following functions are to realize Local memory variant of enqueue ray index function */
+
+/* All threads should call this function */
+ccl_device void enqueue_ray_index_local(
+ int ray_index, /* Ray index to enqueue*/
+ int queue_number, /* Queue in which to enqueue ray index */
+ char enqueue_flag, /* True for threads whose ray index has to be enqueued */
+ int queuesize, /* queue size */
+ ccl_local unsigned int *local_queue_atomics, /* To to local queue atomics */
+ ccl_global int *Queue_data, /* Queues */
+ ccl_global int *Queue_index /* To do global queue atomics */
+ )
+{
+ int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0);
+
+ /* Get local queue id */
+ unsigned int lqidx;
+ if(enqueue_flag) {
+ lqidx = atomic_inc(local_queue_atomics);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ /* Get global queue offset */
+ if(lidx == 0) {
+ *local_queue_atomics = atomic_add(&Queue_index[queue_number], *local_queue_atomics);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ /* Get global queue index and enqueue ray */
+ if(enqueue_flag) {
+ unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
+ Queue_data[my_gqidx] = ray_index;
+ }
+}
+
+ccl_device unsigned int get_local_queue_index(
+ int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
+ ccl_local unsigned int *local_queue_atomics
+ )
+{
+ int my_lqidx = atomic_inc(&local_queue_atomics[queue_number]);
+ return my_lqidx;
+}
+
+ccl_device unsigned int get_global_per_queue_offset(
+ int queue_number,
+ ccl_local unsigned int *local_queue_atomics,
+ ccl_global int* global_queue_atomics
+ )
+{
+ unsigned int queue_offset = atomic_add((&global_queue_atomics[queue_number]), local_queue_atomics[queue_number]);
+ return queue_offset;
+}
+
+ccl_device unsigned int get_global_queue_index(
+ int queue_number,
+ int queuesize,
+ unsigned int lqidx,
+ ccl_local unsigned int * global_per_queue_offset
+ )
+{
+ int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
+ return my_gqidx;
+}
+
+#endif // __KERNEL_QUEUE_H__
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 40767bac013..631a2cb75de 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -98,7 +98,7 @@ ccl_device uint sobol_lookup(const uint m, const uint frame, const uint ex, cons
return index;
}
-ccl_device_inline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension)
+ccl_device_inline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension)
{
#ifdef __CMJ__
if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
@@ -132,7 +132,7 @@ ccl_device_inline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int
#endif
}
-ccl_device_inline void path_rng_2D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy)
+ccl_device_inline void path_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy)
{
#ifdef __CMJ__
if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
@@ -149,7 +149,7 @@ ccl_device_inline void path_rng_2D(KernelGlobals *kg, RNG *rng, int sample, int
}
}
-ccl_device_inline void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
+ccl_device_inline void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, ccl_addr_space RNG *rng, int x, int y, float *fx, float *fy)
{
#ifdef __SOBOL_FULL_SCREEN__
uint px, py;
@@ -261,12 +261,12 @@ ccl_device uint lcg_init(uint seed)
* For branches in the path we must be careful not to reuse the same number
* in a sequence and offset accordingly. */
-ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension)
+ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension)
{
return path_rng_1D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension);
}
-ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension)
+ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension)
{
/* the rng_offset is not increased for transparent bounces. if we do then
* fully transparent objects can become subtly visible by the different
@@ -279,23 +279,23 @@ ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, RNG *r
return path_rng_1D(kg, rng, state->sample, state->num_samples, rng_offset + dimension);
}
-ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension, float *fx, float *fy)
+ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy)
{
path_rng_2D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension, fx, fy);
}
-ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
+ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
{
return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension);
}
-ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
+ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
{
int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM;
return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, rng_offset + dimension);
}
-ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy)
+ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy)
{
path_rng_2D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension, fx, fy);
}
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 0f3b09a9555..94e13028599 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -37,13 +37,13 @@ CCL_NAMESPACE_BEGIN
#ifdef __OBJECT_MOTION__
ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
{
- if(sd->flag & SD_OBJECT_MOTION) {
- sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
- sd->ob_itfm = transform_quick_inverse(sd->ob_tfm);
+ if(ccl_fetch(sd, flag) & SD_OBJECT_MOTION) {
+ ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time);
+ ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm));
}
else {
- sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
- sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+ ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
+ ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
}
}
#endif
@@ -52,55 +52,55 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
const Intersection *isect, const Ray *ray, int bounce, int transparent_bounce)
{
#ifdef __INSTANCING__
- sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
+ ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
#endif
- sd->type = isect->type;
- sd->flag = kernel_tex_fetch(__object_flag, sd->object);
+ ccl_fetch(sd, type) = isect->type;
+ ccl_fetch(sd, flag) = kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
/* matrices and time */
#ifdef __OBJECT_MOTION__
shader_setup_object_transforms(kg, sd, ray->time);
- sd->time = ray->time;
+ ccl_fetch(sd, time) = ray->time;
#endif
- sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
- sd->ray_length = isect->t;
- sd->ray_depth = bounce;
- sd->transparent_depth = transparent_bounce;
+ ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim);
+ ccl_fetch(sd, ray_length) = isect->t;
+ ccl_fetch(sd, ray_depth) = bounce;
+ ccl_fetch(sd, transparent_depth) = transparent_bounce;
#ifdef __UV__
- sd->u = isect->u;
- sd->v = isect->v;
+ ccl_fetch(sd, u) = isect->u;
+ ccl_fetch(sd, v) = isect->v;
#endif
#ifdef __HAIR__
- if(sd->type & PRIMITIVE_ALL_CURVE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
/* curve */
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
- sd->shader = __float_as_int(curvedata.z);
- sd->P = bvh_curve_refine(kg, sd, isect, ray);
+ ccl_fetch(sd, shader) = __float_as_int(curvedata.z);
+ ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray);
}
else
#endif
- if(sd->type & PRIMITIVE_TRIANGLE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
/* static triangle */
float3 Ng = triangle_normal(kg, sd);
- sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+ ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
/* vectors */
- sd->P = triangle_refine(kg, sd, isect, ray);
- sd->Ng = Ng;
- sd->N = Ng;
+ ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray);
+ ccl_fetch(sd, Ng) = Ng;
+ ccl_fetch(sd, N) = Ng;
/* smooth normal */
- if(sd->shader & SHADER_SMOOTH_NORMAL)
- sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
+ if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL)
+ ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
#ifdef __DPDU__
/* dPdu/dPdv */
- triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
+ triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
#endif
}
else {
@@ -108,40 +108,40 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
motion_triangle_shader_setup(kg, sd, isect, ray, false);
}
- sd->I = -ray->D;
+ ccl_fetch(sd, I) = -ray->D;
- sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
+ ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
#ifdef __INSTANCING__
if(isect->object != OBJECT_NONE) {
/* instance transform */
- object_normal_transform(kg, sd, &sd->N);
- object_normal_transform(kg, sd, &sd->Ng);
+ object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
+ object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
#ifdef __DPDU__
- object_dir_transform(kg, sd, &sd->dPdu);
- object_dir_transform(kg, sd, &sd->dPdv);
+ object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
+ object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
#endif
}
#endif
/* backfacing test */
- bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
+ bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
if(backfacing) {
- sd->flag |= SD_BACKFACING;
- sd->Ng = -sd->Ng;
- sd->N = -sd->N;
+ ccl_fetch(sd, flag) |= SD_BACKFACING;
+ ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
+ ccl_fetch(sd, N) = -ccl_fetch(sd, N);
#ifdef __DPDU__
- sd->dPdu = -sd->dPdu;
- sd->dPdv = -sd->dPdv;
+ ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
+ ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
#endif
}
#ifdef __RAY_DIFFERENTIALS__
/* differentials */
- differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t);
- differential_incoming(&sd->dI, ray->dD);
- differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
+ differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t);
+ differential_incoming(&ccl_fetch(sd, dI), ray->dD);
+ differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng));
#endif
}
@@ -166,7 +166,7 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
/* fetch triangle data */
if(sd->type == PRIMITIVE_TRIANGLE) {
float3 Ng = triangle_normal(kg, sd);
- sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+ sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
/* static triangle */
sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
@@ -230,105 +230,105 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
int shader, int object, int prim, float u, float v, float t, float time, int bounce, int transparent_bounce)
{
/* vectors */
- sd->P = P;
- sd->N = Ng;
- sd->Ng = Ng;
- sd->I = I;
- sd->shader = shader;
- sd->type = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE;
+ ccl_fetch(sd, P) = P;
+ ccl_fetch(sd, N) = Ng;
+ ccl_fetch(sd, Ng) = Ng;
+ ccl_fetch(sd, I) = I;
+ ccl_fetch(sd, shader) = shader;
+ ccl_fetch(sd, type) = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE;
/* primitive */
#ifdef __INSTANCING__
- sd->object = object;
+ ccl_fetch(sd, object) = object;
#endif
/* currently no access to bvh prim index for strand sd->prim*/
- sd->prim = prim;
+ ccl_fetch(sd, prim) = prim;
#ifdef __UV__
- sd->u = u;
- sd->v = v;
+ ccl_fetch(sd, u) = u;
+ ccl_fetch(sd, v) = v;
#endif
- sd->ray_length = t;
- sd->ray_depth = bounce;
- sd->transparent_depth = transparent_bounce;
+ ccl_fetch(sd, ray_length) = t;
+ ccl_fetch(sd, ray_depth) = bounce;
+ ccl_fetch(sd, transparent_depth) = transparent_bounce;
/* detect instancing, for non-instanced the object index is -object-1 */
#ifdef __INSTANCING__
bool instanced = false;
- if(sd->prim != PRIM_NONE) {
- if(sd->object >= 0)
+ if(ccl_fetch(sd, prim) != PRIM_NONE) {
+ if(ccl_fetch(sd, object) >= 0)
instanced = true;
else
#endif
- sd->object = ~sd->object;
+ ccl_fetch(sd, object) = ~ccl_fetch(sd, object);
#ifdef __INSTANCING__
}
#endif
- sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
- if(sd->object != OBJECT_NONE) {
- sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
+ ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
+ if(ccl_fetch(sd, object) != OBJECT_NONE) {
+ ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
#ifdef __OBJECT_MOTION__
shader_setup_object_transforms(kg, sd, time);
}
- sd->time = time;
+ ccl_fetch(sd, time) = time;
#else
}
#endif
- if(sd->type & PRIMITIVE_TRIANGLE) {
+ if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
/* smooth normal */
- if(sd->shader & SHADER_SMOOTH_NORMAL) {
- sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
+ if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
+ ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
#ifdef __INSTANCING__
if(instanced)
- object_normal_transform(kg, sd, &sd->N);
+ object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
#endif
}
/* dPdu/dPdv */
#ifdef __DPDU__
- triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
+ triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
#ifdef __INSTANCING__
if(instanced) {
- object_dir_transform(kg, sd, &sd->dPdu);
- object_dir_transform(kg, sd, &sd->dPdv);
+ object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
+ object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
}
#endif
#endif
}
else {
#ifdef __DPDU__
- sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
- sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
+ ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
+ ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
#endif
}
/* backfacing test */
- if(sd->prim != PRIM_NONE) {
- bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
+ if(ccl_fetch(sd, prim) != PRIM_NONE) {
+ bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
if(backfacing) {
- sd->flag |= SD_BACKFACING;
- sd->Ng = -sd->Ng;
- sd->N = -sd->N;
+ ccl_fetch(sd, flag) |= SD_BACKFACING;
+ ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
+ ccl_fetch(sd, N) = -ccl_fetch(sd, N);
#ifdef __DPDU__
- sd->dPdu = -sd->dPdu;
- sd->dPdv = -sd->dPdv;
+ ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
+ ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
#endif
}
}
#ifdef __RAY_DIFFERENTIALS__
/* no ray differentials here yet */
- sd->dP = differential3_zero();
- sd->dI = differential3_zero();
- sd->du = differential_zero();
- sd->dv = differential_zero();
+ ccl_fetch(sd, dP) = differential3_zero();
+ ccl_fetch(sd, dI) = differential3_zero();
+ ccl_fetch(sd, du) = differential_zero();
+ ccl_fetch(sd, dv) = differential_zero();
#endif
}
@@ -355,45 +355,46 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce, int transparent_bounce)
{
/* vectors */
- sd->P = ray->D;
- sd->N = -ray->D;
- sd->Ng = -ray->D;
- sd->I = -ray->D;
- sd->shader = kernel_data.background.surface_shader;
- sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
+ ccl_fetch(sd, P) = ray->D;
+ ccl_fetch(sd, N) = -ray->D;
+ ccl_fetch(sd, Ng) = -ray->D;
+ ccl_fetch(sd, I) = -ray->D;
+ ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
+ ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
#ifdef __OBJECT_MOTION__
- sd->time = ray->time;
+ ccl_fetch(sd, time) = ray->time;
#endif
- sd->ray_length = 0.0f;
- sd->ray_depth = bounce;
- sd->transparent_depth = transparent_bounce;
+ ccl_fetch(sd, ray_length) = 0.0f;
+ ccl_fetch(sd, ray_depth) = bounce;
+ ccl_fetch(sd, transparent_depth) = transparent_bounce;
#ifdef __INSTANCING__
- sd->object = PRIM_NONE;
+ ccl_fetch(sd, object) = PRIM_NONE;
#endif
- sd->prim = PRIM_NONE;
+ ccl_fetch(sd, prim) = PRIM_NONE;
#ifdef __UV__
- sd->u = 0.0f;
- sd->v = 0.0f;
+ ccl_fetch(sd, u) = 0.0f;
+ ccl_fetch(sd, v) = 0.0f;
#endif
#ifdef __DPDU__
/* dPdu/dPdv */
- sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
- sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
+ ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
+ ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
#endif
#ifdef __RAY_DIFFERENTIALS__
/* differentials */
- sd->dP = ray->dD;
- differential_incoming(&sd->dI, sd->dP);
- sd->du = differential_zero();
- sd->dv = differential_zero();
+ ccl_fetch(sd, dP) = ray->dD;
+ differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP));
+ ccl_fetch(sd, du) = differential_zero();
+ ccl_fetch(sd, dv) = differential_zero();
#endif
}
/* ShaderData setup from point inside volume */
+#ifdef __VOLUME__
ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce, int transparent_bounce)
{
/* vectors */
@@ -439,6 +440,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s
sd->ray_P = ray->P;
sd->ray_dP = ray->dP;
}
+#endif
/* Merging */
@@ -478,6 +480,7 @@ ccl_device void shader_merge_closures(ShaderData *sd)
}
sd->num_closure--;
+ kernel_assert(sd->num_closure >= 0);
j--;
}
}
@@ -491,11 +494,11 @@ ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderDa
{
/* this is the veach one-sample model with balance heuristic, some pdf
* factors drop out when using balance heuristic weighting */
- for(int i = 0; i< sd->num_closure; i++) {
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
if(i == skip_bsdf)
continue;
- const ShaderClosure *sc = &sd->closure[i];
+ const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_BSDF(sc->type)) {
float bsdf_pdf = 0.0f;
@@ -513,7 +516,7 @@ ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderDa
*pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
}
-ccl_device void shader_bsdf_eval(KernelGlobals *kg, const ShaderData *sd,
+ccl_device void shader_bsdf_eval(KernelGlobals *kg, ShaderData *sd,
const float3 omega_in, BsdfEval *eval, float *pdf)
{
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
@@ -527,22 +530,22 @@ ccl_device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd,
{
int sampled = 0;
- if(sd->num_closure > 1) {
+ if(ccl_fetch(sd, num_closure) > 1) {
/* pick a BSDF closure based on sample weights */
float sum = 0.0f;
- for(sampled = 0; sampled < sd->num_closure; sampled++) {
- const ShaderClosure *sc = &sd->closure[sampled];
+ for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
+ const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
if(CLOSURE_IS_BSDF(sc->type))
sum += sc->sample_weight;
}
- float r = sd->randb_closure*sum;
+ float r = ccl_fetch(sd, randb_closure)*sum;
sum = 0.0f;
- for(sampled = 0; sampled < sd->num_closure; sampled++) {
- const ShaderClosure *sc = &sd->closure[sampled];
+ for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
+ const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
if(CLOSURE_IS_BSDF(sc->type)) {
sum += sc->sample_weight;
@@ -552,13 +555,14 @@ ccl_device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd,
}
}
- if(sampled == sd->num_closure) {
+ if(sampled == ccl_fetch(sd, num_closure)) {
*pdf = 0.0f;
return LABEL_NONE;
}
}
- const ShaderClosure *sc = &sd->closure[sampled];
+ const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
+
int label;
float3 eval;
@@ -568,7 +572,7 @@ ccl_device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd,
if(*pdf != 0.0f) {
bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
- if(sd->num_closure > 1) {
+ if(ccl_fetch(sd, num_closure) > 1) {
float sweight = sc->sample_weight;
_shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight);
}
@@ -595,8 +599,8 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, const ShaderData *s
ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
{
- for(int i = 0; i< sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_BSDF(sc->type))
bsdf_blur(kg, sc, roughness);
@@ -605,13 +609,13 @@ ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughn
ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
{
- if(sd->flag & SD_HAS_ONLY_VOLUME)
+ if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)
return make_float3(1.0f, 1.0f, 1.0f);
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i< sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl
eval += sc->weight;
@@ -634,8 +638,8 @@ ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i< sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
eval += sc->weight;
@@ -648,8 +652,8 @@ ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i< sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
eval += sc->weight;
@@ -662,8 +666,8 @@ ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i< sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
eval += sc->weight;
@@ -676,8 +680,8 @@ ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i< sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
eval += sc->weight;
@@ -691,8 +695,8 @@ ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_fac
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
float3 N = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i< sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
eval += sc->weight*ao_factor;
@@ -700,12 +704,12 @@ ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_fac
}
else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
eval += sc->weight;
- N += sd->N*average(sc->weight);
+ N += ccl_fetch(sd, N)*average(sc->weight);
}
}
if(is_zero(N))
- N = sd->N;
+ N = ccl_fetch(sd, N);
else
N = normalize(N);
@@ -719,8 +723,8 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b
float3 N = make_float3(0.0f, 0.0f, 0.0f);
float texture_blur = 0.0f, weight_sum = 0.0f;
- for(int i = 0; i< sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_BSSRDF(sc->type)) {
float avg_weight = fabsf(average(sc->weight));
@@ -733,7 +737,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b
}
if(N_)
- *N_ = (is_zero(N))? sd->N: normalize(N);
+ *N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N);
if(texture_blur_)
*texture_blur_ = texture_blur/weight_sum;
@@ -745,7 +749,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b
ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
{
- return emissive_simple_eval(sd->Ng, sd->I);
+ return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I));
}
ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
@@ -753,8 +757,8 @@ ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
float3 eval;
eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_EMISSION(sc->type))
eval += emissive_eval(kg, sd, sc)*sc->weight;
@@ -769,8 +773,8 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
{
float3 weight = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_HOLDOUT(sc->type))
weight += sc->weight;
@@ -784,8 +788,8 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
float randb, int path_flag, ShaderContext ctx)
{
- sd->num_closure = 0;
- sd->randb_closure = randb;
+ ccl_fetch(sd, num_closure) = 0;
+ ccl_fetch(sd, randb_closure) = randb;
#ifdef __OSL__
if(kg->osl)
@@ -796,9 +800,11 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
#ifdef __SVM__
svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, path_flag);
#else
- sd->closure->weight = make_float3(0.8f, 0.8f, 0.8f);
- sd->closure->N = sd->N;
- sd->flag |= bsdf_diffuse_setup(&sd->closure);
+ ccl_fetch_array(sd, closure, 0)->weight = make_float3(0.8f, 0.8f, 0.8f);
+ ccl_fetch_array(sd, closure, 0)->N = ccl_fetch(sd, N);
+ ccl_fetch_array(sd, closure, 0)->data0 = 0.0f;
+ ccl_fetch_array(sd, closure, 0)->data1 = 0.0f;
+ ccl_fetch(sd, flag) |= bsdf_diffuse_setup(ccl_fetch_array(sd, closure, 0));
#endif
}
}
@@ -807,8 +813,8 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int path_flag, ShaderContext ctx)
{
- sd->num_closure = 0;
- sd->randb_closure = 0.0f;
+ ccl_fetch(sd, num_closure) = 0;
+ ccl_fetch(sd, randb_closure) = 0.0f;
#ifdef __OSL__
if(kg->osl) {
@@ -823,8 +829,8 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
- for(int i = 0; i< sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
+ for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
+ const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
if(CLOSURE_IS_BACKGROUND(sc->type))
eval += sc->weight;
@@ -844,7 +850,7 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int
ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
{
- for(int i = 0; i< sd->num_closure; i++) {
+ for(int i = 0; i < sd->num_closure; i++) {
if(i == skip_phase)
continue;
@@ -997,8 +1003,8 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderContext ctx)
{
- sd->num_closure = 0;
- sd->randb_closure = 0.0f;
+ ccl_fetch(sd, num_closure) = 0;
+ ccl_fetch(sd, randb_closure) = 0.0f;
/* this will modify sd->P */
#ifdef __SVM__
diff --git a/intern/cycles/kernel/kernel_shaderdata_vars.h b/intern/cycles/kernel/kernel_shaderdata_vars.h
new file mode 100644
index 00000000000..b157b82e023
--- /dev/null
+++ b/intern/cycles/kernel/kernel_shaderdata_vars.h
@@ -0,0 +1,99 @@
+/*
+* Copyright 2011-2015 Blender Foundation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef SD_VAR
+#define SD_VAR(type, what)
+#endif
+#ifndef SD_CLOSURE_VAR
+#define SD_CLOSURE_VAR(type, what, max_closure)
+#endif
+
+/* position */
+SD_VAR(float3, P)
+/* smooth normal for shading */
+SD_VAR(float3, N)
+/* true geometric normal */
+SD_VAR(float3, Ng)
+/* view/incoming direction */
+SD_VAR(float3, I)
+/* shader id */
+SD_VAR(int, shader)
+/* booleans describing shader, see ShaderDataFlag */
+SD_VAR(int, flag)
+
+/* primitive id if there is one, ~0 otherwise */
+SD_VAR(int, prim)
+
+/* combined type and curve segment for hair */
+SD_VAR(int, type)
+
+/* parametric coordinates
+* - barycentric weights for triangles */
+SD_VAR(float, u)
+SD_VAR(float, v)
+/* object id if there is one, ~0 otherwise */
+SD_VAR(int, object)
+
+/* motion blur sample time */
+SD_VAR(float, time)
+
+/* length of the ray being shaded */
+SD_VAR(float, ray_length)
+
+/* ray bounce depth */
+SD_VAR(int, ray_depth)
+
+/* ray transparent depth */
+SD_VAR(int, transparent_depth)
+
+#ifdef __RAY_DIFFERENTIALS__
+/* differential of P. these are orthogonal to Ng, not N */
+SD_VAR(differential3, dP)
+/* differential of I */
+SD_VAR(differential3, dI)
+/* differential of u, v */
+SD_VAR(differential, du)
+SD_VAR(differential, dv)
+#endif
+#ifdef __DPDU__
+/* differential of P w.r.t. parametric coordinates. note that dPdu is
+* not readily suitable as a tangent for shading on triangles. */
+SD_VAR(float3, dPdu)
+SD_VAR(float3, dPdv)
+#endif
+
+#ifdef __OBJECT_MOTION__
+/* object <-> world space transformations, cached to avoid
+* re-interpolating them constantly for shading */
+SD_VAR(Transform, ob_tfm)
+SD_VAR(Transform, ob_itfm)
+#endif
+
+/* Closure data, we store a fixed array of closures */
+SD_CLOSURE_VAR(ShaderClosure, closure, MAX_CLOSURE)
+SD_VAR(int, num_closure)
+SD_VAR(float, randb_closure)
+
+/* ray start position, only set for backgrounds */
+SD_VAR(float3, ray_P)
+SD_VAR(differential3, ray_dP)
+
+#ifdef __OSL__
+SD_VAR(struct KernelGlobals *, osl_globals)
+#endif
+
+#undef SD_VAR
+#undef SD_CLOSURE_VAR
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 8923fcebee5..2811a8348ca 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -39,19 +39,6 @@ CCL_NAMESPACE_BEGIN
* This is CPU only because of qsort, and malloc or high stack space usage to
* record all these intersections. */
-ccl_device_noinline int shadow_intersections_compare(const void *a, const void *b)
-{
- const Intersection *isect_a = (const Intersection*)a;
- const Intersection *isect_b = (const Intersection*)b;
-
- if(isect_a->t < isect_b->t)
- return -1;
- else if(isect_a->t > isect_b->t)
- return 1;
- else
- return 0;
-}
-
#define STACK_MAX_HITS 64
ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
@@ -95,7 +82,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
PathState ps = *state;
#endif
- qsort(hits, num_hits, sizeof(Intersection), shadow_intersections_compare);
+ qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
for(int hit = 0; hit < num_hits; hit++, isect++) {
/* adjust intersection distance for moving ray forward */
@@ -193,19 +180,36 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
* potentially transparent, and only in that case start marching. this gives
* one extra ray cast for the cases were we do want transparency. */
-ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
+ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space Ray *ray_input, float3 *shadow
+#ifdef __SPLIT_KERNEL__
+ , ShaderData *sd_mem, Intersection *isect_mem
+#endif
+ )
{
*shadow = make_float3(1.0f, 1.0f, 1.0f);
- if(ray->t == 0.0f)
+ if(ray_input->t == 0.0f)
return false;
- Intersection isect;
- bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
+#ifdef __SPLIT_KERNEL__
+ Ray private_ray = *ray_input;
+ Ray *ray = &private_ray;
+#else
+ Ray *ray = ray_input;
+#endif
+
+#ifdef __SPLIT_KERNEL__
+ Intersection *isect = isect_mem;
+#else
+ Intersection isect_object;
+ Intersection *isect = &isect_object;
+#endif
+
+ bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
#ifdef __TRANSPARENT_SHADOWS__
if(blocked && kernel_data.integrator.transparent_shadows) {
- if(shader_transparent_shadow(kg, &isect)) {
+ if(shader_transparent_shadow(kg, isect)) {
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
float3 Pend = ray->P + ray->D*ray->t;
int bounce = state->transparent_bounce;
@@ -217,9 +221,8 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
if(bounce >= kernel_data.integrator.transparent_max_bounce)
return true;
- if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect, NULL, 0.0f, 0.0f))
+ if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f))
{
-
#ifdef __VOLUME__
/* attenuation for last line segment towards light */
if(ps.volume_stack[0].shader != SHADER_NONE)
@@ -231,39 +234,44 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
return false;
}
- if(!shader_transparent_shadow(kg, &isect))
+ if(!shader_transparent_shadow(kg, isect))
return true;
#ifdef __VOLUME__
/* attenuation between last surface and next surface */
if(ps.volume_stack[0].shader != SHADER_NONE) {
Ray segment_ray = *ray;
- segment_ray.t = isect.t;
+ segment_ray.t = isect->t;
kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
}
#endif
/* setup shader data at surface */
- ShaderData sd;
- shader_setup_from_ray(kg, &sd, &isect, ray, state->bounce+1, bounce);
+#ifdef __SPLIT_KERNEL__
+ ShaderData *sd = sd_mem;
+#else
+ ShaderData sd_object;
+ ShaderData *sd = &sd_object;
+#endif
+ shader_setup_from_ray(kg, sd, isect, ray, state->bounce+1, bounce);
/* attenuation from transparent surface */
- if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
- shader_eval_surface(kg, &sd, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
- throughput *= shader_bsdf_transparency(kg, &sd);
+ if(!(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)) {
+ shader_eval_surface(kg, sd, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
+ throughput *= shader_bsdf_transparency(kg, sd);
}
if(is_zero(throughput))
return true;
/* move ray forward */
- ray->P = ray_offset(sd.P, -sd.Ng);
+ ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng));
if(ray->t != FLT_MAX)
ray->D = normalize_len(Pend - ray->P, &ray->t);
#ifdef __VOLUME__
/* exit/enter volume */
- kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack);
+ kernel_volume_stack_enter_exit(kg, sd, ps.volume_stack);
#endif
bounce++;
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index 374dc6d1dd9..f545a056cc8 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -24,6 +24,7 @@
/* bvh */
KERNEL_TEX(float4, texture_float4, __bvh_nodes)
+KERNEL_TEX(float4, texture_float4, __bvh_leaf_nodes)
KERNEL_TEX(float4, texture_float4, __tri_woop)
KERNEL_TEX(uint, texture_uint, __prim_type)
KERNEL_TEX(uint, texture_uint, __prim_visibility)
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 238b4b0bfdc..2a70bfcb8f0 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -24,6 +24,13 @@
#define __KERNEL_CPU__
#endif
+/* TODO(sergey): This is only to make it possible to include this header
+ * from outside of the kernel. but this could be done somewhat cleaner?
+ */
+#ifndef ccl_addr_space
+#define ccl_addr_space
+#endif
+
CCL_NAMESPACE_BEGIN
/* constants */
@@ -38,12 +45,6 @@ CCL_NAMESPACE_BEGIN
#define BSSRDF_MIN_RADIUS 1e-8f
#define BSSRDF_MAX_HITS 4
-#define BB_DRAPER 800.0f
-#define BB_MAX_TABLE_RANGE 12000.0f
-#define BB_TABLE_XPOWER 1.5f
-#define BB_TABLE_YPOWER 5.0f
-#define BB_TABLE_SPACING 2.0f
-
#define BECKMANN_TABLE_SIZE 256
#define TEX_NUM_FLOAT_IMAGES 5
@@ -72,6 +73,7 @@ CCL_NAMESPACE_BEGIN
#define __VOLUME_DECOUPLED__
#define __VOLUME_SCATTER__
#define __SHADOW_RECORD_ALL__
+#define __VOLUME_RECORD_ALL__
#endif
#ifdef __KERNEL_CUDA__
@@ -82,7 +84,7 @@ CCL_NAMESPACE_BEGIN
#define __VOLUME_SCATTER__
/* Experimental on GPU */
-#ifdef __KERNEL_CUDA_EXPERIMENTAL__
+#ifdef __KERNEL_EXPERIMENTAL__
#define __SUBSURFACE__
#define __CMJ__
#endif
@@ -94,38 +96,44 @@ CCL_NAMESPACE_BEGIN
/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
#ifdef __KERNEL_OPENCL_NVIDIA__
-#define __KERNEL_SHADING__
-#define __KERNEL_ADV_SHADING__
+# define __KERNEL_SHADING__
+# define __KERNEL_ADV_SHADING__
+# ifdef __KERNEL_EXPERIMENTAL__
+# define __CMJ__
+# endif
#endif
#ifdef __KERNEL_OPENCL_APPLE__
-#define __KERNEL_SHADING__
+# define __KERNEL_SHADING__
//#define __KERNEL_ADV_SHADING__
#endif
#ifdef __KERNEL_OPENCL_AMD__
-#define __CL_USE_NATIVE__
-#define __KERNEL_SHADING__
-//__KERNEL_ADV_SHADING__
-#define __MULTI_CLOSURE__
-#define __TRANSPARENT_SHADOWS__
-#define __PASSES__
-#define __BACKGROUND_MIS__
-#define __LAMP_MIS__
-#define __AO__
-//#define __CAMERA_MOTION__
-//#define __OBJECT_MOTION__
-//#define __HAIR__
-//end __KERNEL_ADV_SHADING__
+# define __CL_USE_NATIVE__
+# define __KERNEL_SHADING__
+# define __MULTI_CLOSURE__
+# define __PASSES__
+# define __BACKGROUND_MIS__
+# define __LAMP_MIS__
+# define __AO__
+# define __CAMERA_MOTION__
+# define __OBJECT_MOTION__
+# define __HAIR__
+# ifdef __KERNEL_EXPERIMENTAL__
+# define __TRANSPARENT_SHADOWS__
+# endif
#endif
#ifdef __KERNEL_OPENCL_INTEL_CPU__
-#define __CL_USE_NATIVE__
-#define __KERNEL_SHADING__
-#define __KERNEL_ADV_SHADING__
+# define __CL_USE_NATIVE__
+# define __KERNEL_SHADING__
+# define __KERNEL_ADV_SHADING__
+# ifdef __KERNEL_EXPERIMENTAL__
+# define __CMJ__
+# endif
#endif
-#endif
+#endif // __KERNEL_OPENCL__
/* kernel features */
#define __SOBOL__
@@ -164,6 +172,17 @@ CCL_NAMESPACE_BEGIN
# define __KERNEL_DEBUG__
#endif
+/* Scene-based selective featrues compilation/ */
+#ifdef __NO_CAMERA_MOTION__
+# undef __CAMERA_MOTION__
+#endif
+#ifdef __NO_OBJECT_MOTION__
+# undef __OBJECT_MOTION__
+#endif
+#ifdef __NO_HAIR__
+# undef __HAIR__
+#endif
+
/* Random Numbers */
typedef uint RNG;
@@ -269,9 +288,7 @@ enum PathRayFlag {
PATH_RAY_MIS_SKIP = 2048,
PATH_RAY_DIFFUSE_ANCESTOR = 4096,
- PATH_RAY_GLOSSY_ANCESTOR = 8192,
- PATH_RAY_BSSRDF_ANCESTOR = 16384,
- PATH_RAY_SINGLE_PASS_DONE = 32768,
+ PATH_RAY_SINGLE_PASS_DONE = 8192,
/* we need layer member flags to be the 20 upper bits */
PATH_RAY_LAYER_SHIFT = (32-20)
@@ -322,6 +339,8 @@ typedef enum PassType {
PASS_LIGHT = (1 << 25), /* no real pass, used to force use_light_pass */
#ifdef __KERNEL_DEBUG__
PASS_BVH_TRAVERSAL_STEPS = (1 << 26),
+ PASS_BVH_TRAVERSED_INSTANCES = (1 << 27),
+ PASS_RAY_BOUNCES = (1 << 28),
#endif
} PassType;
@@ -329,7 +348,7 @@ typedef enum PassType {
#ifdef __PASSES__
-typedef struct PathRadiance {
+typedef ccl_addr_space struct PathRadiance {
int use_light_pass;
float3 emission;
@@ -381,7 +400,7 @@ typedef struct BsdfEval {
#else
-typedef float3 PathRadiance;
+typedef ccl_addr_space float3 PathRadiance;
typedef float3 BsdfEval;
#endif
@@ -426,6 +445,7 @@ enum CameraType {
enum PanoramaType {
PANORAMA_EQUIRECTANGULAR,
+ PANORAMA_MIRRORBALL,
PANORAMA_FISHEYE_EQUIDISTANT,
PANORAMA_FISHEYE_EQUISOLID
};
@@ -445,10 +465,26 @@ typedef struct differential {
/* Ray */
typedef struct Ray {
+/* TODO(sergey): This is only needed because current AMD
+ * compiler has hard time building the kernel with this
+ * reshuffle. And at the same time reshuffle will cause
+ * less optimal CPU code in certain places.
+ *
+ * We'll get rid of this nasty exception once AMD compiler
+ * is fixed.
+ */
+#ifndef __KERNEL_OPENCL_AMD__
float3 P; /* origin */
float3 D; /* direction */
+
float t; /* length of the ray */
float time; /* time (for motion blur) */
+#else
+ float t; /* length of the ray */
+ float time; /* time (for motion blur) */
+ float3 P; /* origin */
+ float3 D; /* direction */
+#endif
#ifdef __RAY_DIFFERENTIALS__
differential3 dP;
@@ -458,7 +494,7 @@ typedef struct Ray {
/* Intersection */
-typedef struct Intersection {
+typedef ccl_addr_space struct Intersection {
float t, u, v;
int prim;
int object;
@@ -466,6 +502,7 @@ typedef struct Intersection {
#ifdef __KERNEL_DEBUG__
int num_traversal_steps;
+ int num_traversed_instances;
#endif
} Intersection;
@@ -543,7 +580,11 @@ typedef enum AttributeStandard {
/* Closure data */
#ifdef __MULTI_CLOSURE__
-#define MAX_CLOSURE 64
+# ifndef __MAX_CLOSURE__
+# define MAX_CLOSURE 64
+# else
+# define MAX_CLOSURE __MAX_CLOSURE__
+# endif
#else
#define MAX_CLOSURE 1
#endif
@@ -553,7 +594,7 @@ typedef enum AttributeStandard {
* does not put own padding trying to align this members.
* - We make sure OSL pointer is also 16 bytes aligned.
*/
-typedef struct ShaderClosure {
+typedef ccl_addr_space struct ShaderClosure {
float3 weight;
float3 N;
float3 T;
@@ -638,78 +679,23 @@ enum ShaderDataFlag {
struct KernelGlobals;
-typedef struct ShaderData {
- /* position */
- float3 P;
- /* smooth normal for shading */
- float3 N;
- /* true geometric normal */
- float3 Ng;
- /* view/incoming direction */
- float3 I;
- /* shader id */
- int shader;
- /* booleans describing shader, see ShaderDataFlag */
- int flag;
-
- /* primitive id if there is one, ~0 otherwise */
- int prim;
-
- /* combined type and curve segment for hair */
- int type;
-
- /* parametric coordinates
- * - barycentric weights for triangles */
- float u, v;
- /* object id if there is one, ~0 otherwise */
- int object;
-
- /* motion blur sample time */
- float time;
-
- /* length of the ray being shaded */
- float ray_length;
-
- /* ray bounce depth */
- int ray_depth;
-
- /* ray transparent depth */
- int transparent_depth;
-
-#ifdef __RAY_DIFFERENTIALS__
- /* differential of P. these are orthogonal to Ng, not N */
- differential3 dP;
- /* differential of I */
- differential3 dI;
- /* differential of u, v */
- differential du;
- differential dv;
-#endif
-#ifdef __DPDU__
- /* differential of P w.r.t. parametric coordinates. note that dPdu is
- * not readily suitable as a tangent for shading on triangles. */
- float3 dPdu, dPdv;
-#endif
-
-#ifdef __OBJECT_MOTION__
- /* object <-> world space transformations, cached to avoid
- * re-interpolating them constantly for shading */
- Transform ob_tfm;
- Transform ob_itfm;
+#ifdef __SPLIT_KERNEL__
+#define SD_VAR(type, what) ccl_global type *what;
+#define SD_CLOSURE_VAR(type, what, max_closure) type *what;
+#define TIDX (get_global_id(1) * get_global_size(0) + get_global_id(0))
+#define ccl_fetch(s, t) (s->t[TIDX])
+#define ccl_fetch_array(s, t, index) (&s->t[TIDX * MAX_CLOSURE + index])
+#else
+#define SD_VAR(type, what) type what;
+#define SD_CLOSURE_VAR(type, what, max_closure) type what[max_closure];
+#define ccl_fetch(s, t) (s->t)
+#define ccl_fetch_array(s, t, index) (&s->t[index])
#endif
- /* Closure data, we store a fixed array of closures */
- ShaderClosure closure[MAX_CLOSURE];
- int num_closure;
- float randb_closure;
+typedef ccl_addr_space struct ShaderData {
- /* ray start position, only set for backgrounds */
- float3 ray_P;
- differential3 ray_dP;
+#include "kernel_shaderdata_vars.h"
-#ifdef __OSL__
- struct KernelGlobals *osl_globals;
-#endif
} ShaderData;
/* Path State */
@@ -867,7 +853,9 @@ typedef struct KernelFilm {
#ifdef __KERNEL_DEBUG__
int pass_bvh_traversal_steps;
- int pass_pad3, pass_pad4, pass_pad5;
+ int pass_bvh_traversed_instances;
+ int pass_ray_bounces;
+ int pass_pad3;
#endif
} KernelFilm;
@@ -895,6 +883,11 @@ typedef struct KernelIntegrator {
float inv_pdf_lights;
int pdf_background_res;
+ /* light portals */
+ float portal_pdf;
+ int num_portals;
+ int portal_offset;
+
/* bounces */
int min_bounce;
int max_bounce;
@@ -947,6 +940,8 @@ typedef struct KernelIntegrator {
int volume_max_steps;
float volume_step_size;
int volume_samples;
+
+ int pad;
} KernelIntegrator;
typedef struct KernelBVH {
@@ -980,9 +975,8 @@ typedef struct KernelCurves {
} KernelCurves;
typedef struct KernelTables {
- int blackbody_offset;
int beckmann_offset;
- int pad1, pad2;
+ int pad1, pad2, pad3;
} KernelTables;
typedef struct KernelData {
@@ -996,13 +990,64 @@ typedef struct KernelData {
} KernelData;
#ifdef __KERNEL_DEBUG__
-typedef struct DebugData {
+typedef ccl_addr_space struct DebugData {
// Total number of BVH node traversal steps and primitives intersections
// for the camera rays.
int num_bvh_traversal_steps;
+ int num_bvh_traversed_instances;
+ int num_ray_bounces;
} DebugData;
#endif
+/* Declarations required for split kernel */
+
+/* Macro for queues */
+/* Value marking queue's empty slot */
+#define QUEUE_EMPTY_SLOT -1
+
+/*
+* Queue 1 - Active rays
+* Queue 2 - Background queue
+* Queue 3 - Shadow ray cast kernel - AO
+* Queeu 4 - Shadow ray cast kernel - direct lighting
+*/
+#define NUM_QUEUES 4
+
+/* Queue names */
+enum QueueNumber {
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS, /* All active rays and regenerated rays are enqueued here */
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, /* All
+ * 1.Background-hit rays,
+ * 2.Rays that has exited path-iteration but needs to update output buffer
+ * 3.Rays to be regenerated
+ * are enqueued here */
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS, /* All rays for which a shadow ray should be cast to determine radiance
+ contribution for AO are enqueued here */
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS, /* All rays for which a shadow ray should be cast to determine radiance
+ contributuin for direct lighting are enqueued here */
+};
+
+/* We use RAY_STATE_MASK to get ray_state (enums 0 to 5) */
+#define RAY_STATE_MASK 0x007
+#define RAY_FLAG_MASK 0x0F8
+enum RayState {
+ RAY_ACTIVE = 0, // Denotes ray is actively involved in path-iteration
+ RAY_INACTIVE = 1, // Denotes ray has completed processing all samples and is inactive
+ RAY_UPDATE_BUFFER = 2, // Denoted ray has exited path-iteration and needs to update output buffer
+ RAY_HIT_BACKGROUND = 3, // Donotes ray has hit background
+ RAY_TO_REGENERATE = 4, // Denotes ray has to be regenerated
+ RAY_REGENERATED = 5, // Denotes ray has been regenerated
+ RAY_SKIP_DL = 6, // Denotes ray should skip direct lighting
+ RAY_SHADOW_RAY_CAST_AO = 16, // Flag's ray has to execute shadow blocked function in AO part
+ RAY_SHADOW_RAY_CAST_DL = 32 // Flag's ray has to execute shadow blocked function in direct lighting part
+};
+
+#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
+#define IS_STATE(ray_state, ray_index, state) ((ray_state[ray_index] & RAY_STATE_MASK) == state)
+#define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag))
+#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
+#define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
+
CCL_NAMESPACE_END
#endif /* __KERNEL_TYPES_H__ */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 0300e1d4c7f..e06568457c6 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -627,7 +627,7 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
step_size = kernel_data.integrator.volume_step_size;
/* compute exact steps in advance for malloc */
max_steps = max((int)ceilf(ray->t/step_size), 1);
- if (max_steps > global_max_steps) {
+ if(max_steps > global_max_steps) {
max_steps = global_max_steps;
step_size = ray->t / (float)max_steps;
}
@@ -993,6 +993,48 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
volume_ray.t = FLT_MAX;
int stack_index = 0, enclosed_index = 0;
+
+#ifdef __VOLUME_RECORD_ALL__
+ Intersection hits[2*VOLUME_STACK_SIZE];
+ uint num_hits = scene_intersect_volume_all(kg,
+ &volume_ray,
+ hits,
+ 2*VOLUME_STACK_SIZE);
+ if(num_hits > 0) {
+ int enclosed_volumes[VOLUME_STACK_SIZE];
+ Intersection *isect = hits;
+
+ qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+ for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
+ ShaderData sd;
+ shader_setup_from_ray(kg, &sd, isect, &volume_ray, 0, 0);
+ if(sd.flag & SD_BACKFACING) {
+ /* If ray exited the volume and never entered to that volume
+ * it means that camera is inside such a volume.
+ */
+ bool is_enclosed = false;
+ for(int i = 0; i < enclosed_index; ++i) {
+ if(enclosed_volumes[i] == sd.object) {
+ is_enclosed = true;
+ break;
+ }
+ }
+ if(is_enclosed == false) {
+ stack[stack_index].object = sd.object;
+ stack[stack_index].shader = sd.shader;
+ ++stack_index;
+ }
+ }
+ else {
+ /* If ray from camera enters the volume, this volume shouldn't
+ * be added to the stack on exit.
+ */
+ enclosed_volumes[enclosed_index++] = sd.object;
+ }
+ }
+ }
+#else
int enclosed_volumes[VOLUME_STACK_SIZE];
int step = 0;
@@ -1035,6 +1077,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
volume_ray.P = ray_offset(sd.P, -sd.Ng);
++step;
}
+#endif
/* stack_index of 0 means quick checks outside of the kernel gave false
* positive, nothing to worry about, just we've wasted quite a few of
* ticks just to come into conclusion that camera is in the air.
@@ -1097,4 +1140,49 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
}
}
+#ifdef __SUBSURFACE__
+ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
+ Ray *ray,
+ VolumeStack *stack)
+{
+ kernel_assert(kernel_data.integrator.use_volumes);
+
+ Ray volume_ray = *ray;
+
+#ifdef __VOLUME_RECORD_ALL__
+ Intersection hits[2*VOLUME_STACK_SIZE];
+ uint num_hits = scene_intersect_volume_all(kg,
+ &volume_ray,
+ hits,
+ 2*VOLUME_STACK_SIZE);
+ if(num_hits > 0) {
+ Intersection *isect = hits;
+
+ qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+ for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
+ ShaderData sd;
+ shader_setup_from_ray(kg, &sd, isect, &volume_ray, 0, 0);
+ kernel_volume_stack_enter_exit(kg, &sd, stack);
+ }
+ }
+#else
+ Intersection isect;
+ int step = 0;
+ while(step < 2 * VOLUME_STACK_SIZE &&
+ scene_intersect_volume(kg, &volume_ray, &isect))
+ {
+ ShaderData sd;
+ shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0);
+ kernel_volume_stack_enter_exit(kg, &sd, stack);
+
+ /* Move ray forward. */
+ volume_ray.P = ray_offset(sd.P, -sd.Ng);
+ volume_ray.t -= sd.ray_length;
+ ++step;
+ }
+#endif
+}
+#endif
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
new file mode 100644
index 00000000000..9b83d972e97
--- /dev/null
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_WORK_STEALING_H__
+#define __KERNEL_WORK_STEALING_H__
+
+/*
+ * Utility functions for work stealing
+ */
+
+#ifdef __WORK_STEALING__
+
+#ifdef __KERNEL_OPENCL__
+#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
+#endif
+
+uint get_group_id_with_ray_index(uint ray_index,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint parallel_samples,
+ int dim)
+{
+ if(dim == 0) {
+ uint x_span = ray_index % (tile_dim_x * parallel_samples);
+ return x_span / get_local_size(0);
+ }
+ else /*if(dim == 1)*/ {
+ kernel_assert(dim == 1);
+ uint y_span = ray_index / (tile_dim_x * parallel_samples);
+ return y_span / get_local_size(1);
+ }
+}
+
+uint get_total_work(uint tile_dim_x,
+ uint tile_dim_y,
+ uint grp_idx,
+ uint grp_idy,
+ uint num_samples)
+{
+ uint threads_within_tile_border_x =
+ (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
+ : get_local_size(0);
+ uint threads_within_tile_border_y =
+ (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
+ : get_local_size(1);
+
+ threads_within_tile_border_x =
+ (threads_within_tile_border_x == 0) ? get_local_size(0)
+ : threads_within_tile_border_x;
+ threads_within_tile_border_y =
+ (threads_within_tile_border_y == 0) ? get_local_size(1)
+ : threads_within_tile_border_y;
+
+ return threads_within_tile_border_x *
+ threads_within_tile_border_y *
+ num_samples;
+}
+
+/* Returns 0 in case there is no next work available */
+/* Returns 1 in case work assigned is valid */
+int get_next_work(ccl_global uint *work_pool,
+ ccl_private uint *my_work,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint num_samples,
+ uint parallel_samples,
+ uint ray_index)
+{
+ uint grp_idx = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 0);
+ uint grp_idy = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 1);
+ uint total_work = get_total_work(tile_dim_x,
+ tile_dim_y,
+ grp_idx,
+ grp_idy,
+ num_samples);
+ uint group_index = grp_idy * get_num_groups(0) + grp_idx;
+ *my_work = atomic_inc(&work_pool[group_index]);
+ return (*my_work < total_work) ? 1 : 0;
+}
+
+/* This function assumes that the passed my_work is valid. */
+/* Decode sample number w.r.t. assigned my_work. */
+uint get_my_sample(uint my_work,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint parallel_samples,
+ uint ray_index)
+{
+ uint grp_idx = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 0);
+ uint grp_idy = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 1);
+ uint threads_within_tile_border_x =
+ (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
+ : get_local_size(0);
+ uint threads_within_tile_border_y =
+ (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
+ : get_local_size(1);
+
+ threads_within_tile_border_x =
+ (threads_within_tile_border_x == 0) ? get_local_size(0)
+ : threads_within_tile_border_x;
+ threads_within_tile_border_y =
+ (threads_within_tile_border_y == 0) ? get_local_size(1)
+ : threads_within_tile_border_y;
+
+ return my_work /
+ (threads_within_tile_border_x * threads_within_tile_border_y);
+}
+
+/* Decode pixel and tile position w.r.t. assigned my_work. */
+void get_pixel_tile_position(ccl_private uint *pixel_x,
+ ccl_private uint *pixel_y,
+ ccl_private uint *tile_x,
+ ccl_private uint *tile_y,
+ uint my_work,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint tile_offset_x,
+ uint tile_offset_y,
+ uint parallel_samples,
+ uint ray_index)
+{
+ uint grp_idx = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 0);
+ uint grp_idy = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 1);
+ uint threads_within_tile_border_x =
+ (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
+ : get_local_size(0);
+ uint threads_within_tile_border_y =
+ (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
+ : get_local_size(1);
+
+ threads_within_tile_border_x =
+ (threads_within_tile_border_x == 0) ? get_local_size(0)
+ : threads_within_tile_border_x;
+ threads_within_tile_border_y =
+ (threads_within_tile_border_y == 0) ? get_local_size(1)
+ : threads_within_tile_border_y;
+
+ uint total_associated_pixels =
+ threads_within_tile_border_x * threads_within_tile_border_y;
+ uint work_group_pixel_index = my_work % total_associated_pixels;
+ uint work_group_pixel_x =
+ work_group_pixel_index % threads_within_tile_border_x;
+ uint work_group_pixel_y =
+ work_group_pixel_index / threads_within_tile_border_x;
+
+ *pixel_x =
+ tile_offset_x + (grp_idx * get_local_size(0)) + work_group_pixel_x;
+ *pixel_y =
+ tile_offset_y + (grp_idy * get_local_size(1)) + work_group_pixel_y;
+ *tile_x = *pixel_x - tile_offset_x;
+ *tile_y = *pixel_y - tile_offset_y;
+}
+
+#endif /* __WORK_STEALING__ */
+
+#endif /* __KERNEL_WORK_STEALING_H__ */
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index 013eeff57fa..37a73ab2f04 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -23,6 +23,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
+#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
@@ -55,7 +56,7 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
int id = atoi(name + strlen("__tex_image_float_"));
int array_index = id;
- if (array_index >= 0 && array_index < MAX_FLOAT_IMAGES) {
+ if(array_index >= 0 && array_index < MAX_FLOAT_IMAGES) {
tex = &kg->texture_float_images[array_index];
}
@@ -70,7 +71,7 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
int id = atoi(name + strlen("__tex_image_"));
int array_index = id - MAX_FLOAT_IMAGES;
- if (array_index >= 0 && array_index < MAX_BYTE_IMAGES) {
+ if(array_index >= 0 && array_index < MAX_BYTE_IMAGES) {
tex = &kg->texture_byte_images[array_index];
}
diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
index f1027ad413d..df77bedc729 100644
--- a/intern/cycles/kernel/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
@@ -38,6 +38,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
+#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
index b2f16ff54d8..b3192369794 100644
--- a/intern/cycles/kernel/kernel_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
@@ -39,6 +39,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
+#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
index cc8c603e8f8..f9c5134e442 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
@@ -34,6 +34,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
+#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
index 20919a4f26e..2dbe4b81821 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
@@ -36,6 +36,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
+#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
index 48579d3b7e5..5c57ad01181 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
@@ -37,6 +37,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
+#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index 64069fc049f..bcd55b8c676 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -16,13 +16,14 @@
/* CUDA kernel entry points */
-#include "kernel_compat_cuda.h"
-#include "kernel_math.h"
-#include "kernel_types.h"
-#include "kernel_globals.h"
-#include "kernel_film.h"
-#include "kernel_path.h"
-#include "kernel_bake.h"
+#include "../../kernel_compat_cuda.h"
+#include "../../kernel_math.h"
+#include "../../kernel_types.h"
+#include "../../kernel_globals.h"
+#include "../../kernel_film.h"
+#include "../../kernel_path.h"
+#include "../../kernel_path_branched.h"
+#include "../../kernel_bake.h"
/* device data taken from CUDA occupancy calculator */
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index 5a47260a4ee..15fb34cfe3b 100644
--- a/intern/cycles/kernel/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -16,14 +16,17 @@
/* OpenCL kernel entry points - unfinished */
-#include "kernel_compat_opencl.h"
-#include "kernel_math.h"
-#include "kernel_types.h"
-#include "kernel_globals.h"
+#include "../../kernel_compat_opencl.h"
+#include "../../kernel_math.h"
+#include "../../kernel_types.h"
+#include "../../kernel_globals.h"
-#include "kernel_film.h"
-#include "kernel_path.h"
-#include "kernel_bake.h"
+#include "../../kernel_film.h"
+#include "../../kernel_path.h"
+#include "../../kernel_path_branched.h"
+#include "../../kernel_bake.h"
+
+#ifdef __COMPILE_ONLY_MEGAKERNEL__
__kernel void kernel_ocl_path_trace(
ccl_constant KernelData *data,
@@ -32,7 +35,7 @@ __kernel void kernel_ocl_path_trace(
#define KERNEL_TEX(type, ttype, name) \
ccl_global type *name,
-#include "kernel_textures.h"
+#include "../../kernel_textures.h"
int sample,
int sx, int sy, int sw, int sh, int offset, int stride)
@@ -43,7 +46,7 @@ __kernel void kernel_ocl_path_trace(
#define KERNEL_TEX(type, ttype, name) \
kg->name = name;
-#include "kernel_textures.h"
+#include "../../kernel_textures.h"
int x = sx + get_global_id(0);
int y = sy + get_global_id(1);
@@ -52,17 +55,18 @@ __kernel void kernel_ocl_path_trace(
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
}
-__kernel void kernel_ocl_convert_to_byte(
+#else // __COMPILE_ONLY_MEGAKERNEL__
+
+__kernel void kernel_ocl_shader(
ccl_constant KernelData *data,
- ccl_global uchar4 *rgba,
- ccl_global float *buffer,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
#define KERNEL_TEX(type, ttype, name) \
ccl_global type *name,
-#include "kernel_textures.h"
+#include "../../kernel_textures.h"
- float sample_scale,
- int sx, int sy, int sw, int sh, int offset, int stride)
+ int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -70,26 +74,24 @@ __kernel void kernel_ocl_convert_to_byte(
#define KERNEL_TEX(type, ttype, name) \
kg->name = name;
-#include "kernel_textures.h"
+#include "../../kernel_textures.h"
int x = sx + get_global_id(0);
- int y = sy + get_global_id(1);
- if(x < sx + sw && y < sy + sh)
- kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
+ if(x < sx + sw)
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
}
-__kernel void kernel_ocl_convert_to_half_float(
+__kernel void kernel_ocl_bake(
ccl_constant KernelData *data,
- ccl_global uchar4 *rgba,
- ccl_global float *buffer,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
#define KERNEL_TEX(type, ttype, name) \
ccl_global type *name,
-#include "kernel_textures.h"
+#include "../../kernel_textures.h"
- float sample_scale,
- int sx, int sy, int sw, int sh, int offset, int stride)
+ int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -97,25 +99,36 @@ __kernel void kernel_ocl_convert_to_half_float(
#define KERNEL_TEX(type, ttype, name) \
kg->name = name;
-#include "kernel_textures.h"
+#include "../../kernel_textures.h"
int x = sx + get_global_id(0);
- int y = sy + get_global_id(1);
- if(x < sx + sw && y < sy + sh)
- kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
+ if(x < sx + sw) {
+#if defined(__KERNEL_OPENCL_NVIDIA__) && __COMPUTE_CAPABILITY__ < 300
+ /* NVidia compiler is spending infinite amount of time trying
+ * to deal with kernel_bake_evaluate() on architectures prior
+ * to sm_30.
+ * For now we disable baking kernel for those devices, so at
+ * least rendering with split kernel could be compiled.
+ */
+ output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#else
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample);
+#endif
+ }
}
-__kernel void kernel_ocl_shader(
+__kernel void kernel_ocl_convert_to_byte(
ccl_constant KernelData *data,
- ccl_global uint4 *input,
- ccl_global float4 *output,
+ ccl_global uchar4 *rgba,
+ ccl_global float *buffer,
#define KERNEL_TEX(type, ttype, name) \
ccl_global type *name,
-#include "kernel_textures.h"
+#include "../../kernel_textures.h"
- int type, int sx, int sw, int offset, int sample)
+ float sample_scale,
+ int sx, int sy, int sw, int sh, int offset, int stride)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -123,24 +136,26 @@ __kernel void kernel_ocl_shader(
#define KERNEL_TEX(type, ttype, name) \
kg->name = name;
-#include "kernel_textures.h"
+#include "../../kernel_textures.h"
int x = sx + get_global_id(0);
+ int y = sy + get_global_id(1);
- if(x < sx + sw)
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
+ if(x < sx + sw && y < sy + sh)
+ kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
-__kernel void kernel_ocl_bake(
+__kernel void kernel_ocl_convert_to_half_float(
ccl_constant KernelData *data,
- ccl_global uint4 *input,
- ccl_global float4 *output,
+ ccl_global uchar4 *rgba,
+ ccl_global float *buffer,
#define KERNEL_TEX(type, ttype, name) \
ccl_global type *name,
-#include "kernel_textures.h"
+#include "../../kernel_textures.h"
- int type, int sx, int sw, int offset, int sample)
+ float sample_scale,
+ int sx, int sy, int sw, int sh, int offset, int stride)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -148,11 +163,13 @@ __kernel void kernel_ocl_bake(
#define KERNEL_TEX(type, ttype, name) \
kg->name = name;
-#include "kernel_textures.h"
+#include "../../kernel_textures.h"
int x = sx + get_global_id(0);
+ int y = sy + get_global_id(1);
- if(x < sx + sw)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample);
+ if(x < sx + sw && y < sy + sh)
+ kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
+#endif // __COMPILE_ONLY_MEGAKERNEL__ \ No newline at end of file
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
new file mode 100644
index 00000000000..eff77b89a0a
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "split/kernel_background_buffer_update.h"
+
+__kernel void kernel_ocl_path_trace_background_buffer_update(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data,
+ ccl_global float *per_sample_output_buffers,
+ ccl_global uint *rng_state,
+ ccl_global uint *rng_coop, /* Required for buffer Update */
+ ccl_global float3 *throughput_coop, /* Required for background hit processing */
+ PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */
+ ccl_global Ray *Ray_coop, /* Required for background hit processing */
+ ccl_global PathState *PathState_coop, /* Required for background hit processing */
+ ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */
+ ccl_global char *ray_state, /* Stores information on the current state of a ray */
+ int sw, int sh, int sx, int sy, int stride,
+ int rng_state_offset_x,
+ int rng_state_offset_y,
+ int rng_state_stride,
+ ccl_global unsigned int *work_array, /* Denotes work of each ray */
+ ccl_global int *Queue_data, /* Queues memory */
+ ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
+ int queuesize, /* Size (capacity) of each queue */
+ int end_sample,
+ int start_sample,
+#ifdef __WORK_STEALING__
+ ccl_global unsigned int *work_pool_wgs,
+ unsigned int num_samples,
+#endif
+#ifdef __KERNEL_DEBUG__
+ DebugData *debugdata_coop,
+#endif
+ int parallel_samples) /* Number of samples to be processed in parallel */
+{
+ ccl_local unsigned int local_queue_atomics;
+ if(get_local_id(0) == 0 && get_local_id(1) == 0) {
+ local_queue_atomics = 0;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
+ if(ray_index == 0) {
+ /* We will empty this queue in this kernel. */
+ Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+ }
+ char enqueue_flag = 0;
+ ray_index = get_ray_index(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ Queue_data,
+ queuesize,
+ 1);
+
+#ifdef __COMPUTE_DEVICE_GPU__
+ /* If we are executing on a GPU device, we exit all threads that are not
+ * required.
+ *
+ * If we are executing on a CPU device, then we need to keep all threads
+ * active since we have barrier() calls later in the kernel. CPU devices,
+ * expect all threads to execute barrier statement.
+ */
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+#endif
+
+#ifndef __COMPUTE_DEVICE_GPU__
+ if(ray_index != QUEUE_EMPTY_SLOT) {
+#endif
+ enqueue_flag =
+ kernel_background_buffer_update(globals,
+ data,
+ shader_data,
+ per_sample_output_buffers,
+ rng_state,
+ rng_coop,
+ throughput_coop,
+ PathRadiance_coop,
+ Ray_coop,
+ PathState_coop,
+ L_transparent_coop,
+ ray_state,
+ sw, sh, sx, sy, stride,
+ rng_state_offset_x,
+ rng_state_offset_y,
+ rng_state_stride,
+ work_array,
+ end_sample,
+ start_sample,
+#ifdef __WORK_STEALING__
+ work_pool_wgs,
+ num_samples,
+#endif
+#ifdef __KERNEL_DEBUG__
+ debugdata_coop,
+#endif
+ parallel_samples,
+ ray_index);
+#ifndef __COMPUTE_DEVICE_GPU__
+ }
+#endif
+
+ /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+ * These rays will be made active during next SceneIntersectkernel.
+ */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ enqueue_flag,
+ queuesize,
+ &local_queue_atomics,
+ Queue_data,
+ Queue_index);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
new file mode 100644
index 00000000000..c3277676029
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
@@ -0,0 +1,241 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "split/kernel_data_init.h"
+
+__kernel void kernel_ocl_path_trace_data_init(
+ ccl_global char *globals,
+ ccl_global char *shader_data_sd, /* Arguments related to ShaderData */
+ ccl_global char *shader_data_sd_DL_shadow, /* Arguments related to ShaderData */
+
+ ccl_global float3 *P_sd,
+ ccl_global float3 *P_sd_DL_shadow,
+
+ ccl_global float3 *N_sd,
+ ccl_global float3 *N_sd_DL_shadow,
+
+ ccl_global float3 *Ng_sd,
+ ccl_global float3 *Ng_sd_DL_shadow,
+
+ ccl_global float3 *I_sd,
+ ccl_global float3 *I_sd_DL_shadow,
+
+ ccl_global int *shader_sd,
+ ccl_global int *shader_sd_DL_shadow,
+
+ ccl_global int *flag_sd,
+ ccl_global int *flag_sd_DL_shadow,
+
+ ccl_global int *prim_sd,
+ ccl_global int *prim_sd_DL_shadow,
+
+ ccl_global int *type_sd,
+ ccl_global int *type_sd_DL_shadow,
+
+ ccl_global float *u_sd,
+ ccl_global float *u_sd_DL_shadow,
+
+ ccl_global float *v_sd,
+ ccl_global float *v_sd_DL_shadow,
+
+ ccl_global int *object_sd,
+ ccl_global int *object_sd_DL_shadow,
+
+ ccl_global float *time_sd,
+ ccl_global float *time_sd_DL_shadow,
+
+ ccl_global float *ray_length_sd,
+ ccl_global float *ray_length_sd_DL_shadow,
+
+ ccl_global int *ray_depth_sd,
+ ccl_global int *ray_depth_sd_DL_shadow,
+
+ ccl_global int *transparent_depth_sd,
+ ccl_global int *transparent_depth_sd_DL_shadow,
+
+ /* Ray differentials. */
+ ccl_global differential3 *dP_sd,
+ ccl_global differential3 *dP_sd_DL_shadow,
+
+ ccl_global differential3 *dI_sd,
+ ccl_global differential3 *dI_sd_DL_shadow,
+
+ ccl_global differential *du_sd,
+ ccl_global differential *du_sd_DL_shadow,
+
+ ccl_global differential *dv_sd,
+ ccl_global differential *dv_sd_DL_shadow,
+
+ /* Dp/Du */
+ ccl_global float3 *dPdu_sd,
+ ccl_global float3 *dPdu_sd_DL_shadow,
+
+ ccl_global float3 *dPdv_sd,
+ ccl_global float3 *dPdv_sd_DL_shadow,
+
+ /* Object motion. */
+ ccl_global Transform *ob_tfm_sd,
+ ccl_global Transform *ob_tfm_sd_DL_shadow,
+
+ ccl_global Transform *ob_itfm_sd,
+ ccl_global Transform *ob_itfm_sd_DL_shadow,
+
+ ShaderClosure *closure_sd,
+ ShaderClosure *closure_sd_DL_shadow,
+
+ ccl_global int *num_closure_sd,
+ ccl_global int *num_closure_sd_DL_shadow,
+
+ ccl_global float *randb_closure_sd,
+ ccl_global float *randb_closure_sd_DL_shadow,
+
+ ccl_global float3 *ray_P_sd,
+ ccl_global float3 *ray_P_sd_DL_shadow,
+
+ ccl_global differential3 *ray_dP_sd,
+ ccl_global differential3 *ray_dP_sd_DL_shadow,
+
+ ccl_constant KernelData *data,
+ ccl_global float *per_sample_output_buffers,
+ ccl_global uint *rng_state,
+ ccl_global uint *rng_coop, /* rng array to store rng values for all rays */
+ ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */
+ ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */
+ PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */
+ ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */
+ ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */
+ ccl_global char *ray_state, /* Stores information on current state of a ray */
+
+#define KERNEL_TEX(type, ttype, name) \
+ ccl_global type *name,
+#include "../../kernel_textures.h"
+
+ int start_sample, int sx, int sy, int sw, int sh, int offset, int stride,
+ int rng_state_offset_x,
+ int rng_state_offset_y,
+ int rng_state_stride,
+ ccl_global int *Queue_data, /* Memory for queues */
+ ccl_global int *Queue_index, /* Tracks the number of elements in queues */
+ int queuesize, /* size (capacity) of the queue */
+ ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
+ ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */
+#ifdef __WORK_STEALING__
+ ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */
+ unsigned int num_samples, /* Total number of samples per pixel */
+#endif
+#ifdef __KERNEL_DEBUG__
+ DebugData *debugdata_coop,
+#endif
+ int parallel_samples) /* Number of samples to be processed in parallel */
+{
+ kernel_data_init(globals,
+ shader_data_sd,
+ shader_data_sd_DL_shadow,
+ P_sd,
+ P_sd_DL_shadow,
+ N_sd,
+ N_sd_DL_shadow,
+ Ng_sd,
+ Ng_sd_DL_shadow,
+ I_sd,
+ I_sd_DL_shadow,
+ shader_sd,
+ shader_sd_DL_shadow,
+ flag_sd,
+ flag_sd_DL_shadow,
+ prim_sd,
+ prim_sd_DL_shadow,
+ type_sd,
+ type_sd_DL_shadow,
+ u_sd,
+ u_sd_DL_shadow,
+ v_sd,
+ v_sd_DL_shadow,
+ object_sd,
+ object_sd_DL_shadow,
+ time_sd,
+ time_sd_DL_shadow,
+ ray_length_sd,
+ ray_length_sd_DL_shadow,
+ ray_depth_sd,
+ ray_depth_sd_DL_shadow,
+ transparent_depth_sd,
+ transparent_depth_sd_DL_shadow,
+
+ /* Ray differentials. */
+ dP_sd,
+ dP_sd_DL_shadow,
+ dI_sd,
+ dI_sd_DL_shadow,
+ du_sd,
+ du_sd_DL_shadow,
+ dv_sd,
+ dv_sd_DL_shadow,
+
+ /* Dp/Du */
+ dPdu_sd,
+ dPdu_sd_DL_shadow,
+ dPdv_sd,
+ dPdv_sd_DL_shadow,
+
+ /* Object motion. */
+ ob_tfm_sd,
+ ob_tfm_sd_DL_shadow,
+ ob_itfm_sd,
+ ob_itfm_sd_DL_shadow,
+
+ closure_sd,
+ closure_sd_DL_shadow,
+ num_closure_sd,
+ num_closure_sd_DL_shadow,
+ randb_closure_sd,
+ randb_closure_sd_DL_shadow,
+ ray_P_sd,
+ ray_P_sd_DL_shadow,
+ ray_dP_sd,
+ ray_dP_sd_DL_shadow,
+ data,
+ per_sample_output_buffers,
+ rng_state,
+ rng_coop,
+ throughput_coop,
+ L_transparent_coop,
+ PathRadiance_coop,
+ Ray_coop,
+ PathState_coop,
+ ray_state,
+
+#define KERNEL_TEX(type, ttype, name) name,
+#include "../../kernel_textures.h"
+
+ start_sample, sx, sy, sw, sh, offset, stride,
+ rng_state_offset_x,
+ rng_state_offset_y,
+ rng_state_stride,
+ Queue_data,
+ Queue_index,
+ queuesize,
+ use_queues_flag,
+ work_array,
+#ifdef __WORK_STEALING__
+ work_pool_wgs,
+ num_samples,
+#endif
+#ifdef __KERNEL_DEBUG__
+ debugdata_coop,
+#endif
+ parallel_samples);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
new file mode 100644
index 00000000000..6ec75013b3a
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "split/kernel_direct_lighting.h"
+
+__kernel void kernel_ocl_path_trace_direct_lighting(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data, /* Required for direct lighting */
+ ccl_global char *shader_DL, /* Required for direct lighting */
+ ccl_global uint *rng_coop, /* Required for direct lighting */
+ ccl_global PathState *PathState_coop, /* Required for direct lighting */
+ ccl_global int *ISLamp_coop, /* Required for direct lighting */
+ ccl_global Ray *LightRay_coop, /* Required for direct lighting */
+ ccl_global BsdfEval *BSDFEval_coop, /* Required for direct lighting */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ ccl_global int *Queue_data, /* Queue memory */
+ ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
+ int queuesize) /* Size (capacity) of each queue */
+{
+ ccl_local unsigned int local_queue_atomics;
+ if(get_local_id(0) == 0 && get_local_id(1) == 0) {
+ local_queue_atomics = 0;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ char enqueue_flag = 0;
+ int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
+ ray_index = get_ray_index(ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ Queue_data,
+ queuesize,
+ 0);
+
+#ifdef __COMPUTE_DEVICE_GPU__
+ /* If we are executing on a GPU device, we exit all threads that are not
+ * required.
+ *
+ * If we are executing on a CPU device, then we need to keep all threads
+ * active since we have barrier() calls later in the kernel. CPU devices,
+ * expect all threads to execute barrier statement.
+ */
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+#endif
+
+#ifndef __COMPUTE_DEVICE_GPU__
+ if(ray_index != QUEUE_EMPTY_SLOT) {
+#endif
+ enqueue_flag = kernel_direct_lighting(globals,
+ data,
+ shader_data,
+ shader_DL,
+ rng_coop,
+ PathState_coop,
+ ISLamp_coop,
+ LightRay_coop,
+ BSDFEval_coop,
+ ray_state,
+ ray_index);
+
+#ifndef __COMPUTE_DEVICE_GPU__
+ }
+#endif
+
+#ifdef __EMISSION__
+ /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+ enqueue_flag,
+ queuesize,
+ &local_queue_atomics,
+ Queue_data,
+ Queue_index);
+#endif
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
new file mode 100644
index 00000000000..ae5f5cd1b3b
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "split/kernel_holdout_emission_blurring_pathtermination_ao.h"
+
+__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */
+ ccl_global float *per_sample_output_buffers,
+ ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */
+ ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */
+ ccl_global float *L_transparent_coop, /* Required for handling holdout material */
+ PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */
+ ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */
+ Intersection *Intersection_coop, /* Required for indirect primitive emission */
+ ccl_global float3 *AOAlpha_coop, /* Required for AO */
+ ccl_global float3 *AOBSDF_coop, /* Required for AO */
+ ccl_global Ray *AOLightRay_coop, /* Required for AO */
+ int sw, int sh, int sx, int sy, int stride,
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */
+ ccl_global int *Queue_data, /* Queue memory */
+ ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
+ int queuesize, /* Size (capacity) of each queue */
+#ifdef __WORK_STEALING__
+ unsigned int start_sample,
+#endif
+ int parallel_samples) /* Number of samples to be processed in parallel */
+{
+ ccl_local unsigned int local_queue_atomics_bg;
+ ccl_local unsigned int local_queue_atomics_ao;
+ if(get_local_id(0) == 0 && get_local_id(1) == 0) {
+ local_queue_atomics_bg = 0;
+ local_queue_atomics_ao = 0;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ char enqueue_flag = 0;
+ char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
+ int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
+ ray_index = get_ray_index(ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ Queue_data,
+ queuesize,
+ 0);
+
+#ifdef __COMPUTE_DEVICE_GPU__
+ /* If we are executing on a GPU device, we exit all threads that are not
+ * required.
+ *
+ * If we are executing on a CPU device, then we need to keep all threads
+ * active since we have barrier() calls later in the kernel. CPU devices,
+ * expect all threads to execute barrier statement.
+ */
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+#endif /* __COMPUTE_DEVICE_GPU__ */
+
+#ifndef __COMPUTE_DEVICE_GPU__
+ if(ray_index != QUEUE_EMPTY_SLOT) {
+#endif
+ kernel_holdout_emission_blurring_pathtermination_ao(
+ globals,
+ data,
+ shader_data,
+ per_sample_output_buffers,
+ rng_coop,
+ throughput_coop,
+ L_transparent_coop,
+ PathRadiance_coop,
+ PathState_coop,
+ Intersection_coop,
+ AOAlpha_coop,
+ AOBSDF_coop,
+ AOLightRay_coop,
+ sw, sh, sx, sy, stride,
+ ray_state,
+ work_array,
+#ifdef __WORK_STEALING__
+ start_sample,
+#endif
+ parallel_samples,
+ ray_index,
+ &enqueue_flag,
+ &enqueue_flag_AO_SHADOW_RAY_CAST);
+#ifndef __COMPUTE_DEVICE_GPU__
+ }
+#endif
+
+ /* Enqueue RAY_UPDATE_BUFFER rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ enqueue_flag,
+ queuesize,
+ &local_queue_atomics_bg,
+ Queue_data,
+ Queue_index);
+
+#ifdef __AO__
+ /* Enqueue to-shadow-ray-cast rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+ enqueue_flag_AO_SHADOW_RAY_CAST,
+ queuesize,
+ &local_queue_atomics_ao,
+ Queue_data,
+ Queue_index);
+#endif
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
new file mode 100644
index 00000000000..1bc7808d834
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "split/kernel_lamp_emission.h"
+
+__kernel void kernel_ocl_path_trace_lamp_emission(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data, /* Required for lamp emission */
+ ccl_global float3 *throughput_coop, /* Required for lamp emission */
+ PathRadiance *PathRadiance_coop, /* Required for lamp emission */
+ ccl_global Ray *Ray_coop, /* Required for lamp emission */
+ ccl_global PathState *PathState_coop, /* Required for lamp emission */
+ Intersection *Intersection_coop, /* Required for lamp emission */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ int sw, int sh,
+ ccl_global int *Queue_data, /* Memory for queues */
+ ccl_global int *Queue_index, /* Tracks the number of elements in queues */
+ int queuesize, /* Size (capacity) of queues */
+ ccl_global char *use_queues_flag, /* Used to decide if this kernel should use
+ * queues to fetch ray index
+ */
+ int parallel_samples) /* Number of samples to be processed in parallel */
+{
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+
+ /* We will empty this queue in this kernel. */
+ if(get_global_id(0) == 0 && get_global_id(1) == 0) {
+ Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ }
+ /* Fetch use_queues_flag. */
+ ccl_local char local_use_queues_flag;
+ if(get_local_id(0) == 0 && get_local_id(1) == 0) {
+ local_use_queues_flag = use_queues_flag[0];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int ray_index;
+ if(local_use_queues_flag) {
+ int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
+ ray_index = get_ray_index(thread_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ Queue_data,
+ queuesize,
+ 1);
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+ } else {
+ if(x < (sw * parallel_samples) && y < sh){
+ ray_index = x + y * (sw * parallel_samples);
+ } else {
+ return;
+ }
+ }
+
+ kernel_lamp_emission(globals,
+ data,
+ shader_data,
+ throughput_coop,
+ PathRadiance_coop,
+ Ray_coop,
+ PathState_coop,
+ Intersection_coop,
+ ray_state,
+ sw, sh,
+ use_queues_flag,
+ parallel_samples,
+ ray_index);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
new file mode 100644
index 00000000000..dcf4db40411
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "split/kernel_next_iteration_setup.h"
+
+__kernel void kernel_ocl_path_trace_next_iteration_setup(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data, /* Required for setting up ray for next iteration */
+ ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */
+ ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */
+ PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */
+ ccl_global Ray *Ray_coop, /* Required for setting up ray for next iteration */
+ ccl_global PathState *PathState_coop, /* Required for setting up ray for next iteration */
+ ccl_global Ray *LightRay_dl_coop, /* Required for radiance update - direct lighting */
+ ccl_global int *ISLamp_coop, /* Required for radiance update - direct lighting */
+ ccl_global BsdfEval *BSDFEval_coop, /* Required for radiance update - direct lighting */
+ ccl_global Ray *LightRay_ao_coop, /* Required for radiance update - AO */
+ ccl_global float3 *AOBSDF_coop, /* Required for radiance update - AO */
+ ccl_global float3 *AOAlpha_coop, /* Required for radiance update - AO */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ ccl_global int *Queue_data, /* Queue memory */
+ ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
+ int queuesize, /* Size (capacity) of each queue */
+ ccl_global char *use_queues_flag) /* flag to decide if scene_intersect kernel should
+ * use queues to fetch ray index */
+{
+ ccl_local unsigned int local_queue_atomics;
+ if(get_local_id(0) == 0 && get_local_id(1) == 0) {
+ local_queue_atomics = 0;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if(get_global_id(0) == 0 && get_global_id(1) == 0) {
+ /* If we are here, then it means that scene-intersect kernel
+ * has already been executed atleast once. From the next time,
+ * scene-intersect kernel may operate on queues to fetch ray index
+ */
+ use_queues_flag[0] = 1;
+
+ /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and
+ * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the
+ * previous kernel.
+ */
+ Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
+ Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
+ }
+
+ char enqueue_flag = 0;
+ int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
+ ray_index = get_ray_index(ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ Queue_data,
+ queuesize,
+ 0);
+
+#ifdef __COMPUTE_DEVICE_GPU__
+ /* If we are executing on a GPU device, we exit all threads that are not
+ * required.
+ *
+ * If we are executing on a CPU device, then we need to keep all threads
+ * active since we have barrier() calls later in the kernel. CPU devices,
+ * expect all threads to execute barrier statement.
+ */
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+#endif
+
+#ifndef __COMPUTE_DEVICE_GPU__
+ if(ray_index != QUEUE_EMPTY_SLOT) {
+#endif
+ enqueue_flag = kernel_next_iteration_setup(globals,
+ data,
+ shader_data,
+ rng_coop,
+ throughput_coop,
+ PathRadiance_coop,
+ Ray_coop,
+ PathState_coop,
+ LightRay_dl_coop,
+ ISLamp_coop,
+ BSDFEval_coop,
+ LightRay_ao_coop,
+ AOBSDF_coop,
+ AOAlpha_coop,
+ ray_state,
+ use_queues_flag,
+ ray_index);
+#ifndef __COMPUTE_DEVICE_GPU__
+ }
+#endif
+
+ /* Enqueue RAY_UPDATE_BUFFER rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ enqueue_flag,
+ queuesize,
+ &local_queue_atomics,
+ Queue_data,
+ Queue_index);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
new file mode 100644
index 00000000000..3156dc255fb
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../kernel_compat_opencl.h"
+#include "../../kernel_math.h"
+#include "../../kernel_types.h"
+#include "../../kernel_globals.h"
+#include "../../kernel_queues.h"
+
+/*
+ * The kernel "kernel_queue_enqueue" enqueues rays of
+ * different ray state into their appropriate Queues;
+ * 1. Rays that have been determined to hit the background from the
+ * "kernel_scene_intersect" kernel
+ * are enqueued in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
+ * 2. Rays that have been determined to be actively participating in path-iteration will be enqueued into QUEUE_ACTIVE_AND_REGENERATED_RAYS.
+ *
+ * The input and output of the kernel is as follows,
+ *
+ * ray_state -------------------------------------------|--- kernel_queue_enqueue --|--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS)
+ * Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS)
+ * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |
+ * queuesize -------------------------------------------| |
+ *
+ * Note on Queues :
+ * State of queues during the first time this kernel is called :
+ * At entry,
+ * Both QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty.
+ * At exit,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays
+ * QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_HIT_BACKGROUND rays.
+ *
+ * State of queue during other times this kernel is called :
+ * At entry,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be empty.
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will contain RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays.
+ * At exit,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays.
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays.
+ */
+__kernel void kernel_ocl_path_trace_queue_enqueue(
+ ccl_global int *Queue_data, /* Queue memory */
+ ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ int queuesize) /* Size (capacity) of each queue */
+{
+ /* We have only 2 cases (Hit/Not-Hit) */
+ ccl_local unsigned int local_queue_atomics[2];
+
+ int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0);
+ int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
+
+ if(lidx < 2 ) {
+ local_queue_atomics[lidx] = 0;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int queue_number = -1;
+
+ if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
+ }
+ else if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+ }
+
+ unsigned int my_lqidx;
+ if(queue_number != -1) {
+ my_lqidx = get_local_queue_index(queue_number, local_queue_atomics);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if(lidx == 0) {
+ local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
+ get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ local_queue_atomics,
+ Queue_index);
+ local_queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] =
+ get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ local_queue_atomics,
+ Queue_index);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ unsigned int my_gqidx;
+ if(queue_number != -1) {
+ my_gqidx = get_global_queue_index(queue_number,
+ queuesize,
+ my_lqidx,
+ local_queue_atomics);
+ Queue_data[my_gqidx] = ray_index;
+ }
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
new file mode 100644
index 00000000000..e5fad7bce50
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "split/kernel_scene_intersect.h"
+
+__kernel void kernel_ocl_path_trace_scene_intersect(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global uint *rng_coop,
+ ccl_global Ray *Ray_coop, /* Required for scene_intersect */
+ ccl_global PathState *PathState_coop, /* Required for scene_intersect */
+ Intersection *Intersection_coop, /* Required for scene_intersect */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ int sw, int sh,
+ ccl_global int *Queue_data, /* Memory for queues */
+ ccl_global int *Queue_index, /* Tracks the number of elements in queues */
+ int queuesize, /* Size (capacity) of queues */
+ ccl_global char *use_queues_flag, /* used to decide if this kernel should use
+ * queues to fetch ray index */
+#ifdef __KERNEL_DEBUG__
+ DebugData *debugdata_coop,
+#endif
+ int parallel_samples) /* Number of samples to be processed in parallel */
+{
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+
+ /* Fetch use_queues_flag */
+ ccl_local char local_use_queues_flag;
+ if(get_local_id(0) == 0 && get_local_id(1) == 0) {
+ local_use_queues_flag = use_queues_flag[0];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int ray_index;
+ if(local_use_queues_flag) {
+ int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
+ ray_index = get_ray_index(thread_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ Queue_data,
+ queuesize,
+ 0);
+
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+ } else {
+ if(x < (sw * parallel_samples) && y < sh){
+ ray_index = x + y * (sw * parallel_samples);
+ } else {
+ return;
+ }
+ }
+
+ kernel_scene_intersect(globals,
+ data,
+ rng_coop,
+ Ray_coop,
+ PathState_coop,
+ Intersection_coop,
+ ray_state,
+ sw, sh,
+ use_queues_flag,
+#ifdef __KERNEL_DEBUG__
+ debugdata_coop,
+#endif
+ parallel_samples,
+ ray_index);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
new file mode 100644
index 00000000000..b9f616e6bdf
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "split/kernel_shader_eval.h"
+
+__kernel void kernel_ocl_path_trace_shader_eval(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data, /* Output ShaderData structure to be filled */
+ ccl_global uint *rng_coop, /* Required for rbsdf calculation */
+ ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */
+ ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */
+ Intersection *Intersection_coop, /* Required for setting up shader from ray */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ ccl_global int *Queue_data, /* queue memory */
+ ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
+ int queuesize) /* Size (capacity) of each queue */
+{
+ /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
+ ccl_local unsigned int local_queue_atomics;
+ if(get_local_id(0) == 0 && get_local_id(1) == 0) {
+ local_queue_atomics = 0;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
+ ray_index = get_ray_index(ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ Queue_data,
+ queuesize,
+ 0);
+
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+
+ char enqueue_flag = (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0;
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ enqueue_flag,
+ queuesize,
+ &local_queue_atomics,
+ Queue_data,
+ Queue_index);
+
+ /* Continue on with shader evaluation. */
+ kernel_shader_eval(globals,
+ data,
+ shader_data,
+ rng_coop,
+ Ray_coop,
+ PathState_coop,
+ Intersection_coop,
+ ray_state,
+ ray_index);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
new file mode 100644
index 00000000000..03886c0a030
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "split/kernel_shadow_blocked.h"
+
+__kernel void kernel_ocl_path_trace_shadow_blocked(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_shadow, /* Required for shadow blocked */
+ ccl_global PathState *PathState_coop, /* Required for shadow blocked */
+ ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */
+ ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */
+ Intersection *Intersection_coop_AO,
+ Intersection *Intersection_coop_DL,
+ ccl_global char *ray_state,
+ ccl_global int *Queue_data, /* Queue memory */
+ ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
+ int queuesize, /* Size (capacity) of each queue */
+ int total_num_rays)
+{
+#if 0
+ /* We will make the Queue_index entries '0' in the next kernel. */
+ if(get_global_id(0) == 0 && get_global_id(1) == 0) {
+ /* We empty this queue here */
+ Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
+ Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
+ }
+#endif
+
+ int lidx = get_local_id(1) * get_local_id(0) + get_local_id(0);
+
+ ccl_local unsigned int ao_queue_length;
+ ccl_local unsigned int dl_queue_length;
+ if(lidx == 0) {
+ ao_queue_length = Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
+ dl_queue_length = Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ /* flag determining if the current ray is to process shadow ray for AO or DL */
+ char shadow_blocked_type = -1;
+
+ int ray_index = QUEUE_EMPTY_SLOT;
+ int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
+ if(thread_index < ao_queue_length + dl_queue_length) {
+ if(thread_index < ao_queue_length) {
+ ray_index = get_ray_index(thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, Queue_data, queuesize, 1);
+ shadow_blocked_type = RAY_SHADOW_RAY_CAST_AO;
+ } else {
+ ray_index = get_ray_index(thread_index - ao_queue_length, QUEUE_SHADOW_RAY_CAST_DL_RAYS, Queue_data, queuesize, 1);
+ shadow_blocked_type = RAY_SHADOW_RAY_CAST_DL;
+ }
+ }
+
+ if(ray_index == QUEUE_EMPTY_SLOT)
+ return;
+
+ kernel_shadow_blocked(globals,
+ data,
+ shader_shadow,
+ PathState_coop,
+ LightRay_dl_coop,
+ LightRay_ao_coop,
+ Intersection_coop_AO,
+ Intersection_coop_DL,
+ ray_state,
+ total_num_rays,
+ shadow_blocked_type,
+ ray_index);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl
new file mode 100644
index 00000000000..88a1ed830af
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "split/kernel_sum_all_radiance.h"
+
+__kernel void kernel_ocl_path_trace_sum_all_radiance(
+ ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */
+ ccl_global float *buffer, /* Output buffer of RenderTile */
+ ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */
+ int parallel_samples, int sw, int sh, int stride,
+ int buffer_offset_x,
+ int buffer_offset_y,
+ int buffer_stride,
+ int start_sample)
+{
+ kernel_sum_all_radiance(data,
+ buffer,
+ per_sample_output_buffer,
+ parallel_samples,
+ sw, sh, stride,
+ buffer_offset_x,
+ buffer_offset_y,
+ buffer_stride,
+ start_sample);
+}
diff --git a/intern/cycles/kernel/osl/SConscript b/intern/cycles/kernel/osl/SConscript
index 58b0204a1b9..74ba5e1020c 100644
--- a/intern/cycles/kernel/osl/SConscript
+++ b/intern/cycles/kernel/osl/SConscript
@@ -44,6 +44,18 @@ defs.append('CCL_NAMESPACE_BEGIN=namespace ccl {')
defs.append('CCL_NAMESPACE_END=}')
defs.append('WITH_OSL')
+if env['WITH_UNORDERED_MAP_SUPPORT']:
+ if env['UNORDERED_MAP_HEADER'] == 'unordered_map':
+ if env['UNORDERED_MAP_NAMESPACE'] == 'std':
+ defs.append('CYCLES_STD_UNORDERED_MAP')
+ elif env['UNORDERED_MAP_NAMESPACE'] == 'std::tr1':
+ defs.append('CYCLES_STD_UNORDERED_MAP_IN_TR1_NAMESPACE')
+ elif env['UNORDERED_MAP_NAMESPACE'] == 'std::tr1':
+ defs.append('CYCLES_TR1_UNORDERED_MAP')
+else:
+ print("-- Replacing unordered_map/set with map/set (warning: slower!)")
+ defs.append('CYCLES_NO_UNORDERED_MAP')
+
if env['WITH_BF_CYCLES_DEBUG']:
defs.append('WITH_CYCLES_DEBUG')
diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
index 8f9c2efd470..43929fbe928 100644
--- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
@@ -34,6 +34,7 @@
#include <OSL/genclosure.h>
+#include "kernel_compat_cpu.h"
#include "osl_closures.h"
#include "kernel_types.h"
diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
index c5851747b54..497c4f0dc5c 100644
--- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
@@ -34,6 +34,7 @@
#include <OSL/genclosure.h>
+#include "kernel_compat_cpu.h"
#include "osl_closures.h"
#include "kernel_types.h"
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
index 84ef85e089d..88998037751 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -34,6 +34,7 @@
#include <OSL/genclosure.h>
+#include "kernel_compat_cpu.h"
#include "osl_bssrdf.h"
#include "osl_closures.h"
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index 5e833d738d8..ef67ef52fc0 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -147,14 +147,14 @@ public: \
\
float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float& pdf) const \
{ \
- pdf = 0; \
- return make_float3(0, 0, 0); \
+ pdf = 0.0f; \
+ return make_float3(0.0f, 0.0f, 0.0f); \
} \
\
float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float& pdf) const \
{ \
- pdf = 0; \
- return make_float3(0, 0, 0); \
+ pdf = 0.0f; \
+ return make_float3(0.0f, 0.0f, 0.0f); \
} \
\
int sample(const float3 &Ng, \
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 1f6015d0d6b..3c1955a1e1e 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -20,6 +20,7 @@
*/
#if defined(__GNUC__) && defined(NDEBUG)
# pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+# pragma GCC diagnostic ignored "-Wuninitialized"
#endif
#include <string.h>
@@ -138,12 +139,12 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
- if (xform) {
+ if(xform) {
const ShaderData *sd = (const ShaderData *)xform;
KernelGlobals *kg = sd->osl_globals;
int object = sd->object;
- if (object != OBJECT_NONE) {
+ if(object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform tfm;
@@ -168,12 +169,12 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
- if (xform) {
+ if(xform) {
const ShaderData *sd = (const ShaderData *)xform;
KernelGlobals *kg = sd->osl_globals;
int object = sd->object;
- if (object != OBJECT_NONE) {
+ if(object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform itfm;
@@ -198,27 +199,27 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result
{
KernelGlobals *kg = kernel_globals;
- if (from == u_ndc) {
+ if(from == u_ndc) {
Transform tfm = transform_transpose(transform_quick_inverse(kernel_data.cam.worldtondc));
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (from == u_raster) {
+ else if(from == u_raster) {
Transform tfm = transform_transpose(kernel_data.cam.rastertoworld);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (from == u_screen) {
+ else if(from == u_screen) {
Transform tfm = transform_transpose(kernel_data.cam.screentoworld);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (from == u_camera) {
+ else if(from == u_camera) {
Transform tfm = transform_transpose(kernel_data.cam.cameratoworld);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (from == u_world) {
+ else if(from == u_world) {
result.makeIdentity();
return true;
}
@@ -230,27 +231,27 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44
{
KernelGlobals *kg = kernel_globals;
- if (to == u_ndc) {
+ if(to == u_ndc) {
Transform tfm = transform_transpose(kernel_data.cam.worldtondc);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (to == u_raster) {
+ else if(to == u_raster) {
Transform tfm = transform_transpose(kernel_data.cam.worldtoraster);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (to == u_screen) {
+ else if(to == u_screen) {
Transform tfm = transform_transpose(kernel_data.cam.worldtoscreen);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (to == u_camera) {
+ else if(to == u_camera) {
Transform tfm = transform_transpose(kernel_data.cam.worldtocamera);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (to == u_world) {
+ else if(to == u_world) {
result.makeIdentity();
return true;
}
@@ -262,11 +263,11 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
- if (xform) {
+ if(xform) {
const ShaderData *sd = (const ShaderData *)xform;
int object = sd->object;
- if (object != OBJECT_NONE) {
+ if(object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
#else
@@ -287,11 +288,11 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
- if (xform) {
+ if(xform) {
const ShaderData *sd = (const ShaderData *)xform;
int object = sd->object;
- if (object != OBJECT_NONE) {
+ if(object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_itfm;
#else
@@ -312,22 +313,22 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result
{
KernelGlobals *kg = kernel_globals;
- if (from == u_ndc) {
+ if(from == u_ndc) {
Transform tfm = transform_transpose(transform_quick_inverse(kernel_data.cam.worldtondc));
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (from == u_raster) {
+ else if(from == u_raster) {
Transform tfm = transform_transpose(kernel_data.cam.rastertoworld);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (from == u_screen) {
+ else if(from == u_screen) {
Transform tfm = transform_transpose(kernel_data.cam.screentoworld);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (from == u_camera) {
+ else if(from == u_camera) {
Transform tfm = transform_transpose(kernel_data.cam.cameratoworld);
COPY_MATRIX44(&result, &tfm);
return true;
@@ -340,22 +341,22 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44
{
KernelGlobals *kg = kernel_globals;
- if (to == u_ndc) {
+ if(to == u_ndc) {
Transform tfm = transform_transpose(kernel_data.cam.worldtondc);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (to == u_raster) {
+ else if(to == u_raster) {
Transform tfm = transform_transpose(kernel_data.cam.worldtoraster);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (to == u_screen) {
+ else if(to == u_screen) {
Transform tfm = transform_transpose(kernel_data.cam.worldtoscreen);
COPY_MATRIX44(&result, &tfm);
return true;
}
- else if (to == u_camera) {
+ else if(to == u_camera) {
Transform tfm = transform_transpose(kernel_data.cam.worldtocamera);
COPY_MATRIX44(&result, &tfm);
return true;
@@ -373,8 +374,8 @@ bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg, bool derivat
static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, void *val)
{
- if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
- type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
+ if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+ type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
{
float *fval = (float *)val;
@@ -382,7 +383,7 @@ static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, v
fval[1] = f[0].y;
fval[2] = f[0].z;
- if (derivatives) {
+ if(derivatives) {
fval[3] = f[1].x;
fval[4] = f[1].y;
fval[5] = f[1].z;
@@ -398,7 +399,7 @@ static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, v
float *fval = (float *)val;
fval[0] = average(f[0]);
- if (derivatives) {
+ if(derivatives) {
fval[1] = average(f[1]);
fval[2] = average(f[2]);
}
@@ -422,15 +423,15 @@ static bool set_attribute_float3(float3 f, TypeDesc type, bool derivatives, void
static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val)
{
- if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
- type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
+ if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+ type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
{
float *fval = (float *)val;
fval[0] = f[0];
fval[1] = f[1];
fval[2] = f[2];
- if (derivatives) {
+ if(derivatives) {
fval[3] = f[1];
fval[4] = f[1];
fval[5] = f[1];
@@ -446,7 +447,7 @@ static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, voi
float *fval = (float *)val;
fval[0] = f[0];
- if (derivatives) {
+ if(derivatives) {
fval[1] = f[1];
fval[2] = f[2];
}
@@ -474,7 +475,7 @@ static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val)
int *ival = (int *)val;
ival[0] = i;
- if (derivatives) {
+ if(derivatives) {
ival[1] = 0;
ival[2] = 0;
}
@@ -491,7 +492,7 @@ static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, v
ustring *sval = (ustring *)val;
sval[0] = str;
- if (derivatives) {
+ if(derivatives) {
sval[1] = OSLRenderServices::u_empty;
sval[2] = OSLRenderServices::u_empty;
}
@@ -521,7 +522,7 @@ static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives,
if(type.arraylen > 3)
memset(fval + 3*3, 0, sizeof(float)*3*(type.arraylen - 3));
- if (derivatives)
+ if(derivatives)
memset(fval + type.arraylen*3, 0, sizeof(float)*2*3*type.arraylen);
return true;
@@ -544,15 +545,15 @@ static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val)
static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr,
const TypeDesc& type, bool derivatives, void *val)
{
- if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
- attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor)
+ if(attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
+ attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor)
{
float3 fval[3];
fval[0] = primitive_attribute_float3(kg, sd, attr.elem, attr.offset,
(derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
return set_attribute_float3(fval, type, derivatives, val);
}
- else if (attr.type == TypeDesc::TypeFloat) {
+ else if(attr.type == TypeDesc::TypeFloat) {
float fval[3];
fval[0] = primitive_attribute_float(kg, sd, attr.elem, attr.offset,
(derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
@@ -566,7 +567,7 @@ static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd,
static bool get_mesh_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr,
const TypeDesc& type, bool derivatives, void *val)
{
- if (attr.type == TypeDesc::TypeMatrix) {
+ if(attr.type == TypeDesc::TypeMatrix) {
Transform tfm = primitive_attribute_matrix(kg, sd, attr.offset);
return set_attribute_matrix(tfm, type, val);
}
@@ -580,7 +581,7 @@ static void get_object_attribute(const OSLGlobals::Attribute& attr, bool derivat
size_t datasize = attr.value.datasize();
memcpy(val, attr.value.data(), datasize);
- if (derivatives)
+ if(derivatives)
memset((char *)val + datasize, 0, datasize * 2);
}
@@ -590,80 +591,80 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD
/* todo: turn this into hash table? */
/* Object Attributes */
- if (name == u_object_location) {
+ if(name == u_object_location) {
float3 f = object_location(kg, sd);
return set_attribute_float3(f, type, derivatives, val);
}
- else if (name == u_object_index) {
+ else if(name == u_object_index) {
float f = object_pass_id(kg, sd->object);
return set_attribute_float(f, type, derivatives, val);
}
- else if (name == u_geom_dupli_generated) {
+ else if(name == u_geom_dupli_generated) {
float3 f = object_dupli_generated(kg, sd->object);
return set_attribute_float3(f, type, derivatives, val);
}
- else if (name == u_geom_dupli_uv) {
+ else if(name == u_geom_dupli_uv) {
float3 f = object_dupli_uv(kg, sd->object);
return set_attribute_float3(f, type, derivatives, val);
}
- else if (name == u_material_index) {
+ else if(name == u_material_index) {
float f = shader_pass_id(kg, sd);
return set_attribute_float(f, type, derivatives, val);
}
- else if (name == u_object_random) {
+ else if(name == u_object_random) {
float f = object_random_number(kg, sd->object);
return set_attribute_float(f, type, derivatives, val);
}
/* Particle Attributes */
- else if (name == u_particle_index) {
+ else if(name == u_particle_index) {
int particle_id = object_particle_id(kg, sd->object);
float f = particle_index(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
- else if (name == u_particle_age) {
+ else if(name == u_particle_age) {
int particle_id = object_particle_id(kg, sd->object);
float f = particle_age(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
- else if (name == u_particle_lifetime) {
+ else if(name == u_particle_lifetime) {
int particle_id = object_particle_id(kg, sd->object);
float f = particle_lifetime(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
- else if (name == u_particle_location) {
+ else if(name == u_particle_location) {
int particle_id = object_particle_id(kg, sd->object);
float3 f = particle_location(kg, particle_id);
return set_attribute_float3(f, type, derivatives, val);
}
#if 0 /* unsupported */
- else if (name == u_particle_rotation) {
+ else if(name == u_particle_rotation) {
int particle_id = object_particle_id(kg, sd->object);
float4 f = particle_rotation(kg, particle_id);
return set_attribute_float4(f, type, derivatives, val);
}
#endif
- else if (name == u_particle_size) {
+ else if(name == u_particle_size) {
int particle_id = object_particle_id(kg, sd->object);
float f = particle_size(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
- else if (name == u_particle_velocity) {
+ else if(name == u_particle_velocity) {
int particle_id = object_particle_id(kg, sd->object);
float3 f = particle_velocity(kg, particle_id);
return set_attribute_float3(f, type, derivatives, val);
}
- else if (name == u_particle_angular_velocity) {
+ else if(name == u_particle_angular_velocity) {
int particle_id = object_particle_id(kg, sd->object);
float3 f = particle_angular_velocity(kg, particle_id);
return set_attribute_float3(f, type, derivatives, val);
}
/* Geometry Attributes */
- else if (name == u_geom_numpolyvertices) {
+ else if(name == u_geom_numpolyvertices) {
return set_attribute_int(3, type, derivatives, val);
}
- else if ((name == u_geom_trianglevertices || name == u_geom_polyvertices)
+ else if((name == u_geom_trianglevertices || name == u_geom_polyvertices)
#ifdef __HAIR__
&& sd->type & PRIMITIVE_ALL_TRIANGLE)
#else
@@ -689,21 +690,21 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD
ustring object_name = kg->osl->object_names[sd->object];
return set_attribute_string(object_name, type, derivatives, val);
}
- else if (name == u_is_smooth) {
+ else if(name == u_is_smooth) {
float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0);
return set_attribute_float(f, type, derivatives, val);
}
#ifdef __HAIR__
/* Hair Attributes */
- else if (name == u_is_curve) {
+ else if(name == u_is_curve) {
float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
return set_attribute_float(f, type, derivatives, val);
}
- else if (name == u_curve_thickness) {
+ else if(name == u_curve_thickness) {
float f = curve_thickness(kg, sd);
return set_attribute_float(f, type, derivatives, val);
}
- else if (name == u_curve_tangent_normal) {
+ else if(name == u_curve_tangent_normal) {
float3 f = curve_tangent_normal(kg, sd);
return set_attribute_float3(f, type, derivatives, val);
}
@@ -715,22 +716,22 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD
bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
TypeDesc type, bool derivatives, void *val)
{
- if (name == u_path_ray_length) {
+ if(name == u_path_ray_length) {
/* Ray Length */
float f = sd->ray_length;
return set_attribute_float(f, type, derivatives, val);
}
- else if (name == u_path_ray_depth) {
+ else if(name == u_path_ray_depth) {
/* Ray Depth */
int f = sd->ray_depth;
return set_attribute_int(f, type, derivatives, val);
}
- else if (name == u_path_transparent_depth) {
+ else if(name == u_path_transparent_depth) {
/* Transparent Ray Depth */
int f = sd->transparent_depth;
return set_attribute_int(f, type, derivatives, val);
}
- else if (name == u_ndc) {
+ else if(name == u_ndc) {
/* NDC coordinates with special exception for otho */
OSLThreadData *tdata = kg->osl_tdata;
OSL::ShaderGlobals *globals = &tdata->globals;
@@ -762,7 +763,7 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *
bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object_name,
TypeDesc type, ustring name, void *val)
{
- if (sg->renderstate == NULL)
+ if(sg->renderstate == NULL)
return false;
ShaderData *sd = (ShaderData *)(sg->renderstate);
@@ -777,10 +778,10 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring
int object;
/* lookup of attribute on another object */
- if (object_name != u_empty) {
+ if(object_name != u_empty) {
OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name);
- if (it == kg->osl->object_name_map.end())
+ if(it == kg->osl->object_name_map.end())
return false;
object = it->second;
@@ -790,7 +791,7 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring
object = sd->object;
is_curve = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
- if (object == OBJECT_NONE)
+ if(object == OBJECT_NONE)
return get_background_attribute(kg, sd, name, type, derivatives, val);
}
@@ -799,10 +800,10 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring
OSLGlobals::AttributeMap& attribute_map = kg->osl->attribute_map[object];
OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
- if (it != attribute_map.end()) {
+ if(it != attribute_map.end()) {
const OSLGlobals::Attribute& attr = it->second;
- if (attr.elem != ATTR_ELEMENT_OBJECT) {
+ if(attr.elem != ATTR_ELEMENT_OBJECT) {
/* triangle and vertex attributes */
if(get_mesh_element_attribute(kg, sd, attr, type, derivatives, val))
return true;
@@ -819,7 +820,7 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring
/* not found in attribute, check standard object info */
bool is_std_object_attribute = get_object_standard_attribute(kg, sd, name, type, derivatives, val);
- if (is_std_object_attribute)
+ if(is_std_object_attribute)
return true;
return get_background_attribute(kg, sd, name, type, derivatives, val);
@@ -887,7 +888,7 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options,
#endif
bool status;
- if(filename[0] == '@' && filename.find('.') == -1) {
+ if(filename[0] == '@') {
int slot = atoi(filename.c_str() + 1);
float4 rgba = kernel_tex_image_interp(slot, s, 1.0f - t);
@@ -939,19 +940,33 @@ bool OSLRenderServices::texture3d(ustring filename, TextureOpt &options,
OSL::TextureSystem *ts = osl_ts;
ShaderData *sd = (ShaderData *)(sg->renderstate);
KernelGlobals *kg = sd->osl_globals;
- OSLThreadData *tdata = kg->osl_tdata;
- OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info;
-
- OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info);
+ bool status;
+ if(filename[0] == '@') {
+ int slot = atoi(filename.c_str() + 1);
+ float4 rgba = kernel_tex_image_interp_3d(slot, P.x, P.y, P.z);
+ result[0] = rgba[0];
+ if(nchannels > 1)
+ result[1] = rgba[1];
+ if(nchannels > 2)
+ result[2] = rgba[2];
+ if(nchannels > 3)
+ result[3] = rgba[3];
+ status = true;
+ }
+ else {
+ OSLThreadData *tdata = kg->osl_tdata;
+ OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info;
+ OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info);
#if OIIO_VERSION < 10500
- bool status = ts->texture3d(th, thread_info,
- options, P, dPdx, dPdy, dPdz, result);
+ status = ts->texture3d(th, thread_info,
+ options, P, dPdx, dPdy, dPdz, result);
#else
- bool status = ts->texture3d(th, thread_info,
- options, P, dPdx, dPdy, dPdz,
- nchannels, result);
+ status = ts->texture3d(th, thread_info,
+ options, P, dPdx, dPdy, dPdz,
+ nchannels, result);
#endif
+ }
if(!status) {
if(nchannels == 3 || nchannels == 4) {
@@ -979,7 +994,7 @@ bool OSLRenderServices::environment(ustring filename, TextureOpt &options,
OSLThreadData *tdata = kg->osl_tdata;
OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info;
- OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info);
+ OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info);
#if OIIO_VERSION < 10500
bool status = ts->environment(th, thread_info,
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index ebf72ae11f4..8cfe0cbcbd4 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -146,11 +146,11 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
/* OSL gives us a closure tree, we flatten it into arrays per
* closure type, for evaluation, sampling, etc later on. */
- if (closure->type == OSL::ClosureColor::COMPONENT) {
+ if(closure->type == OSL::ClosureColor::COMPONENT) {
OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
- if (prim) {
+ if(prim) {
ShaderClosure sc;
#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
@@ -267,6 +267,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
if(fabsf(weight.x) > 0.0f) {
sc.weight = make_float3(weight.x, 0.0f, 0.0f);
sc.data0 = bssrdf->radius.x;
+ sc.data1 = 0.0f;
sd->flag |= bssrdf_setup(&sc, sc.type);
sd->closure[sd->num_closure++] = sc;
}
@@ -274,6 +275,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
if(fabsf(weight.y) > 0.0f) {
sc.weight = make_float3(0.0f, weight.y, 0.0f);
sc.data0 = bssrdf->radius.y;
+ sc.data1 = 0.0f;
sd->flag |= bssrdf_setup(&sc, sc.type);
sd->closure[sd->num_closure++] = sc;
}
@@ -281,6 +283,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
if(fabsf(weight.z) > 0.0f) {
sc.weight = make_float3(0.0f, 0.0f, weight.z);
sc.data0 = bssrdf->radius.z;
+ sc.data1 = 0.0f;
sd->flag |= bssrdf_setup(&sc, sc.type);
sd->closure[sd->num_closure++] = sc;
}
@@ -293,11 +296,11 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
}
}
}
- else if (closure->type == OSL::ClosureColor::MUL) {
+ else if(closure->type == OSL::ClosureColor::MUL) {
OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
}
- else if (closure->type == OSL::ClosureColor::ADD) {
+ else if(closure->type == OSL::ClosureColor::ADD) {
OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
flatten_surface_closure_tree(sd, path_flag, add->closureA, weight);
flatten_surface_closure_tree(sd, path_flag, add->closureB, weight);
@@ -316,11 +319,11 @@ void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, int path_flag, S
OSL::ShadingContext *octx = tdata->context[(int)ctx];
int shader = sd->shader & SHADER_MASK;
- if (kg->osl->surface_state[shader])
+ if(kg->osl->surface_state[shader])
ss->execute(*octx, *(kg->osl->surface_state[shader]), *globals);
/* flatten closure tree */
- if (globals->Ci)
+ if(globals->Ci)
flatten_surface_closure_tree(sd, path_flag, globals->Ci);
}
@@ -332,23 +335,23 @@ static float3 flatten_background_closure_tree(const OSL::ClosureColor *closure)
* is only one supported closure type at the moment, which has no evaluation
* functions, so we just sum the weights */
- if (closure->type == OSL::ClosureColor::COMPONENT) {
+ if(closure->type == OSL::ClosureColor::COMPONENT) {
OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
- if (prim && prim->category == CClosurePrimitive::Background)
+ if(prim && prim->category == CClosurePrimitive::Background)
#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
return TO_FLOAT3(comp->w);
#else
return make_float3(1.0f, 1.0f, 1.0f);
#endif
}
- else if (closure->type == OSL::ClosureColor::MUL) {
+ else if(closure->type == OSL::ClosureColor::MUL) {
OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
return TO_FLOAT3(mul->weight) * flatten_background_closure_tree(mul->closure);
}
- else if (closure->type == OSL::ClosureColor::ADD) {
+ else if(closure->type == OSL::ClosureColor::ADD) {
OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
return flatten_background_closure_tree(add->closureA) +
@@ -369,11 +372,11 @@ float3 OSLShader::eval_background(KernelGlobals *kg, ShaderData *sd, int path_fl
OSL::ShaderGlobals *globals = &tdata->globals;
OSL::ShadingContext *octx = tdata->context[(int)ctx];
- if (kg->osl->background_state)
+ if(kg->osl->background_state)
ss->execute(*octx, *(kg->osl->background_state), *globals);
/* return background color immediately */
- if (globals->Ci)
+ if(globals->Ci)
return flatten_background_closure_tree(globals->Ci);
return make_float3(0.0f, 0.0f, 0.0f);
@@ -387,11 +390,11 @@ static void flatten_volume_closure_tree(ShaderData *sd,
/* OSL gives us a closure tree, we flatten it into arrays per
* closure type, for evaluation, sampling, etc later on. */
- if (closure->type == OSL::ClosureColor::COMPONENT) {
+ if(closure->type == OSL::ClosureColor::COMPONENT) {
OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
- if (prim) {
+ if(prim) {
ShaderClosure sc;
#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
@@ -448,11 +451,11 @@ static void flatten_volume_closure_tree(ShaderData *sd,
}
}
}
- else if (closure->type == OSL::ClosureColor::MUL) {
+ else if(closure->type == OSL::ClosureColor::MUL) {
OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight);
}
- else if (closure->type == OSL::ClosureColor::ADD) {
+ else if(closure->type == OSL::ClosureColor::ADD) {
OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
flatten_volume_closure_tree(sd, add->closureA, weight);
flatten_volume_closure_tree(sd, add->closureB, weight);
@@ -471,11 +474,11 @@ void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, int path_flag, Sh
OSL::ShadingContext *octx = tdata->context[(int)ctx];
int shader = sd->shader & SHADER_MASK;
- if (kg->osl->volume_state[shader])
+ if(kg->osl->volume_state[shader])
ss->execute(*octx, *(kg->osl->volume_state[shader]), *globals);
/* flatten closure tree */
- if (globals->Ci)
+ if(globals->Ci)
flatten_volume_closure_tree(sd, globals->Ci);
}
@@ -493,7 +496,7 @@ void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderConte
OSL::ShadingContext *octx = tdata->context[(int)ctx];
int shader = sd->shader & SHADER_MASK;
- if (kg->osl->displacement_state[shader])
+ if(kg->osl->displacement_state[shader])
ss->execute(*octx, *(kg->osl->displacement_state[shader]), *globals);
/* get back position */
@@ -520,7 +523,7 @@ float3 OSLShader::bsdf_eval(const ShaderData *sd, const ShaderClosure *sc, const
CBSDFClosure *bsdf = (CBSDFClosure *)sc->prim;
float3 bsdf_eval;
- if (dot(sd->Ng, omega_in) >= 0.0f)
+ if(dot(sd->Ng, omega_in) >= 0.0f)
bsdf_eval = bsdf->eval_reflect(sd->I, omega_in, pdf);
else
bsdf_eval = bsdf->eval_transmit(sd->I, omega_in, pdf);
@@ -548,7 +551,7 @@ int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id,
ustring stdname(std::string("geom:") + std::string(Attribute::standard_name((AttributeStandard)id)));
OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname);
- if (it != attr_map.end()) {
+ if(it != attr_map.end()) {
const OSLGlobals::Attribute &osl_attr = it->second;
*elem = osl_attr.elem;
diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl
index 526a87525cd..46a02cab32e 100644
--- a/intern/cycles/kernel/shaders/node_image_texture.osl
+++ b/intern/cycles/kernel/shaders/node_image_texture.osl
@@ -26,7 +26,7 @@ point map_to_tube(vector dir)
{
float u, v;
v = (dir[2] + 1.0) * 0.5;
- float len = sqrt(dir[0]*dir[0] + dir[1]*dir[1]);
+ float len = sqrt(dir[0] * dir[0] + dir[1] * dir[1]);
if (len > 0.0) {
u = (1.0 - (atan2(dir[0] / len, dir[1] / len) / M_PI)) * 0.5;
}
@@ -40,8 +40,8 @@ point map_to_sphere(vector dir)
{
float len = length(dir);
float v, u;
- if(len > 0.0) {
- if(dir[0] == 0.0 && dir[1] == 0.0) {
+ if (len > 0.0) {
+ if (dir[0] == 0.0 && dir[1] == 0.0) {
u = 0.0; /* Othwise domain error. */
}
else {
diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl
index bbc008b4299..7eef97fd7e8 100644
--- a/intern/cycles/kernel/shaders/node_math.osl
+++ b/intern/cycles/kernel/shaders/node_math.osl
@@ -93,8 +93,8 @@ shader node_math(
Value = Value1 > Value2;
else if (type == "Modulo")
Value = safe_modulo(Value1, Value2);
- else if (type == "Absolute")
- Value = fabs(Value1);
+ else if (type == "Absolute")
+ Value = fabs(Value1);
if (Clamp)
Value = clamp(Value, 0.0, 1.0);
diff --git a/intern/cycles/kernel/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
index a349dc8cb9a..4f95dec910a 100644
--- a/intern/cycles/kernel/shaders/node_musgrave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
@@ -26,7 +26,7 @@
* from "Texturing and Modelling: A procedural approach"
*/
-float noise_musgrave_fBm(point p, string basis, float H, float lacunarity, float octaves)
+float noise_musgrave_fBm(point p, float H, float lacunarity, float octaves)
{
float rmd;
float value = 0.0;
@@ -54,7 +54,7 @@ float noise_musgrave_fBm(point p, string basis, float H, float lacunarity, float
* octaves: number of frequencies in the fBm
*/
-float noise_musgrave_multi_fractal(point p, string basis, float H, float lacunarity, float octaves)
+float noise_musgrave_multi_fractal(point p, float H, float lacunarity, float octaves)
{
float rmd;
float value = 1.0;
@@ -83,7 +83,7 @@ float noise_musgrave_multi_fractal(point p, string basis, float H, float lacunar
* offset: raises the terrain from `sea level'
*/
-float noise_musgrave_hetero_terrain(point p, string basis, float H, float lacunarity, float octaves, float offset)
+float noise_musgrave_hetero_terrain(point p, float H, float lacunarity, float octaves, float offset)
{
float value, increment, rmd;
float pwHL = pow(lacunarity, -H);
@@ -118,8 +118,8 @@ float noise_musgrave_hetero_terrain(point p, string basis, float H, float lacuna
* offset: raises the terrain from `sea level'
*/
-float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H,
- float lacunarity, float octaves, float offset, float gain)
+float noise_musgrave_hybrid_multi_fractal(point p, float H, float lacunarity,
+ float octaves, float offset, float gain)
{
float result, signal, weight, rmd;
float pwHL = pow(lacunarity, -H);
@@ -156,8 +156,8 @@ float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H,
* offset: raises the terrain from `sea level'
*/
-float noise_musgrave_ridged_multi_fractal(point p, string basis, float H,
- float lacunarity, float octaves, float offset, float gain)
+float noise_musgrave_ridged_multi_fractal(point p, float H, float lacunarity,
+ float octaves, float offset, float gain)
{
float result, signal, weight;
float pwHL = pow(lacunarity, -H);
@@ -201,7 +201,6 @@ shader node_musgrave_texture(
float dimension = max(Dimension, 1e-5);
float octaves = clamp(Detail, 0.0, 16.0);
float lacunarity = max(Lacunarity, 1e-5);
- string Basis = "Perlin";
float intensity = 1.0;
point p = Vector;
@@ -212,15 +211,15 @@ shader node_musgrave_texture(
p = p * Scale;
if (Type == "Multifractal")
- Fac = intensity * noise_musgrave_multi_fractal(p, Basis, dimension, lacunarity, octaves);
+ Fac = intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
else if (Type == "fBM")
- Fac = intensity * noise_musgrave_fBm(p, Basis, dimension, lacunarity, octaves);
+ Fac = intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves);
else if (Type == "Hybrid Multifractal")
- Fac = intensity * noise_musgrave_hybrid_multi_fractal(p, Basis, dimension, lacunarity, octaves, Offset, Gain);
+ Fac = intensity * noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
else if (Type == "Ridged Multifractal")
- Fac = intensity * noise_musgrave_ridged_multi_fractal(p, Basis, dimension, lacunarity, octaves, Offset, Gain);
+ Fac = intensity * noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain);
else if (Type == "Hetero Terrain")
- Fac = intensity * noise_musgrave_hetero_terrain(p, Basis, dimension, lacunarity, octaves, Offset);
+ Fac = intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, Offset);
Color = color(Fac, Fac, Fac);
}
diff --git a/intern/cycles/kernel/shaders/node_noise_texture.osl b/intern/cycles/kernel/shaders/node_noise_texture.osl
index dabc0b6843f..e83e5b5b211 100644
--- a/intern/cycles/kernel/shaders/node_noise_texture.osl
+++ b/intern/cycles/kernel/shaders/node_noise_texture.osl
@@ -19,23 +19,23 @@
/* Noise */
-float noise(point p, string basis, float distortion, float detail, float fac, color Color)
+float noise(point p, float distortion, float detail, float fac, color Color)
{
point r;
int hard = 0;
if (distortion != 0.0) {
- r[0] = noise_basis(p + point(13.5), basis) * distortion;
- r[1] = noise_basis(p, basis) * distortion;
- r[2] = noise_basis(p - point(13.5), basis) * distortion;
+ r[0] = safe_noise(p + point(13.5), "unsigned") * distortion;
+ r[1] = safe_noise(p, "unsigned") * distortion;
+ r[2] = safe_noise(p - point(13.5), "unsigned") * distortion;
p += r;
}
- fac = noise_turbulence(p, basis, detail, hard);
+ fac = noise_turbulence(p, detail, hard);
- Color = color(fac, noise_turbulence(point(p[1], p[0], p[2]), basis, detail, hard),
- noise_turbulence(point(p[1], p[2], p[0]), basis, detail, hard));
+ Color = color(fac, noise_turbulence(point(p[1], p[0], p[2]), detail, hard),
+ noise_turbulence(point(p[1], p[2], p[0]), detail, hard));
return fac;
}
@@ -55,7 +55,6 @@ shader node_noise_texture(
if (use_mapping)
p = transform(mapping, p);
- string Basis = "Perlin";
- Fac = noise(p * Scale, Basis, Distortion, Detail, Fac, Color);
+ Fac = noise(p * Scale, Distortion, Detail, Fac, Color);
}
diff --git a/intern/cycles/kernel/shaders/node_texture.h b/intern/cycles/kernel/shaders/node_texture.h
index 5f9cd5afa47..fc2cfdcd55c 100644
--- a/intern/cycles/kernel/shaders/node_texture.h
+++ b/intern/cycles/kernel/shaders/node_texture.h
@@ -14,32 +14,6 @@
* limitations under the License.
*/
-/* Voronoi Distances */
-
-float voronoi_distance(string distance_metric, vector d, float e)
-{
-#if 0
- if (distance_metric == "Distance Squared")
-#endif
- return dot(d, d);
-#if 0
- if (distance_metric == "Actual Distance")
- return length(d);
- if (distance_metric == "Manhattan")
- return fabs(d[0]) + fabs(d[1]) + fabs(d[2]);
- if (distance_metric == "Chebychev")
- return max(fabs(d[0]), max(fabs(d[1]), fabs(d[2])));
- if (distance_metric == "Minkovsky 1/2")
- return sqrt(fabs(d[0])) + sqrt(fabs(d[1])) + sqrt(fabs(d[1]));
- if (distance_metric == "Minkovsky 4")
- return sqrt(sqrt(dot(d * d, d * d)));
- if (distance_metric == "Minkovsky")
- return pow(pow(fabs(d[0]), e) + pow(fabs(d[1]), e) + pow(fabs(d[2]), e), 1.0 / e);
-
- return 0.0;
-#endif
-}
-
/* Voronoi / Worley like */
color cellnoise_color(point p)
@@ -51,7 +25,7 @@ color cellnoise_color(point p)
return color(r, g, b);
}
-void voronoi(point p, string distance_metric, float e, float da[4], point pa[4])
+void voronoi(point p, float e, float da[4], point pa[4])
{
/* returns distances in da and point coords in pa */
int xx, yy, zz, xi, yi, zi;
@@ -71,7 +45,7 @@ void voronoi(point p, string distance_metric, float e, float da[4], point pa[4])
point ip = point(xx, yy, zz);
point vp = (point)cellnoise_color(ip);
point pd = p - (vp + ip);
- float d = voronoi_distance(distance_metric, pd, e);
+ float d = dot(pd, pd);
vp += point(xx, yy, zz);
@@ -111,46 +85,6 @@ void voronoi(point p, string distance_metric, float e, float da[4], point pa[4])
}
}
-float voronoi_Fn(point p, int n)
-{
- float da[4];
- point pa[4];
-
- voronoi(p, "Distance Squared", 0, da, pa);
-
- return da[n];
-}
-
-float voronoi_FnFn(point p, int n1, int n2)
-{
- float da[4];
- point pa[4];
-
- voronoi(p, "Distance Squared", 0, da, pa);
-
- return da[n2] - da[n1];
-}
-
-float voronoi_F1(point p) { return voronoi_Fn(p, 0); }
-float voronoi_F2(point p) { return voronoi_Fn(p, 1); }
-float voronoi_F3(point p) { return voronoi_Fn(p, 2); }
-float voronoi_F4(point p) { return voronoi_Fn(p, 3); }
-float voronoi_F1F2(point p) { return voronoi_FnFn(p, 0, 1); }
-
-float voronoi_Cr(point p)
-{
- /* crackle type pattern, just a scale/clamp of F2-F1 */
- float t = 10.0 * voronoi_F1F2(p);
- return (t > 1.0) ? 1.0 : t;
-}
-
-float voronoi_F1S(point p) { return 2.0 * voronoi_F1(p) - 1.0; }
-float voronoi_F2S(point p) { return 2.0 * voronoi_F2(p) - 1.0; }
-float voronoi_F3S(point p) { return 2.0 * voronoi_F3(p) - 1.0; }
-float voronoi_F4S(point p) { return 2.0 * voronoi_F4(p) - 1.0; }
-float voronoi_F1F2S(point p) { return 2.0 * voronoi_F1F2(p) - 1.0; }
-float voronoi_CrS(point p) { return 2.0 * voronoi_Cr(p) - 1.0; }
-
/* Noise Bases */
float safe_noise(point p, string type)
@@ -172,39 +106,9 @@ float safe_noise(point p, string type)
return f;
}
-float noise_basis(point p, string basis)
-{
- if (basis == "Perlin")
- return safe_noise(p, "unsigned");
- if (basis == "Voronoi F1")
- return voronoi_F1S(p);
- if (basis == "Voronoi F2")
- return voronoi_F2S(p);
- if (basis == "Voronoi F3")
- return voronoi_F3S(p);
- if (basis == "Voronoi F4")
- return voronoi_F4S(p);
- if (basis == "Voronoi F2-F1")
- return voronoi_F1F2S(p);
- if (basis == "Voronoi Crackle")
- return voronoi_CrS(p);
- if (basis == "Cell Noise")
- return cellnoise(p);
-
- return 0.0;
-}
-
-/* Soft/Hard Noise */
-
-float noise_basis_hard(point p, string basis, int hard)
-{
- float t = noise_basis(p, basis);
- return (hard) ? fabs(2.0 * t - 1.0) : t;
-}
-
/* Turbulence */
-float noise_turbulence(point p, string basis, float details, int hard)
+float noise_turbulence(point p, float details, int hard)
{
float fscale = 1.0;
float amp = 1.0;
@@ -215,7 +119,7 @@ float noise_turbulence(point p, string basis, float details, int hard)
n = (int)octaves;
for (i = 0; i <= n; i++) {
- float t = noise_basis(fscale * p, basis);
+ float t = safe_noise(fscale * p, "unsigned");
if (hard)
t = fabs(2.0 * t - 1.0);
@@ -228,7 +132,7 @@ float noise_turbulence(point p, string basis, float details, int hard)
float rmd = octaves - floor(octaves);
if (rmd != 0.0) {
- float t = noise_basis(fscale * p, basis);
+ float t = safe_noise(fscale * p, "unsigned");
if (hard)
t = fabs(2.0 * t - 1.0);
diff --git a/intern/cycles/kernel/shaders/node_voronoi_texture.osl b/intern/cycles/kernel/shaders/node_voronoi_texture.osl
index df169599d08..29e143ae207 100644
--- a/intern/cycles/kernel/shaders/node_voronoi_texture.osl
+++ b/intern/cycles/kernel/shaders/node_voronoi_texture.osl
@@ -37,7 +37,7 @@ shader node_voronoi_texture(
float da[4];
point pa[4];
- voronoi(p * Scale, "Distance Squared", 1.0, da, pa);
+ voronoi(p * Scale, 1.0, da, pa);
/* Colored output */
if (Coloring == "Intensity") {
diff --git a/intern/cycles/kernel/shaders/node_wave_texture.osl b/intern/cycles/kernel/shaders/node_wave_texture.osl
index a95752fc592..569f284cbac 100644
--- a/intern/cycles/kernel/shaders/node_wave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_wave_texture.osl
@@ -31,7 +31,7 @@ float wave(point p, string type, float detail, float distortion, float dscale)
}
if (distortion != 0.0) {
- n = n + (distortion * noise_turbulence(p * dscale, "Perlin", detail, 0));
+ n = n + (distortion * noise_turbulence(p * dscale, detail, 0));
}
return 0.5 + 0.5 * sin(n);
}
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
index 6babe98717c..697a1756119 100644
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@@ -249,7 +249,21 @@ point rotate (point p, float angle, point a, point b)
{
vector axis = normalize (b - a);
float cosang, sinang;
+ /* Older OSX has major issues with sincos() function,
+ * it's likely a big in OSL or LLVM. For until we've
+ * updated to new versions of this libraries we'll
+ * use a workaround to prevent possible crashes on all
+ * the platforms.
+ *
+ * Shouldn't be that bad because it's mainly used for
+ * anisotropic shader where angle is usually constant.
+ */
+#if 0
sincos (angle, sinang, cosang);
+#else
+ sinang = sin (angle);
+ cosang = cos (angle);
+#endif
float cosang1 = 1.0 - cosang;
float x = axis[0], y = axis[1], z = axis[2];
matrix M = matrix (x * x + (1.0 - x * x) * cosang,
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h
new file mode 100644
index 00000000000..181a1054a0d
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_split_common.h"
+
+/* Note on kernel_background_buffer_update kernel.
+ * This is the fourth kernel in the ray tracing logic, and the third
+ * of the path iteration kernels. This kernel takes care of rays that hit
+ * the background (sceneintersect kernel), and for the rays of
+ * state RAY_UPDATE_BUFFER it updates the ray's accumulated radiance in
+ * the output buffer. This kernel also takes care of rays that have been determined
+ * to-be-regenerated.
+ *
+ * We will empty QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue in this kernel
+ *
+ * Typically all rays that are in state RAY_HIT_BACKGROUND, RAY_UPDATE_BUFFER
+ * will be eventually set to RAY_TO_REGENERATE state in this kernel. Finally all rays of ray_state
+ * RAY_TO_REGENERATE will be regenerated and put in queue QUEUE_ACTIVE_AND_REGENERATED_RAYS.
+ *
+ * The input and output are as follows,
+ *
+ * rng_coop ---------------------------------------------|--- kernel_background_buffer_update --|--- PathRadiance_coop
+ * throughput_coop --------------------------------------| |--- L_transparent_coop
+ * per_sample_output_buffers ----------------------------| |--- per_sample_output_buffers
+ * Ray_coop ---------------------------------------------| |--- ray_state
+ * PathState_coop ---------------------------------------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
+ * L_transparent_coop -----------------------------------| |--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS)
+ * ray_state --------------------------------------------| |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
+ * Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ----| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS)
+ * Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- work_array
+ * parallel_samples -------------------------------------| |--- PathState_coop
+ * end_sample -------------------------------------------| |--- throughput_coop
+ * kg (globals + data) ----------------------------------| |--- rng_coop
+ * rng_state --------------------------------------------| |--- Ray
+ * PathRadiance_coop ------------------------------------| |
+ * sw ---------------------------------------------------| |
+ * sh ---------------------------------------------------| |
+ * sx ---------------------------------------------------| |
+ * sy ---------------------------------------------------| |
+ * stride -----------------------------------------------| |
+ * work_array -------------------------------------------| |--- work_array
+ * queuesize --------------------------------------------| |
+ * start_sample -----------------------------------------| |--- work_pool_wgs
+ * work_pool_wgs ----------------------------------------| |
+ * num_samples ------------------------------------------| |
+ *
+ * note on shader_data : shader_data argument is neither an input nor an output for this kernel. It is just filled and consumed here itself.
+ * Note on Queues :
+ * This kernel fetches rays from QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
+ *
+ * State of queues when this kernel is called :
+ * At entry,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND, RAY_TO_REGENERATE rays
+ * At exit,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty
+ */
+ccl_device char kernel_background_buffer_update(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data,
+ ccl_global float *per_sample_output_buffers,
+ ccl_global uint *rng_state,
+ ccl_global uint *rng_coop, /* Required for buffer Update */
+ ccl_global float3 *throughput_coop, /* Required for background hit processing */
+ PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */
+ ccl_global Ray *Ray_coop, /* Required for background hit processing */
+ ccl_global PathState *PathState_coop, /* Required for background hit processing */
+ ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */
+ ccl_global char *ray_state, /* Stores information on the current state of a ray */
+ int sw, int sh, int sx, int sy, int stride,
+ int rng_state_offset_x,
+ int rng_state_offset_y,
+ int rng_state_stride,
+ ccl_global unsigned int *work_array, /* Denotes work of each ray */
+ int end_sample,
+ int start_sample,
+#ifdef __WORK_STEALING__
+ ccl_global unsigned int *work_pool_wgs,
+ unsigned int num_samples,
+#endif
+#ifdef __KERNEL_DEBUG__
+ DebugData *debugdata_coop,
+#endif
+ int parallel_samples, /* Number of samples to be processed in parallel */
+ int ray_index)
+{
+ char enqueue_flag = 0;
+
+ /* Load kernel globals structure and ShaderData strucuture */
+ KernelGlobals *kg = (KernelGlobals *)globals;
+ ShaderData *sd = (ShaderData *)shader_data;
+
+#ifdef __KERNEL_DEBUG__
+ DebugData *debug_data = &debugdata_coop[ray_index];
+#endif
+ ccl_global PathState *state = &PathState_coop[ray_index];
+ PathRadiance *L = L = &PathRadiance_coop[ray_index];
+ ccl_global Ray *ray = &Ray_coop[ray_index];
+ ccl_global float3 *throughput = &throughput_coop[ray_index];
+ ccl_global float *L_transparent = &L_transparent_coop[ray_index];
+ ccl_global uint *rng = &rng_coop[ray_index];
+
+#ifdef __WORK_STEALING__
+ unsigned int my_work;
+ ccl_global float *initial_per_sample_output_buffers;
+ ccl_global uint *initial_rng;
+#endif
+ unsigned int sample;
+ unsigned int tile_x;
+ unsigned int tile_y;
+ unsigned int pixel_x;
+ unsigned int pixel_y;
+ unsigned int my_sample_tile;
+
+#ifdef __WORK_STEALING__
+ my_work = work_array[ray_index];
+ sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
+ get_pixel_tile_position(&pixel_x, &pixel_y,
+ &tile_x, &tile_y,
+ my_work,
+ sw, sh, sx, sy,
+ parallel_samples,
+ ray_index);
+ my_sample_tile = 0;
+ initial_per_sample_output_buffers = per_sample_output_buffers;
+ initial_rng = rng_state;
+#else /* __WORK_STEALING__ */
+ sample = work_array[ray_index];
+ int tile_index = ray_index / parallel_samples;
+ /* buffer and rng_state's stride is "stride". Find x and y using ray_index */
+ tile_x = tile_index % sw;
+ tile_y = tile_index / sw;
+ my_sample_tile = ray_index - (tile_index * parallel_samples);
+#endif /* __WORK_STEALING__ */
+
+ rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
+ per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
+
+ if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ /* eval background shader if nothing hit */
+ if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
+ *L_transparent = (*L_transparent) + average((*throughput));
+#ifdef __PASSES__
+ if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
+#endif
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
+
+ if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND))
+ {
+#ifdef __BACKGROUND__
+ /* sample background shader */
+ float3 L_background = indirect_background(kg, state, ray, sd);
+ path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
+#endif
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
+ }
+
+ if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ float3 L_sum = path_radiance_clamp_and_sum(kg, L);
+ kernel_write_light_passes(kg, per_sample_output_buffers, L, sample);
+#ifdef __KERNEL_DEBUG__
+ kernel_write_debug_passes(kg, per_sample_output_buffers, state, debug_data, sample);
+#endif
+ float4 L_rad = make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - (*L_transparent));
+
+ /* accumulate result in output buffer */
+ kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
+ path_rng_end(kg, rng_state, *rng);
+
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+ }
+
+ if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+#ifdef __WORK_STEALING__
+ /* We have completed current work; So get next work */
+ int valid_work = get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index);
+ if(!valid_work) {
+ /* If work is invalid, this means no more work is available and the thread may exit */
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+ }
+#else /* __WORK_STEALING__ */
+ if((sample + parallel_samples) >= end_sample) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+ }
+#endif /* __WORK_STEALING__ */
+
+ if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+#ifdef __WORK_STEALING__
+ work_array[ray_index] = my_work;
+ /* Get the sample associated with the current work */
+ sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
+ /* Get pixel and tile position associated with current work */
+ get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
+ my_sample_tile = 0;
+
+ /* Remap rng_state according to the current work */
+ rng_state = initial_rng + ((rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride);
+ /* Remap per_sample_output_buffers according to the current work */
+ per_sample_output_buffers = initial_per_sample_output_buffers
+ + (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
+#else /* __WORK_STEALING__ */
+ work_array[ray_index] = sample + parallel_samples;
+ sample = work_array[ray_index];
+
+ /* Get ray position from ray index */
+ pixel_x = sx + ((ray_index / parallel_samples) % sw);
+ pixel_y = sy + ((ray_index / parallel_samples) / sw);
+#endif /* __WORK_STEALING__ */
+
+ /* Initialize random numbers and ray. */
+ kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray);
+
+ if(ray->t != 0.0f) {
+ /* Initialize throughput, L_transparent, Ray, PathState;
+ * These rays proceed with path-iteration.
+ */
+ *throughput = make_float3(1.0f, 1.0f, 1.0f);
+ *L_transparent = 0.0f;
+ path_radiance_init(L, kernel_data.film.use_light_pass);
+ path_state_init(kg, state, rng, sample, ray);
+#ifdef __KERNEL_DEBUG__
+ debug_data_init(debug_data);
+#endif
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ enqueue_flag = 1;
+ } else {
+ /* These rays do not participate in path-iteration. */
+ float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ /* Accumulate result in output buffer. */
+ kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
+ path_rng_end(kg, rng_state, *rng);
+
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+ }
+ }
+ }
+ return enqueue_flag;
+}
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
new file mode 100644
index 00000000000..2cd98e466c1
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -0,0 +1,418 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_split_common.h"
+
+/* Note on kernel_data_initialization kernel
+ * This kernel Initializes structures needed in path-iteration kernels.
+ * This is the first kernel in ray-tracing logic.
+ *
+ * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
+ *
+ * Its input and output are as follows,
+ *
+ * Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng
+ * Un-initialized throughput -------| |--- Initialized throughput
+ * Un-initialized L_transparent ----| |--- Initialized L_transparent
+ * Un-initialized PathRadiance -----| |--- Initialized PathRadiance
+ * Un-initialized Ray --------------| |--- Initialized Ray
+ * Un-initialized PathState --------| |--- Initialized PathState
+ * Un-initialized QueueData --------| |--- Initialized QueueData (to QUEUE_EMPTY_SLOT)
+ * Un-initilaized QueueIndex -------| |--- Initialized QueueIndex (to 0)
+ * Un-initialized use_queues_flag---| |--- Initialized use_queues_flag (to false)
+ * Un-initialized ray_state --------| |--- Initialized ray_state
+ * parallel_samples --------------- | |--- Initialized per_sample_output_buffers
+ * rng_state -----------------------| |--- Initialized work_array
+ * data ----------------------------| |--- Initialized work_pool_wgs
+ * start_sample --------------------| |
+ * sx ------------------------------| |
+ * sy ------------------------------| |
+ * sw ------------------------------| |
+ * sh ------------------------------| |
+ * stride --------------------------| |
+ * queuesize -----------------------| |
+ * num_samples ---------------------| |
+ *
+ * Note on Queues :
+ * All slots in queues are initialized to queue empty slot;
+ * The number of elements in the queues is initialized to 0;
+ */
+ccl_device void kernel_data_init(
+ ccl_global char *globals,
+ ccl_global char *shader_data_sd, /* Arguments related to ShaderData */
+ ccl_global char *shader_data_sd_DL_shadow, /* Arguments related to ShaderData */
+
+ ccl_global float3 *P_sd,
+ ccl_global float3 *P_sd_DL_shadow,
+
+ ccl_global float3 *N_sd,
+ ccl_global float3 *N_sd_DL_shadow,
+
+ ccl_global float3 *Ng_sd,
+ ccl_global float3 *Ng_sd_DL_shadow,
+
+ ccl_global float3 *I_sd,
+ ccl_global float3 *I_sd_DL_shadow,
+
+ ccl_global int *shader_sd,
+ ccl_global int *shader_sd_DL_shadow,
+
+ ccl_global int *flag_sd,
+ ccl_global int *flag_sd_DL_shadow,
+
+ ccl_global int *prim_sd,
+ ccl_global int *prim_sd_DL_shadow,
+
+ ccl_global int *type_sd,
+ ccl_global int *type_sd_DL_shadow,
+
+ ccl_global float *u_sd,
+ ccl_global float *u_sd_DL_shadow,
+
+ ccl_global float *v_sd,
+ ccl_global float *v_sd_DL_shadow,
+
+ ccl_global int *object_sd,
+ ccl_global int *object_sd_DL_shadow,
+
+ ccl_global float *time_sd,
+ ccl_global float *time_sd_DL_shadow,
+
+ ccl_global float *ray_length_sd,
+ ccl_global float *ray_length_sd_DL_shadow,
+
+ ccl_global int *ray_depth_sd,
+ ccl_global int *ray_depth_sd_DL_shadow,
+
+ ccl_global int *transparent_depth_sd,
+ ccl_global int *transparent_depth_sd_DL_shadow,
+
+ /* Ray differentials. */
+ ccl_global differential3 *dP_sd,
+ ccl_global differential3 *dP_sd_DL_shadow,
+
+ ccl_global differential3 *dI_sd,
+ ccl_global differential3 *dI_sd_DL_shadow,
+
+ ccl_global differential *du_sd,
+ ccl_global differential *du_sd_DL_shadow,
+
+ ccl_global differential *dv_sd,
+ ccl_global differential *dv_sd_DL_shadow,
+
+ /* Dp/Du */
+ ccl_global float3 *dPdu_sd,
+ ccl_global float3 *dPdu_sd_DL_shadow,
+
+ ccl_global float3 *dPdv_sd,
+ ccl_global float3 *dPdv_sd_DL_shadow,
+
+ /* Object motion. */
+ ccl_global Transform *ob_tfm_sd,
+ ccl_global Transform *ob_tfm_sd_DL_shadow,
+
+ ccl_global Transform *ob_itfm_sd,
+ ccl_global Transform *ob_itfm_sd_DL_shadow,
+
+ ShaderClosure *closure_sd,
+ ShaderClosure *closure_sd_DL_shadow,
+
+ ccl_global int *num_closure_sd,
+ ccl_global int *num_closure_sd_DL_shadow,
+
+ ccl_global float *randb_closure_sd,
+ ccl_global float *randb_closure_sd_DL_shadow,
+
+ ccl_global float3 *ray_P_sd,
+ ccl_global float3 *ray_P_sd_DL_shadow,
+
+ ccl_global differential3 *ray_dP_sd,
+ ccl_global differential3 *ray_dP_sd_DL_shadow,
+
+ ccl_constant KernelData *data,
+ ccl_global float *per_sample_output_buffers,
+ ccl_global uint *rng_state,
+ ccl_global uint *rng_coop, /* rng array to store rng values for all rays */
+ ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */
+ ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */
+ PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */
+ ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */
+ ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */
+ ccl_global char *ray_state, /* Stores information on current state of a ray */
+
+#define KERNEL_TEX(type, ttype, name) \
+ ccl_global type *name,
+#include "../kernel_textures.h"
+
+ int start_sample, int sx, int sy, int sw, int sh, int offset, int stride,
+ int rng_state_offset_x,
+ int rng_state_offset_y,
+ int rng_state_stride,
+ ccl_global int *Queue_data, /* Memory for queues */
+ ccl_global int *Queue_index, /* Tracks the number of elements in queues */
+ int queuesize, /* size (capacity) of the queue */
+ ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
+ ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */
+#ifdef __WORK_STEALING__
+ ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */
+ unsigned int num_samples, /* Total number of samples per pixel */
+#endif
+#ifdef __KERNEL_DEBUG__
+ DebugData *debugdata_coop,
+#endif
+ int parallel_samples) /* Number of samples to be processed in parallel */
+{
+
+ /* Load kernel globals structure */
+ KernelGlobals *kg = (KernelGlobals *)globals;
+
+ kg->data = data;
+#define KERNEL_TEX(type, ttype, name) \
+ kg->name = name;
+#include "../kernel_textures.h"
+
+ /* Load ShaderData structure */
+ ShaderData *sd = (ShaderData *)shader_data_sd;
+ ShaderData *sd_DL_shadow = (ShaderData *)shader_data_sd_DL_shadow;
+
+ sd->P = P_sd;
+ sd_DL_shadow->P = P_sd_DL_shadow;
+
+ sd->N = N_sd;
+ sd_DL_shadow->N = N_sd_DL_shadow;
+
+ sd->Ng = Ng_sd;
+ sd_DL_shadow->Ng = Ng_sd_DL_shadow;
+
+ sd->I = I_sd;
+ sd_DL_shadow->I = I_sd_DL_shadow;
+
+ sd->shader = shader_sd;
+ sd_DL_shadow->shader = shader_sd_DL_shadow;
+
+ sd->flag = flag_sd;
+ sd_DL_shadow->flag = flag_sd_DL_shadow;
+
+ sd->prim = prim_sd;
+ sd_DL_shadow->prim = prim_sd_DL_shadow;
+
+ sd->type = type_sd;
+ sd_DL_shadow->type = type_sd_DL_shadow;
+
+ sd->u = u_sd;
+ sd_DL_shadow->u = u_sd_DL_shadow;
+
+ sd->v = v_sd;
+ sd_DL_shadow->v = v_sd_DL_shadow;
+
+ sd->object = object_sd;
+ sd_DL_shadow->object = object_sd_DL_shadow;
+
+ sd->time = time_sd;
+ sd_DL_shadow->time = time_sd_DL_shadow;
+
+ sd->ray_length = ray_length_sd;
+ sd_DL_shadow->ray_length = ray_length_sd_DL_shadow;
+
+ sd->ray_depth = ray_depth_sd;
+ sd_DL_shadow->ray_depth = ray_depth_sd_DL_shadow;
+
+ sd->transparent_depth = transparent_depth_sd;
+ sd_DL_shadow->transparent_depth = transparent_depth_sd_DL_shadow;
+
+#ifdef __RAY_DIFFERENTIALS__
+ sd->dP = dP_sd;
+ sd_DL_shadow->dP = dP_sd_DL_shadow;
+
+ sd->dI = dI_sd;
+ sd_DL_shadow->dI = dI_sd_DL_shadow;
+
+ sd->du = du_sd;
+ sd_DL_shadow->du = du_sd_DL_shadow;
+
+ sd->dv = dv_sd;
+ sd_DL_shadow->dv = dv_sd_DL_shadow;
+#ifdef __DPDU__
+ sd->dPdu = dPdu_sd;
+ sd_DL_shadow->dPdu = dPdu_sd_DL_shadow;
+
+ sd->dPdv = dPdv_sd;
+ sd_DL_shadow->dPdv = dPdv_sd_DL_shadow;
+#endif
+#endif
+
+#ifdef __OBJECT_MOTION__
+ sd->ob_tfm = ob_tfm_sd;
+ sd_DL_shadow->ob_tfm = ob_tfm_sd_DL_shadow;
+
+ sd->ob_itfm = ob_itfm_sd;
+ sd_DL_shadow->ob_itfm = ob_itfm_sd_DL_shadow;
+#endif
+
+ sd->closure = closure_sd;
+ sd_DL_shadow->closure = closure_sd_DL_shadow;
+
+ sd->num_closure = num_closure_sd;
+ sd_DL_shadow->num_closure = num_closure_sd_DL_shadow;
+
+ sd->randb_closure = randb_closure_sd;
+ sd_DL_shadow->randb_closure = randb_closure_sd_DL_shadow;
+
+ sd->ray_P = ray_P_sd;
+ sd_DL_shadow->ray_P = ray_P_sd_DL_shadow;
+
+ sd->ray_dP = ray_dP_sd;
+ sd_DL_shadow->ray_dP = ray_dP_sd_DL_shadow;
+
+ int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
+
+#ifdef __WORK_STEALING__
+ int lid = get_local_id(1) * get_local_size(0) + get_local_id(0);
+ /* Initialize work_pool_wgs */
+ if(lid == 0) {
+ int group_index = get_group_id(1) * get_num_groups(0) + get_group_id(0);
+ work_pool_wgs[group_index] = 0;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+#endif /* __WORK_STEALING__ */
+
+ /* Initialize queue data and queue index. */
+ if(thread_index < queuesize) {
+ /* Initialize active ray queue */
+ Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ /* Initialize background and buffer update queue */
+ Queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ /* Initialize shadow ray cast of AO queue */
+ Queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ /* Initialize shadow ray cast of direct lighting queue */
+ Queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ }
+
+ if(thread_index == 0) {
+ Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+ Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
+ Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
+ /* The scene-intersect kernel should not use the queues very first time.
+ * since the queue would be empty.
+ */
+ use_queues_flag[0] = 0;
+ }
+
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+
+ if(x < (sw * parallel_samples) && y < sh) {
+
+ int ray_index = x + y * (sw * parallel_samples);
+
+ /* This is the first assignment to ray_state;
+ * So we dont use ASSIGN_RAY_STATE macro.
+ */
+ ray_state[ray_index] = RAY_ACTIVE;
+
+ unsigned int my_sample;
+ unsigned int pixel_x;
+ unsigned int pixel_y;
+ unsigned int tile_x;
+ unsigned int tile_y;
+ unsigned int my_sample_tile;
+
+#ifdef __WORK_STEALING__
+ unsigned int my_work = 0;
+ /* Get work. */
+ get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index);
+ /* Get the sample associated with the work. */
+ my_sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
+
+ my_sample_tile = 0;
+
+ /* Get pixel and tile position associated with the work. */
+ get_pixel_tile_position(&pixel_x, &pixel_y,
+ &tile_x, &tile_y,
+ my_work,
+ sw, sh, sx, sy,
+ parallel_samples,
+ ray_index);
+ work_array[ray_index] = my_work;
+#else /* __WORK_STEALING__ */
+ unsigned int tile_index = ray_index / parallel_samples;
+ tile_x = tile_index % sw;
+ tile_y = tile_index / sw;
+ my_sample_tile = ray_index - (tile_index * parallel_samples);
+ my_sample = my_sample_tile + start_sample;
+
+ /* Initialize work array. */
+ work_array[ray_index] = my_sample ;
+
+ /* Calculate pixel position of this ray. */
+ pixel_x = sx + tile_x;
+ pixel_y = sy + tile_y;
+#endif /* __WORK_STEALING__ */
+
+ rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
+
+ /* Initialise per_sample_output_buffers to all zeros. */
+ per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride;
+ int per_sample_output_buffers_iterator = 0;
+ for(per_sample_output_buffers_iterator = 0;
+ per_sample_output_buffers_iterator < kernel_data.film.pass_stride;
+ per_sample_output_buffers_iterator++)
+ {
+ per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f;
+ }
+
+ /* Initialize random numbers and ray. */
+ kernel_path_trace_setup(kg,
+ rng_state,
+ my_sample,
+ pixel_x, pixel_y,
+ &rng_coop[ray_index],
+ &Ray_coop[ray_index]);
+
+ if(Ray_coop[ray_index].t != 0.0f) {
+ /* Initialize throuput, L_transparent, Ray, PathState;
+ * These rays proceed with path-iteration.
+ */
+ throughput_coop[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
+ L_transparent_coop[ray_index] = 0.0f;
+ path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
+ path_state_init(kg,
+ &PathState_coop[ray_index],
+ &rng_coop[ray_index],
+ my_sample,
+ &Ray_coop[ray_index]);
+#ifdef __KERNEL_DEBUG__
+ debug_data_init(&debugdata_coop[ray_index]);
+#endif
+ } else {
+ /* These rays do not participate in path-iteration. */
+
+ float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ /* Accumulate result in output buffer. */
+ kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
+ path_rng_end(kg, rng_state, rng_coop[ray_index]);
+
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+ }
+ }
+
+ /* Mark rest of the ray-state indices as RAY_INACTIVE. */
+ if(thread_index < (get_global_size(0) * get_global_size(1)) - (sh * (sw * parallel_samples))) {
+ /* First assignment, hence we dont use ASSIGN_RAY_STATE macro */
+ ray_state[((sw * parallel_samples) * sh) + thread_index] = RAY_INACTIVE;
+ }
+}
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
new file mode 100644
index 00000000000..50c83d06140
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_split_common.h"
+
+/* Note on kernel_direct_lighting kernel.
+ * This is the eighth kernel in the ray tracing logic. This is the seventh
+ * of the path iteration kernels. This kernel takes care of direct lighting
+ * logic. However, the "shadow ray cast" part of direct lighting is handled
+ * in the next kernel.
+ *
+ * This kernels determines the rays for which a shadow_blocked() function associated with direct lighting should be executed.
+ * Those rays for which a shadow_blocked() function for direct-lighting must be executed, are marked with flag RAY_SHADOW_RAY_CAST_DL and
+ * enqueued into the queue QUEUE_SHADOW_RAY_CAST_DL_RAYS
+ *
+ * The input and output are as follows,
+ *
+ * rng_coop -----------------------------------------|--- kernel_direct_lighting --|--- BSDFEval_coop
+ * PathState_coop -----------------------------------| |--- ISLamp_coop
+ * shader_data --------------------------------------| |--- LightRay_coop
+ * ray_state ----------------------------------------| |--- ray_state
+ * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ---| |
+ * kg (globals + data) ------------------------------| |
+ * queuesize ----------------------------------------| |
+ *
+ * note on shader_DL : shader_DL is neither input nor output to this kernel; shader_DL is filled and consumed in this kernel itself.
+ * Note on Queues :
+ * This kernel only reads from the QUEUE_ACTIVE_AND_REGENERATED_RAYS queue and processes
+ * only the rays of state RAY_ACTIVE; If a ray needs to execute the corresponding shadow_blocked
+ * part, after direct lighting, the ray is marked with RAY_SHADOW_RAY_CAST_DL flag.
+ *
+ * State of queues when this kernel is called :
+ * state of queues QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be same
+ * before and after this kernel call.
+ * QUEUE_SHADOW_RAY_CAST_DL_RAYS queue will be filled with rays for which a shadow_blocked function must be executed, after this
+ * kernel call. Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty.
+ */
+ccl_device char kernel_direct_lighting(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data, /* Required for direct lighting */
+ ccl_global char *shader_DL, /* Required for direct lighting */
+ ccl_global uint *rng_coop, /* Required for direct lighting */
+ ccl_global PathState *PathState_coop, /* Required for direct lighting */
+ ccl_global int *ISLamp_coop, /* Required for direct lighting */
+ ccl_global Ray *LightRay_coop, /* Required for direct lighting */
+ ccl_global BsdfEval *BSDFEval_coop, /* Required for direct lighting */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ int ray_index)
+{
+ char enqueue_flag = 0;
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ /* Load kernel globals structure and ShaderData structure. */
+ KernelGlobals *kg = (KernelGlobals *)globals;
+ ShaderData *sd = (ShaderData *)shader_data;
+ ShaderData *sd_DL = (ShaderData *)shader_DL;
+
+ ccl_global PathState *state = &PathState_coop[ray_index];
+
+ /* direct lighting */
+#ifdef __EMISSION__
+ if((kernel_data.integrator.use_direct_light &&
+ (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)))
+ {
+ /* Sample illumination from lights to find path contribution. */
+ ccl_global RNG* rng = &rng_coop[ray_index];
+ float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ LightSample ls;
+ light_sample(kg,
+ light_t, light_u, light_v,
+ ccl_fetch(sd, time),
+ ccl_fetch(sd, P),
+ state->bounce,
+ &ls);
+
+ Ray light_ray;
+#ifdef __OBJECT_MOTION__
+ light_ray.time = ccl_fetch(sd, time);
+#endif
+
+ BsdfEval L_light;
+ bool is_lamp;
+ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp,
+ state->bounce, state->transparent_bounce, sd_DL))
+ {
+ /* Write intermediate data to global memory to access from
+ * the next kernel.
+ */
+ LightRay_coop[ray_index] = light_ray;
+ BSDFEval_coop[ray_index] = L_light;
+ ISLamp_coop[ray_index] = is_lamp;
+ /* Mark ray state for next shadow kernel. */
+ ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
+ enqueue_flag = 1;
+ }
+ }
+#endif /* __EMISSION__ */
+ }
+ return enqueue_flag;
+}
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
new file mode 100644
index 00000000000..a75523a3e53
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -0,0 +1,264 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_split_common.h"
+
+/* Note on kernel_holdout_emission_blurring_pathtermination_ao kernel.
+ * This is the sixth kernel in the ray tracing logic. This is the fifth
+ * of the path iteration kernels. This kernel takes care of the logic to process
+ * "material of type holdout", indirect primitive emission, bsdf blurring,
+ * probabilistic path termination and AO.
+ *
+ * This kernels determines the rays for which a shadow_blocked() function associated with AO should be executed.
+ * Those rays for which a shadow_blocked() function for AO must be executed are marked with flag RAY_SHADOW_RAY_CAST_ao and
+ * enqueued into the queue QUEUE_SHADOW_RAY_CAST_AO_RAYS
+ *
+ * Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER
+ *
+ * The input and output are as follows,
+ *
+ * rng_coop ---------------------------------------------|--- kernel_holdout_emission_blurring_pathtermination_ao ---|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
+ * throughput_coop --------------------------------------| |--- PathState_coop
+ * PathRadiance_coop ------------------------------------| |--- throughput_coop
+ * Intersection_coop ------------------------------------| |--- L_transparent_coop
+ * PathState_coop ---------------------------------------| |--- per_sample_output_buffers
+ * L_transparent_coop -----------------------------------| |--- PathRadiance_coop
+ * shader_data ------------------------------------------| |--- ShaderData
+ * ray_state --------------------------------------------| |--- ray_state
+ * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
+ * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- AOAlpha_coop
+ * kg (globals + data) ----------------------------------| |--- AOBSDF_coop
+ * parallel_samples -------------------------------------| |--- AOLightRay_coop
+ * per_sample_output_buffers ----------------------------| |
+ * sw ---------------------------------------------------| |
+ * sh ---------------------------------------------------| |
+ * sx ---------------------------------------------------| |
+ * sy ---------------------------------------------------| |
+ * stride -----------------------------------------------| |
+ * work_array -------------------------------------------| |
+ * queuesize --------------------------------------------| |
+ * start_sample -----------------------------------------| |
+ *
+ * Note on Queues :
+ * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS and processes only
+ * the rays of state RAY_ACTIVE.
+ * There are different points in this kernel where a ray may terminate and reach RAY_UPDATE_BUFFER
+ * state. These rays are enqueued into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will
+ * still be present in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has been
+ * changed to RAY_UPDATE_BUFFER, there is no problem.
+ *
+ * State of queues when this kernel is called :
+ * At entry,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays.
+ * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty.
+ * At exit,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED and RAY_UPDATE_BUFFER rays
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays
+ * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flag RAY_SHADOW_RAY_CAST_AO
+ */
+ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */
+ ccl_global float *per_sample_output_buffers,
+ ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */
+ ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */
+ ccl_global float *L_transparent_coop, /* Required for handling holdout material */
+ PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */
+ ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */
+ Intersection *Intersection_coop, /* Required for indirect primitive emission */
+ ccl_global float3 *AOAlpha_coop, /* Required for AO */
+ ccl_global float3 *AOBSDF_coop, /* Required for AO */
+ ccl_global Ray *AOLightRay_coop, /* Required for AO */
+ int sw, int sh, int sx, int sy, int stride,
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */
+#ifdef __WORK_STEALING__
+ unsigned int start_sample,
+#endif
+ int parallel_samples, /* Number of samples to be processed in parallel */
+ int ray_index,
+ char *enqueue_flag,
+ char *enqueue_flag_AO_SHADOW_RAY_CAST)
+{
+ /* Load kernel globals structure and ShaderData structure */
+ KernelGlobals *kg = (KernelGlobals *)globals;
+ ShaderData *sd = (ShaderData *)shader_data;
+
+#ifdef __WORK_STEALING__
+ unsigned int my_work;
+ unsigned int pixel_x;
+ unsigned int pixel_y;
+#endif
+ unsigned int tile_x;
+ unsigned int tile_y;
+ int my_sample_tile;
+ unsigned int sample;
+
+ ccl_global RNG *rng = 0x0;
+ ccl_global PathState *state = 0x0;
+ float3 throughput;
+
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+
+ throughput = throughput_coop[ray_index];
+ state = &PathState_coop[ray_index];
+ rng = &rng_coop[ray_index];
+#ifdef __WORK_STEALING__
+ my_work = work_array[ray_index];
+ sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
+ get_pixel_tile_position(&pixel_x, &pixel_y,
+ &tile_x, &tile_y,
+ my_work,
+ sw, sh, sx, sy,
+ parallel_samples,
+ ray_index);
+ my_sample_tile = 0;
+#else /* __WORK_STEALING__ */
+ sample = work_array[ray_index];
+ /* Buffer's stride is "stride"; Find x and y using ray_index. */
+ int tile_index = ray_index / parallel_samples;
+ tile_x = tile_index % sw;
+ tile_y = tile_index / sw;
+ my_sample_tile = ray_index - (tile_index * parallel_samples);
+#endif /* __WORK_STEALING__ */
+ per_sample_output_buffers +=
+ (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) *
+ kernel_data.film.pass_stride;
+
+ /* holdout */
+#ifdef __HOLDOUT__
+ if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) &&
+ (state->flag & PATH_RAY_CAMERA))
+ {
+ if(kernel_data.background.transparent) {
+ float3 holdout_weight;
+
+ if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK)
+ holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
+ else
+ holdout_weight = shader_holdout_eval(kg, sd);
+
+ /* any throughput is ok, should all be identical here */
+ L_transparent_coop[ray_index] += average(holdout_weight*throughput);
+ }
+
+ if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ *enqueue_flag = 1;
+ }
+ }
+#endif /* __HOLDOUT__ */
+ }
+
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ PathRadiance *L = &PathRadiance_coop[ray_index];
+ /* Holdout mask objects do not write data passes. */
+ kernel_write_data_passes(kg,
+ per_sample_output_buffers,
+ L,
+ sd,
+ sample,
+ state,
+ throughput);
+ /* Blurring of bsdf after bounces, for rays that have a small likelihood
+ * of following this particular path (diffuse, rough glossy.
+ */
+ if(kernel_data.integrator.filter_glossy != FLT_MAX) {
+ float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
+ if(blur_pdf < 1.0f) {
+ float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
+ shader_bsdf_blur(kg, sd, blur_roughness);
+ }
+ }
+
+#ifdef __EMISSION__
+ /* emission */
+ if(ccl_fetch(sd, flag) & SD_EMISSION) {
+ /* TODO(sergey): is isect.t wrong here for transparent surfaces? */
+ float3 emission = indirect_primitive_emission(
+ kg,
+ sd,
+ Intersection_coop[ray_index].t,
+ state->flag,
+ state->ray_pdf);
+ path_radiance_accum_emission(L, throughput, emission, state->bounce);
+ }
+#endif /* __EMISSION__ */
+
+ /* Path termination. this is a strange place to put the termination, it's
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate.
+ */
+ float probability = path_state_terminate_probability(kg, state, throughput);
+
+ if(probability == 0.0f) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ *enqueue_flag = 1;
+ }
+
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ if(probability != 1.0f) {
+ float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
+ if(terminate >= probability) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ *enqueue_flag = 1;
+ } else {
+ throughput_coop[ray_index] = throughput/probability;
+ }
+ }
+ }
+ }
+
+#ifdef __AO__
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ /* ambient occlusion */
+ if(kernel_data.integrator.use_ambient_occlusion ||
+ (ccl_fetch(sd, flag) & SD_AO))
+ {
+ /* todo: solve correlation */
+ float bsdf_u, bsdf_v;
+ path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+ float ao_factor = kernel_data.background.ao_factor;
+ float3 ao_N;
+ AOBSDF_coop[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+ AOAlpha_coop[ray_index] = shader_bsdf_alpha(kg, sd);
+
+ float3 ao_D;
+ float ao_pdf;
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+ if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
+ Ray _ray;
+ _ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
+ _ray.D = ao_D;
+ _ray.t = kernel_data.background.ao_distance;
+#ifdef __OBJECT_MOTION__
+ _ray.time = ccl_fetch(sd, time);
+#endif
+ _ray.dP = ccl_fetch(sd, dP);
+ _ray.dD = differential3_zero();
+ AOLightRay_coop[ray_index] = _ray;
+
+ ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
+ *enqueue_flag_AO_SHADOW_RAY_CAST = 1;
+ }
+ }
+ }
+#endif /* __AO__ */
+}
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
new file mode 100644
index 00000000000..a8e4b0a06c8
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_split_common.h"
+
+/* Note on kernel_lamp_emission
+ * This is the 3rd kernel in the ray-tracing logic. This is the second of the
+ * path-iteration kernels. This kernel takes care of the indirect lamp emission logic.
+ * This kernel operates on QUEUE_ACTIVE_AND_REGENERATED_RAYS. It processes rays of state RAY_ACTIVE
+ * and RAY_HIT_BACKGROUND.
+ * We will empty QUEUE_ACTIVE_AND_REGENERATED_RAYS queue in this kernel.
+ * The input/output of the kernel is as follows,
+ * Throughput_coop ------------------------------------|--- kernel_lamp_emission --|--- PathRadiance_coop
+ * Ray_coop -------------------------------------------| |--- Queue_data(QUEUE_ACTIVE_AND_REGENERATED_RAYS)
+ * PathState_coop -------------------------------------| |--- Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS)
+ * kg (globals + data) --------------------------------| |
+ * Intersection_coop ----------------------------------| |
+ * ray_state ------------------------------------------| |
+ * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -----| |
+ * Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ----| |
+ * queuesize ------------------------------------------| |
+ * use_queues_flag ------------------------------------| |
+ * sw -------------------------------------------------| |
+ * sh -------------------------------------------------| |
+ * parallel_samples -----------------------------------| |
+ *
+ * note : shader_data is neither input nor output. Its just filled and consumed in the same, kernel_lamp_emission, kernel.
+ */
+ccl_device void kernel_lamp_emission(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data, /* Required for lamp emission */
+ ccl_global float3 *throughput_coop, /* Required for lamp emission */
+ PathRadiance *PathRadiance_coop, /* Required for lamp emission */
+ ccl_global Ray *Ray_coop, /* Required for lamp emission */
+ ccl_global PathState *PathState_coop, /* Required for lamp emission */
+ Intersection *Intersection_coop, /* Required for lamp emission */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ int sw, int sh,
+ ccl_global char *use_queues_flag, /* Used to decide if this kernel should use
+ * queues to fetch ray index
+ */
+ int parallel_samples, /* Number of samples to be processed in parallel */
+ int ray_index)
+{
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND))
+ {
+ KernelGlobals *kg = (KernelGlobals *)globals;
+ ShaderData *sd = (ShaderData *)shader_data;
+ PathRadiance *L = &PathRadiance_coop[ray_index];
+
+ float3 throughput = throughput_coop[ray_index];
+ Ray ray = Ray_coop[ray_index];
+ PathState state = PathState_coop[ray_index];
+
+#ifdef __LAMP_MIS__
+ if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
+ /* ray starting from previous non-transparent bounce */
+ Ray light_ray;
+
+ light_ray.P = ray.P - state.ray_t*ray.D;
+ state.ray_t += Intersection_coop[ray_index].t;
+ light_ray.D = ray.D;
+ light_ray.t = state.ray_t;
+ light_ray.time = ray.time;
+ light_ray.dD = ray.dD;
+ light_ray.dP = ray.dP;
+ /* intersect with lamp */
+ float3 emission;
+
+ if(indirect_lamp_emission(kg, &state, &light_ray, &emission, sd)) {
+ path_radiance_accum_emission(L, throughput, emission, state.bounce);
+ }
+ }
+#endif /* __LAMP_MIS__ */
+
+ /* __VOLUME__ feature is disabled */
+#if 0
+#ifdef __VOLUME__
+ /* volume attenuation, emission, scatter */
+ if(state.volume_stack[0].shader != SHADER_NONE) {
+ Ray volume_ray = ray;
+ volume_ray.t = (hit)? isect.t: FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
+
+#ifdef __VOLUME_DECOUPLED__
+ int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
+ bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
+
+ if(decoupled) {
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
+ ShaderData volume_sd;
+
+ shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
+ kernel_volume_decoupled_record(kg, &state,
+ &volume_ray, &volume_sd, &volume_segment, heterogeneous);
+
+ volume_segment.sampling_method = sampling_method;
+
+ /* emission */
+ if(volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
+
+ /* scattering */
+ VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+
+ if(volume_segment.closure_flag & SD_SCATTER) {
+ bool all = false;
+
+ /* direct light sampling */
+ kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
+ throughput, &state, &L, 1.0f, all, &volume_ray, &volume_segment);
+
+ /* indirect sample. if we use distance sampling and take just
+ * one sample for direct and indirect light, we could share
+ * this computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
+ float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
+
+ result = kernel_volume_decoupled_scatter(kg,
+ &state, &volume_ray, &volume_sd, &throughput,
+ rphase, rscatter, &volume_segment, NULL, true);
+ }
+
+ if(result != VOLUME_PATH_SCATTERED)
+ throughput *= volume_segment.accum_transmittance;
+
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
+
+ if(result == VOLUME_PATH_SCATTERED) {
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
+ continue;
+ else
+ break;
+ }
+ }
+ else
+#endif /* __VOLUME_DECOUPLED__ */
+ {
+ /* integrate along volume segment with distance sampling */
+ ShaderData volume_sd;
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous);
+
+#ifdef __VOLUME_SCATTER__
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L);
+
+ /* indirect light bounce */
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
+ continue;
+ else
+ break;
+ }
+#endif /* __VOLUME_SCATTER__ */
+ }
+ }
+#endif /* __VOLUME__ */
+#endif
+ }
+}
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
new file mode 100644
index 00000000000..2dbdabc5fd3
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_split_common.h"
+
+/* Note on kernel_setup_next_iteration kernel.
+ * This is the tenth kernel in the ray tracing logic. This is the ninth
+ * of the path iteration kernels. This kernel takes care of setting up
+ * Ray for the next iteration of path-iteration and accumulating radiance
+ * corresponding to AO and direct-lighting
+ *
+ * Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER
+ *
+ * The input and output are as follows,
+ *
+ * rng_coop ---------------------------------------------|--- kernel_next_iteration_setup -|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
+ * throughput_coop --------------------------------------| |--- Queue_data (QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS)
+ * PathRadiance_coop ------------------------------------| |--- throughput_coop
+ * PathState_coop ---------------------------------------| |--- PathRadiance_coop
+ * shader_data ------------------------------------------| |--- PathState_coop
+ * ray_state --------------------------------------------| |--- ray_state
+ * Queue_data (QUEUE_ACTIVE_AND_REGENERATD_RAYS) --------| |--- Ray_coop
+ * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- use_queues_flag
+ * Ray_coop ---------------------------------------------| |
+ * kg (globals + data) ----------------------------------| |
+ * LightRay_dl_coop -------------------------------------|
+ * ISLamp_coop ------------------------------------------|
+ * BSDFEval_coop ----------------------------------------|
+ * LightRay_ao_coop -------------------------------------|
+ * AOBSDF_coop ------------------------------------------|
+ * AOAlpha_coop -----------------------------------------|
+ *
+ * Note on queues,
+ * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS and processes only
+ * the rays of state RAY_ACTIVE.
+ * There are different points in this kernel where a ray may terminate and reach RAY_UPDATE_BUFF
+ * state. These rays are enqueued into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will
+ * still be present in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has been
+ * changed to RAY_UPDATE_BUFF, there is no problem.
+ *
+ * State of queues when this kernel is called :
+ * At entry,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED, RAY_UPDATE_BUFFER rays.
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays
+ * At exit,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED and more RAY_UPDATE_BUFFER rays.
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays
+ */
+ccl_device char kernel_next_iteration_setup(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data, /* Required for setting up ray for next iteration */
+ ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */
+ ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */
+ PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */
+ ccl_global Ray *Ray_coop, /* Required for setting up ray for next iteration */
+ ccl_global PathState *PathState_coop, /* Required for setting up ray for next iteration */
+ ccl_global Ray *LightRay_dl_coop, /* Required for radiance update - direct lighting */
+ ccl_global int *ISLamp_coop, /* Required for radiance update - direct lighting */
+ ccl_global BsdfEval *BSDFEval_coop, /* Required for radiance update - direct lighting */
+ ccl_global Ray *LightRay_ao_coop, /* Required for radiance update - AO */
+ ccl_global float3 *AOBSDF_coop, /* Required for radiance update - AO */
+ ccl_global float3 *AOAlpha_coop, /* Required for radiance update - AO */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ ccl_global char *use_queues_flag, /* flag to decide if scene_intersect kernel should
+ * use queues to fetch ray index */
+ int ray_index)
+{
+ char enqueue_flag = 0;
+
+ /* Load kernel globals structure and ShaderData structure. */
+ KernelGlobals *kg = (KernelGlobals *)globals;
+ ShaderData *sd = (ShaderData *)shader_data;
+ PathRadiance *L = 0x0;
+ ccl_global PathState *state = 0x0;
+
+ /* Path radiance update for AO/Direct_lighting's shadow blocked. */
+ if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
+ IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
+ {
+ state = &PathState_coop[ray_index];
+ L = &PathRadiance_coop[ray_index];
+ float3 _throughput = throughput_coop[ray_index];
+
+ if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) {
+ float3 shadow = LightRay_ao_coop[ray_index].P;
+ char update_path_radiance = LightRay_ao_coop[ray_index].t;
+ if(update_path_radiance) {
+ path_radiance_accum_ao(L,
+ _throughput,
+ AOAlpha_coop[ray_index],
+ AOBSDF_coop[ray_index],
+ shadow,
+ state->bounce);
+ }
+ REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
+ }
+
+ if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) {
+ float3 shadow = LightRay_dl_coop[ray_index].P;
+ char update_path_radiance = LightRay_dl_coop[ray_index].t;
+ if(update_path_radiance) {
+ BsdfEval L_light = BSDFEval_coop[ray_index];
+ path_radiance_accum_light(L,
+ _throughput,
+ &L_light,
+ shadow,
+ 1.0f,
+ state->bounce,
+ ISLamp_coop[ray_index]);
+ }
+ REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
+ }
+ }
+
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+
+ ccl_global float3 *throughput = &throughput_coop[ray_index];
+ ccl_global Ray *ray = &Ray_coop[ray_index];
+ ccl_global RNG* rng = &rng_coop[ray_index];
+ state = &PathState_coop[ray_index];
+ L = &PathRadiance_coop[ray_index];
+
+ /* compute direct lighting and next bounce */
+ if(!kernel_path_surface_bounce(kg, rng, sd, throughput, state, L, ray)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ enqueue_flag = 1;
+ }
+ }
+
+ return enqueue_flag;
+}
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
new file mode 100644
index 00000000000..73aa005a496
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_split_common.h"
+
+/* Note on kernel_scene_intersect kernel.
+ * This is the second kernel in the ray tracing logic. This is the first
+ * of the path iteration kernels. This kernel takes care of scene_intersect function.
+ *
+ * This kernel changes the ray_state of RAY_REGENERATED rays to RAY_ACTIVE.
+ * This kernel processes rays of ray state RAY_ACTIVE
+ * This kernel determines the rays that have hit the background and changes their ray state to RAY_HIT_BACKGROUND.
+ *
+ * The input and output are as follows,
+ *
+ * Ray_coop ---------------------------------------|--------- kernel_scene_intersect----------|--- PathState
+ * PathState_coop ---------------------------------| |--- Intersection
+ * ray_state --------------------------------------| |--- ray_state
+ * use_queues_flag --------------------------------| |
+ * parallel_samples -------------------------------| |
+ * QueueData(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ---| |
+ * kg (data + globals) ----------------------------| |
+ * rng_coop ---------------------------------------| |
+ * sw ---------------------------------------------| |
+ * sh ---------------------------------------------| |
+ * queuesize --------------------------------------| |
+ *
+ * Note on Queues :
+ * Ideally we would want kernel_scene_intersect to work on queues.
+ * But during the very first time, the queues will be empty and hence we perform a direct mapping
+ * between ray-index and thread-index; From the next time onward, the queue will be filled and
+ * we may start operating on queues.
+ *
+ * State of queue during the first time this kernel is called :
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty.before and after this kernel
+ *
+ * State of queues during other times this kernel is called :
+ * At entry,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will have a mix of RAY_ACTIVE, RAY_UPDATE_BUFFER and RAY_REGENERATED rays;
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays ;
+ * (The rays that are in the state RAY_UPDATE_BUFFER in both the queues are actually the same rays; These
+ * are the rays that were in RAY_ACTIVE state during the initial enqueue but on further processing
+ * , by different kernels, have turned into RAY_UPDATE_BUFFER rays. Since all kernel, even after fetching from
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS, proceed further based on ray state information, RAY_UPDATE_BUFFER rays
+ * being present in QUEUE_ACTIVE_AND_REGENERATED_RAYS does not cause any logical issues)
+ * At exit,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS - All RAY_REGENERATED rays will have been converted to RAY_ACTIVE and
+ * Some rays in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue will move to state RAY_HIT_BACKGROUND
+ * QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS - no change
+ */
+
+ccl_device void kernel_scene_intersect(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global uint *rng_coop,
+ ccl_global Ray *Ray_coop, /* Required for scene_intersect */
+ ccl_global PathState *PathState_coop, /* Required for scene_intersect */
+ Intersection *Intersection_coop, /* Required for scene_intersect */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ int sw, int sh,
+ ccl_global char *use_queues_flag, /* used to decide if this kernel should use
+ * queues to fetch ray index */
+#ifdef __KERNEL_DEBUG__
+ DebugData *debugdata_coop,
+#endif
+ int parallel_samples, /* Number of samples to be processed in parallel */
+ int ray_index)
+{
+ /* All regenerated rays become active here */
+ if(IS_STATE(ray_state, ray_index, RAY_REGENERATED))
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE);
+
+ if(!IS_STATE(ray_state, ray_index, RAY_ACTIVE))
+ return;
+
+ /* Load kernel globals structure */
+ KernelGlobals *kg = (KernelGlobals *)globals;
+
+#ifdef __KERNEL_DEBUG__
+ DebugData *debug_data = &debugdata_coop[ray_index];
+#endif
+ Intersection *isect = &Intersection_coop[ray_index];
+ PathState state = PathState_coop[ray_index];
+ Ray ray = Ray_coop[ray_index];
+
+ /* intersect scene */
+ uint visibility = path_state_ray_visibility(kg, &state);
+
+#ifdef __HAIR__
+ float difl = 0.0f, extmax = 0.0f;
+ uint lcg_state = 0;
+ RNG rng = rng_coop[ray_index];
+
+ if(kernel_data.bvh.have_curves) {
+ if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {
+ float3 pixdiff = ray.dD.dx + ray.dD.dy;
+ /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
+ difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
+ }
+
+ extmax = kernel_data.curve.maximum_width;
+ lcg_state = lcg_state_init(&rng, &state, 0x51633e2d);
+ }
+
+ bool hit = scene_intersect(kg, &ray, visibility, isect, &lcg_state, difl, extmax);
+#else
+ bool hit = scene_intersect(kg, &ray, visibility, isect, NULL, 0.0f, 0.0f);
+#endif
+
+#ifdef __KERNEL_DEBUG__
+ if(state.flag & PATH_RAY_CAMERA) {
+ debug_data->num_bvh_traversal_steps += isect->num_traversal_steps;
+ debug_data->num_bvh_traversed_instances += isect->num_traversed_instances;
+ }
+ debug_data->num_ray_bounces++;
+#endif
+
+ if(!hit) {
+ /* Change the state of rays that hit the background;
+ * These rays undergo special processing in the
+ * background_bufferUpdate kernel*/
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
+ }
+}
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
new file mode 100644
index 00000000000..e6fdc592586
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_split_common.h"
+
+/* Note on kernel_shader_eval kernel
+ * This kernel is the 5th kernel in the ray tracing logic. This is
+ * the 4rd kernel in path iteration. This kernel sets up the ShaderData
+ * structure from the values computed by the previous kernels. It also identifies
+ * the rays of state RAY_TO_REGENERATE and enqueues them in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
+ *
+ * The input and output of the kernel is as follows,
+ * rng_coop -------------------------------------------|--- kernel_shader_eval --|--- shader_data
+ * Ray_coop -------------------------------------------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
+ * PathState_coop -------------------------------------| |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
+ * Intersection_coop ----------------------------------| |
+ * Queue_data (QUEUE_ACTIVE_AND_REGENERATD_RAYS)-------| |
+ * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)---| |
+ * ray_state ------------------------------------------| |
+ * kg (globals + data) --------------------------------| |
+ * queuesize ------------------------------------------| |
+ *
+ * Note on Queues :
+ * This kernel reads from the QUEUE_ACTIVE_AND_REGENERATED_RAYS queue and processes
+ * only the rays of state RAY_ACTIVE;
+ * State of queues when this kernel is called,
+ * at entry,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty.
+ * at exit,
+ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays
+ */
+ccl_device void kernel_shader_eval(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_data, /* Output ShaderData structure to be filled */
+ ccl_global uint *rng_coop, /* Required for rbsdf calculation */
+ ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */
+ ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */
+ Intersection *Intersection_coop, /* Required for setting up shader from ray */
+ ccl_global char *ray_state, /* Denotes the state of each ray */
+ int ray_index)
+{
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ KernelGlobals *kg = (KernelGlobals *)globals;
+ ShaderData *sd = (ShaderData *)shader_data;
+ Intersection *isect = &Intersection_coop[ray_index];
+ ccl_global uint *rng = &rng_coop[ray_index];
+ ccl_global PathState *state = &PathState_coop[ray_index];
+ Ray ray = Ray_coop[ray_index];
+
+ shader_setup_from_ray(kg,
+ sd,
+ isect,
+ &ray,
+ state->bounce,
+ state->transparent_bounce);
+ float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
+ shader_eval_surface(kg, sd, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
+ }
+}
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h
new file mode 100644
index 00000000000..154ec53ffbb
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_split_common.h"
+
+/* Note on kernel_shadow_blocked kernel.
+ * This is the ninth kernel in the ray tracing logic. This is the eighth
+ * of the path iteration kernels. This kernel takes care of "shadow ray cast"
+ * logic of the direct lighting and AO part of ray tracing.
+ *
+ * The input and output are as follows,
+ *
+ * PathState_coop ----------------------------------|--- kernel_shadow_blocked --|
+ * LightRay_dl_coop --------------------------------| |--- LightRay_dl_coop
+ * LightRay_ao_coop --------------------------------| |--- LightRay_ao_coop
+ * ray_state ---------------------------------------| |--- ray_state
+ * Queue_data(QUEUE_SHADOW_RAY_CAST_AO_RAYS & | |--- Queue_data (QUEUE_SHADOW_RAY_CAST_AO_RAYS & QUEUE_SHADOW_RAY_CAST_AO_RAYS)
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| |
+ * Queue_index(QUEUE_SHADOW_RAY_CAST_AO_RAYS&
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| |
+ * kg (globals + data) -----------------------------| |
+ * queuesize ---------------------------------------| |
+ *
+ * Note on shader_shadow : shader_shadow is neither input nor output to this kernel. shader_shadow is filled and consumed in this kernel itself.
+ * Note on queues :
+ * The kernel fetches from QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS queues. We will empty
+ * these queues this kernel.
+ * State of queues when this kernel is called :
+ * state of queues QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be same
+ * before and after this kernel call.
+ * QUEUE_SHADOW_RAY_CAST_AO_RAYS & QUEUE_SHADOW_RAY_CAST_DL_RAYS will be filled with rays marked with flags RAY_SHADOW_RAY_CAST_AO
+ * and RAY_SHADOW_RAY_CAST_DL respectively, during kernel entry.
+ * QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty at kernel exit.
+ */
+ccl_device void kernel_shadow_blocked(
+ ccl_global char *globals,
+ ccl_constant KernelData *data,
+ ccl_global char *shader_shadow, /* Required for shadow blocked */
+ ccl_global PathState *PathState_coop, /* Required for shadow blocked */
+ ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */
+ ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */
+ Intersection *Intersection_coop_AO,
+ Intersection *Intersection_coop_DL,
+ ccl_global char *ray_state,
+ int total_num_rays,
+ char shadow_blocked_type,
+ int ray_index)
+{
+ /* Flag determining if we need to update L. */
+ char update_path_radiance = 0;
+
+ if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
+ IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
+ {
+ /* Load kernel global structure */
+ KernelGlobals *kg = (KernelGlobals *)globals;
+ ShaderData *sd_shadow = (ShaderData *)shader_shadow;
+
+ ccl_global PathState *state = &PathState_coop[ray_index];
+ ccl_global Ray *light_ray_dl_global = &LightRay_dl_coop[ray_index];
+ ccl_global Ray *light_ray_ao_global = &LightRay_ao_coop[ray_index];
+ Intersection *isect_ao_global = &Intersection_coop_AO[ray_index];
+ Intersection *isect_dl_global = &Intersection_coop_DL[ray_index];
+
+ ccl_global Ray *light_ray_global =
+ shadow_blocked_type == RAY_SHADOW_RAY_CAST_AO
+ ? light_ray_ao_global
+ : light_ray_dl_global;
+ Intersection *isect_global =
+ RAY_SHADOW_RAY_CAST_AO ? isect_ao_global : isect_dl_global;
+
+ float3 shadow;
+ update_path_radiance = !(shadow_blocked(kg,
+ state,
+ light_ray_global,
+ &shadow,
+ sd_shadow,
+ isect_global));
+
+ /* We use light_ray_global's P and t to store shadow and
+ * update_path_radiance.
+ */
+ light_ray_global->P = shadow;
+ light_ray_global->t = update_path_radiance;
+ }
+}
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
new file mode 100644
index 00000000000..e1c7e2cea99
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_SPLIT_H__
+#define __KERNEL_SPLIT_H__
+
+#include "kernel_compat_opencl.h"
+#include "kernel_math.h"
+#include "kernel_types.h"
+#include "kernel_globals.h"
+
+#include "util_atomic.h"
+
+#include "kernel_random.h"
+#include "kernel_projection.h"
+#include "kernel_montecarlo.h"
+#include "kernel_differential.h"
+#include "kernel_camera.h"
+
+#include "geom/geom.h"
+
+#include "kernel_accumulate.h"
+#include "kernel_shader.h"
+#include "kernel_light.h"
+#include "kernel_passes.h"
+
+#ifdef __SUBSURFACE__
+#include "kernel_subsurface.h"
+#endif
+
+#ifdef __VOLUME__
+#include "kernel_volume.h"
+#endif
+
+#include "kernel_path_state.h"
+#include "kernel_shadow.h"
+#include "kernel_emission.h"
+#include "kernel_path_common.h"
+#include "kernel_path_surface.h"
+#include "kernel_path_volume.h"
+
+#ifdef __KERNEL_DEBUG__
+#include "kernel_debug.h"
+#endif
+
+#include "kernel_queues.h"
+#include "kernel_work_stealing.h"
+
+#endif /* __KERNEL_SPLIT_H__ */
diff --git a/intern/cycles/kernel/split/kernel_sum_all_radiance.h b/intern/cycles/kernel/split/kernel_sum_all_radiance.h
new file mode 100644
index 00000000000..a21e9b6a0b1
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_sum_all_radiance.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../kernel_compat_opencl.h"
+#include "../kernel_math.h"
+#include "../kernel_types.h"
+#include "../kernel_globals.h"
+
+/* Since we process various samples in parallel; The output radiance of different samples
+ * are stored in different locations; This kernel combines the output radiance contributed
+ * by all different samples and stores them in the RenderTile's output buffer.
+ */
+ccl_device void kernel_sum_all_radiance(
+ ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */
+ ccl_global float *buffer, /* Output buffer of RenderTile */
+ ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */
+ int parallel_samples, int sw, int sh, int stride,
+ int buffer_offset_x,
+ int buffer_offset_y,
+ int buffer_stride,
+ int start_sample)
+{
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+
+ if(x < sw && y < sh) {
+ buffer += ((buffer_offset_x + x) + (buffer_offset_y + y) * buffer_stride) * (data->film.pass_stride);
+ per_sample_output_buffer += ((x + y * stride) * parallel_samples) * (data->film.pass_stride);
+
+ int sample_stride = (data->film.pass_stride);
+
+ int sample_iterator = 0;
+ int pass_stride_iterator = 0;
+ int num_floats = data->film.pass_stride;
+
+ for(sample_iterator = 0; sample_iterator < parallel_samples; sample_iterator++) {
+ for(pass_stride_iterator = 0; pass_stride_iterator < num_floats; pass_stride_iterator++) {
+ *(buffer + pass_stride_iterator) =
+ (start_sample == 0 && sample_iterator == 0)
+ ? *(per_sample_output_buffer + pass_stride_iterator)
+ : *(buffer + pass_stride_iterator) + *(per_sample_output_buffer + pass_stride_iterator);
+ }
+ per_sample_output_buffer += sample_stride;
+ }
+ }
+}
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index d59c9b9e61c..15ac6519780 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -87,7 +87,7 @@ ccl_device_inline int stack_load_int(float *stack, uint a)
return __float_as_int(stack[a]);
}
-ccl_device_inline float stack_load_int_default(float *stack, uint a, uint value)
+ccl_device_inline int stack_load_int_default(float *stack, uint a, uint value)
{
return (a == (uint)SVM_STACK_INVALID)? (int)value: stack_load_int(stack, a);
}
@@ -142,6 +142,8 @@ CCL_NAMESPACE_END
#include "svm_noise.h"
#include "svm_texture.h"
+#include "svm_math_util.h"
+
#include "svm_attribute.h"
#include "svm_gradient.h"
#include "svm_blackbody.h"
@@ -164,7 +166,6 @@ CCL_NAMESPACE_END
#include "svm_mapping.h"
#include "svm_normal.h"
#include "svm_wave.h"
-#include "svm_math_util.h"
#include "svm_math.h"
#include "svm_mix.h"
#include "svm_ramp.h"
@@ -181,17 +182,20 @@ CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN
-/* Main Interpreter Loop */
+#define NODES_GROUP(group) ((group) <= __NODES_MAX_GROUP__)
+#define NODES_FEATURE(feature) ((__NODES_FEATURES__ & (feature)) != 0)
+/* Main Interpreter Loop */
ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderType type, int path_flag)
{
float stack[SVM_STACK_SIZE];
- int offset = sd->shader & SHADER_MASK;
+ int offset = ccl_fetch(sd, shader) & SHADER_MASK;
while(1) {
uint4 node = read_node(kg, &offset);
switch(node.x) {
+#if NODES_GROUP(NODE_GROUP_LEVEL_0)
case NODE_SHADER_JUMP: {
if(type == SHADER_TYPE_SURFACE) offset = node.y;
else if(type == SHADER_TYPE_VOLUME) offset = node.z;
@@ -208,15 +212,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
case NODE_CLOSURE_BACKGROUND:
svm_node_closure_background(sd, stack, node);
break;
- case NODE_CLOSURE_HOLDOUT:
- svm_node_closure_holdout(sd, stack, node);
- break;
- case NODE_CLOSURE_AMBIENT_OCCLUSION:
- svm_node_closure_ambient_occlusion(sd, stack, node);
- break;
- case NODE_CLOSURE_VOLUME:
- svm_node_closure_volume(kg, sd, stack, node, path_flag);
- break;
case NODE_CLOSURE_SET_WEIGHT:
svm_node_closure_set_weight(sd, node.y, node.z, node.w);
break;
@@ -237,13 +232,137 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
if(stack_load_float(stack, node.z) == 1.0f)
offset += node.y;
break;
-#ifdef __TEXTURES__
+ case NODE_GEOMETRY:
+ svm_node_geometry(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_CONVERT:
+ svm_node_convert(sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_TEX_COORD:
+ svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset);
+ break;
+ case NODE_VALUE_F:
+ svm_node_value_f(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_VALUE_V:
+ svm_node_value_v(kg, sd, stack, node.y, &offset);
+ break;
+ case NODE_ATTR:
+ svm_node_attr(kg, sd, stack, node);
+ break;
+# if NODES_FEATURE(NODE_FEATURE_BUMP)
+ case NODE_GEOMETRY_BUMP_DX:
+ svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_GEOMETRY_BUMP_DY:
+ svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_SET_DISPLACEMENT:
+ svm_node_set_displacement(sd, stack, node.y);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
+# ifdef __TEXTURES__
case NODE_TEX_IMAGE:
svm_node_tex_image(kg, sd, stack, node);
break;
case NODE_TEX_IMAGE_BOX:
svm_node_tex_image_box(kg, sd, stack, node);
break;
+ case NODE_TEX_NOISE:
+ svm_node_tex_noise(kg, sd, stack, node, &offset);
+ break;
+# endif /* __TEXTURES__ */
+# ifdef __EXTRA_NODES__
+# if NODES_FEATURE(NODE_FEATURE_BUMP)
+ case NODE_SET_BUMP:
+ svm_node_set_bump(kg, sd, stack, node);
+ break;
+ case NODE_ATTR_BUMP_DX:
+ svm_node_attr_bump_dx(kg, sd, stack, node);
+ break;
+ case NODE_ATTR_BUMP_DY:
+ svm_node_attr_bump_dy(kg, sd, stack, node);
+ break;
+ case NODE_TEX_COORD_BUMP_DX:
+ svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset);
+ break;
+ case NODE_TEX_COORD_BUMP_DY:
+ svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset);
+ break;
+ case NODE_CLOSURE_SET_NORMAL:
+ svm_node_set_normal(kg, sd, stack, node.y, node.z);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
+ case NODE_HSV:
+ svm_node_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
+# endif /* __EXTRA_NODES__ */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */
+
+#if NODES_GROUP(NODE_GROUP_LEVEL_1)
+ case NODE_CLOSURE_HOLDOUT:
+ svm_node_closure_holdout(sd, stack, node);
+ break;
+ case NODE_CLOSURE_AMBIENT_OCCLUSION:
+ svm_node_closure_ambient_occlusion(sd, stack, node);
+ break;
+ case NODE_FRESNEL:
+ svm_node_fresnel(sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_LAYER_WEIGHT:
+ svm_node_layer_weight(sd, stack, node);
+ break;
+# if NODES_FEATURE(NODE_FEATURE_VOLUME)
+ case NODE_CLOSURE_VOLUME:
+ svm_node_closure_volume(kg, sd, stack, node, path_flag);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
+# ifdef __EXTRA_NODES__
+ case NODE_MATH:
+ svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
+ case NODE_VECTOR_MATH:
+ svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
+ break;
+ case NODE_RGB_RAMP:
+ svm_node_rgb_ramp(kg, sd, stack, node, &offset);
+ break;
+ case NODE_GAMMA:
+ svm_node_gamma(sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_BRIGHTCONTRAST:
+ svm_node_brightness(sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_LIGHT_PATH:
+ svm_node_light_path(sd, stack, node.y, node.z, path_flag);
+ break;
+ case NODE_OBJECT_INFO:
+ svm_node_object_info(kg, sd, stack, node.y, node.z);
+ break;
+ case NODE_PARTICLE_INFO:
+ svm_node_particle_info(kg, sd, stack, node.y, node.z);
+ break;
+# ifdef __HAIR__
+# if NODES_FEATURE(NODE_FEATURE_HAIR)
+ case NODE_HAIR_INFO:
+ svm_node_hair_info(kg, sd, stack, node.y, node.z);
+ break;
+# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */
+# endif /* __HAIR__ */
+# endif /* __EXTRA_NODES__ */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */
+
+#if NODES_GROUP(NODE_GROUP_LEVEL_2)
+ case NODE_MAPPING:
+ svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
+ break;
+ case NODE_MIN_MAX:
+ svm_node_min_max(kg, sd, stack, node.y, node.z, &offset);
+ break;
+ case NODE_CAMERA:
+ svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
+ break;
+# ifdef __TEXTURES__
case NODE_TEX_ENVIRONMENT:
svm_node_tex_environment(kg, sd, stack, node);
break;
@@ -253,9 +372,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
case NODE_TEX_GRADIENT:
svm_node_tex_gradient(sd, stack, node);
break;
- case NODE_TEX_NOISE:
- svm_node_tex_noise(kg, sd, stack, node, &offset);
- break;
case NODE_TEX_VORONOI:
svm_node_tex_voronoi(kg, sd, stack, node, &offset);
break;
@@ -274,55 +390,34 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
case NODE_TEX_BRICK:
svm_node_tex_brick(kg, sd, stack, node, &offset);
break;
-#endif
- case NODE_CAMERA:
- svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
- break;
- case NODE_GEOMETRY:
- svm_node_geometry(kg, sd, stack, node.y, node.z);
- break;
-#ifdef __EXTRA_NODES__
- case NODE_GEOMETRY_BUMP_DX:
- svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
- break;
- case NODE_GEOMETRY_BUMP_DY:
- svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
- break;
- case NODE_LIGHT_PATH:
- svm_node_light_path(sd, stack, node.y, node.z, path_flag);
- break;
- case NODE_OBJECT_INFO:
- svm_node_object_info(kg, sd, stack, node.y, node.z);
- break;
- case NODE_PARTICLE_INFO:
- svm_node_particle_info(kg, sd, stack, node.y, node.z);
+# endif /* __TEXTURES__ */
+# ifdef __EXTRA_NODES__
+ case NODE_NORMAL:
+ svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
-#ifdef __HAIR__
- case NODE_HAIR_INFO:
- svm_node_hair_info(kg, sd, stack, node.y, node.z);
+ case NODE_LIGHT_FALLOFF:
+ svm_node_light_falloff(sd, stack, node);
break;
-#endif
+# endif /* __EXTRA_NODES__ */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */
-#endif
- case NODE_CONVERT:
- svm_node_convert(sd, stack, node.y, node.z, node.w);
+#if NODES_GROUP(NODE_GROUP_LEVEL_3)
+ case NODE_RGB_CURVES:
+ svm_node_rgb_curves(kg, sd, stack, node, &offset);
break;
- case NODE_VALUE_F:
- svm_node_value_f(kg, sd, stack, node.y, node.z);
+ case NODE_VECTOR_CURVES:
+ svm_node_vector_curves(kg, sd, stack, node, &offset);
break;
- case NODE_VALUE_V:
- svm_node_value_v(kg, sd, stack, node.y, &offset);
+ case NODE_TANGENT:
+ svm_node_tangent(kg, sd, stack, node);
break;
-#ifdef __EXTRA_NODES__
+ case NODE_NORMAL_MAP:
+ svm_node_normal_map(kg, sd, stack, node);
+ break;
+# ifdef __EXTRA_NODES__
case NODE_INVERT:
svm_node_invert(sd, stack, node.y, node.z, node.w);
break;
- case NODE_GAMMA:
- svm_node_gamma(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_BRIGHTCONTRAST:
- svm_node_brightness(sd, stack, node.y, node.z, node.w);
- break;
case NODE_MIX:
svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
@@ -338,28 +433,9 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
case NODE_COMBINE_HSV:
svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
- case NODE_HSV:
- svm_node_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
- break;
-#endif
- case NODE_ATTR:
- svm_node_attr(kg, sd, stack, node);
- break;
-#ifdef __EXTRA_NODES__
- case NODE_ATTR_BUMP_DX:
- svm_node_attr_bump_dx(kg, sd, stack, node);
- break;
- case NODE_ATTR_BUMP_DY:
- svm_node_attr_bump_dy(kg, sd, stack, node);
- break;
-#endif
- case NODE_FRESNEL:
- svm_node_fresnel(sd, stack, node.y, node.z, node.w);
- break;
- case NODE_LAYER_WEIGHT:
- svm_node_layer_weight(sd, stack, node);
+ case NODE_VECTOR_TRANSFORM:
+ svm_node_vector_transform(kg, sd, stack, node);
break;
-#ifdef __EXTRA_NODES__
case NODE_WIREFRAME:
svm_node_wireframe(kg, sd, stack, node);
break;
@@ -369,70 +445,20 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
case NODE_BLACKBODY:
svm_node_blackbody(kg, sd, stack, node.y, node.z);
break;
- case NODE_SET_DISPLACEMENT:
- svm_node_set_displacement(sd, stack, node.y);
- break;
- case NODE_SET_BUMP:
- svm_node_set_bump(kg, sd, stack, node);
- break;
- case NODE_MATH:
- svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
- break;
- case NODE_VECTOR_MATH:
- svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
- break;
- case NODE_VECTOR_TRANSFORM:
- svm_node_vector_transform(kg, sd, stack, node);
- break;
- case NODE_NORMAL:
- svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset);
- break;
-#endif
- case NODE_MAPPING:
- svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
- break;
- case NODE_MIN_MAX:
- svm_node_min_max(kg, sd, stack, node.y, node.z, &offset);
- break;
- case NODE_TEX_COORD:
- svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset);
- break;
-#ifdef __EXTRA_NODES__
- case NODE_TEX_COORD_BUMP_DX:
- svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset);
- break;
- case NODE_TEX_COORD_BUMP_DY:
- svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset);
- break;
- case NODE_CLOSURE_SET_NORMAL:
- svm_node_set_normal(kg, sd, stack, node.y, node.z );
- break;
- case NODE_RGB_RAMP:
- svm_node_rgb_ramp(kg, sd, stack, node, &offset);
- break;
- case NODE_RGB_CURVES:
- svm_node_rgb_curves(kg, sd, stack, node, &offset);
- break;
- case NODE_VECTOR_CURVES:
- svm_node_vector_curves(kg, sd, stack, node, &offset);
- break;
- case NODE_LIGHT_FALLOFF:
- svm_node_light_falloff(sd, stack, node);
- break;
-#endif
- case NODE_TANGENT:
- svm_node_tangent(kg, sd, stack, node);
- break;
- case NODE_NORMAL_MAP:
- svm_node_normal_map(kg, sd, stack, node);
- break;
+# endif /* __EXTRA_NODES__ */
+#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */
case NODE_END:
+ return;
default:
+ kernel_assert(!"Unknown node type was passed to the SVM machine");
return;
}
}
}
+#undef NODES_GROUP
+#undef NODES_FEATURE
+
CCL_NAMESPACE_END
#endif /* __SVM_H__ */
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index b63978b6e1f..025ae96f59d 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -22,12 +22,12 @@ ccl_device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
uint4 node, NodeAttributeType *type,
NodeAttributeType *mesh_type, AttributeElement *elem, int *offset, uint *out_offset)
{
- if(sd->object != OBJECT_NONE) {
+ if(ccl_fetch(sd, object) != OBJECT_NONE) {
/* find attribute by unique id */
uint id = node.y;
- uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride;
+ uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride;
#ifdef __HAIR__
- attr_offset = (sd->type & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset;
+ attr_offset = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset;
#endif
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
index 1e40e868e14..b750ad87b7f 100644
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@@ -36,48 +36,12 @@ CCL_NAMESPACE_BEGIN
ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset)
{
- /* Output */
- float3 color_rgb = make_float3(0.0f, 0.0f, 0.0f);
-
/* Input */
float temperature = stack_load_float(stack, temperature_offset);
- if (temperature < BB_DRAPER) {
- /* just return very very dim red */
- color_rgb = make_float3(1.0e-6f,0.0f,0.0f);
- }
- else if (temperature <= BB_MAX_TABLE_RANGE) {
- /* This is the overall size of the table */
- const int lookuptablesize = 956;
- const float lookuptablenormalize = 1.0f/956.0f;
-
- /* reconstruct a proper index for the table lookup, compared to OSL we don't look up two colors
- just one (the OSL-lerp is also automatically done for us by "lookup_table_read") */
- float t = powf((temperature - BB_DRAPER) * (1.0f / BB_TABLE_SPACING), (1.0f / BB_TABLE_XPOWER));
-
- int blackbody_table_offset = kernel_data.tables.blackbody_offset;
-
- /* Retrieve colors from the lookup table */
- float lutval = t*lookuptablenormalize;
- float R = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize);
- lutval = (t + 319.0f*1.0f)*lookuptablenormalize;
- float G = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize);
- lutval = (t + 319.0f*2.0f)*lookuptablenormalize;
- float B = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize);
-
- R = powf(R, BB_TABLE_YPOWER);
- G = powf(G, BB_TABLE_YPOWER);
- B = powf(B, BB_TABLE_YPOWER);
-
- color_rgb = make_float3(R, G, B);
- }
-
- /* Luminance */
- float l = linear_rgb_to_gray(color_rgb);
- if (l != 0.0f)
- color_rgb /= l;
+ float3 color_rgb = svm_math_blackbody_color(temperature);
- if (stack_valid(col_offset))
+ if(stack_valid(col_offset))
stack_store_float3(stack, col_offset, color_rgb);
}
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index 33a2a5c7598..fcf8f47b77e 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -47,7 +47,7 @@ ccl_device_noinline float2 svm_brick(float3 p, float mortar_size, float bias,
y = p.y - row_height*rownum;
return make_float2(
- clamp((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias), 0.0f, 1.0f),
+ saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias)),
(x < mortar_size || y < mortar_size ||
x > (brick_width - mortar_size) ||
diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h
index 631bd1825ee..e4d545a00ae 100644
--- a/intern/cycles/kernel/svm/svm_brightness.h
+++ b/intern/cycles/kernel/svm/svm_brightness.h
@@ -32,7 +32,7 @@ ccl_device void svm_node_brightness(ShaderData *sd, float *stack, uint in_color,
color.y = max(a*color.y + b, 0.0f);
color.z = max(a*color.z + b, 0.0f);
- if (stack_valid(out_color))
+ if(stack_valid(out_color))
stack_store_float3(stack, out_color, color);
}
diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h
index e03745cb331..00678a49d70 100644
--- a/intern/cycles/kernel/svm/svm_camera.h
+++ b/intern/cycles/kernel/svm/svm_camera.h
@@ -23,17 +23,17 @@ ccl_device void svm_node_camera(KernelGlobals *kg, ShaderData *sd, float *stack,
float3 vector;
Transform tfm = kernel_data.cam.worldtocamera;
- vector = transform_point(&tfm, sd->P);
+ vector = transform_point(&tfm, ccl_fetch(sd, P));
zdepth = vector.z;
distance = len(vector);
- if (stack_valid(out_vector))
+ if(stack_valid(out_vector))
stack_store_float3(stack, out_vector, normalize(vector));
- if (stack_valid(out_zdepth))
+ if(stack_valid(out_zdepth))
stack_store_float(stack, out_zdepth, zdepth);
- if (stack_valid(out_distance))
+ if(stack_valid(out_distance))
stack_store_float(stack, out_distance, distance);
}
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 07ac7104e68..20a6cb8cd45 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -25,10 +25,13 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type
sc->data0 = eta;
sc->data1 = 0.0f;
sc->data2 = 0.0f;
- sd->flag |= bsdf_refraction_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_refraction_setup(sc);
+ }
+ else {
+ sc->data0 = 0.0f;
+ sc->data1 = 0.0f;
+ ccl_fetch(sd, flag) |= bsdf_reflection_setup(sc);
}
- else
- sd->flag |= bsdf_reflection_setup(sc);
}
else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) {
sc->data0 = roughness;
@@ -36,9 +39,9 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type
sc->data2 = eta;
if(refract)
- sd->flag |= bsdf_microfacet_beckmann_refraction_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_refraction_setup(sc);
else
- sd->flag |= bsdf_microfacet_beckmann_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_setup(sc);
}
else {
sc->data0 = roughness;
@@ -46,23 +49,23 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type
sc->data2 = eta;
if(refract)
- sd->flag |= bsdf_microfacet_ggx_refraction_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_refraction_setup(sc);
else
- sd->flag |= bsdf_microfacet_ggx_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_setup(sc);
}
}
ccl_device_inline ShaderClosure *svm_node_closure_get_non_bsdf(ShaderData *sd, ClosureType type, float mix_weight)
{
- ShaderClosure *sc = &sd->closure[sd->num_closure];
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure));
- if(sd->num_closure < MAX_CLOSURE) {
+ if(ccl_fetch(sd, num_closure) < MAX_CLOSURE) {
sc->weight *= mix_weight;
sc->type = type;
#ifdef __OSL__
sc->prim = NULL;
#endif
- sd->num_closure++;
+ ccl_fetch(sd, num_closure)++;
return sc;
}
@@ -71,14 +74,15 @@ ccl_device_inline ShaderClosure *svm_node_closure_get_non_bsdf(ShaderData *sd, C
ccl_device_inline ShaderClosure *svm_node_closure_get_bsdf(ShaderData *sd, float mix_weight)
{
- ShaderClosure *sc = &sd->closure[sd->num_closure];
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure));
+
float3 weight = sc->weight * mix_weight;
float sample_weight = fabsf(average(weight));
- if(sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure < MAX_CLOSURE) {
+ if(sample_weight > CLOSURE_WEIGHT_CUTOFF && ccl_fetch(sd, num_closure) < MAX_CLOSURE) {
sc->weight = weight;
sc->sample_weight = sample_weight;
- sd->num_closure++;
+ ccl_fetch(sd, num_closure)++;
#ifdef __OSL__
sc->prim = NULL;
#endif
@@ -90,14 +94,15 @@ ccl_device_inline ShaderClosure *svm_node_closure_get_bsdf(ShaderData *sd, float
ccl_device_inline ShaderClosure *svm_node_closure_get_absorption(ShaderData *sd, float mix_weight)
{
- ShaderClosure *sc = &sd->closure[sd->num_closure];
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure));
+
float3 weight = (make_float3(1.0f, 1.0f, 1.0f) - sc->weight) * mix_weight;
float sample_weight = fabsf(average(weight));
- if(sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure < MAX_CLOSURE) {
+ if(sample_weight > CLOSURE_WEIGHT_CUTOFF && ccl_fetch(sd, num_closure) < MAX_CLOSURE) {
sc->weight = weight;
sc->sample_weight = sample_weight;
- sd->num_closure++;
+ ccl_fetch(sd, num_closure)++;
#ifdef __OSL__
sc->prim = NULL;
#endif
@@ -121,7 +126,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
if(mix_weight == 0.0f)
return;
- float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N;
+ float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): ccl_fetch(sd, N);
float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);
@@ -139,13 +144,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data0 = 0.0f;
sc->data1 = 0.0f;
sc->data2 = 0.0f;
- sd->flag |= bsdf_diffuse_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_diffuse_setup(sc);
}
else {
sc->data0 = roughness;
sc->data1 = 0.0f;
sc->data2 = 0.0f;
- sd->flag |= bsdf_oren_nayar_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_oren_nayar_setup(sc);
}
}
break;
@@ -158,7 +163,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data1 = 0.0f;
sc->data2 = 0.0f;
sc->N = N;
- sd->flag |= bsdf_translucent_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_translucent_setup(sc);
}
break;
}
@@ -170,7 +175,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data1 = 0.0f;
sc->data2 = 0.0f;
sc->N = N;
- sd->flag |= bsdf_transparent_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_transparent_setup(sc);
}
break;
}
@@ -192,13 +197,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
/* setup bsdf */
if(type == CLOSURE_BSDF_REFLECTION_ID)
- sd->flag |= bsdf_reflection_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_reflection_setup(sc);
else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID)
- sd->flag |= bsdf_microfacet_beckmann_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_setup(sc);
else if(type == CLOSURE_BSDF_MICROFACET_GGX_ID)
- sd->flag |= bsdf_microfacet_ggx_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_setup(sc);
else
- sd->flag |= bsdf_ashikhmin_shirley_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_ashikhmin_shirley_setup(sc);
}
break;
@@ -216,7 +221,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->N = N;
float eta = fmaxf(param2, 1e-5f);
- eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
+ eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta;
/* setup bsdf */
if(type == CLOSURE_BSDF_REFRACTION_ID) {
@@ -224,7 +229,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data1 = 0.0f;
sc->data2 = 0.0f;
- sd->flag |= bsdf_refraction_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_refraction_setup(sc);
}
else {
sc->data0 = param1;
@@ -232,9 +237,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data2 = eta;
if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID)
- sd->flag |= bsdf_microfacet_beckmann_refraction_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_refraction_setup(sc);
else
- sd->flag |= bsdf_microfacet_ggx_refraction_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_refraction_setup(sc);
}
}
@@ -251,15 +256,15 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
#endif
/* index of refraction */
float eta = fmaxf(param2, 1e-5f);
- eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
+ eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta;
/* fresnel */
- float cosNO = dot(N, sd->I);
+ float cosNO = dot(N, ccl_fetch(sd, I));
float fresnel = fresnel_dielectric_cos(cosNO, eta);
float roughness = param1;
/* reflection */
- ShaderClosure *sc = &sd->closure[sd->num_closure];
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure));
float3 weight = sc->weight;
float sample_weight = sc->sample_weight;
@@ -280,15 +285,17 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
#endif
/* refraction */
- sc = &sd->closure[sd->num_closure];
- sc->weight = weight;
- sc->sample_weight = sample_weight;
+ if(ccl_fetch(sd, num_closure) < MAX_CLOSURE) {
+ sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure));
+ sc->weight = weight;
+ sc->sample_weight = sample_weight;
- sc = svm_node_closure_get_bsdf(sd, mix_weight*(1.0f - fresnel));
+ sc = svm_node_closure_get_bsdf(sd, mix_weight*(1.0f - fresnel));
- if(sc) {
- sc->N = N;
- svm_node_glass_setup(sd, sc, type, eta, roughness, true);
+ if(sc) {
+ sc->N = N;
+ svm_node_glass_setup(sd, sc, type, eta, roughness, true);
+ }
}
break;
@@ -328,12 +335,12 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data2 = 0.0f;
- if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID)
- sd->flag |= bsdf_microfacet_beckmann_aniso_setup(sc);
- else if (type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID)
- sd->flag |= bsdf_microfacet_ggx_aniso_setup(sc);
+ if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID)
+ ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_aniso_setup(sc);
+ else if(type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID)
+ ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_aniso_setup(sc);
else
- sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_ashikhmin_shirley_aniso_setup(sc);
}
break;
}
@@ -344,10 +351,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->N = N;
/* sigma */
- sc->data0 = clamp(param1, 0.0f, 1.0f);
+ sc->data0 = saturate(param1);
sc->data1 = 0.0f;
sc->data2 = 0.0f;
- sd->flag |= bsdf_ashikhmin_velvet_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_ashikhmin_velvet_setup(sc);
}
break;
}
@@ -362,10 +369,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data1 = param2;
sc->data2 = 0.0f;
- if (type == CLOSURE_BSDF_DIFFUSE_TOON_ID)
- sd->flag |= bsdf_diffuse_toon_setup(sc);
+ if(type == CLOSURE_BSDF_DIFFUSE_TOON_ID)
+ ccl_fetch(sd, flag) |= bsdf_diffuse_toon_setup(sc);
else
- sd->flag |= bsdf_glossy_toon_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_glossy_toon_setup(sc);
}
break;
}
@@ -373,7 +380,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
- if(sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) {
+ if(ccl_fetch(sd, flag) & SD_BACKFACING && ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight);
if(sc) {
@@ -384,11 +391,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
* spawned by transmission from the front */
sc->weight = make_float3(1.0f, 1.0f, 1.0f);
sc->N = N;
- sd->flag |= bsdf_transparent_setup(sc);
+ sc->data0 = 0.0f;
+ sc->data1 = 0.0f;
+ ccl_fetch(sd, flag) |= bsdf_transparent_setup(sc);
}
}
else {
- ShaderClosure *sc = &sd->closure[sd->num_closure];
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure));
sc = svm_node_closure_get_bsdf(sd, mix_weight);
if(sc) {
@@ -397,18 +406,18 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data1 = param2;
sc->data2 = -stack_load_float(stack, data_node.z);
- if(!(sd->type & PRIMITIVE_ALL_CURVE)) {
- sc->T = normalize(sd->dPdv);
+ if(!(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)) {
+ sc->T = normalize(ccl_fetch(sd, dPdv));
sc->data2 = 0.0f;
}
else
- sc->T = normalize(sd->dPdu);
+ sc->T = normalize(ccl_fetch(sd, dPdu));
if(type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
- sd->flag |= bsdf_hair_reflection_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_hair_reflection_setup(sc);
}
else {
- sd->flag |= bsdf_hair_transmission_setup(sc);
+ ccl_fetch(sd, flag) |= bsdf_hair_transmission_setup(sc);
}
}
}
@@ -418,9 +427,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
#endif
#ifdef __SUBSURFACE__
+#ifndef __SPLIT_KERNEL__
+# define sc_next(sc) sc++
+# else
+# define sc_next(sc) sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure))
+# endif
case CLOSURE_BSSRDF_CUBIC_ID:
case CLOSURE_BSSRDF_GAUSSIAN_ID: {
- ShaderClosure *sc = &sd->closure[sd->num_closure];
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure));
float3 weight = sc->weight * mix_weight;
float sample_weight = fabsf(average(weight));
@@ -430,7 +444,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR)
param1 = 0.0f;
- if(sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure+2 < MAX_CLOSURE) {
+ if(sample_weight > CLOSURE_WEIGHT_CUTOFF && ccl_fetch(sd, num_closure)+2 < MAX_CLOSURE) {
/* radius * scale */
float3 radius = stack_load_float3(stack, data_node.z)*param1;
/* sharpness */
@@ -450,10 +464,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->prim = NULL;
#endif
sc->N = N;
- sd->flag |= bssrdf_setup(sc, (ClosureType)type);
+ ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type);
- sd->num_closure++;
- sc++;
+ ccl_fetch(sd, num_closure)++;
+ sc_next(sc);
}
if(fabsf(weight.y) > 0.0f) {
@@ -467,10 +481,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->prim = NULL;
#endif
sc->N = N;
- sd->flag |= bssrdf_setup(sc, (ClosureType)type);
+ ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type);
- sd->num_closure++;
- sc++;
+ ccl_fetch(sd, num_closure)++;
+ sc_next(sc);
}
if(fabsf(weight.z) > 0.0f) {
@@ -484,15 +498,16 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->prim = NULL;
#endif
sc->N = N;
- sd->flag |= bssrdf_setup(sc, (ClosureType)type);
+ ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type);
- sd->num_closure++;
- sc++;
+ ccl_fetch(sd, num_closure)++;
+ sc_next(sc);
}
}
break;
}
+# undef sc_next
#endif
default:
break;
@@ -520,7 +535,7 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float
ShaderClosure *sc = svm_node_closure_get_absorption(sd, mix_weight * density);
if(sc) {
- sd->flag |= volume_absorption_setup(sc);
+ ccl_fetch(sd, flag) |= volume_absorption_setup(sc);
}
break;
}
@@ -529,7 +544,8 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float
if(sc) {
sc->data0 = param2; /* g */
- sd->flag |= volume_henyey_greenstein_setup(sc);
+ sc->data1 = 0.0f;
+ ccl_fetch(sd, flag) |= volume_henyey_greenstein_setup(sc);
}
break;
}
@@ -554,7 +570,7 @@ ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 no
else
svm_node_closure_get_non_bsdf(sd, CLOSURE_EMISSION_ID, 1.0f);
- sd->flag |= SD_EMISSION;
+ ccl_fetch(sd, flag) |= SD_EMISSION;
}
ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node)
@@ -588,7 +604,7 @@ ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 nod
else
svm_node_closure_get_non_bsdf(sd, CLOSURE_HOLDOUT_ID, 1.0f);
- sd->flag |= SD_HOLDOUT;
+ ccl_fetch(sd, flag) |= SD_HOLDOUT;
}
ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, uint4 node)
@@ -606,15 +622,17 @@ ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack,
else
svm_node_closure_get_non_bsdf(sd, CLOSURE_AMBIENT_OCCLUSION_ID, 1.0f);
- sd->flag |= SD_AO;
+ ccl_fetch(sd, flag) |= SD_AO;
}
/* Closure Nodes */
ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight)
{
- if(sd->num_closure < MAX_CLOSURE)
- sd->closure[sd->num_closure].weight = weight;
+ if(ccl_fetch(sd, num_closure) < MAX_CLOSURE) {
+ ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure));
+ sc->weight = weight;
+ }
}
ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b)
@@ -649,7 +667,7 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
float weight = stack_load_float(stack, weight_offset);
- weight = clamp(weight, 0.0f, 1.0f);
+ weight = saturate(weight);
float in_weight = (stack_valid(in_weight_offset))? stack_load_float(stack, in_weight_offset): 1.0f;
@@ -664,7 +682,7 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
ccl_device void svm_node_set_normal(KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal)
{
float3 normal = stack_load_float3(stack, in_direction);
- sd->N = normal;
+ ccl_fetch(sd, N) = normal;
stack_store_float3(stack, out_normal, normal);
}
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
index 4a058905a93..8d4b07c9973 100644
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ b/intern/cycles/kernel/svm/svm_displace.h
@@ -25,11 +25,11 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac
uint normal_offset, distance_offset, invert;
decode_node_uchar4(node.y, &normal_offset, &distance_offset, &invert, NULL);
- float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N;
+ float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): ccl_fetch(sd, N);
/* get surface tangents from normal */
- float3 Rx = cross(sd->dP.dy, normal_in);
- float3 Ry = cross(normal_in, sd->dP.dx);
+ float3 Rx = cross(ccl_fetch(sd, dP).dy, normal_in);
+ float3 Ry = cross(normal_in, ccl_fetch(sd, dP).dx);
/* get bump values */
uint c_offset, x_offset, y_offset, strength_offset;
@@ -40,7 +40,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac
float h_y = stack_load_float(stack, y_offset);
/* compute surface gradient and determinant */
- float det = dot(sd->dP.dx, Rx);
+ float det = dot(ccl_fetch(sd, dP).dx, Rx);
float3 surfgrad = (h_x - h_c)*Rx + (h_y - h_c)*Ry;
float absdet = fabsf(det);
@@ -65,7 +65,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac
ccl_device void svm_node_set_displacement(ShaderData *sd, float *stack, uint fac_offset)
{
float d = stack_load_float(stack, fac_offset);
- sd->P += sd->N*d*0.1f; /* todo: get rid of this factor */
+ ccl_fetch(sd, P) += ccl_fetch(sd, N)*d*0.1f; /* todo: get rid of this factor */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h
index 3703ec55015..23c97d80cb0 100644
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ b/intern/cycles/kernel/svm/svm_fresnel.h
@@ -23,12 +23,12 @@ ccl_device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset,
uint normal_offset, out_offset;
decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL);
float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __uint_as_float(ior_value);
- float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N;
+ float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): ccl_fetch(sd, N);
eta = fmaxf(eta, 1e-5f);
- eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
+ eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta;
- float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
+ float f = fresnel_dielectric_cos(dot(ccl_fetch(sd, I), normal_in), eta);
stack_store_float(stack, out_offset, f);
}
@@ -44,18 +44,18 @@ ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL);
float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __uint_as_float(blend_value);
- float3 normal_in = (stack_valid(normal_offset))? stack_load_float3(stack, normal_offset): sd->N;
+ float3 normal_in = (stack_valid(normal_offset))? stack_load_float3(stack, normal_offset): ccl_fetch(sd, N);
float f;
if(type == NODE_LAYER_WEIGHT_FRESNEL) {
float eta = fmaxf(1.0f - blend, 1e-5f);
- eta = (sd->flag & SD_BACKFACING)? eta: 1.0f/eta;
+ eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? eta: 1.0f/eta;
- f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
+ f = fresnel_dielectric_cos(dot(ccl_fetch(sd, I), normal_in), eta);
}
else {
- f = fabsf(dot(sd->I, normal_in));
+ f = fabsf(dot(ccl_fetch(sd, I), normal_in));
if(blend != 0.5f) {
blend = clamp(blend, 0.0f, 1.0f-1e-5f);
diff --git a/intern/cycles/kernel/svm/svm_gamma.h b/intern/cycles/kernel/svm/svm_gamma.h
index 8bc59b9c673..b645ff3f0f9 100644
--- a/intern/cycles/kernel/svm/svm_gamma.h
+++ b/intern/cycles/kernel/svm/svm_gamma.h
@@ -21,14 +21,14 @@ ccl_device void svm_node_gamma(ShaderData *sd, float *stack, uint in_gamma, uint
float3 color = stack_load_float3(stack, in_color);
float gamma = stack_load_float(stack, in_gamma);
- if (color.x > 0.0f)
+ if(color.x > 0.0f)
color.x = powf(color.x, gamma);
- if (color.y > 0.0f)
+ if(color.y > 0.0f)
color.y = powf(color.y, gamma);
- if (color.z > 0.0f)
+ if(color.z > 0.0f)
color.z = powf(color.z, gamma);
- if (stack_valid(out_color))
+ if(stack_valid(out_color))
stack_store_float3(stack, out_color, color);
}
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index efbefa77d28..bb06254c3a9 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -23,15 +23,15 @@ ccl_device void svm_node_geometry(KernelGlobals *kg, ShaderData *sd, float *stac
float3 data;
switch(type) {
- case NODE_GEOM_P: data = sd->P; break;
- case NODE_GEOM_N: data = sd->N; break;
+ case NODE_GEOM_P: data = ccl_fetch(sd, P); break;
+ case NODE_GEOM_N: data = ccl_fetch(sd, N); break;
#ifdef __DPDU__
case NODE_GEOM_T: data = primitive_tangent(kg, sd); break;
#endif
- case NODE_GEOM_I: data = sd->I; break;
- case NODE_GEOM_Ng: data = sd->Ng; break;
+ case NODE_GEOM_I: data = ccl_fetch(sd, I); break;
+ case NODE_GEOM_Ng: data = ccl_fetch(sd, Ng); break;
#ifdef __UV__
- case NODE_GEOM_uv: data = make_float3(sd->u, sd->v, 0.0f); break;
+ case NODE_GEOM_uv: data = make_float3(ccl_fetch(sd, u), ccl_fetch(sd, v), 0.0f); break;
#endif
}
@@ -44,8 +44,8 @@ ccl_device void svm_node_geometry_bump_dx(KernelGlobals *kg, ShaderData *sd, flo
float3 data;
switch(type) {
- case NODE_GEOM_P: data = sd->P + sd->dP.dx; break;
- case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f); break;
+ case NODE_GEOM_P: data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx; break;
+ case NODE_GEOM_uv: data = make_float3(ccl_fetch(sd, u) + ccl_fetch(sd, du).dx, ccl_fetch(sd, v) + ccl_fetch(sd, dv).dx, 0.0f); break;
default: svm_node_geometry(kg, sd, stack, type, out_offset); return;
}
@@ -61,8 +61,8 @@ ccl_device void svm_node_geometry_bump_dy(KernelGlobals *kg, ShaderData *sd, flo
float3 data;
switch(type) {
- case NODE_GEOM_P: data = sd->P + sd->dP.dy; break;
- case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f); break;
+ case NODE_GEOM_P: data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy; break;
+ case NODE_GEOM_uv: data = make_float3(ccl_fetch(sd, u) + ccl_fetch(sd, du).dy, ccl_fetch(sd, v) + ccl_fetch(sd, dv).dy, 0.0f); break;
default: svm_node_geometry(kg, sd, stack, type, out_offset); return;
}
@@ -83,9 +83,9 @@ ccl_device void svm_node_object_info(KernelGlobals *kg, ShaderData *sd, float *s
stack_store_float3(stack, out_offset, object_location(kg, sd));
return;
}
- case NODE_INFO_OB_INDEX: data = object_pass_id(kg, sd->object); break;
+ case NODE_INFO_OB_INDEX: data = object_pass_id(kg, ccl_fetch(sd, object)); break;
case NODE_INFO_MAT_INDEX: data = shader_pass_id(kg, sd); break;
- case NODE_INFO_OB_RANDOM: data = object_random_number(kg, sd->object); break;
+ case NODE_INFO_OB_RANDOM: data = object_random_number(kg, ccl_fetch(sd, object)); break;
default: data = 0.0f; break;
}
@@ -98,44 +98,44 @@ ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float
{
switch(type) {
case NODE_INFO_PAR_INDEX: {
- int particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, ccl_fetch(sd, object));
stack_store_float(stack, out_offset, particle_index(kg, particle_id));
break;
}
case NODE_INFO_PAR_AGE: {
- int particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, ccl_fetch(sd, object));
stack_store_float(stack, out_offset, particle_age(kg, particle_id));
break;
}
case NODE_INFO_PAR_LIFETIME: {
- int particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, ccl_fetch(sd, object));
stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id));
break;
}
case NODE_INFO_PAR_LOCATION: {
- int particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, ccl_fetch(sd, object));
stack_store_float3(stack, out_offset, particle_location(kg, particle_id));
break;
}
#if 0 /* XXX float4 currently not supported in SVM stack */
case NODE_INFO_PAR_ROTATION: {
- int particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, ccl_fetch(sd, object));
stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id));
break;
}
#endif
case NODE_INFO_PAR_SIZE: {
- int particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, ccl_fetch(sd, object));
stack_store_float(stack, out_offset, particle_size(kg, particle_id));
break;
}
case NODE_INFO_PAR_VELOCITY: {
- int particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, ccl_fetch(sd, object));
stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id));
break;
}
case NODE_INFO_PAR_ANGULAR_VELOCITY: {
- int particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, ccl_fetch(sd, object));
stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id));
break;
}
@@ -153,7 +153,7 @@ ccl_device void svm_node_hair_info(KernelGlobals *kg, ShaderData *sd, float *sta
switch(type) {
case NODE_INFO_CURVE_IS_STRAND: {
- data = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
+ data = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) != 0;
stack_store_float(stack, out_offset, data);
break;
}
@@ -165,7 +165,7 @@ ccl_device void svm_node_hair_info(KernelGlobals *kg, ShaderData *sd, float *sta
break;
}
/*case NODE_INFO_CURVE_FADE: {
- data = sd->curve_transparency;
+ data = ccl_fetch(sd, curve_transparency);
stack_store_float(stack, out_offset, data);
break;
}*/
diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h
index a5e385faddc..53d7b4f812c 100644
--- a/intern/cycles/kernel/svm/svm_gradient.h
+++ b/intern/cycles/kernel/svm/svm_gradient.h
@@ -66,7 +66,7 @@ ccl_device void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node)
float3 co = stack_load_float3(stack, co_offset);
float f = svm_gradient(co, (NodeGradientType)type);
- f = clamp(f, 0.0f, 1.0f);
+ f = saturate(f);
if(stack_valid(fac_offset))
stack_store_float(stack, fac_offset, f);
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index eeb4ba25e91..1f2cad60df7 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -46,12 +46,12 @@ ccl_device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, ui
color.y = fac*color.y + (1.0f - fac)*in_color.y;
color.z = fac*color.z + (1.0f - fac)*in_color.z;
- /* Clamp color to prevent negative values cauzed by oversaturation. */
+ /* Clamp color to prevent negative values caused by oversaturation. */
color.x = max(color.x, 0.0f);
color.y = max(color.y, 0.0f);
color.z = max(color.z, 0.0f);
- if (stack_valid(out_color_offset))
+ if(stack_valid(out_color_offset))
stack_store_float3(stack, out_color_offset, color);
}
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 4de69479bd9..caf0b37ba35 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -65,7 +65,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
float4 r;
int ix, iy, nix, niy;
- if (interpolation == INTERPOLATION_CLOSEST) {
+ if(interpolation == INTERPOLATION_CLOSEST) {
svm_image_texture_frac(x*width, &ix);
svm_image_texture_frac(y*height, &iy);
@@ -251,9 +251,9 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
case 95: r = kernel_tex_image_interp(__tex_image_095, x, y); break;
case 96: r = kernel_tex_image_interp(__tex_image_096, x, y); break;
case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break;
- case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break;
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
+ case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break;
case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break;
case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break;
case 101: r = kernel_tex_image_interp(__tex_image_101, x, y); break;
@@ -392,10 +392,10 @@ ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *sta
ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
/* get object space normal */
- float3 N = sd->N;
+ float3 N = ccl_fetch(sd, N);
- N = sd->N;
- if(sd->object != OBJECT_NONE)
+ N = ccl_fetch(sd, N);
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
object_inverse_normal_transform(kg, sd, &N);
/* project from direction vector to barycentric coordinates in triangles */
@@ -433,17 +433,17 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float
/* in case of blending, test for mixes between two textures */
if(N.z < (1.0f - limit)*(N.y + N.x)) {
weight.x = N.x/(N.x + N.y);
- weight.x = clamp((weight.x - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f);
+ weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend);
weight.y = 1.0f - weight.x;
}
else if(N.x < (1.0f - limit)*(N.y + N.z)) {
weight.y = N.y/(N.y + N.z);
- weight.y = clamp((weight.y - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f);
+ weight.y = saturate((weight.y - 0.5f*(1.0f - blend))/blend);
weight.z = 1.0f - weight.y;
}
else if(N.y < (1.0f - limit)*(N.x + N.z)) {
weight.x = N.x/(N.x + N.z);
- weight.x = clamp((weight.x - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f);
+ weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend);
weight.z = 1.0f - weight.x;
}
else {
diff --git a/intern/cycles/kernel/svm/svm_invert.h b/intern/cycles/kernel/svm/svm_invert.h
index 152b49174e0..5ce858e2e5d 100644
--- a/intern/cycles/kernel/svm/svm_invert.h
+++ b/intern/cycles/kernel/svm/svm_invert.h
@@ -30,7 +30,7 @@ ccl_device void svm_node_invert(ShaderData *sd, float *stack, uint in_fac, uint
color.y = invert(color.y, factor);
color.z = invert(color.z, factor);
- if (stack_valid(out_color))
+ if(stack_valid(out_color))
stack_store_float3(stack, out_color, color);
}
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index 677d139c5d4..a235dd35224 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -31,10 +31,10 @@ ccl_device void svm_node_light_path(ShaderData *sd, float *stack, uint type, uin
case NODE_LP_reflection: info = (path_flag & PATH_RAY_REFLECT)? 1.0f: 0.0f; break;
case NODE_LP_transmission: info = (path_flag & PATH_RAY_TRANSMIT)? 1.0f: 0.0f; break;
case NODE_LP_volume_scatter: info = (path_flag & PATH_RAY_VOLUME_SCATTER)? 1.0f: 0.0f; break;
- case NODE_LP_backfacing: info = (sd->flag & SD_BACKFACING)? 1.0f: 0.0f; break;
- case NODE_LP_ray_length: info = sd->ray_length; break;
- case NODE_LP_ray_depth: info = (float)sd->ray_depth; break;
- case NODE_LP_ray_transparent: info = sd->transparent_depth; break;
+ case NODE_LP_backfacing: info = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f: 0.0f; break;
+ case NODE_LP_ray_length: info = ccl_fetch(sd, ray_length); break;
+ case NODE_LP_ray_depth: info = (float)ccl_fetch(sd, ray_depth); break;
+ case NODE_LP_ray_transparent: info = (float)ccl_fetch(sd, transparent_depth); break;
}
stack_store_float(stack, out_offset, info);
@@ -53,14 +53,14 @@ ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node)
switch(type) {
case NODE_LIGHT_FALLOFF_QUADRATIC: break;
- case NODE_LIGHT_FALLOFF_LINEAR: strength *= sd->ray_length; break;
- case NODE_LIGHT_FALLOFF_CONSTANT: strength *= sd->ray_length*sd->ray_length; break;
+ case NODE_LIGHT_FALLOFF_LINEAR: strength *= ccl_fetch(sd, ray_length); break;
+ case NODE_LIGHT_FALLOFF_CONSTANT: strength *= ccl_fetch(sd, ray_length)*ccl_fetch(sd, ray_length); break;
}
float smooth = stack_load_float(stack, smooth_offset);
if(smooth > 0.0f) {
- float squared = sd->ray_length*sd->ray_length;
+ float squared = ccl_fetch(sd, ray_length)*ccl_fetch(sd, ray_length);
strength *= squared/(smooth + squared);
}
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index 39cc14d5e8e..645cbd3fc73 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -97,12 +97,74 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
else if(type == NODE_MATH_ABSOLUTE)
Fac = fabsf(Fac1);
else if(type == NODE_MATH_CLAMP)
- Fac = clamp(Fac1, 0.0f, 1.0f);
+ Fac = saturate(Fac1);
else
Fac = 0.0f;
return Fac;
}
+ccl_device float3 svm_math_blackbody_color(float t) {
+ /* Calculate color in range 800..12000 using an approximation
+ * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
+ * Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
+ * which is enough to get the same 8 bit/channel color.
+ */
+
+ const float rc[6][3] = {
+ { 2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f },
+ { 3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f },
+ { 4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f },
+ { 4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f },
+ { 4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f },
+ { 3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f },
+ };
+
+ const float gc[6][3] = {
+ { -7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f },
+ { -1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f },
+ { -1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f },
+ { -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f },
+ { -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f },
+ { -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f },
+ };
+
+ const float bc[6][4] = {
+ { 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */
+ { 0.0f, 0.0f, 0.0f, 0.0f },
+ { 0.0f, 0.0f, 0.0f, 0.0f },
+ { -2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f },
+ { -2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f },
+ { 6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f },
+ };
+
+ if(t >= 12000.0f)
+ return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
+
+ /* Define a macro to reduce stack usage for nvcc */
+#define MAKE_BB_RGB(i) make_float3(\
+ rc[i][0] / t + rc[i][1] * t + rc[i][2],\
+ gc[i][0] / t + gc[i][1] * t + gc[i][2],\
+ ((bc[i][0] * t + bc[i][1]) * t + bc[i][2]) * t + bc[i][3])
+
+ if(t >= 6365.0f)
+ return MAKE_BB_RGB(5);
+ if(t >= 3315.0f)
+ return MAKE_BB_RGB(4);
+ if(t >= 1902.0f)
+ return MAKE_BB_RGB(3);
+ if(t >= 1449.0f)
+ return MAKE_BB_RGB(2);
+ if(t >= 1167.0f)
+ return MAKE_BB_RGB(1);
+ if(t >= 965.0f)
+ return MAKE_BB_RGB(0);
+
+#undef MAKE_BB_RGB
+
+ /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */
+ return make_float3(4.70366907f, 0.0f, 0.0f);
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h
index b6b1966cb3b..6111214acba 100644
--- a/intern/cycles/kernel/svm/svm_mix.h
+++ b/intern/cycles/kernel/svm/svm_mix.h
@@ -254,16 +254,16 @@ ccl_device float3 svm_mix_clamp(float3 col)
{
float3 outcol = col;
- outcol.x = clamp(col.x, 0.0f, 1.0f);
- outcol.y = clamp(col.y, 0.0f, 1.0f);
- outcol.z = clamp(col.z, 0.0f, 1.0f);
+ outcol.x = saturate(col.x);
+ outcol.y = saturate(col.y);
+ outcol.z = saturate(col.z);
return outcol;
}
ccl_device float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
{
- float t = clamp(fac, 0.0f, 1.0f);
+ float t = saturate(fac);
switch(type) {
case NODE_MIX_BLEND: return svm_mix_blend(t, c1, c2);
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 2f81ddaa74c..09eba31945e 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN
* from "Texturing and Modelling: A procedural approach"
*/
-ccl_device_noinline float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves)
+ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity, float octaves)
{
float rmd;
float value = 0.0f;
@@ -53,7 +53,7 @@ ccl_device_noinline float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, flo
* octaves: number of frequencies in the fBm
*/
-ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves)
+ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, float H, float lacunarity, float octaves)
{
float rmd;
float value = 1.0f;
@@ -82,7 +82,7 @@ ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis
* offset: raises the terrain from `sea level'
*/
-ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset)
+ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, float H, float lacunarity, float octaves, float offset)
{
float value, increment, rmd;
float pwHL = powf(lacunarity, -H);
@@ -117,7 +117,7 @@ ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis
* offset: raises the terrain from `sea level'
*/
-ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
+ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, float H, float lacunarity, float octaves, float offset, float gain)
{
float result, signal, weight, rmd;
float pwHL = powf(lacunarity, -H);
@@ -154,7 +154,7 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNois
* offset: raises the terrain from `sea level'
*/
-ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
+ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, float H, float lacunarity, float octaves, float offset, float gain)
{
float result, signal, weight;
float pwHL = powf(lacunarity, -H);
@@ -168,7 +168,7 @@ ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNois
for(i = 1; i < float_to_int(octaves); i++) {
p *= lacunarity;
- weight = clamp(signal * gain, 0.0f, 1.0f);
+ weight = saturate(signal * gain);
signal = offset - fabsf(snoise(p));
signal *= signal;
signal *= weight;
@@ -183,18 +183,16 @@ ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNois
ccl_device float svm_musgrave(NodeMusgraveType type, float dimension, float lacunarity, float octaves, float offset, float intensity, float gain, float3 p)
{
- NodeNoiseBasis basis = NODE_NOISE_PERLIN;
-
if(type == NODE_MUSGRAVE_MULTIFRACTAL)
- return intensity*noise_musgrave_multi_fractal(p, basis, dimension, lacunarity, octaves);
+ return intensity*noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves);
else if(type == NODE_MUSGRAVE_FBM)
- return intensity*noise_musgrave_fBm(p, basis, dimension, lacunarity, octaves);
+ return intensity*noise_musgrave_fBm(p, dimension, lacunarity, octaves);
else if(type == NODE_MUSGRAVE_HYBRID_MULTIFRACTAL)
- return intensity*noise_musgrave_hybrid_multi_fractal(p, basis, dimension, lacunarity, octaves, offset, gain);
+ return intensity*noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
else if(type == NODE_MUSGRAVE_RIDGED_MULTIFRACTAL)
- return intensity*noise_musgrave_ridged_multi_fractal(p, basis, dimension, lacunarity, octaves, offset, gain);
+ return intensity*noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, offset, gain);
else if(type == NODE_MUSGRAVE_HETERO_TERRAIN)
- return intensity*noise_musgrave_hetero_terrain(p, basis, dimension, lacunarity, octaves, offset);
+ return intensity*noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, offset);
return 0.0f;
}
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index eccd119b74f..62ff38cf1c5 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -20,23 +20,22 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void svm_noise(float3 p, float detail, float distortion, float *fac, float3 *color)
{
- NodeNoiseBasis basis = NODE_NOISE_PERLIN;
int hard = 0;
if(distortion != 0.0f) {
float3 r, offset = make_float3(13.5f, 13.5f, 13.5f);
- r.x = noise_basis(p + offset, basis) * distortion;
- r.y = noise_basis(p, basis) * distortion;
- r.z = noise_basis(p - offset, basis) * distortion;
+ r.x = noise(p + offset) * distortion;
+ r.y = noise(p) * distortion;
+ r.z = noise(p - offset) * distortion;
p += r;
}
- *fac = noise_turbulence(p, basis, detail, hard);
+ *fac = noise_turbulence(p, detail, hard);
*color = make_float3(*fac,
- noise_turbulence(make_float3(p.y, p.x, p.z), basis, detail, hard),
- noise_turbulence(make_float3(p.y, p.z, p.x), basis, detail, hard));
+ noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard),
+ noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard));
}
ccl_device void svm_node_tex_noise(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h
index 67b5321e0de..53abef71012 100644
--- a/intern/cycles/kernel/svm/svm_normal.h
+++ b/intern/cycles/kernel/svm/svm_normal.h
@@ -28,10 +28,10 @@ ccl_device void svm_node_normal(KernelGlobals *kg, ShaderData *sd, float *stack,
direction.z = __int_as_float(node1.z);
direction = normalize(direction);
- if (stack_valid(out_normal_offset))
+ if(stack_valid(out_normal_offset))
stack_store_float3(stack, out_normal_offset, direction);
- if (stack_valid(out_dot_offset))
+ if(stack_valid(out_dot_offset))
stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal)));
}
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index 998f649a571..062ab013b1f 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN
ccl_device float4 rgb_ramp_lookup(KernelGlobals *kg, int offset, float f, bool interpolate)
{
- f = clamp(f, 0.0f, 1.0f)*(RAMP_TABLE_SIZE-1);
+ f = saturate(f)*(RAMP_TABLE_SIZE-1);
/* clamp int as well in case of NaN */
int i = clamp(float_to_int(f), 0, RAMP_TABLE_SIZE-1);
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
index 68f9fea02f0..6f51b163756 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
@@ -28,7 +28,7 @@ ccl_device void svm_node_combine_hsv(KernelGlobals *kg, ShaderData *sd, float *s
/* Combine, and convert back to RGB */
float3 color = hsv_to_rgb(make_float3(hue, saturation, value));
- if (stack_valid(color_out))
+ if(stack_valid(color_out))
stack_store_float3(stack, color_out, color);
}
@@ -42,11 +42,11 @@ ccl_device void svm_node_separate_hsv(KernelGlobals *kg, ShaderData *sd, float *
/* Convert to HSV */
color = rgb_to_hsv(color);
- if (stack_valid(hue_out))
+ if(stack_valid(hue_out))
stack_store_float(stack, hue_out, color.x);
- if (stack_valid(saturation_out))
+ if(stack_valid(saturation_out))
stack_store_float(stack, saturation_out, color.y);
- if (stack_valid(value_out))
+ if(stack_valid(value_out))
stack_store_float(stack, value_out, color.z);
}
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_vector.h b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
index 7a5a69f6dff..63570dd6942 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_vector.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
@@ -22,7 +22,7 @@ ccl_device void svm_node_combine_vector(ShaderData *sd, float *stack, uint in_of
{
float vector = stack_load_float(stack, in_offset);
- if (stack_valid(out_offset))
+ if(stack_valid(out_offset))
stack_store_float(stack, out_offset+vector_index, vector);
}
@@ -30,10 +30,10 @@ ccl_device void svm_node_separate_vector(ShaderData *sd, float *stack, uint ivec
{
float3 vector = stack_load_float3(stack, ivector_offset);
- if (stack_valid(out_offset)) {
- if (vector_index == 0)
+ if(stack_valid(out_offset)) {
+ if(vector_index == 0)
stack_store_float(stack, out_offset, vector.x);
- else if (vector_index == 1)
+ else if(vector_index == 1)
stack_store_float(stack, out_offset, vector.y);
else
stack_store_float(stack, out_offset, vector.z);
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index a399acf3c0f..eebd9bee420 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -31,9 +31,9 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
switch(type) {
case NODE_TEXCO_OBJECT: {
- data = sd->P;
+ data = ccl_fetch(sd, P);
if(node.w == 0) {
- if(sd->object != OBJECT_NONE) {
+ if(ccl_fetch(sd, object) != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &data);
}
}
@@ -48,48 +48,48 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
break;
}
case NODE_TEXCO_NORMAL: {
- data = sd->N;
- if(sd->object != OBJECT_NONE)
+ data = ccl_fetch(sd, N);
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
object_inverse_normal_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_CAMERA: {
Transform tfm = kernel_data.cam.worldtocamera;
- if(sd->object != OBJECT_NONE)
- data = transform_point(&tfm, sd->P);
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
+ data = transform_point(&tfm, ccl_fetch(sd, P));
else
- data = transform_point(&tfm, sd->P + camera_position(kg));
+ data = transform_point(&tfm, ccl_fetch(sd, P) + camera_position(kg));
break;
}
case NODE_TEXCO_WINDOW: {
- if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
- data = camera_world_to_ndc(kg, sd, sd->ray_P);
+ if((path_flag & PATH_RAY_CAMERA) && ccl_fetch(sd, object) == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+ data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, ray_P));
else
- data = camera_world_to_ndc(kg, sd, sd->P);
+ data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, P));
data.z = 0.0f;
break;
}
case NODE_TEXCO_REFLECTION: {
- if(sd->object != OBJECT_NONE)
- data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
+ data = 2.0f*dot(ccl_fetch(sd, N), ccl_fetch(sd, I))*ccl_fetch(sd, N) - ccl_fetch(sd, I);
else
- data = sd->I;
+ data = ccl_fetch(sd, I);
break;
}
case NODE_TEXCO_DUPLI_GENERATED: {
- data = object_dupli_generated(kg, sd->object);
+ data = object_dupli_generated(kg, ccl_fetch(sd, object));
break;
}
case NODE_TEXCO_DUPLI_UV: {
- data = object_dupli_uv(kg, sd->object);
+ data = object_dupli_uv(kg, ccl_fetch(sd, object));
break;
}
case NODE_TEXCO_VOLUME_GENERATED: {
- data = sd->P;
+ data = ccl_fetch(sd, P);
#ifdef __VOLUME__
- if(sd->object != OBJECT_NONE)
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
data = volume_normalized_position(kg, sd, data);
#endif
break;
@@ -113,9 +113,9 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
switch(type) {
case NODE_TEXCO_OBJECT: {
- data = sd->P + sd->dP.dx;
+ data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx;
if(node.w == 0) {
- if(sd->object != OBJECT_NONE) {
+ if(ccl_fetch(sd, object) != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &data);
}
}
@@ -130,48 +130,48 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
break;
}
case NODE_TEXCO_NORMAL: {
- data = sd->N;
- if(sd->object != OBJECT_NONE)
+ data = ccl_fetch(sd, N);
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
object_inverse_normal_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_CAMERA: {
Transform tfm = kernel_data.cam.worldtocamera;
- if(sd->object != OBJECT_NONE)
- data = transform_point(&tfm, sd->P + sd->dP.dx);
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
+ data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx);
else
- data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg));
+ data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx + camera_position(kg));
break;
}
case NODE_TEXCO_WINDOW: {
- if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
- data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx);
+ if((path_flag & PATH_RAY_CAMERA) && ccl_fetch(sd, object) == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+ data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, ray_P) + ccl_fetch(sd, ray_dP).dx);
else
- data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
+ data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx);
data.z = 0.0f;
break;
}
case NODE_TEXCO_REFLECTION: {
- if(sd->object != OBJECT_NONE)
- data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
+ data = 2.0f*dot(ccl_fetch(sd, N), ccl_fetch(sd, I))*ccl_fetch(sd, N) - ccl_fetch(sd, I);
else
- data = sd->I;
+ data = ccl_fetch(sd, I);
break;
}
case NODE_TEXCO_DUPLI_GENERATED: {
- data = object_dupli_generated(kg, sd->object);
+ data = object_dupli_generated(kg, ccl_fetch(sd, object));
break;
}
case NODE_TEXCO_DUPLI_UV: {
- data = object_dupli_uv(kg, sd->object);
+ data = object_dupli_uv(kg, ccl_fetch(sd, object));
break;
}
case NODE_TEXCO_VOLUME_GENERATED: {
- data = sd->P + sd->dP.dx;
+ data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx;
#ifdef __VOLUME__
- if(sd->object != OBJECT_NONE)
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
data = volume_normalized_position(kg, sd, data);
#endif
break;
@@ -198,9 +198,9 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
switch(type) {
case NODE_TEXCO_OBJECT: {
- data = sd->P + sd->dP.dy;
+ data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy;
if(node.w == 0) {
- if(sd->object != OBJECT_NONE) {
+ if(ccl_fetch(sd, object) != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &data);
}
}
@@ -215,48 +215,48 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
break;
}
case NODE_TEXCO_NORMAL: {
- data = sd->N;
- if(sd->object != OBJECT_NONE)
+ data = ccl_fetch(sd, N);
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
object_inverse_normal_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_CAMERA: {
Transform tfm = kernel_data.cam.worldtocamera;
- if(sd->object != OBJECT_NONE)
- data = transform_point(&tfm, sd->P + sd->dP.dy);
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
+ data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy);
else
- data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg));
+ data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy + camera_position(kg));
break;
}
case NODE_TEXCO_WINDOW: {
- if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
- data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy);
+ if((path_flag & PATH_RAY_CAMERA) && ccl_fetch(sd, object) == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+ data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, ray_P) + ccl_fetch(sd, ray_dP).dy);
else
- data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
+ data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy);
data.z = 0.0f;
break;
}
case NODE_TEXCO_REFLECTION: {
- if(sd->object != OBJECT_NONE)
- data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
+ data = 2.0f*dot(ccl_fetch(sd, N), ccl_fetch(sd, I))*ccl_fetch(sd, N) - ccl_fetch(sd, I);
else
- data = sd->I;
+ data = ccl_fetch(sd, I);
break;
}
case NODE_TEXCO_DUPLI_GENERATED: {
- data = object_dupli_generated(kg, sd->object);
+ data = object_dupli_generated(kg, ccl_fetch(sd, object));
break;
}
case NODE_TEXCO_DUPLI_UV: {
- data = object_dupli_uv(kg, sd->object);
+ data = object_dupli_uv(kg, ccl_fetch(sd, object));
break;
}
case NODE_TEXCO_VOLUME_GENERATED: {
- data = sd->P + sd->dP.dy;
+ data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy;
#ifdef __VOLUME__
- if(sd->object != OBJECT_NONE)
+ if(ccl_fetch(sd, object) != OBJECT_NONE)
data = volume_normalized_position(kg, sd, data);
#endif
break;
@@ -281,7 +281,7 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st
if(space == NODE_NORMAL_MAP_TANGENT) {
/* tangent space */
- if(sd->object == OBJECT_NONE) {
+ if(ccl_fetch(sd, object) == OBJECT_NONE) {
stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
return;
}
@@ -302,11 +302,11 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st
float sign = primitive_attribute_float(kg, sd, attr_sign_elem, attr_sign_offset, NULL, NULL);
float3 normal;
- if(sd->shader & SHADER_SMOOTH_NORMAL) {
+ if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
normal = primitive_attribute_float3(kg, sd, attr_normal_elem, attr_normal_offset, NULL, NULL);
}
else {
- normal = sd->Ng;
+ normal = ccl_fetch(sd, Ng);
object_inverse_normal_transform(kg, sd, &normal);
}
@@ -337,7 +337,7 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st
if(strength != 1.0f) {
strength = max(strength, 0.0f);
- N = normalize(sd->N + (N - sd->N)*strength);
+ N = normalize(ccl_fetch(sd, N) + (N - ccl_fetch(sd, N))*strength);
}
stack_store_float3(stack, normal_offset, N);
@@ -367,7 +367,7 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack
float3 generated;
if(attr_offset == ATTR_STD_NOT_FOUND)
- generated = sd->P;
+ generated = ccl_fetch(sd, P);
else
generated = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL);
@@ -380,7 +380,7 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack
}
object_normal_transform(kg, sd, &tangent);
- tangent = cross(sd->N, normalize(cross(tangent, sd->N)));
+ tangent = cross(ccl_fetch(sd, N), normalize(cross(tangent, ccl_fetch(sd, N))));
stack_store_float3(stack, tangent_offset, tangent);
}
diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h
index c5dc213c82d..dcb00f7dd55 100644
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ b/intern/cycles/kernel/svm/svm_texture.h
@@ -16,261 +16,9 @@
CCL_NAMESPACE_BEGIN
-/* Voronoi Distances */
-
-#if 0
-ccl_device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d, float e)
-{
-#if 0
- if(distance_metric == NODE_VORONOI_DISTANCE_SQUARED)
-#endif
- return dot(d, d);
-#if 0
- if(distance_metric == NODE_VORONOI_ACTUAL_DISTANCE)
- return len(d);
- if(distance_metric == NODE_VORONOI_MANHATTAN)
- return fabsf(d.x) + fabsf(d.y) + fabsf(d.z);
- if(distance_metric == NODE_VORONOI_CHEBYCHEV)
- return fmaxf(fabsf(d.x), fmaxf(fabsf(d.y), fabsf(d.z)));
- if(distance_metric == NODE_VORONOI_MINKOVSKY_H)
- return sqrtf(fabsf(d.x)) + sqrtf(fabsf(d.y)) + sqrtf(fabsf(d.y));
- if(distance_metric == NODE_VORONOI_MINKOVSKY_4)
- return sqrtf(sqrtf(dot(d*d, d*d)));
- if(distance_metric == NODE_VORONOI_MINKOVSKY)
- return powf(powf(fabsf(d.x), e) + powf(fabsf(d.y), e) + powf(fabsf(d.z), e), 1.0f/e);
-
- return 0.0f;
-#endif
-}
-
-/* Voronoi / Worley like */
-ccl_device_inline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
-{
- float da[4];
- float3 pa[4];
- NodeDistanceMetric distance_metric = NODE_VORONOI_DISTANCE_SQUARED;
-
- /* returns distances in da and point coords in pa */
- int xx, yy, zz, xi, yi, zi;
-
- xi = floor_to_int(p.x);
- yi = floor_to_int(p.y);
- zi = floor_to_int(p.z);
-
- da[0] = 1e10f;
- da[1] = 1e10f;
- da[2] = 1e10f;
- da[3] = 1e10f;
-
- pa[0] = make_float3(0.0f, 0.0f, 0.0f);
- pa[1] = make_float3(0.0f, 0.0f, 0.0f);
- pa[2] = make_float3(0.0f, 0.0f, 0.0f);
- pa[3] = make_float3(0.0f, 0.0f, 0.0f);
-
- for(xx = xi-1; xx <= xi+1; xx++) {
- for(yy = yi-1; yy <= yi+1; yy++) {
- for(zz = zi-1; zz <= zi+1; zz++) {
- float3 ip = make_float3((float)xx, (float)yy, (float)zz);
- float3 vp = cellnoise_color(ip);
- float3 pd = p - (vp + ip);
- float d = voronoi_distance(distance_metric, pd, e);
-
- vp += ip;
-
- if(d < da[0]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = da[0];
- da[0] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = pa[0];
- pa[0] = vp;
- }
- else if(d < da[1]) {
- da[3] = da[2];
- da[2] = da[1];
- da[1] = d;
-
- pa[3] = pa[2];
- pa[2] = pa[1];
- pa[1] = vp;
- }
- else if(d < da[2]) {
- da[3] = da[2];
- da[2] = d;
-
- pa[3] = pa[2];
- pa[2] = vp;
- }
- else if(d < da[3]) {
- da[3] = d;
- pa[3] = vp;
- }
- }
- }
- }
-
- float4 result = make_float4(pa[n1].x, pa[n1].y, pa[n1].z, da[n1]);
-
- if(n2 != -1)
- result = make_float4(pa[n2].x, pa[n2].y, pa[n2].z, da[n2]) - result;
-
- return result;
-}
-#endif
-
-ccl_device float voronoi_F1_distance(float3 p)
-{
- /* returns squared distance in da */
- float da = 1e10f;
-
-#ifndef __KERNEL_SSE2__
- int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
-
- for (int xx = -1; xx <= 1; xx++) {
- for (int yy = -1; yy <= 1; yy++) {
- for (int zz = -1; zz <= 1; zz++) {
- float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
- float3 vp = ip + cellnoise_color(ip);
- float d = len_squared(p - vp);
- da = min(d, da);
- }
- }
- }
-#else
- ssef vec_p = load4f(p);
- ssei xyzi = quick_floor_sse(vec_p);
-
- for (int xx = -1; xx <= 1; xx++) {
- for (int yy = -1; yy <= 1; yy++) {
- for (int zz = -1; zz <= 1; zz++) {
- ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0));
- ssef vp = ip + cellnoise_color(ip);
- float d = len_squared<1, 1, 1, 0>(vec_p - vp);
- da = min(d, da);
- }
- }
- }
-#endif
-
- return da;
-}
-
-ccl_device float3 voronoi_F1_color(float3 p)
-{
- /* returns color of the nearest point */
- float da = 1e10f;
-
-#ifndef __KERNEL_SSE2__
- float3 pa;
- int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
-
- for (int xx = -1; xx <= 1; xx++) {
- for (int yy = -1; yy <= 1; yy++) {
- for (int zz = -1; zz <= 1; zz++) {
- float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
- float3 vp = ip + cellnoise_color(ip);
- float d = len_squared(p - vp);
-
- if(d < da) {
- da = d;
- pa = vp;
- }
- }
- }
- }
-
- return cellnoise_color(pa);
-#else
- ssef pa, vec_p = load4f(p);
- ssei xyzi = quick_floor_sse(vec_p);
-
- for (int xx = -1; xx <= 1; xx++) {
- for (int yy = -1; yy <= 1; yy++) {
- for (int zz = -1; zz <= 1; zz++) {
- ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0));
- ssef vp = ip + cellnoise_color(ip);
- float d = len_squared<1, 1, 1, 0>(vec_p - vp);
-
- if(d < da) {
- da = d;
- pa = vp;
- }
- }
- }
- }
-
- ssef color = cellnoise_color(pa);
- return (float3 &)color;
-#endif
-}
-
-#if 0
-ccl_device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0.0f, 0, -1).w; }
-ccl_device float voronoi_F2(float3 p) { return voronoi_Fn(p, 0.0f, 1, -1).w; }
-ccl_device float voronoi_F3(float3 p) { return voronoi_Fn(p, 0.0f, 2, -1).w; }
-ccl_device float voronoi_F4(float3 p) { return voronoi_Fn(p, 0.0f, 3, -1).w; }
-ccl_device float voronoi_F1F2(float3 p) { return voronoi_Fn(p, 0.0f, 0, 1).w; }
-
-ccl_device float voronoi_Cr(float3 p)
-{
- /* crackle type pattern, just a scale/clamp of F2-F1 */
- float t = 10.0f*voronoi_F1F2(p);
- return (t > 1.0f)? 1.0f: t;
-}
-
-ccl_device float voronoi_F1S(float3 p) { return 2.0f*voronoi_F1(p) - 1.0f; }
-ccl_device float voronoi_F2S(float3 p) { return 2.0f*voronoi_F2(p) - 1.0f; }
-ccl_device float voronoi_F3S(float3 p) { return 2.0f*voronoi_F3(p) - 1.0f; }
-ccl_device float voronoi_F4S(float3 p) { return 2.0f*voronoi_F4(p) - 1.0f; }
-ccl_device float voronoi_F1F2S(float3 p) { return 2.0f*voronoi_F1F2(p) - 1.0f; }
-ccl_device float voronoi_CrS(float3 p) { return 2.0f*voronoi_Cr(p) - 1.0f; }
-#endif
-
-/* Noise Bases */
-
-ccl_device float noise_basis(float3 p, NodeNoiseBasis basis)
-{
- /* Only Perlin enabled for now, others break CUDA compile by making kernel
- * too big, with compile using > 4GB, due to everything being inlined. */
-
-#if 0
- if(basis == NODE_NOISE_PERLIN)
-#endif
- return noise(p);
-#if 0
- if(basis == NODE_NOISE_VORONOI_F1)
- return voronoi_F1S(p);
- if(basis == NODE_NOISE_VORONOI_F2)
- return voronoi_F2S(p);
- if(basis == NODE_NOISE_VORONOI_F3)
- return voronoi_F3S(p);
- if(basis == NODE_NOISE_VORONOI_F4)
- return voronoi_F4S(p);
- if(basis == NODE_NOISE_VORONOI_F2_F1)
- return voronoi_F1F2S(p);
- if(basis == NODE_NOISE_VORONOI_CRACKLE)
- return voronoi_CrS(p);
- if(basis == NODE_NOISE_CELL_NOISE)
- return cellnoise(p);
-
- return 0.0f;
-#endif
-}
-
-/* Soft/Hard Noise */
-
-ccl_device float noise_basis_hard(float3 p, NodeNoiseBasis basis, int hard)
-{
- float t = noise_basis(p, basis);
- return (hard)? fabsf(2.0f*t - 1.0f): t;
-}
-
/* Turbulence */
-ccl_device_noinline float noise_turbulence(float3 p, NodeNoiseBasis basis, float octaves, int hard)
+ccl_device_noinline float noise_turbulence(float3 p, float octaves, int hard)
{
float fscale = 1.0f;
float amp = 1.0f;
@@ -281,7 +29,7 @@ ccl_device_noinline float noise_turbulence(float3 p, NodeNoiseBasis basis, float
n = float_to_int(octaves);
for(i = 0; i <= n; i++) {
- float t = noise_basis(fscale*p, basis);
+ float t = noise(fscale*p);
if(hard)
t = fabsf(2.0f*t - 1.0f);
@@ -294,7 +42,7 @@ ccl_device_noinline float noise_turbulence(float3 p, NodeNoiseBasis basis, float
float rmd = octaves - floorf(octaves);
if(rmd != 0.0f) {
- float t = noise_basis(fscale*p, basis);
+ float t = noise(fscale*p);
if(hard)
t = fabsf(2.0f*t - 1.0f);
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 7130b14a426..009e91192eb 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -28,6 +28,29 @@ CCL_NAMESPACE_BEGIN
/* Nodes */
+/* Known frequencies of used nodes, used for selective nodes compilation
+ * in the kernel. Currently only affects split OpenCL kernel.
+ *
+ * Keep as defines so it's easy to check which nodes are to be compiled
+ * from preprocessor.
+ *
+ * Lower the number of group more often the node is used.
+ */
+#define NODE_GROUP_LEVEL_0 0
+#define NODE_GROUP_LEVEL_1 1
+#define NODE_GROUP_LEVEL_2 2
+#define NODE_GROUP_LEVEL_3 3
+#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_3
+
+#define NODE_FEATURE_VOLUME (1 << 0)
+#define NODE_FEATURE_HAIR (1 << 1)
+#define NODE_FEATURE_BUMP (1 << 2)
+/* TODO(sergey): Consider using something like ((uint)(-1)).
+ * Need to ceck carefully operand types around usage of this
+ * define first.
+ */
+#define NODE_FEATURE_ALL (NODE_FEATURE_VOLUME|NODE_FEATURE_HAIR|NODE_FEATURE_BUMP)
+
typedef enum NodeType {
NODE_END = 0,
NODE_CLOSURE_BSDF,
@@ -256,27 +279,6 @@ typedef enum NodeConvert {
NODE_CONVERT_IV
} NodeConvert;
-typedef enum NodeDistanceMetric {
- NODE_VORONOI_DISTANCE_SQUARED,
- NODE_VORONOI_ACTUAL_DISTANCE,
- NODE_VORONOI_MANHATTAN,
- NODE_VORONOI_CHEBYCHEV,
- NODE_VORONOI_MINKOVSKY_H,
- NODE_VORONOI_MINKOVSKY_4,
- NODE_VORONOI_MINKOVSKY
-} NodeDistanceMetric;
-
-typedef enum NodeNoiseBasis {
- NODE_NOISE_PERLIN,
- NODE_NOISE_VORONOI_F1,
- NODE_NOISE_VORONOI_F2,
- NODE_NOISE_VORONOI_F3,
- NODE_NOISE_VORONOI_F4,
- NODE_NOISE_VORONOI_F2_F1,
- NODE_NOISE_VORONOI_CRACKLE,
- NODE_NOISE_CELL_NOISE
-} NodeNoiseBasis;
-
typedef enum NodeMusgraveType {
NODE_MUSGRAVE_MULTIFRACTAL,
NODE_MUSGRAVE_FBM,
@@ -426,6 +428,7 @@ typedef enum ClosureType {
#define CLOSURE_IS_BACKGROUND(type) (type == CLOSURE_BACKGROUND_ID)
#define CLOSURE_IS_AMBIENT_OCCLUSION(type) (type == CLOSURE_AMBIENT_OCCLUSION_ID)
#define CLOSURE_IS_PHASE(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
+#define CLOSURE_IS_GLASS(type) (type >= CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID)
#define CLOSURE_WEIGHT_CUTOFF 1e-5f
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
index a16786f3ed3..4c32130d06d 100644
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ b/intern/cycles/kernel/svm/svm_vector_transform.h
@@ -33,7 +33,7 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo
NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito;
Transform tfm;
- bool is_object = (sd->object != OBJECT_NONE);
+ bool is_object = (ccl_fetch(sd, object) != OBJECT_NONE);
bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR || type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL);
/* From world */
@@ -45,7 +45,7 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo
else
in = transform_point(&tfm, in);
}
- else if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
+ else if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
if(is_direction)
object_inverse_dir_transform(kg, sd, &in);
else
@@ -54,7 +54,7 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo
}
/* From camera */
- else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
+ else if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD || to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
tfm = kernel_data.cam.cameratoworld;
if(is_direction)
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index 5a2e6e97dd3..d612d7e973f 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -18,6 +18,92 @@ CCL_NAMESPACE_BEGIN
/* Voronoi */
+ccl_device float voronoi_F1_distance(float3 p)
+{
+ /* returns squared distance in da */
+ float da = 1e10f;
+
+#ifndef __KERNEL_SSE2__
+ int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
+
+ for(int xx = -1; xx <= 1; xx++) {
+ for(int yy = -1; yy <= 1; yy++) {
+ for(int zz = -1; zz <= 1; zz++) {
+ float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
+ float3 vp = ip + cellnoise_color(ip);
+ float d = len_squared(p - vp);
+ da = min(d, da);
+ }
+ }
+ }
+#else
+ ssef vec_p = load4f(p);
+ ssei xyzi = quick_floor_sse(vec_p);
+
+ for(int xx = -1; xx <= 1; xx++) {
+ for(int yy = -1; yy <= 1; yy++) {
+ for(int zz = -1; zz <= 1; zz++) {
+ ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0));
+ ssef vp = ip + cellnoise_color(ip);
+ float d = len_squared<1, 1, 1, 0>(vec_p - vp);
+ da = min(d, da);
+ }
+ }
+ }
+#endif
+
+ return da;
+}
+
+ccl_device float3 voronoi_F1_color(float3 p)
+{
+ /* returns color of the nearest point */
+ float da = 1e10f;
+
+#ifndef __KERNEL_SSE2__
+ float3 pa;
+ int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
+
+ for(int xx = -1; xx <= 1; xx++) {
+ for(int yy = -1; yy <= 1; yy++) {
+ for(int zz = -1; zz <= 1; zz++) {
+ float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
+ float3 vp = ip + cellnoise_color(ip);
+ float d = len_squared(p - vp);
+
+ if(d < da) {
+ da = d;
+ pa = vp;
+ }
+ }
+ }
+ }
+
+ return cellnoise_color(pa);
+#else
+ ssef pa, vec_p = load4f(p);
+ ssei xyzi = quick_floor_sse(vec_p);
+
+ for(int xx = -1; xx <= 1; xx++) {
+ for(int yy = -1; yy <= 1; yy++) {
+ for(int zz = -1; zz <= 1; zz++) {
+ ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0));
+ ssef vp = ip + cellnoise_color(ip);
+ float d = len_squared<1, 1, 1, 0>(vec_p - vp);
+
+ if(d < da) {
+ da = d;
+ pa = vp;
+ }
+ }
+ }
+ }
+
+ ssef color = cellnoise_color(pa);
+ return (float3 &)color;
+#endif
+}
+
ccl_device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float3 p)
{
if(coloring == NODE_VORONOI_INTENSITY) {
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index 36b59c3684c..6eaddaf301c 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -28,7 +28,7 @@ ccl_device_noinline float svm_wave(NodeWaveType type, float3 p, float detail, fl
n = len(p) * 20.0f;
if(distortion != 0.0f)
- n += distortion * noise_turbulence(p*dscale, NODE_NOISE_PERLIN, detail, 0);
+ n += distortion * noise_turbulence(p*dscale, detail, 0);
return 0.5f + 0.5f * sinf(n);
}
diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h
index 9e57c470c0f..57030f3979d 100644
--- a/intern/cycles/kernel/svm/svm_wavelength.h
+++ b/intern/cycles/kernel/svm/svm_wavelength.h
@@ -77,7 +77,7 @@ ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelengt
int i = float_to_int(ii);
float3 color;
- if (i < 0 || i >= 80) {
+ if(i < 0 || i >= 80) {
color = make_float3(0.0f, 0.0f, 0.0f);
}
else {
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 42fe3e8e429..30ccd523add 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -41,9 +41,9 @@ ccl_device float wireframe(KernelGlobals *kg,
float3 *P)
{
#ifdef __HAIR__
- if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
+ if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)
#else
- if (sd->prim != PRIM_NONE)
+ if(ccl_fetch(sd, prim) != PRIM_NONE)
#endif
{
float3 Co[3];
@@ -52,12 +52,12 @@ ccl_device float wireframe(KernelGlobals *kg,
/* Triangles */
int np = 3;
- if(sd->type & PRIMITIVE_TRIANGLE)
- triangle_vertices(kg, sd->prim, Co);
+ if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE)
+ triangle_vertices(kg, ccl_fetch(sd, prim), Co);
else
- motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co);
+ motion_triangle_vertices(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), Co);
- if(!(sd->flag & SD_TRANSFORM_APPLIED)) {
+ if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED)) {
object_position_transform(kg, sd, &Co[0]);
object_position_transform(kg, sd, &Co[1]);
object_position_transform(kg, sd, &Co[2]);
@@ -66,8 +66,8 @@ ccl_device float wireframe(KernelGlobals *kg,
if(pixel_size) {
// Project the derivatives of P to the viewing plane defined
// by I so we have a measure of how big is a pixel at this point
- float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I);
- float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I);
+ float pixelwidth_x = len(ccl_fetch(sd, dP).dx - dot(ccl_fetch(sd, dP).dx, ccl_fetch(sd, I)) * ccl_fetch(sd, I));
+ float pixelwidth_y = len(ccl_fetch(sd, dP).dy - dot(ccl_fetch(sd, dP).dy, ccl_fetch(sd, I)) * ccl_fetch(sd, I));
// Take the average of both axis' length
pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f;
}
@@ -76,7 +76,7 @@ ccl_device float wireframe(KernelGlobals *kg,
// other half. And take the square for fast comparison
pixelwidth *= 0.5f * size;
pixelwidth *= pixelwidth;
- for (int i = 0; i < np; i++) {
+ for(int i = 0; i < np; i++) {
int i2 = i ? i - 1 : np - 1;
float3 dir = *P - Co[i];
float3 edge = Co[i] - Co[i2];
@@ -84,7 +84,7 @@ ccl_device float wireframe(KernelGlobals *kg,
// At this point dot(crs, crs) / dot(edge, edge) is
// the square of area / length(edge) == square of the
// distance to the edge.
- if (dot(crs, crs) < (dot(edge, edge) * pixelwidth))
+ if(dot(crs, crs) < (dot(edge, edge) * pixelwidth))
return 1.0f;
}
}
@@ -106,19 +106,30 @@ ccl_device void svm_node_wireframe(KernelGlobals *kg,
int pixel_size = (int)use_pixel_size;
/* Calculate wireframe */
- float f = wireframe(kg, sd, size, pixel_size, &sd->P);
+#ifdef __SPLIT_KERNEL__
+ /* TODO(sergey): This is because sd is actually a global space,
+ * which makes it difficult to re-use same wireframe() function.
+ *
+ * With OpenCL 2.0 it's possible to avoid this change, but for until
+ * then we'll be living with such an exception.
+ */
+ float3 P = ccl_fetch(sd, P);
+ float f = wireframe(kg, sd, size, pixel_size, &P);
+#else
+ float f = wireframe(kg, sd, size, pixel_size, &ccl_fetch(sd, P));
+#endif
/* TODO(sergey): Think of faster way to calculate derivatives. */
if(bump_offset == NODE_BUMP_OFFSET_DX) {
- float3 Px = sd->P - sd->dP.dx;
- f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx);
+ float3 Px = ccl_fetch(sd, P) - ccl_fetch(sd, dP).dx;
+ f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(ccl_fetch(sd, dP).dx);
}
- else if (bump_offset == NODE_BUMP_OFFSET_DY) {
- float3 Py = sd->P - sd->dP.dy;
- f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy);
+ else if(bump_offset == NODE_BUMP_OFFSET_DY) {
+ float3 Py = ccl_fetch(sd, P) - ccl_fetch(sd, dP).dy;
+ f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(ccl_fetch(sd, dP).dy);
}
- if (stack_valid(out_fac))
+ if(stack_valid(out_fac))
stack_store_float(stack, out_fac, f);
}