Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Genrich <daniel.genrich@gmx.net>2014-10-23 17:12:28 +0400
committerDaniel Genrich <daniel.genrich@gmx.net>2014-10-23 17:12:28 +0400
commit9ff1ebed52e0f858a395eeea4caf89304e068b2d (patch)
treeb05d0f4b229de61b088a128ad412dd7bba347928 /intern/cycles/kernel
parenta2ed11c6eeab5fab8cb81e32e1c68fdafdd5dbbc (diff)
parenteaaeae469968c5c78a5d7e6d202f1af00b382a79 (diff)
Merge remote-tracking branch 'origin/master' into soc-2014-fluid
Conflicts: .gitignore intern/cycles/CMakeLists.txt source/blender/blenkernel/intern/smoke.c source/blender/python/intern/bpy_interface.c source/creator/CMakeLists.txt
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt95
-rw-r--r--intern/cycles/kernel/SConscript62
-rw-r--r--intern/cycles/kernel/closure/bsdf.h71
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h210
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair.h8
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h943
-rw-r--r--intern/cycles/kernel/closure/bsdf_util.h18
-rw-r--r--intern/cycles/kernel/closure/bsdf_ward.h189
-rw-r--r--intern/cycles/kernel/closure/bsdf_westin.h180
-rw-r--r--intern/cycles/kernel/geom/geom_bvh.h116
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_shadow.h57
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_subsurface.h54
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_traversal.h72
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_volume.h322
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h146
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle.h36
-rw-r--r--intern/cycles/kernel/geom/geom_primitive.h2
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h76
-rw-r--r--intern/cycles/kernel/geom/geom_volume.h20
-rw-r--r--intern/cycles/kernel/kernel.cl31
-rw-r--r--intern/cycles/kernel/kernel.cpp9
-rw-r--r--intern/cycles/kernel/kernel.cu37
-rw-r--r--intern/cycles/kernel/kernel.h21
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h49
-rw-r--r--intern/cycles/kernel/kernel_avx.cpp10
-rw-r--r--intern/cycles/kernel/kernel_avx2.cpp87
-rw-r--r--intern/cycles/kernel/kernel_bake.h (renamed from intern/cycles/kernel/kernel_displace.h)182
-rw-r--r--intern/cycles/kernel/kernel_camera.h27
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h116
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h13
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h66
-rw-r--r--intern/cycles/kernel/kernel_debug.h38
-rw-r--r--intern/cycles/kernel/kernel_emission.h77
-rw-r--r--intern/cycles/kernel/kernel_jitter.h24
-rw-r--r--intern/cycles/kernel/kernel_light.h171
-rw-r--r--intern/cycles/kernel/kernel_path.h1076
-rw-r--r--intern/cycles/kernel/kernel_path_state.h14
-rw-r--r--intern/cycles/kernel/kernel_path_surface.h299
-rw-r--r--intern/cycles/kernel/kernel_path_volume.h267
-rw-r--r--intern/cycles/kernel/kernel_random.h29
-rw-r--r--intern/cycles/kernel/kernel_shader.h24
-rw-r--r--intern/cycles/kernel/kernel_shadow.h31
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp9
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp9
-rw-r--r--intern/cycles/kernel/kernel_sse41.cpp9
-rw-r--r--intern/cycles/kernel/kernel_textures.h7
-rw-r--r--intern/cycles/kernel/kernel_types.h252
-rw-r--r--intern/cycles/kernel/kernel_volume.h507
-rw-r--r--intern/cycles/kernel/osl/SConscript3
-rw-r--r--intern/cycles/kernel/osl/osl_bssrdf.cpp24
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp75
-rw-r--r--intern/cycles/kernel/osl/osl_closures.h17
-rw-r--r--intern/cycles/kernel/osl/osl_globals.h1
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp76
-rw-r--r--intern/cycles/kernel/osl/osl_services.h92
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp20
-rw-r--r--intern/cycles/kernel/shaders/CMakeLists.txt4
-rw-r--r--intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl (renamed from intern/cycles/kernel/shaders/node_ward_bsdf.osl)12
-rw-r--r--intern/cycles/kernel/shaders/node_brick_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_checker_texture.osl6
-rw-r--r--intern/cycles/kernel/shaders/node_combine_xyz.osl27
-rw-r--r--intern/cycles/kernel/shaders/node_emission.osl6
-rw-r--r--intern/cycles/kernel/shaders/node_fresnel.h13
-rw-r--r--intern/cycles/kernel/shaders/node_geometry.osl8
-rw-r--r--intern/cycles/kernel/shaders/node_glossy_bsdf.osl4
-rw-r--r--intern/cycles/kernel/shaders/node_image_texture.osl4
-rw-r--r--intern/cycles/kernel/shaders/node_musgrave_texture.osl24
-rw-r--r--intern/cycles/kernel/shaders/node_separate_xyz.osl28
-rw-r--r--intern/cycles/kernel/shaders/node_texture.h6
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h12
-rw-r--r--intern/cycles/kernel/svm/svm.h27
-rw-r--r--intern/cycles/kernel/svm/svm_blackbody.h6
-rw-r--r--intern/cycles/kernel/svm/svm_checker.h6
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h114
-rw-r--r--intern/cycles/kernel/svm/svm_convert.h4
-rw-r--r--intern/cycles/kernel/svm/svm_image.h16
-rw-r--r--intern/cycles/kernel/svm/svm_noise.h193
-rw-r--r--intern/cycles/kernel/svm/svm_sepcomb_rgb.h42
-rw-r--r--intern/cycles/kernel/svm/svm_sepcomb_vector.h44
-rw-r--r--intern/cycles/kernel/svm/svm_texture.h22
-rw-r--r--intern/cycles/kernel/svm/svm_types.h14
81 files changed, 4614 insertions, 2506 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index d18f4fa2998..c521e1383a4 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -19,12 +19,13 @@ set(SRC
set(SRC_HEADERS
kernel.h
kernel_accumulate.h
+ kernel_bake.h
kernel_camera.h
kernel_compat_cpu.h
kernel_compat_cuda.h
kernel_compat_opencl.h
+ kernel_debug.h
kernel_differential.h
- kernel_displace.h
kernel_emission.h
kernel_film.h
kernel_globals.h
@@ -35,6 +36,8 @@ set(SRC_HEADERS
kernel_passes.h
kernel_path.h
kernel_path_state.h
+ kernel_path_surface.h
+ kernel_path_volume.h
kernel_projection.h
kernel_random.h
kernel_shader.h
@@ -58,8 +61,7 @@ set(SRC_CLOSURE_HEADERS
closure/bsdf_toon.h
closure/bsdf_transparent.h
closure/bsdf_util.h
- closure/bsdf_ward.h
- closure/bsdf_westin.h
+ closure/bsdf_ashikhmin_shirley.h
closure/bsdf_hair.h
closure/bssrdf.h
closure/emissive.h
@@ -95,8 +97,8 @@ set(SRC_SVM_HEADERS
svm/svm_noisetex.h
svm/svm_normal.h
svm/svm_ramp.h
- svm/svm_sepcomb_rgb.h
svm/svm_sepcomb_hsv.h
+ svm/svm_sepcomb_vector.h
svm/svm_sky.h
svm/svm_tex_coord.h
svm/svm_texture.h
@@ -111,8 +113,10 @@ set(SRC_GEOM_HEADERS
geom/geom.h
geom/geom_attribute.h
geom/geom_bvh.h
+ geom/geom_bvh_shadow.h
geom/geom_bvh_subsurface.h
geom/geom_bvh_traversal.h
+ geom/geom_bvh_volume.h
geom/geom_curve.h
geom/geom_motion_curve.h
geom/geom_motion_triangle.h
@@ -146,50 +150,69 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# warn for other versions
- if(CUDA_VERSION MATCHES "60")
+ if(CUDA_VERSION MATCHES "65")
else()
message(WARNING
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
- "build may succeed but only CUDA 6.0 is officially supported")
+ "build may succeed but only CUDA 6.5 is officially supported")
endif()
# build for each arch
set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
set(cuda_cubins)
- foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
- set(cuda_cubin kernel_${arch}.cubin)
+ macro(CYCLES_CUDA_KERNEL_ADD arch experimental)
+ if(${experimental})
+ set(cuda_extra_flags "-D__KERNEL_CUDA_EXPERIMENTAL__")
+ set(cuda_cubin kernel_experimental_${arch}.cubin)
+ else()
+ set(cuda_extra_flags "")
+ set(cuda_cubin kernel_${arch}.cubin)
+ endif()
+
+ if(WITH_CYCLES_DEBUG)
+ set(cuda_debug_flags "-D__KERNEL_DEBUG__")
+ else()
+ set(cuda_debug_flags "")
+ endif()
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
set(cuda_math_flags "--use_fast_math")
- if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50")
- message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping")
- elseif(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
- message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
- else()
- add_custom_command(
- OUTPUT ${cuda_cubin}
- COMMAND ${CUDA_NVCC_EXECUTABLE}
- -arch=${arch}
- -m${CUDA_BITS}
- --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu
- -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
- --ptxas-options="-v"
- ${cuda_arch_flags}
- ${cuda_version_flags}
- ${cuda_math_flags}
- -I${CMAKE_CURRENT_SOURCE_DIR}/../util
- -I${CMAKE_CURRENT_SOURCE_DIR}/svm
- -DCCL_NAMESPACE_BEGIN=
- -DCCL_NAMESPACE_END=
- -DNVCC
-
- DEPENDS ${cuda_sources})
-
- delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
- list(APPEND cuda_cubins ${cuda_cubin})
- endif()
+ add_custom_command(
+ OUTPUT ${cuda_cubin}
+ COMMAND ${CUDA_NVCC_EXECUTABLE}
+ -arch=${arch}
+ -m${CUDA_BITS}
+ --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu
+ -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
+ --ptxas-options="-v"
+ ${cuda_arch_flags}
+ ${cuda_version_flags}
+ ${cuda_math_flags}
+ ${cuda_extra_flags}
+ ${cuda_debug_flags}
+ -I${CMAKE_CURRENT_SOURCE_DIR}/../util
+ -I${CMAKE_CURRENT_SOURCE_DIR}/svm
+ -DCCL_NAMESPACE_BEGIN=
+ -DCCL_NAMESPACE_END=
+ -DNVCC
+
+ DEPENDS ${cuda_sources})
+
+ delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
+ list(APPEND cuda_cubins ${cuda_cubin})
+
+ unset(cuda_extra_flags)
+ unset(cuda_debug_flags)
+ endmacro()
+
+ foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
+ # Compile regular kernel
+ CYCLES_CUDA_KERNEL_ADD(${arch} FALSE)
+
+ # Compile experimental kernel
+ CYCLES_CUDA_KERNEL_ADD(${arch} TRUE)
endforeach()
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
@@ -213,12 +236,14 @@ if(CXX_HAS_SSE)
kernel_sse3.cpp
kernel_sse41.cpp
kernel_avx.cpp
+ kernel_avx2.cpp
)
set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+ set_source_files_properties(kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif()
diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript
index 04e1bad7538..c0d969e24ae 100644
--- a/intern/cycles/kernel/SConscript
+++ b/intern/cycles/kernel/SConscript
@@ -30,6 +30,7 @@ import subprocess
import sys
import os
import Blender as B
+import btools
def normpath(path):
return os.path.abspath(os.path.normpath(path))
@@ -64,49 +65,56 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
closure_dir = os.path.join(source_dir, "../closure")
# get CUDA version
- nvcc_pipe = subprocess.Popen([nvcc, "--version"],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
- output, erroroutput = nvcc_pipe.communicate()
+ output = btools.get_command_output([nvcc, "--version"])
cuda_major_minor = re.findall(r'release (\d+).(\d+)', output)[0]
cuda_version = int(cuda_major_minor[0])*10 + int(cuda_major_minor[1])
- if cuda_version != 60:
- print("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported." % (cuda_version/10, cuda_version%10))
+ if cuda_version != 65:
+ print("CUDA version %d.%d detected, build may succeed but only CUDA 6.5 is officially supported." % (cuda_version/10, cuda_version%10))
# nvcc flags
nvcc_flags = "-m%s" % (bits)
- nvcc_flags += " --cubin --ptxas-options=\"-v\""
+ nvcc_flags += " --cubin --ptxas-options=\"-v\" --use_fast_math"
nvcc_flags += " -D__KERNEL_CUDA_VERSION__=%d" % (cuda_version)
nvcc_flags += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC"
nvcc_flags += " -I \"%s\" -I \"%s\" -I \"%s\" -I \"%s\"" % (util_dir, svm_dir, geom_dir, closure_dir)
+ if env['WITH_BF_CYCLES_DEBUG']:
+ nvcc_flags += " -D__KERNEL_DEBUG__"
+
# dependencies
dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h')
last_cubin_file = None
+ configs = (("kernel_%s.cubin", ''),
+ ("kernel_experimental_%s.cubin", ' -D__KERNEL_CUDA_EXPERIMENTAL__'))
+
# add command for each cuda architecture
for arch in cuda_archs:
- if cuda_version < 60 and arch == "sm_50":
- print("Can't build kernel for CUDA sm_50 architecture, skipping")
- continue
-
- cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
-
- if env['BF_CYCLES_CUDA_ENV']:
- MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd"
- command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, kernel_file, cubin_file)
- else:
- command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, kernel_file, cubin_file)
-
- kernel.Command(cubin_file, 'kernel.cu', command)
- kernel.Depends(cubin_file, dependencies)
-
- kernel_binaries.append(cubin_file)
-
- if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']:
- # trick to compile one kernel at a time to reduce memory usage
- if last_cubin_file:
- kernel.Depends(cubin_file, last_cubin_file)
- last_cubin_file = cubin_file
+ for config in configs:
+ # TODO(sergey): Use dict instead ocouple in order to increase readability?
+ name = config[0]
+ extra_flags = config[1]
+
+ cubin_file = os.path.join(build_dir, name % arch)
+ current_flags = nvcc_flags + extra_flags
+
+ if env['BF_CYCLES_CUDA_ENV']:
+ MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd"
+ command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, current_flags, kernel_file, cubin_file)
+ else:
+ command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file)
+
+ kernel.Command(cubin_file, 'kernel.cu', command)
+ kernel.Depends(cubin_file, dependencies)
+
+ kernel_binaries.append(cubin_file)
+
+ if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']:
+ # trick to compile one kernel at a time to reduce memory usage
+ if last_cubin_file:
+ kernel.Depends(cubin_file, last_cubin_file)
+ last_cubin_file = cubin_file
Return('kernel_binaries')
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 24b54cd9d9e..7d4783b0f3c 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -23,10 +23,7 @@
#include "../closure/bsdf_reflection.h"
#include "../closure/bsdf_refraction.h"
#include "../closure/bsdf_transparent.h"
-#ifdef __ANISOTROPIC__
-#include "../closure/bsdf_ward.h"
-#endif
-#include "../closure/bsdf_westin.h"
+#include "../closure/bsdf_ashikhmin_shirley.h"
#include "../closure/bsdf_toon.h"
#include "../closure/bsdf_hair.h"
#ifdef __SUBSURFACE__
@@ -83,21 +80,22 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- label = bsdf_microfacet_ggx_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- label = bsdf_microfacet_beckmann_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
-#ifdef __ANISOTROPIC__
- case CLOSURE_BSDF_WARD_ID:
- label = bsdf_ward_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+ label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
-#endif
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
@@ -110,14 +108,6 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader
label = bsdf_glossy_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
- case CLOSURE_BSDF_WESTIN_BACKSCATTER_ID:
- label = bsdf_westin_backscatter_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
- case CLOSURE_BSDF_WESTIN_SHEEN_ID:
- label = bsdf_westin_sheen_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
- eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
- break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
label = bsdf_hair_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
@@ -178,18 +168,19 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
break;
-#ifdef __ANISOTROPIC__
- case CLOSURE_BSDF_WARD_ID:
- eval = bsdf_ward_eval_reflect(sc, sd->I, omega_in, pdf);
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+ eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
break;
-#endif
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf);
break;
@@ -199,12 +190,6 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
case CLOSURE_BSDF_GLOSSY_TOON_ID:
eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf);
break;
- case CLOSURE_BSDF_WESTIN_BACKSCATTER_ID:
- eval = bsdf_westin_backscatter_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_WESTIN_SHEEN_ID:
- eval = bsdf_westin_sheen_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
break;
@@ -245,18 +230,19 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
break;
-#ifdef __ANISOTROPIC__
- case CLOSURE_BSDF_WARD_ID:
- eval = bsdf_ward_eval_transmit(sc, sd->I, omega_in, pdf);
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+ eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
break;
-#endif
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf);
break;
@@ -266,12 +252,6 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade
case CLOSURE_BSDF_GLOSSY_TOON_ID:
eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf);
break;
- case CLOSURE_BSDF_WESTIN_BACKSCATTER_ID:
- eval = bsdf_westin_backscatter_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_WESTIN_SHEEN_ID:
- eval = bsdf_westin_sheen_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
break;
@@ -330,18 +310,19 @@ ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness)
bsdf_transparent_blur(sc, roughness);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
bsdf_microfacet_ggx_blur(sc, roughness);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
bsdf_microfacet_beckmann_blur(sc, roughness);
break;
-#ifdef __ANISOTROPIC__
- case CLOSURE_BSDF_WARD_ID:
- bsdf_ward_blur(sc, roughness);
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID:
+ bsdf_ashikhmin_shirley_blur(sc, roughness);
break;
-#endif
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
bsdf_ashikhmin_velvet_blur(sc, roughness);
break;
@@ -351,12 +332,6 @@ ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness)
case CLOSURE_BSDF_GLOSSY_TOON_ID:
bsdf_glossy_toon_blur(sc, roughness);
break;
- case CLOSURE_BSDF_WESTIN_BACKSCATTER_ID:
- bsdf_westin_backscatter_blur(sc, roughness);
- break;
- case CLOSURE_BSDF_WESTIN_SHEEN_ID:
- bsdf_westin_sheen_blur(sc, roughness);
- break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
bsdf_hair_reflection_blur(sc, roughness);
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
new file mode 100644
index 00000000000..ad7864cb8ea
--- /dev/null
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#ifndef __BSDF_ASHIKHMIN_SHIRLEY_H__
+#define __BSDF_ASHIKHMIN_SHIRLEY_H__
+
+/*
+ASHIKHMIN SHIRLEY BSDF
+
+Implementation of
+Michael Ashikhmin and Peter Shirley: "An Anisotropic Phong BRDF Model" (2000)
+
+The Fresnel factor is missing to get a separable bsdf (intensity*color), as is
+the case with all other microfacet-based BSDF implementations in Cycles.
+
+Other than that, the implementation directly follows the paper.
+*/
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device int bsdf_ashikhmin_shirley_setup(ShaderClosure *sc)
+{
+ /* store roughness. could already convert to exponent to save some cycles
+ * in eval, but this is more consistent with other bsdfs and shader_blur. */
+ sc->data0 = clamp(sc->data0, 1e-4f, 1.0f);
+ sc->data1 = sc->data0;
+
+ sc->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_GLOSSY;
+}
+
+ccl_device int bsdf_ashikhmin_shirley_aniso_setup(ShaderClosure *sc)
+{
+ /* store roughness. could already convert to exponent to save some cycles
+ * in eval, but this is more consistent with other bsdfs and shader_blur. */
+ sc->data0 = clamp(sc->data0, 1e-4f, 1.0f);
+ sc->data1 = clamp(sc->data1, 1e-4f, 1.0f);
+
+ sc->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID;
+ return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_GLOSSY;
+}
+
+ccl_device void bsdf_ashikhmin_shirley_blur(ShaderClosure *sc, float roughness)
+{
+ sc->data0 = fmaxf(roughness, sc->data0); /* clamp roughness */
+ sc->data1 = fmaxf(roughness, sc->data1);
+}
+
+ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float roughness)
+{
+ return 2.0f / (roughness*roughness) - 2.0f;
+}
+
+ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+{
+ float3 N = sc->N;
+
+ float NdotI = dot(N, I); /* in Cycles/OSL convention I is omega_out */
+ float NdotO = dot(N, omega_in); /* and consequently we use for O omaga_in ;) */
+
+ float out = 0.0f;
+
+ if (NdotI > 0.0f && NdotO > 0.0f) {
+ NdotI = fmaxf(NdotI, 1e-6f);
+ NdotO = fmaxf(NdotO, 1e-6f);
+ float3 H = normalize(omega_in + I);
+ float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
+ float HdotN = fmaxf(dot(H, N), 1e-6f);
+
+ float pump = 1.0f / fmaxf(1e-6f, (HdotI*fmaxf(NdotO, NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */
+ /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */
+
+ float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data0);
+ float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data1);
+
+ if (n_x == n_y) { /* => isotropic case */
+ float e = n_x;
+ float lobe = powf(HdotN, e);
+ float norm = (n_x + 1.0f) / (8.0f * M_PI_F);
+
+ out = NdotO * norm * lobe * pump;
+ *pdf = norm * lobe / HdotI; /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */
+ }
+ else { /* => ANisotropic case */
+ float3 X, Y;
+ make_orthonormals_tangent(N, sc->T, &X, &Y);
+
+ float HdotX = dot(H, X);
+ float HdotY = dot(H, Y);
+ float e = (n_x * HdotX*HdotX + n_y * HdotY*HdotY) / (1.0f - HdotN*HdotN);
+ float lobe = powf(HdotN, e);
+ float norm = sqrtf((n_x + 1.0f)*(n_y + 1.0f)) / (8.0f * M_PI_F);
+
+ out = NdotO * norm * lobe * pump;
+ *pdf = norm * lobe / HdotI;
+ }
+ }
+
+ return make_float3(out, out, out);
+}
+
+ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+{
+ return make_float3(0.0f, 0.0f, 0.0f);
+}
+
+ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x, float n_y, float randu, float randv, float *phi, float *cos_theta)
+{
+ *phi = atanf(sqrtf((n_x + 1.0f) / (n_y + 1.0f)) * tanf(M_PI_2_F * randu));
+ float cos_phi = cosf(*phi);
+ float sin_phi = sinf(*phi);
+ *cos_theta = powf(randv, 1.0f / (n_x * cos_phi*cos_phi + n_y * sin_phi*sin_phi + 1.0f));
+}
+
+ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+{
+ float3 N = sc->N;
+
+ float NdotI = dot(N, I);
+ if (NdotI > 0.0f) {
+
+ float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data0);
+ float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data1);
+
+ /* get x,y basis on the surface for anisotropy */
+ float3 X, Y;
+
+ if(n_x == n_y)
+ make_orthonormals(N, &X, &Y);
+ else
+ make_orthonormals_tangent(N, sc->T, &X, &Y);
+
+ /* sample spherical coords for h in tangent space */
+ float phi;
+ float cos_theta;
+ if (n_x == n_y) { /* => simple isotropic sampling */
+ phi = M_2PI_F * randu;
+ cos_theta = powf(randv, 1.0f / (n_x + 1.0f));
+ }
+ else { /* => more complex anisotropic sampling */
+ if (randu < 0.25f) { /* first quadrant */
+ float remapped_randu = 4.0f * randu;
+ bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ }
+ else if (randu < 0.5f) { /* second quadrant */
+ float remapped_randu = 4.0f * (.5f - randu);
+ bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ phi = M_PI_F - phi;
+ }
+ else if (randu < 0.75f) { /* third quadrant */
+ float remapped_randu = 4.0f * (randu - 0.5f);
+ bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ phi = M_PI_F + phi;
+ }
+ else { /* fourth quadrant */
+ float remapped_randu = 4.0f * (1.0f - randu);
+ bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ phi = 2.0f * M_PI_F - phi;
+ }
+ }
+
+ /* get half vector in tangent space */
+ float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - cos_theta*cos_theta));
+ float cos_phi = cosf(phi);
+ float sin_phi = sinf(phi); /* no sqrt(1-cos^2) here b/c it causes artifacts */
+ float3 h = make_float3(
+ sin_theta * cos_phi,
+ sin_theta * sin_phi,
+ cos_theta
+ );
+
+ /* half vector to world space */
+ float3 H = h.x*X + h.y*Y + h.z*N;
+ float HdotI = dot(H, I);
+ if (HdotI < 0.0f) H = -H;
+
+ /* reflect I on H to get omega_in */
+ *omega_in = -I + (2.0f * HdotI) * H;
+
+ /* leave the rest to eval_reflect */
+ /* (could maybe optimize a few things by manual inlining, but I doubt it would make much difference) */
+ *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf);
+
+#ifdef __RAY_DIFFERENTIALS__
+ /* just do the reflection thing for now */
+ *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx;
+ *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy;
+#endif
+ }
+
+ return LABEL_REFLECT | LABEL_GLOSSY;
+}
+
+
+CCL_NAMESPACE_END
+
+#endif /* __BSDF_ASHIKHMIN_SHIRLEY_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index 19cdb773255..e0b5454592b 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -63,7 +63,7 @@ ccl_device int bsdf_hair_transmission_setup(ShaderClosure *sc)
ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
{
#ifdef __HAIR__
- float offset = sc->offset;
+ float offset = sc->data2;
float3 Tg = sc->T;
#else
float offset = 0.0f;
@@ -120,7 +120,7 @@ ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc, co
ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
{
#ifdef __HAIR__
- float offset = sc->offset;
+ float offset = sc->data2;
float3 Tg = sc->T;
#else
float offset = 0.0f;
@@ -166,7 +166,7 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc,
ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
{
#ifdef __HAIR__
- float offset = sc->offset;
+ float offset = sc->data2;
float3 Tg = sc->T;
#else
float offset = 0.0f;
@@ -221,7 +221,7 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, f
ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
{
#ifdef __HAIR__
- float offset = sc->offset;
+ float offset = sc->data2;
float3 Tg = sc->T;
#else
float offset = 0.0f;
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 1ec35e444fe..8737b0e2d94 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -35,20 +35,293 @@
CCL_NAMESPACE_BEGIN
-/* GGX */
+/* Approximate erf and erfinv implementations.
+ * Implementation comes straight from Wikipedia:
+ *
+ * http://en.wikipedia.org/wiki/Error_function
+ *
+ * Some constants are baked into the code.
+ */
+
+ccl_device_inline float approx_erff_do(float x)
+{
+ /* Such a clamp doesn't give much distortion to the output value
+ * and gives quite a few of the speedup.
+ */
+ if(x > 3.0f) {
+ return 1.0f;
+ }
+ float t = 1.0f / (1.0f + 0.47047f*x);
+ return (1.0f -
+ t*(0.3480242f + t*(-0.0958798f + t*0.7478556f)) * expf(-x*x));
+}
+
+ccl_device_inline float approx_erff(float x)
+{
+ if(x >= 0.0f) {
+ return approx_erff_do(x);
+ }
+ else {
+ return -approx_erff_do(-x);
+ }
+}
+
+ccl_device_inline float approx_erfinvf_do(float x)
+{
+ if(x <= 0.7f) {
+ const float x2 = x * x;
+ const float a1 = 0.886226899f;
+ const float a2 = -1.645349621f;
+ const float a3 = 0.914624893f;
+ const float a4 = -0.140543331f;
+ const float b1 = -2.118377725f;
+ const float b2 = 1.442710462f;
+ const float b3 = -0.329097515f;
+ const float b4 = 0.012229801f;
+ return x * (((a4 * x2 + a3) * x2 + a2) * x2 + a1) /
+ ((((b4 * x2 + b3) * x2 + b2) * x2 + b1) * x2 + 1.0f);
+ }
+ else {
+ const float c1 = -1.970840454f;
+ const float c2 = -1.624906493f;
+ const float c3 = 3.429567803f;
+ const float c4 = 1.641345311f;
+ const float d1 = 3.543889200f;
+ const float d2 = 1.637067800f;
+ const float z = sqrtf(-logf((1.0f - x) * 0.5f));
+ return (((c4 * z + c3) * z + c2) * z + c1) /
+ ((d2 * z + d1) * z + 1.0f);
+ }
+}
+
+ccl_device_inline float approx_erfinvf(float x)
+{
+ if(x >= 0.0f) {
+ return approx_erfinvf_do(x);
+ }
+ else {
+ return -approx_erfinvf_do(-x);
+ }
+}
+
+/* Beckmann and GGX microfacet importance sampling from:
+ *
+ * Importance Sampling Microfacet-Based BSDFs using the Distribution of Visible Normals.
+ * E. Heitz and E. d'Eon, EGSR 2014 */
+
+ccl_device_inline void microfacet_beckmann_sample_slopes(
+ KernelGlobals *kg,
+ const float cos_theta_i, const float sin_theta_i,
+ float randu, float randv, float *slope_x, float *slope_y,
+ float *G1i)
+{
+ /* special case (normal incidence) */
+ if(cos_theta_i >= 0.99999f) {
+ const float r = sqrtf(-logf(randu));
+ const float phi = M_2PI_F * randv;
+ *slope_x = r * cosf(phi);
+ *slope_y = r * sinf(phi);
+ *G1i = 1.0f;
+ return;
+ }
+
+ /* precomputations */
+ const float tan_theta_i = sin_theta_i/cos_theta_i;
+ const float inv_a = tan_theta_i;
+ const float a = 1.0f/inv_a;
+ const float erf_a = approx_erff(a);
+ const float exp_a2 = expf(-a*a);
+ const float SQRT_PI_INV = 0.56418958354f;
+ const float Lambda = 0.5f*(erf_a - 1.0f) + (0.5f*SQRT_PI_INV)*(exp_a2*inv_a);
+ const float G1 = 1.0f/(1.0f + Lambda); /* masking */
+
+ *G1i = G1;
+
+#if 0
+ const float C = 1.0f - G1 * erf_a;
+
+ /* sample slope X */
+ if(randu < C) {
+ /* rescale randu */
+ randu = randu / C;
+ const float w_1 = 0.5f * SQRT_PI_INV * sin_theta_i * exp_a2;
+ const float w_2 = cos_theta_i * (0.5f - 0.5f*erf_a);
+ const float p = w_1 / (w_1 + w_2);
+
+ if(randu < p) {
+ randu = randu / p;
+ *slope_x = -sqrtf(-logf(randu*exp_a2));
+ }
+ else {
+ randu = (randu - p) / (1.0f - p);
+ *slope_x = approx_erfinvf(randu - 1.0f - randu*erf_a);
+ }
+ }
+ else {
+ /* rescale randu */
+ randu = (randu - C) / (1.0f - C);
+ *slope_x = approx_erfinvf((-1.0f + 2.0f*randu)*erf_a);
+
+ const float p = (-(*slope_x)*sin_theta_i + cos_theta_i) / (2.0f*cos_theta_i);
+
+ if(randv > p) {
+ *slope_x = -(*slope_x);
+ randv = (randv - p) / (1.0f - p);
+ }
+ else
+ randv = randv / p;
+ }
+
+ /* sample slope Y */
+ *slope_y = approx_erfinvf(2.0f*randv - 1.0f);
+#else
+ /* use precomputed table, because it better preserves stratification
+ * of the random number pattern */
+ int beckmann_table_offset = kernel_data.tables.beckmann_offset;
+
+ *slope_x = lookup_table_read_2D(kg, randu, cos_theta_i,
+ beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE);
+ *slope_y = approx_erfinvf(2.0f*randv - 1.0f);
+#endif
+
+}
+
+ccl_device_inline void microfacet_ggx_sample_slopes(
+ const float cos_theta_i, const float sin_theta_i,
+ float randu, float randv, float *slope_x, float *slope_y,
+ float *G1i)
+{
+ /* special case (normal incidence) */
+ if(cos_theta_i >= 0.99999f) {
+ const float r = sqrtf(randu/(1.0f - randu));
+ const float phi = M_2PI_F * randv;
+ *slope_x = r * cosf(phi);
+ *slope_y = r * sinf(phi);
+ *G1i = 1.0f;
+
+ return;
+ }
+
+ /* precomputations */
+ const float tan_theta_i = sin_theta_i/cos_theta_i;
+ const float G1_inv = 0.5f * (1.0f + safe_sqrtf(1.0f + tan_theta_i*tan_theta_i));
+
+ *G1i = 1.0f/G1_inv;
+
+ /* sample slope_x */
+ const float A = 2.0f*randu*G1_inv - 1.0f;
+ const float AA = A*A;
+ const float tmp = 1.0f/(AA - 1.0f);
+ const float B = tan_theta_i;
+ const float BB = B*B;
+ const float D = safe_sqrtf(BB*(tmp*tmp) - (AA - BB)*tmp);
+ const float slope_x_1 = B*tmp - D;
+ const float slope_x_2 = B*tmp + D;
+ *slope_x = (A < 0.0f || slope_x_2*tan_theta_i > 1.0f)? slope_x_1: slope_x_2;
+
+ /* sample slope_y */
+ float S;
+
+ if(randv > 0.5f) {
+ S = 1.0f;
+ randv = 2.0f*(randv - 0.5f);
+ }
+ else {
+ S = -1.0f;
+ randv = 2.0f*(0.5f - randv);
+ }
+
+ const float z = (randv*(randv*(randv*0.27385f - 0.73369f) + 0.46341f)) / (randv*(randv*(randv*0.093073f + 0.309420f) - 1.000000f) + 0.597999f);
+ *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x)*(*slope_x));
+}
+
+ccl_device_inline float3 microfacet_sample_stretched(
+ KernelGlobals *kg, const float3 omega_i,
+ const float alpha_x, const float alpha_y,
+ const float randu, const float randv,
+ bool beckmann, float *G1i)
+{
+ /* 1. stretch omega_i */
+ float3 omega_i_ = make_float3(alpha_x * omega_i.x, alpha_y * omega_i.y, omega_i.z);
+ omega_i_ = normalize(omega_i_);
+
+ /* get polar coordinates of omega_i_ */
+ float costheta_ = 1.0f;
+ float sintheta_ = 0.0f;
+ float cosphi_ = 1.0f;
+ float sinphi_ = 0.0f;
+
+ if(omega_i_.z < 0.99999f) {
+ costheta_ = omega_i_.z;
+ sintheta_ = safe_sqrtf(1.0f - costheta_*costheta_);
+
+ float invlen = 1.0f/sintheta_;
+ cosphi_ = omega_i_.x * invlen;
+ sinphi_ = omega_i_.y * invlen;
+ }
+
+ /* 2. sample P22_{omega_i}(x_slope, y_slope, 1, 1) */
+ float slope_x, slope_y;
+
+ if(beckmann) {
+ microfacet_beckmann_sample_slopes(kg, costheta_, sintheta_,
+ randu, randv, &slope_x, &slope_y, G1i);
+ }
+ else {
+ microfacet_ggx_sample_slopes(costheta_, sintheta_,
+ randu, randv, &slope_x, &slope_y, G1i);
+ }
+
+ /* 3. rotate */
+ float tmp = cosphi_*slope_x - sinphi_*slope_y;
+ slope_y = sinphi_*slope_x + cosphi_*slope_y;
+ slope_x = tmp;
+
+ /* 4. unstretch */
+ slope_x = alpha_x * slope_x;
+ slope_y = alpha_y * slope_y;
+
+ /* 5. compute normal */
+ return normalize(make_float3(-slope_x, -slope_y, 1.0f));
+}
+
+/* GGX microfacet with Smith shadow-masking from:
+ *
+ * Microfacet Models for Refraction through Rough Surfaces
+ * B. Walter, S. R. Marschner, H. Li, K. E. Torrance, EGSR 2007
+ *
+ * Anisotropic from:
+ *
+ * Understanding the Masking-Shadowing Function in Microfacet-Based BRDFs
+ * E. Heitz, Research Report 2014
+ *
+ * Anisotropy is only supported for reflection currently, but adding it for
+ * transmission is just a matter of copying code from reflection if needed. */
ccl_device int bsdf_microfacet_ggx_setup(ShaderClosure *sc)
{
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* m_ag */
+ sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
+ sc->data1 = sc->data0; /* alpha_y */
sc->type = CLOSURE_BSDF_MICROFACET_GGX_ID;
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY;
}
+ccl_device int bsdf_microfacet_ggx_aniso_setup(ShaderClosure *sc)
+{
+ sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
+ sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* alpha_y */
+
+ sc->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID;
+
+ return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY;
+}
+
ccl_device int bsdf_microfacet_ggx_refraction_setup(ShaderClosure *sc)
{
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* m_ag */
+ sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
+ sc->data1 = sc->data0; /* alpha_y */
sc->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
@@ -57,136 +330,250 @@ ccl_device int bsdf_microfacet_ggx_refraction_setup(ShaderClosure *sc)
ccl_device void bsdf_microfacet_ggx_blur(ShaderClosure *sc, float roughness)
{
- sc->data0 = fmaxf(roughness, sc->data0); /* m_ag */
+ sc->data0 = fmaxf(roughness, sc->data0); /* alpha_x */
+ sc->data1 = fmaxf(roughness, sc->data1); /* alpha_y */
}
ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
{
- float m_ag = max(sc->data0, 1e-4f);
+ float alpha_x = sc->data0;
+ float alpha_y = sc->data1;
int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
float3 N = sc->N;
- if(m_refractive || m_ag <= 1e-4f)
- return make_float3 (0, 0, 0);
+ if(m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
+ return make_float3(0, 0, 0);
+
float cosNO = dot(N, I);
float cosNI = dot(N, omega_in);
+
if(cosNI > 0 && cosNO > 0) {
- // get half vector
- float3 Hr = normalize(omega_in + I);
- // eq. 20: (F*G*D)/(4*in*on)
- // eq. 33: first we calculate D(m) with m=Hr:
- float alpha2 = m_ag * m_ag;
- float cosThetaM = dot(N, Hr);
- float cosThetaM2 = cosThetaM * cosThetaM;
- float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
- // eq. 34: now calculate G1(i,m) and G1(o,m)
- float G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
- float G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+ /* get half vector */
+ float3 m = normalize(omega_in + I);
+ float alpha2 = alpha_x * alpha_y;
+ float D, G1o, G1i;
+
+ if(alpha_x == alpha_y) {
+ /* isotropic
+ * eq. 20: (F*G*D)/(4*in*on)
+ * eq. 33: first we calculate D(m) */
+ float cosThetaM = dot(N, m);
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+ D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+
+ /* eq. 34: now calculate G1(i,m) and G1(o,m) */
+ G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
+ G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+ }
+ else {
+ /* anisotropic */
+ float3 X, Y, Z = N;
+ make_orthonormals_tangent(Z, sc->T, &X, &Y);
+
+ /* distribution */
+ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+ float slope_x = -local_m.x/(local_m.z*alpha_x);
+ float slope_y = -local_m.y/(local_m.z*alpha_y);
+ float slope_len = 1 + slope_x*slope_x + slope_y*slope_y;
+
+ float cosThetaM = local_m.z;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+ D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4);
+
+ /* G1(i,m) and G1(o,m) */
+ float tanThetaO2 = (1 - cosNO * cosNO) / (cosNO * cosNO);
+ float cosPhiO = dot(I, X);
+ float sinPhiO = dot(I, Y);
+
+ float alphaO2 = (cosPhiO*cosPhiO)*(alpha_x*alpha_x) + (sinPhiO*sinPhiO)*(alpha_y*alpha_y);
+ alphaO2 /= cosPhiO*cosPhiO + sinPhiO*sinPhiO;
+
+ G1o = 2 / (1 + safe_sqrtf(1 + alphaO2 * tanThetaO2));
+
+ float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
+ float cosPhiI = dot(omega_in, X);
+ float sinPhiI = dot(omega_in, Y);
+
+ float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
+ alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
+
+ G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
+ }
+
float G = G1o * G1i;
- float out = (G * D) * 0.25f / cosNO;
- // eq. 24
- float pm = D * cosThetaM;
- // convert into pdf of the sampled direction
- // eq. 38 - but see also:
- // eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
- *pdf = pm * 0.25f / dot(Hr, I);
- return make_float3 (out, out, out);
+
+ /* eq. 20 */
+ float common = D * 0.25f / cosNO;
+ float out = G * common;
+
+ /* eq. 2 in distribution of visible normals sampling
+ * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+
+ /* eq. 38 - but see also:
+ * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
+ * pdf = pm * 0.25 / dot(m, I); */
+ *pdf = G1o * common;
+
+ return make_float3(out, out, out);
}
- return make_float3 (0, 0, 0);
+
+ return make_float3(0, 0, 0);
}
ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
{
- float m_ag = max(sc->data0, 1e-4f);
- float m_eta = sc->data1;
+ float alpha_x = sc->data0;
+ float alpha_y = sc->data1;
+ float m_eta = sc->data2;
int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
float3 N = sc->N;
- if(!m_refractive || m_ag <= 1e-4f)
- return make_float3 (0, 0, 0);
+ if(!m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
+ return make_float3(0, 0, 0);
+
float cosNO = dot(N, I);
float cosNI = dot(N, omega_in);
+
if(cosNO <= 0 || cosNI >= 0)
- return make_float3 (0, 0, 0); // vectors on same side -- not possible
- // compute half-vector of the refraction (eq. 16)
+ return make_float3(0, 0, 0); /* vectors on same side -- not possible */
+
+ /* compute half-vector of the refraction (eq. 16) */
float3 ht = -(m_eta * omega_in + I);
float3 Ht = normalize(ht);
float cosHO = dot(Ht, I);
-
float cosHI = dot(Ht, omega_in);
- // eq. 33: first we calculate D(m) with m=Ht:
- float alpha2 = m_ag * m_ag;
+
+ /* those situations makes chi+ terms in eq. 33, 34 be zero */
+ if(dot(Ht, N) <= 0.0f || cosHO * cosNO <= 0.0f || cosHI * cosNI <= 0.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ float D, G1o, G1i;
+
+ /* eq. 33: first we calculate D(m) with m=Ht: */
+ float alpha2 = alpha_x * alpha_y;
float cosThetaM = dot(N, Ht);
float cosThetaM2 = cosThetaM * cosThetaM;
float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
- // eq. 34: now calculate G1(i,m) and G1(o,m)
- float G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
- float G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+ D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+
+ /* eq. 34: now calculate G1(i,m) and G1(o,m) */
+ G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
+ G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+
float G = G1o * G1i;
- // probability
- float invHt2 = 1 / dot(ht, ht);
- *pdf = D * fabsf(cosThetaM) * (fabsf(cosHI) * (m_eta * m_eta)) * invHt2;
- float out = (fabsf(cosHI * cosHO) * (m_eta * m_eta) * (G * D) * invHt2) / cosNO;
- return make_float3 (out, out, out);
+
+ /* probability */
+ float Ht2 = dot(ht, ht);
+
+ /* eq. 2 in distribution of visible normals sampling
+ * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+
+ /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
+ * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
+ float common = D * (m_eta * m_eta) / (cosNO * Ht2);
+ float out = G * fabsf(cosHI * cosHO) * common;
+ *pdf = G1o * cosHO * fabsf(cosHI) * common;
+
+ return make_float3(out, out, out);
}
-ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
{
- float m_ag = sc->data0;
+ float alpha_x = sc->data0;
+ float alpha_y = sc->data1;
int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
float3 N = sc->N;
float cosNO = dot(N, I);
if(cosNO > 0) {
float3 X, Y, Z = N;
- make_orthonormals(Z, &X, &Y);
- // generate a random microfacet normal m
- // eq. 35,36:
- // we take advantage of cos(atan(x)) == 1/sqrt(1+x^2)
- //tttt and sin(atan(x)) == x/sqrt(1+x^2)
- float alpha2 = m_ag * m_ag;
- float tanThetaM2 = alpha2 * randu / (1 - randu);
- float cosThetaM = 1 / safe_sqrtf(1 + tanThetaM2);
- float sinThetaM = cosThetaM * safe_sqrtf(tanThetaM2);
- float phiM = M_2PI_F * randv;
- float3 m = (cosf(phiM) * sinThetaM) * X +
- (sinf(phiM) * sinThetaM) * Y +
- ( cosThetaM) * Z;
+
+ if(alpha_x == alpha_y)
+ make_orthonormals(Z, &X, &Y);
+ else
+ make_orthonormals_tangent(Z, sc->T, &X, &Y);
+
+ /* importance sampling with distribution of visible normals. vectors are
+ * transformed to local space before and after */
+ float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO);
+ float3 local_m;
+ float G1o;
+
+ local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_y,
+ randu, randv, false, &G1o);
+
+ float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z;
+ float cosThetaM = local_m.z;
+
+ /* reflection or refraction? */
if(!m_refractive) {
float cosMO = dot(m, I);
+
if(cosMO > 0) {
- // eq. 39 - compute actual reflected direction
+ /* eq. 39 - compute actual reflected direction */
*omega_in = 2 * cosMO * m - I;
+
if(dot(Ng, *omega_in) > 0) {
- if (m_ag <= 1e-4f) {
- // some high number for MIS
+ if(fmaxf(alpha_x, alpha_y) <= 1e-4f) {
+ /* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
}
else {
- // microfacet normal is visible to this ray
- // eq. 33
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
- // eq. 24
- float pm = D * cosThetaM;
- // convert into pdf of the sampled direction
- // eq. 38 - but see also:
- // eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
- *pdf = pm * 0.25f / cosMO;
- // eval BRDF*cosNI
- float cosNI = dot(N, *omega_in);
- // eq. 34: now calculate G1(i,m) and G1(o,m)
- float G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
- float G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
- float G = G1o * G1i;
- // eq. 20: (F*G*D)/(4*in*on)
- float out = (G * D) * 0.25f / cosNO;
+ /* microfacet normal is visible to this ray */
+ /* eq. 33 */
+ float alpha2 = alpha_x * alpha_y;
+ float D, G1i;
+
+ if(alpha_x == alpha_y) {
+ /* isotropic */
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float tanThetaM2 = 1/(cosThetaM2) - 1;
+ D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
+
+ /* eval BRDF*cosNI */
+ float cosNI = dot(N, *omega_in);
+
+ /* eq. 34: now calculate G1(i,m) */
+ G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
+ }
+ else {
+ /* anisotropic distribution */
+ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+ float slope_x = -local_m.x/(local_m.z*alpha_x);
+ float slope_y = -local_m.y/(local_m.z*alpha_y);
+ float slope_len = 1 + slope_x*slope_x + slope_y*slope_y;
+
+ float cosThetaM = local_m.z;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+ D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4);
+
+ /* calculate G1(i,m) */
+ float cosNI = dot(N, *omega_in);
+
+ float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
+ float cosPhiI = dot(*omega_in, X);
+ float sinPhiI = dot(*omega_in, Y);
+
+ float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
+ alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
+
+ G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2));
+ }
+
+ /* see eval function for derivation */
+ float common = (G1o * D) * 0.25f / cosNO;
+ float out = G1i * common;
+ *pdf = common;
+
*eval = make_float3(out, out, out);
}
@@ -198,14 +585,15 @@ ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, fl
}
}
else {
- // CAUTION: the i and o variables are inverted relative to the paper
- // eq. 39 - compute actual refractive direction
+ /* CAUTION: the i and o variables are inverted relative to the paper
+ * eq. 39 - compute actual refractive direction */
float3 R, T;
#ifdef __RAY_DIFFERENTIALS__
float3 dRdx, dRdy, dTdx, dTdy;
#endif
- float m_eta = sc->data1;
+ float m_eta = sc->data2;
bool inside;
+
fresnel_dielectric(m_eta, m, I, &R, &T,
#ifdef __RAY_DIFFERENTIALS__
dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy,
@@ -213,38 +601,43 @@ ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, fl
&inside);
if(!inside) {
+
*omega_in = T;
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = dTdx;
*domega_in_dy = dTdy;
#endif
- if (m_ag <= 1e-4f || fabsf(m_eta - 1.0f) < 1e-4f) {
- // some high number for MIS
+ if(fmaxf(alpha_x, alpha_y) <= 1e-4f || fabsf(m_eta - 1.0f) < 1e-4f) {
+ /* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
}
else {
- // eq. 33
+ /* eq. 33 */
+ float alpha2 = alpha_x * alpha_y;
float cosThetaM2 = cosThetaM * cosThetaM;
float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float tanThetaM2 = 1/(cosThetaM2) - 1;
float D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2));
- // eq. 24
- float pm = D * cosThetaM;
- // eval BRDF*cosNI
+
+ /* eval BRDF*cosNI */
float cosNI = dot(N, *omega_in);
- // eq. 34: now calculate G1(i,m) and G1(o,m)
- float G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO)));
+
+ /* eq. 34: now calculate G1(i,m) */
float G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI)));
- float G = G1o * G1i;
- // eq. 21
+
+ /* eq. 21 */
float cosHI = dot(m, *omega_in);
float cosHO = dot(m, I);
float Ht2 = m_eta * cosHI + cosHO;
Ht2 *= Ht2;
- float out = (fabsf(cosHI * cosHO) * (m_eta * m_eta) * (G * D)) / (cosNO * Ht2);
- // eq. 38 and eq. 17
- *pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2;
+
+ /* see eval function for derivation */
+ float common = (G1o * D) * (m_eta * m_eta) / (cosNO * Ht2);
+ float out = G1i * fabsf(cosHI * cosHO) * common;
+ *pdf = cosHO * fabsf(cosHI) * common;
+
*eval = make_float3(out, out, out);
}
}
@@ -253,19 +646,33 @@ ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, fl
return (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY;
}
-/* BECKMANN */
+/* Beckmann microfacet with Smith shadow-masking from:
+ *
+ * Microfacet Models for Refraction through Rough Surfaces
+ * B. Walter, S. R. Marschner, H. Li, K. E. Torrance, EGSR 2007 */
ccl_device int bsdf_microfacet_beckmann_setup(ShaderClosure *sc)
{
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* m_ab */
+ sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
+ sc->data1 = sc->data0; /* alpha_y */
sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY;
}
+ccl_device int bsdf_microfacet_beckmann_aniso_setup(ShaderClosure *sc)
+{
+ sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
+ sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* alpha_y */
+
+ sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID;
+ return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY;
+}
+
ccl_device int bsdf_microfacet_beckmann_refraction_setup(ShaderClosure *sc)
{
- sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* m_ab */
+ sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */
+ sc->data1 = sc->data0; /* alpha_y */
sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY;
@@ -273,155 +680,257 @@ ccl_device int bsdf_microfacet_beckmann_refraction_setup(ShaderClosure *sc)
ccl_device void bsdf_microfacet_beckmann_blur(ShaderClosure *sc, float roughness)
{
- sc->data0 = fmaxf(roughness, sc->data0); /* m_ab */
+ sc->data0 = fmaxf(roughness, sc->data0); /* alpha_x */
+ sc->data1 = fmaxf(roughness, sc->data1); /* alpha_y */
}
ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
{
- float m_ab = max(sc->data0, 1e-4f);
+ float alpha_x = sc->data0;
+ float alpha_y = sc->data1;
int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
float3 N = sc->N;
- if(m_refractive || m_ab <= 1e-4f)
- return make_float3 (0, 0, 0);
+ if(m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
+ return make_float3(0, 0, 0);
+
float cosNO = dot(N, I);
float cosNI = dot(N, omega_in);
+
if(cosNO > 0 && cosNI > 0) {
- // get half vector
- float3 Hr = normalize(omega_in + I);
- // eq. 20: (F*G*D)/(4*in*on)
- // eq. 25: first we calculate D(m) with m=Hr:
- float alpha2 = m_ab * m_ab;
- float cosThetaM = dot(N, Hr);
- float cosThetaM2 = cosThetaM * cosThetaM;
- float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
- // eq. 26, 27: now calculate G1(i,m) and G1(o,m)
- float ao = 1 / (m_ab * safe_sqrtf((1 - cosNO * cosNO) / (cosNO * cosNO)));
- float ai = 1 / (m_ab * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
- float G1o = ao < 1.6f ? (3.535f * ao + 2.181f * ao * ao) / (1 + 2.276f * ao + 2.577f * ao * ao) : 1.0f;
- float G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
- float G = G1o * G1i;
- float out = (G * D) * 0.25f / cosNO;
- // eq. 24
- float pm = D * cosThetaM;
- // convert into pdf of the sampled direction
- // eq. 38 - but see also:
- // eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
- *pdf = pm * 0.25f / dot(Hr, I);
- return make_float3 (out, out, out);
+ /* get half vector */
+ float3 m = normalize(omega_in + I);
+
+ float alpha2 = alpha_x * alpha_y;
+ float D, G1o, G1i;
+
+ if(alpha_x == alpha_y) {
+ /* isotropic
+ * eq. 20: (F*G*D)/(4*in*on)
+ * eq. 25: first we calculate D(m) */
+ float cosThetaM = dot(N, m);
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
+ float ao = 1 / (alpha_x * safe_sqrtf((1 - cosNO * cosNO) / (cosNO * cosNO)));
+ float ai = 1 / (alpha_x * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
+ G1o = ao < 1.6f ? (3.535f * ao + 2.181f * ao * ao) / (1 + 2.276f * ao + 2.577f * ao * ao) : 1.0f;
+ G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+ }
+ else {
+ /* anisotropic */
+ float3 X, Y, Z = N;
+ make_orthonormals_tangent(Z, sc->T, &X, &Y);
+
+ /* distribution */
+ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+ float slope_x = -local_m.x/(local_m.z*alpha_x);
+ float slope_y = -local_m.y/(local_m.z*alpha_y);
+
+ float cosThetaM = local_m.z;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+ D = expf(-slope_x*slope_x - slope_y*slope_y) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* G1(i,m) and G1(o,m) */
+ float tanThetaO2 = (1 - cosNO * cosNO) / (cosNO * cosNO);
+ float cosPhiO = dot(I, X);
+ float sinPhiO = dot(I, Y);
+
+ float alphaO2 = (cosPhiO*cosPhiO)*(alpha_x*alpha_x) + (sinPhiO*sinPhiO)*(alpha_y*alpha_y);
+ alphaO2 /= cosPhiO*cosPhiO + sinPhiO*sinPhiO;
+
+ float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
+ float cosPhiI = dot(omega_in, X);
+ float sinPhiI = dot(omega_in, Y);
+
+ float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
+ alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
+
+ float ao = 1 / (safe_sqrtf(alphaO2 * tanThetaO2));
+ float ai = 1 / (safe_sqrtf(alphaI2 * tanThetaI2));
+ G1o = ao < 1.6f ? (3.535f * ao + 2.181f * ao * ao) / (1 + 2.276f * ao + 2.577f * ao * ao) : 1.0f;
+ G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+ }
+
+ float G = G1o * G1i;
+
+ /* eq. 20 */
+ float common = D * 0.25f / cosNO;
+ float out = G * common;
+
+ /* eq. 2 in distribution of visible normals sampling
+ * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+
+ /* eq. 38 - but see also:
+ * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
+ * pdf = pm * 0.25 / dot(m, I); */
+ *pdf = G1o * common;
+
+ return make_float3(out, out, out);
}
- return make_float3 (0, 0, 0);
+
+ return make_float3(0, 0, 0);
}
ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
{
- float m_ab = max(sc->data0, 1e-4f);
- float m_eta = sc->data1;
+ float alpha_x = sc->data0;
+ float alpha_y = sc->data1;
+ float m_eta = sc->data2;
int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
float3 N = sc->N;
- if(!m_refractive || m_ab <= 1e-4f)
- return make_float3 (0, 0, 0);
+ if(!m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f)
+ return make_float3(0, 0, 0);
+
float cosNO = dot(N, I);
float cosNI = dot(N, omega_in);
+
if(cosNO <= 0 || cosNI >= 0)
- return make_float3 (0, 0, 0);
- // compute half-vector of the refraction (eq. 16)
+ return make_float3(0, 0, 0);
+
+ /* compute half-vector of the refraction (eq. 16) */
float3 ht = -(m_eta * omega_in + I);
float3 Ht = normalize(ht);
float cosHO = dot(Ht, I);
-
float cosHI = dot(Ht, omega_in);
- // eq. 33: first we calculate D(m) with m=Ht:
- float alpha2 = m_ab * m_ab;
+
+ /* those situations makes chi+ terms in eq. 25, 27 be zero */
+ if(dot(Ht, N) <= 0.0f || cosHO * cosNO <= 0.0f || cosHI * cosNI <= 0.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ /* eq. 25: first we calculate D(m) with m=Ht: */
+ float alpha2 = alpha_x * alpha_y;
float cosThetaM = min(dot(N, Ht), 1.0f);
float cosThetaM2 = cosThetaM * cosThetaM;
float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
float cosThetaM4 = cosThetaM2 * cosThetaM2;
float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
- // eq. 26, 27: now calculate G1(i,m) and G1(o,m)
- float ao = 1 / (m_ab * safe_sqrtf((1 - cosNO * cosNO) / (cosNO * cosNO)));
- float ai = 1 / (m_ab * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
+
+ /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
+ float ao = 1 / (alpha_x * safe_sqrtf((1 - cosNO * cosNO) / (cosNO * cosNO)));
+ float ai = 1 / (alpha_x * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
float G1o = ao < 1.6f ? (3.535f * ao + 2.181f * ao * ao) / (1 + 2.276f * ao + 2.577f * ao * ao) : 1.0f;
float G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
float G = G1o * G1i;
- // probability
- float invHt2 = 1 / dot(ht, ht);
- *pdf = D * fabsf(cosThetaM) * (fabsf(cosHI) * (m_eta * m_eta)) * invHt2;
- float out = (fabsf(cosHI * cosHO) * (m_eta * m_eta) * (G * D) * invHt2) / cosNO;
- return make_float3 (out, out, out);
+
+ /* probability */
+ float Ht2 = dot(ht, ht);
+
+ /* eq. 2 in distribution of visible normals sampling
+ * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+
+ /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2)
+ * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */
+ float common = D * (m_eta * m_eta) / (cosNO * Ht2);
+ float out = G * fabsf(cosHI * cosHO) * common;
+ *pdf = G1o * cosHO * fabsf(cosHI) * common;
+
+ return make_float3(out, out, out);
}
-ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
{
- float m_ab = sc->data0;
+ float alpha_x = sc->data0;
+ float alpha_y = sc->data1;
int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
float3 N = sc->N;
float cosNO = dot(N, I);
if(cosNO > 0) {
float3 X, Y, Z = N;
- make_orthonormals(Z, &X, &Y);
- // generate a random microfacet normal m
- // eq. 35,36:
- // we take advantage of cos(atan(x)) == 1/sqrt(1+x^2)
- //tttt and sin(atan(x)) == x/sqrt(1+x^2)
- float alpha2 = m_ab * m_ab;
- float tanThetaM, cosThetaM;
-
- if(alpha2 == 0.0f) {
- tanThetaM = 0.0f;
- cosThetaM = 1.0f;
- }
- else {
- tanThetaM = safe_sqrtf(-alpha2 * logf(1 - randu));
- cosThetaM = 1 / safe_sqrtf(1 + tanThetaM * tanThetaM);
- }
- float sinThetaM = cosThetaM * tanThetaM;
- float phiM = M_2PI_F * randv;
- float3 m = (cosf(phiM) * sinThetaM) * X +
- (sinf(phiM) * sinThetaM) * Y +
- ( cosThetaM) * Z;
+ if(alpha_x == alpha_y)
+ make_orthonormals(Z, &X, &Y);
+ else
+ make_orthonormals_tangent(Z, sc->T, &X, &Y);
+
+ /* importance sampling with distribution of visible normals. vectors are
+ * transformed to local space before and after */
+ float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO);
+ float3 local_m;
+ float G1o;
+ local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_x,
+ randu, randv, true, &G1o);
+
+ float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z;
+ float cosThetaM = local_m.z;
+
+ /* reflection or refraction? */
if(!m_refractive) {
float cosMO = dot(m, I);
+
if(cosMO > 0) {
- // eq. 39 - compute actual reflected direction
+ /* eq. 39 - compute actual reflected direction */
*omega_in = 2 * cosMO * m - I;
+
if(dot(Ng, *omega_in) > 0) {
- if (m_ab <= 1e-4f) {
- // some high number for MIS
+ if(fmaxf(alpha_x, alpha_y) <= 1e-4f) {
+ /* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
}
else {
- // microfacet normal is visible to this ray
- // eq. 25
- float cosThetaM2 = cosThetaM * cosThetaM;
- float tanThetaM2 = tanThetaM * tanThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
- // eq. 24
- float pm = D * cosThetaM;
- // convert into pdf of the sampled direction
- // eq. 38 - but see also:
- // eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
- *pdf = pm * 0.25f / cosMO;
- // Eval BRDF*cosNI
- float cosNI = dot(N, *omega_in);
- // eq. 26, 27: now calculate G1(i,m) and G1(o,m)
- float ao = 1 / (m_ab * safe_sqrtf((1 - cosNO * cosNO) / (cosNO * cosNO)));
- float ai = 1 / (m_ab * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
- float G1o = ao < 1.6f ? (3.535f * ao + 2.181f * ao * ao) / (1 + 2.276f * ao + 2.577f * ao * ao) : 1.0f;
- float G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+ /* microfacet normal is visible to this ray
+ * eq. 25 */
+ float alpha2 = alpha_x * alpha_y;
+ float D, G1i;
+
+ if(alpha_x == alpha_y) {
+ /* istropic distribution */
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float tanThetaM2 = 1/(cosThetaM2) - 1;
+ D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* eval BRDF*cosNI */
+ float cosNI = dot(N, *omega_in);
+
+ /* eq. 26, 27: now calculate G1(i,m) */
+ float ai = 1 / (alpha_x * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
+ G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+ }
+ else {
+ /* anisotropic distribution */
+ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+ float slope_x = -local_m.x/(local_m.z*alpha_x);
+ float slope_y = -local_m.y/(local_m.z*alpha_y);
+
+ float cosThetaM = local_m.z;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+
+ D = expf(-slope_x*slope_x - slope_y*slope_y) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* G1(i,m) */
+ float cosNI = dot(N, *omega_in);
+ float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI);
+ float cosPhiI = dot(*omega_in, X);
+ float sinPhiI = dot(*omega_in, Y);
+
+ float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y);
+ alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI;
+
+ float ai = 1 / (safe_sqrtf(alphaI2 * tanThetaI2));
+ G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
+ }
+
float G = G1o * G1i;
- // eq. 20: (F*G*D)/(4*in*on)
- float out = (G * D) * 0.25f / cosNO;
+
+ /* see eval function for derivation */
+ float common = D * 0.25f / cosNO;
+ float out = G * common;
+ *pdf = G1o * common;
+
*eval = make_float3(out, out, out);
}
+
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
*domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
@@ -430,14 +939,15 @@ ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 N
}
}
else {
- // CAUTION: the i and o variables are inverted relative to the paper
- // eq. 39 - compute actual refractive direction
+ /* CAUTION: the i and o variables are inverted relative to the paper
+ * eq. 39 - compute actual refractive direction */
float3 R, T;
#ifdef __RAY_DIFFERENTIALS__
float3 dRdx, dRdy, dTdx, dTdy;
#endif
- float m_eta = sc->data1;
+ float m_eta = sc->data2;
bool inside;
+
fresnel_dielectric(m_eta, m, I, &R, &T,
#ifdef __RAY_DIFFERENTIALS__
dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy,
@@ -446,39 +956,44 @@ ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 N
if(!inside) {
*omega_in = T;
+
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = dTdx;
*domega_in_dy = dTdy;
#endif
- if (m_ab <= 1e-4f || fabsf(m_eta - 1.0f) < 1e-4f) {
- // some high number for MIS
+
+ if(fmaxf(alpha_x, alpha_y) <= 1e-4f || fabsf(m_eta - 1.0f) < 1e-4f) {
+ /* some high number for MIS */
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
}
else {
- // eq. 33
+ /* eq. 33 */
+ float alpha2 = alpha_x * alpha_y;
float cosThetaM2 = cosThetaM * cosThetaM;
- float tanThetaM2 = tanThetaM * tanThetaM;
float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ float tanThetaM2 = 1/(cosThetaM2) - 1;
float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
- // eq. 24
- float pm = D * cosThetaM;
- // eval BRDF*cosNI
+
+ /* eval BRDF*cosNI */
float cosNI = dot(N, *omega_in);
- // eq. 26, 27: now calculate G1(i,m) and G1(o,m)
- float ao = 1 / (m_ab * safe_sqrtf((1 - cosNO * cosNO) / (cosNO * cosNO)));
- float ai = 1 / (m_ab * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
- float G1o = ao < 1.6f ? (3.535f * ao + 2.181f * ao * ao) / (1 + 2.276f * ao + 2.577f * ao * ao) : 1.0f;
+
+ /* eq. 26, 27: now calculate G1(i,m) */
+ float ai = 1 / (alpha_x * safe_sqrtf((1 - cosNI * cosNI) / (cosNI * cosNI)));
float G1i = ai < 1.6f ? (3.535f * ai + 2.181f * ai * ai) / (1 + 2.276f * ai + 2.577f * ai * ai) : 1.0f;
float G = G1o * G1i;
- // eq. 21
+
+ /* eq. 21 */
float cosHI = dot(m, *omega_in);
float cosHO = dot(m, I);
float Ht2 = m_eta * cosHI + cosHO;
Ht2 *= Ht2;
- float out = (fabsf(cosHI * cosHO) * (m_eta * m_eta) * (G * D)) / (cosNO * Ht2);
- // eq. 38 and eq. 17
- *pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2;
+
+ /* see eval function for derivation */
+ float common = D * (m_eta * m_eta) / (cosNO * Ht2);
+ float out = G * fabsf(cosHI * cosHO) * common;
+ *pdf = G1o * cosHO * fabsf(cosHI) * common;
+
*eval = make_float3(out, out, out);
}
}
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index b3dcb9dcc38..05816bac2c1 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -111,16 +111,20 @@ ccl_device float fresnel_dielectric_cos(float cosi, float eta)
return 1.0f; // TIR(no refracted component)
}
-ccl_device float fresnel_conductor(float cosi, float eta, float k)
+#if 0
+ccl_device float3 fresnel_conductor(float cosi, const float3 eta, const float3 k)
{
- float tmp_f = eta * eta + k * k;
- float tmp = tmp_f * cosi * cosi;
- float Rparl2 = (tmp - (2.0f * eta * cosi) + 1)/
- (tmp + (2.0f * eta * cosi) + 1);
- float Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi * cosi)/
- (tmp_f + (2.0f * eta * cosi) + cosi * cosi);
+ float3 cosi2 = make_float3(cosi*cosi);
+ float3 one = make_float3(1.0f, 1.0f, 1.0f);
+ float3 tmp_f = eta * eta + k * k;
+ float3 tmp = tmp_f * cosi2;
+ float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) /
+ (tmp + (2.0f * eta * cosi) + one);
+ float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) /
+ (tmp_f + (2.0f * eta * cosi) + cosi2);
return(Rparl2 + Rperp2) * 0.5f;
}
+#endif
ccl_device float smooth_step(float edge0, float edge1, float x)
{
diff --git a/intern/cycles/kernel/closure/bsdf_ward.h b/intern/cycles/kernel/closure/bsdf_ward.h
deleted file mode 100644
index c9de615a011..00000000000
--- a/intern/cycles/kernel/closure/bsdf_ward.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __BSDF_WARD_H__
-#define __BSDF_WARD_H__
-
-CCL_NAMESPACE_BEGIN
-
-/* WARD */
-
-ccl_device int bsdf_ward_setup(ShaderClosure *sc)
-{
- sc->data0 = clamp(sc->data0, 1e-4f, 1.0f); /* m_ax */
- sc->data1 = clamp(sc->data1, 1e-4f, 1.0f); /* m_ay */
-
- sc->type = CLOSURE_BSDF_WARD_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY;
-}
-
-ccl_device void bsdf_ward_blur(ShaderClosure *sc, float roughness)
-{
- sc->data0 = fmaxf(roughness, sc->data0); /* m_ax */
- sc->data1 = fmaxf(roughness, sc->data1); /* m_ay */
-}
-
-ccl_device float3 bsdf_ward_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
-{
- float m_ax = sc->data0;
- float m_ay = sc->data1;
- float3 N = sc->N;
- float3 T = sc->T;
-
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
-
- if(cosNI > 0.0f && cosNO > 0.0f) {
- cosNO = max(cosNO, 1e-4f);
- cosNI = max(cosNI, 1e-4f);
-
- // get half vector and get x,y basis on the surface for anisotropy
- float3 H = normalize(omega_in + I); // normalize needed for pdf
- float3 X, Y;
- make_orthonormals_tangent(N, T, &X, &Y);
- // eq. 4
- float dotx = dot(H, X) / m_ax;
- float doty = dot(H, Y) / m_ay;
- float dotn = dot(H, N);
- float exp_arg = (dotx * dotx + doty * doty) / (dotn * dotn);
- float denom = (M_4PI_F * m_ax * m_ay * sqrtf(cosNO * cosNI));
- float exp_val = expf(-exp_arg);
- float out = cosNI * exp_val / denom;
- float oh = dot(H, I);
- denom = M_4PI_F * m_ax * m_ay * oh * dotn * dotn * dotn;
- *pdf = exp_val / denom;
- return make_float3 (out, out, out);
- }
-
- return make_float3 (0, 0, 0);
-}
-
-ccl_device float3 bsdf_ward_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
-{
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device int bsdf_ward_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
-{
- float m_ax = sc->data0;
- float m_ay = sc->data1;
- float3 N = sc->N;
- float3 T = sc->T;
-
- float cosNO = dot(N, I);
- if(cosNO > 0.0f) {
- // get x,y basis on the surface for anisotropy
- float3 X, Y;
- make_orthonormals_tangent(N, T, &X, &Y);
- // generate random angles for the half vector
- // eq. 7 (taking care around discontinuities to keep
- //ttoutput angle in the right quadrant)
- // we take advantage of cos(atan(x)) == 1/sqrt(1+x^2)
- //tttt and sin(atan(x)) == x/sqrt(1+x^2)
- float alphaRatio = m_ay / m_ax;
- float cosPhi, sinPhi;
- if(randu < 0.25f) {
- float val = 4 * randu;
- float tanPhi = alphaRatio * tanf(M_PI_2_F * val);
- cosPhi = 1 / sqrtf(1 + tanPhi * tanPhi);
- sinPhi = tanPhi * cosPhi;
- }
- else if(randu < 0.5f) {
- float val = 1 - 4 * (0.5f - randu);
- float tanPhi = alphaRatio * tanf(M_PI_2_F * val);
- // phi = M_PI_F - phi;
- cosPhi = -1 / sqrtf(1 + tanPhi * tanPhi);
- sinPhi = -tanPhi * cosPhi;
- }
- else if(randu < 0.75f) {
- float val = 4 * (randu - 0.5f);
- float tanPhi = alphaRatio * tanf(M_PI_2_F * val);
- //phi = M_PI_F + phi;
- cosPhi = -1 / sqrtf(1 + tanPhi * tanPhi);
- sinPhi = tanPhi * cosPhi;
- }
- else {
- float val = 1 - 4 * (1 - randu);
- float tanPhi = alphaRatio * tanf(M_PI_2_F * val);
- // phi = M_2PI_F - phi;
- cosPhi = 1 / sqrtf(1 + tanPhi * tanPhi);
- sinPhi = -tanPhi * cosPhi;
- }
- // eq. 6
- // we take advantage of cos(atan(x)) == 1/sqrt(1+x^2)
- //tttt and sin(atan(x)) == x/sqrt(1+x^2)
- float thetaDenom = (cosPhi * cosPhi) / (m_ax * m_ax) + (sinPhi * sinPhi) / (m_ay * m_ay);
- float tanTheta2 = -logf(1 - randv) / thetaDenom;
- float cosTheta = 1 / sqrtf(1 + tanTheta2);
- float sinTheta = cosTheta * sqrtf(tanTheta2);
-
- float3 h; // already normalized becaused expressed from spherical coordinates
- h.x = sinTheta * cosPhi;
- h.y = sinTheta * sinPhi;
- h.z = cosTheta;
- // compute terms that are easier in local space
- float dotx = h.x / m_ax;
- float doty = h.y / m_ay;
- float dotn = h.z;
- // transform to world space
- h = h.x * X + h.y * Y + h.z * N;
- // generate the final sample
- float oh = dot(h, I);
- *omega_in = 2.0f * oh * h - I;
- if(dot(Ng, *omega_in) > 0) {
- float cosNI = dot(N, *omega_in);
- if(cosNI > 0) {
- cosNO = max(cosNO, 1e-4f);
- cosNI = max(cosNI, 1e-4f);
-
- // eq. 9
- float exp_arg = (dotx * dotx + doty * doty) / (dotn * dotn);
- float denom = M_4PI_F * m_ax * m_ay * oh * dotn * dotn * dotn;
- *pdf = expf(-exp_arg) / denom;
- // compiler will reuse expressions already computed
- denom = (M_4PI_F * m_ax * m_ay * sqrtf(cosNO * cosNI));
- float power = cosNI * expf(-exp_arg) / denom;
- *eval = make_float3(power, power, power);
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
- *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
-#endif
- }
- }
- }
- return LABEL_REFLECT|LABEL_GLOSSY;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __BSDF_WARD_H__ */
-
diff --git a/intern/cycles/kernel/closure/bsdf_westin.h b/intern/cycles/kernel/closure/bsdf_westin.h
deleted file mode 100644
index 9dc1c00bb3d..00000000000
--- a/intern/cycles/kernel/closure/bsdf_westin.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __BSDF_WESTIN_H__
-#define __BSDF_WESTIN_H__
-
-CCL_NAMESPACE_BEGIN
-
-/* WESTIN BACKSCATTER */
-
-ccl_device int bsdf_westin_backscatter_setup(ShaderClosure *sc)
-{
- float roughness = sc->data0;
- roughness = clamp(roughness, 1e-5f, 1.0f);
- float m_invroughness = 1.0f/roughness;
-
- sc->type = CLOSURE_BSDF_WESTIN_BACKSCATTER_ID;
- sc->data0 = m_invroughness;
-
- return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY;
-}
-
-ccl_device void bsdf_westin_backscatter_blur(ShaderClosure *sc, float roughness)
-{
- float m_invroughness = sc->data0;
- m_invroughness = min(1.0f/roughness, m_invroughness);
- sc->data0 = m_invroughness;
-}
-
-ccl_device float3 bsdf_westin_backscatter_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
-{
- float m_invroughness = sc->data0;
- float3 N = sc->N;
-
- // pdf is implicitly 0 (no indirect sampling)
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
- if(cosNO > 0 && cosNI > 0) {
- float cosine = dot(I, omega_in);
- *pdf = cosine > 0 ? (m_invroughness + 1) * powf(cosine, m_invroughness) : 0;
- *pdf *= 0.5f * M_1_PI_F;
- return make_float3 (*pdf, *pdf, *pdf);
- }
- return make_float3 (0, 0, 0);
-}
-
-ccl_device float3 bsdf_westin_backscatter_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
-{
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device int bsdf_westin_backscatter_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
-{
- float m_invroughness = sc->data0;
- float3 N = sc->N;
-
- float cosNO = dot(N, I);
- if(cosNO > 0) {
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dIdx;
- *domega_in_dy = dIdy;
-#endif
- float3 T, B;
- make_orthonormals (I, &T, &B);
- float phi = M_2PI_F * randu;
- float cosTheta = powf(randv, 1 / (m_invroughness + 1));
- float sinTheta2 = 1 - cosTheta * cosTheta;
- float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0;
- *omega_in = (cosf(phi) * sinTheta) * T +
- (sinf(phi) * sinTheta) * B +
- (cosTheta) * I;
- if(dot(Ng, *omega_in) > 0) {
- // common terms for pdf and eval
- float cosNI = dot(N, *omega_in);
- // make sure the direction we chose is still in the right hemisphere
- if(cosNI > 0)
- {
- *pdf = 0.5f * M_1_PI_F * powf(cosTheta, m_invroughness);
- *pdf = (m_invroughness + 1) * (*pdf);
- *eval = make_float3(*pdf, *pdf, *pdf);
- }
- }
- }
- return LABEL_REFLECT|LABEL_GLOSSY;
-}
-
-/* WESTIN SHEEN */
-
-ccl_device int bsdf_westin_sheen_setup(ShaderClosure *sc)
-{
- /* float edginess = sc->data0; */
- sc->type = CLOSURE_BSDF_WESTIN_SHEEN_ID;
- return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY;
-}
-
-ccl_device void bsdf_westin_sheen_blur(ShaderClosure *sc, float roughness)
-{
-}
-
-ccl_device float3 bsdf_westin_sheen_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
-{
- float m_edginess = sc->data0;
- float3 N = sc->N;
-
- // pdf is implicitly 0 (no indirect sampling)
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
- if(cosNO > 0 && cosNI > 0) {
- float sinNO2 = 1 - cosNO * cosNO;
- *pdf = cosNI * M_1_PI_F;
- float westin = sinNO2 > 0 ? powf(sinNO2, 0.5f * m_edginess) * (*pdf) : 0;
- return make_float3 (westin, westin, westin);
- }
- return make_float3 (0, 0, 0);
-}
-
-ccl_device float3 bsdf_westin_sheen_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
-{
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device int bsdf_westin_sheen_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
-{
- float m_edginess = sc->data0;
- float3 N = sc->N;
-
- // we are viewing the surface from the right side - send a ray out with cosine
- // distribution over the hemisphere
- sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
- if(dot(Ng, *omega_in) > 0) {
- // TODO: account for sheen when sampling
- float cosNO = dot(N, I);
- float sinNO2 = 1 - cosNO * cosNO;
- float westin = sinNO2 > 0 ? powf(sinNO2, 0.5f * m_edginess) * (*pdf) : 0;
- *eval = make_float3(westin, westin, westin);
-#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the diffuse bounce
- *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
- *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
-#endif
- }
- else {
- pdf = 0;
- }
- return LABEL_REFLECT|LABEL_DIFFUSE;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __BSDF_WESTIN_H__ */
-
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index dd7c25d581d..c5336e086b7 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -28,6 +28,13 @@
CCL_NAMESPACE_BEGIN
+/* Don't inline intersect functions on GPU, this is faster */
+#ifdef __KERNEL_GPU__
+#define ccl_device_intersect ccl_device_noinline
+#else
+#define ccl_device_intersect ccl_device_inline
+#endif
+
/* BVH intersection function variations */
#define BVH_INSTANCING 1
@@ -35,6 +42,8 @@ CCL_NAMESPACE_BEGIN
#define BVH_HAIR 4
#define BVH_HAIR_MINIMUM_WIDTH 8
+/* Regular BVH traversal */
+
#define BVH_FUNCTION_NAME bvh_intersect
#define BVH_FUNCTION_FEATURES 0
#include "geom_bvh_traversal.h"
@@ -63,6 +72,8 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_traversal.h"
#endif
+/* Subsurface scattering BVH traversal */
+
#if defined(__SUBSURFACE__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface
#define BVH_FUNCTION_FEATURES 0
@@ -93,47 +104,72 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_subsurface.h"
#endif
+/* Record all BVH intersection for shadows */
+
#if defined(__SHADOW_RECORD_ALL__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all
#define BVH_FUNCTION_FEATURES 0
#include "geom_bvh_shadow.h"
#endif
-#if defined(__SUBSURFACE__) && defined(__INSTANCING__)
+#if defined(__SHADOW_RECORD_ALL__) && defined(__INSTANCING__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
#define BVH_FUNCTION_FEATURES BVH_INSTANCING
#include "geom_bvh_shadow.h"
#endif
-#if defined(__SUBSURFACE__) && defined(__HAIR__)
+#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
#include "geom_bvh_shadow.h"
#endif
-#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
+#if defined(__SHADOW_RECORD_ALL__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
#include "geom_bvh_shadow.h"
#endif
-#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
+#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
#include "geom_bvh_shadow.h"
#endif
-/* to work around titan bug when using arrays instead of textures */
-#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
-ccl_device_inline
-#else
-ccl_device_noinline
+/* Camera inside Volume BVH intersection */
+
+#if defined(__VOLUME__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume
+#define BVH_FUNCTION_FEATURES 0
+#include "geom_bvh_volume.h"
#endif
-#ifdef __HAIR__
-bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax)
-#else
-bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
+
+#if defined(__VOLUME__) && defined(__INSTANCING__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__HAIR__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_hair
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#include "geom_bvh_volume.h"
+#endif
+
+#if defined(__VOLUME__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_volume_hair_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
+#include "geom_bvh_volume.h"
#endif
+
+ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
+ uint *lcg_state, float difl, float extmax)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
@@ -170,14 +206,8 @@ bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, I
#endif /* __KERNEL_CPU__ */
}
-/* to work around titan bug when using arrays instead of textures */
#ifdef __SUBSURFACE__
-#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
-ccl_device_inline
-#else
-ccl_device_noinline
-#endif
-uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
+ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
@@ -215,14 +245,8 @@ uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection
}
#endif
-/* to work around titan bug when using arrays instead of textures */
#ifdef __SHADOW_RECORD_ALL__
-#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
-ccl_device_inline
-#else
-ccl_device_noinline
-#endif
-uint scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
@@ -240,20 +264,50 @@ uint scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection
return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits);
#endif /* __HAIR__ */
-#ifdef __KERNEL_CPU__
-
#ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits);
#endif /* __INSTANCING__ */
return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits);
+}
+#endif
+
+#ifdef __VOLUME__
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect)
+{
+#ifdef __OBJECT_MOTION__
+ if(kernel_data.bvh.have_motion) {
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_volume_hair_motion(kg, ray, isect);
+#endif /* __HAIR__ */
+
+ return bvh_intersect_volume_motion(kg, ray, isect);
+ }
+#endif /* __OBJECT_MOTION__ */
+
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_volume_hair(kg, ray, isect);
+#endif /* __HAIR__ */
+
+#ifdef __KERNEL_CPU__
+
+#ifdef __INSTANCING__
+ if(kernel_data.bvh.have_instancing)
+ return bvh_intersect_volume_instancing(kg, ray, isect);
+#endif /* __INSTANCING__ */
+
+ return bvh_intersect_volume(kg, ray, isect);
#else /* __KERNEL_CPU__ */
#ifdef __INSTANCING__
- return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits);
+ return bvh_intersect_volume_instancing(kg, ray, isect);
#else
- return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits);
+ return bvh_intersect_volume(kg, ray, isect);
#endif /* __INSTANCING__ */
#endif /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
index 98bf82b3b2d..aee4097d77e 100644
--- a/intern/cycles/kernel/geom/geom_bvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h
@@ -68,15 +68,15 @@ ccl_device bool BVH_FUNCTION_NAME
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
- const __m128 pn = _mm_castsi128_ps(_mm_set_epi32(0x80000000, 0x80000000, 0, 0));
- __m128 Psplat[3], idirsplat[3];
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
shuffle_swap_t shufflexyz[3];
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- __m128 tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+ ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
@@ -132,27 +132,27 @@ ccl_device bool BVH_FUNCTION_NAME
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
/* fetch node data */
- const __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+ const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
const float4 cnodes = ((float4*)bvh_nodes)[3];
/* intersect ray against child nodes */
- const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflexyz[0]), Psplat[0]), idirsplat[0]);
- const __m128 tminmaxy = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[1], shufflexyz[1]), Psplat[1]), idirsplat[1]);
- const __m128 tminmaxz = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[2], shufflexyz[2]), Psplat[2]), idirsplat[2]);
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
/* calculate { c0min, c1min, -c0max, -c1max} */
- __m128 minmax = _mm_max_ps(_mm_max_ps(tminmaxx, tminmaxy), _mm_max_ps(tminmaxz, tsplat));
- const __m128 tminmax = _mm_xor_ps(minmax, pn);
- const __m128 lrhit = _mm_cmple_ps(tminmax, shuffle<2, 3, 0, 1>(tminmax));
+ const ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
/* decide which nodes to traverse next */
#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW);
- traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW);
+ traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW);
+ traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW);
#else
- traverseChild0 = (_mm_movemask_ps(lrhit) & 1);
- traverseChild1 = (_mm_movemask_ps(lrhit) & 2);
+ traverseChild0 = (movemask(lrhit) & 1);
+ traverseChild1 = (movemask(lrhit) & 2);
#endif
#endif // __KERNEL_SSE2__
@@ -164,9 +164,7 @@ ccl_device bool BVH_FUNCTION_NAME
#if !defined(__KERNEL_SSE2__)
bool closestChild1 = (c1min < c0min);
#else
- union { __m128 m128; float v[4]; } uminmax;
- uminmax.m128 = tminmax;
- bool closestChild1 = uminmax.v[1] < uminmax.v[0];
+ bool closestChild1 = tminmax[1] < tminmax[0];
#endif
if(closestChild1) {
@@ -254,8 +252,7 @@ ccl_device bool BVH_FUNCTION_NAME
if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
{
- float4 Ns = kernel_tex_fetch(__tri_normal, prim);
- shader = __float_as_int(Ns.w);
+ shader = kernel_tex_fetch(__tri_shader, prim);
}
#ifdef __HAIR__
else {
@@ -301,12 +298,12 @@ ccl_device bool BVH_FUNCTION_NAME
num_hits_in_instance = 0;
#if defined(__KERNEL_SSE2__)
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
isect_array->t = isect_t;
- tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
@@ -348,13 +345,13 @@ ccl_device bool BVH_FUNCTION_NAME
}
#if defined(__KERNEL_SSE2__)
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
isect_t = tmax;
isect_array->t = isect_t;
- tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index a19f05dd371..a8f57cffa78 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -65,15 +65,15 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
- const __m128 pn = _mm_castsi128_ps(_mm_set_epi32(0x80000000, 0x80000000, 0, 0));
- __m128 Psplat[3], idirsplat[3];
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
shuffle_swap_t shufflexyz[3];
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- __m128 tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+ ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
@@ -131,25 +131,27 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
/* fetch node data */
- const __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+ const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
const float4 cnodes = ((float4*)bvh_nodes)[3];
/* intersect ray against child nodes */
- const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflexyz[0]), Psplat[0]), idirsplat[0]);
- const __m128 tminmaxy = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[1], shufflexyz[1]), Psplat[1]), idirsplat[1]);
- const __m128 tminmaxz = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[2], shufflexyz[2]), Psplat[2]), idirsplat[2]);
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
- const __m128 tminmax = _mm_xor_ps(_mm_max_ps(_mm_max_ps(tminmaxx, tminmaxy), _mm_max_ps(tminmaxz, tsplat)), pn);
- const __m128 lrhit = _mm_cmple_ps(tminmax, shuffle<2, 3, 0, 1>(tminmax));
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ const ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
/* decide which nodes to traverse next */
#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
- traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
+ traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
+ traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
#else
- traverseChild0 = (_mm_movemask_ps(lrhit) & 1);
- traverseChild1 = (_mm_movemask_ps(lrhit) & 2);
+ traverseChild0 = (movemask(lrhit) & 1);
+ traverseChild1 = (movemask(lrhit) & 2);
#endif
#endif // __KERNEL_SSE2__
@@ -161,9 +163,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
#if !defined(__KERNEL_SSE2__)
bool closestChild1 = (c1min < c0min);
#else
- union { __m128 m128; float v[4]; } uminmax;
- uminmax.m128 = tminmax;
- bool closestChild1 = uminmax.v[1] < uminmax.v[0];
+ bool closestChild1 = tminmax[1] < tminmax[0];
#endif
if(closestChild1) {
@@ -243,11 +243,11 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
#endif
#if defined(__KERNEL_SSE2__)
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
@@ -279,11 +279,11 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
#endif
#if defined(__KERNEL_SSE2__)
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+ tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index 9fd40f91471..114d30a479d 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -63,24 +63,28 @@ ccl_device bool BVH_FUNCTION_NAME
#endif
isect->t = ray->t;
- isect->object = OBJECT_NONE;
- isect->prim = PRIM_NONE;
isect->u = 0.0f;
isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
+
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversal_steps = 0;
+#endif
#if defined(__KERNEL_SSE2__)
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
- const __m128 pn = _mm_castsi128_ps(_mm_set_epi32(0x80000000, 0x80000000, 0, 0));
- __m128 Psplat[3], idirsplat[3];
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
shuffle_swap_t shufflexyz[3];
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- __m128 tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
+ ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
@@ -151,17 +155,17 @@ ccl_device bool BVH_FUNCTION_NAME
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
/* fetch node data */
- const __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+ const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
const float4 cnodes = ((float4*)bvh_nodes)[3];
/* intersect ray against child nodes */
- const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflexyz[0]), Psplat[0]), idirsplat[0]);
- const __m128 tminmaxy = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[1], shufflexyz[1]), Psplat[1]), idirsplat[1]);
- const __m128 tminmaxz = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[2], shufflexyz[2]), Psplat[2]), idirsplat[2]);
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
/* calculate { c0min, c1min, -c0max, -c1max} */
- __m128 minmax = _mm_max_ps(_mm_max_ps(tminmaxx, tminmaxy), _mm_max_ps(tminmaxz, tsplat));
- const __m128 tminmax = _mm_xor_ps(minmax, pn);
+ ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
@@ -182,16 +186,16 @@ ccl_device bool BVH_FUNCTION_NAME
}
#endif
- const __m128 lrhit = _mm_cmple_ps(tminmax, shuffle<2, 3, 0, 1>(tminmax));
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
/* decide which nodes to traverse next */
#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
- traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
- traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
+ traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
+ traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
#else
- traverseChild0 = (_mm_movemask_ps(lrhit) & 1);
- traverseChild1 = (_mm_movemask_ps(lrhit) & 2);
+ traverseChild0 = (movemask(lrhit) & 1);
+ traverseChild1 = (movemask(lrhit) & 2);
#endif
#endif // __KERNEL_SSE2__
@@ -203,9 +207,7 @@ ccl_device bool BVH_FUNCTION_NAME
#if !defined(__KERNEL_SSE2__)
bool closestChild1 = (c1min < c0min);
#else
- union { __m128 m128; float v[4]; } uminmax;
- uminmax.m128 = tminmax;
- bool closestChild1 = uminmax.v[1] < uminmax.v[0];
+ bool closestChild1 = tminmax[1] < tminmax[0];
#endif
if(closestChild1) {
@@ -228,6 +230,10 @@ ccl_device bool BVH_FUNCTION_NAME
--stackPtr;
}
}
+
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversal_steps++;
+#endif
}
/* if node is leaf, fetch triangle list */
@@ -276,13 +282,17 @@ ccl_device bool BVH_FUNCTION_NAME
}
}
+#if defined(__KERNEL_DEBUG__)
+ isect->num_traversal_steps++;
+#endif
+
/* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
if(hit) {
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
- tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
}
#else
if(hit && visibility == PATH_RAY_SHADOW_OPAQUE)
@@ -304,11 +314,11 @@ ccl_device bool BVH_FUNCTION_NAME
#endif
#if defined(__KERNEL_SSE2__)
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
@@ -334,11 +344,11 @@ ccl_device bool BVH_FUNCTION_NAME
#endif
#if defined(__KERNEL_SSE2__)
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
- tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h
new file mode 100644
index 00000000000..9dd8d226f5b
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_bvh_volume.h
@@ -0,0 +1,322 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function for volumes, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+#define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
+
+ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
+ const Ray *ray,
+ Intersection *isect)
+{
+ /* todo:
+ * - test if pushing distance on the stack helps (for non shadow rays)
+ * - separate version for shadow rays
+ * - likely and unlikely for if() statements
+ * - test restrict attribute for pointers
+ */
+
+ /* traversal stack in CUDA thread-local memory */
+ int traversalStack[BVH_STACK_SIZE];
+ traversalStack[0] = ENTRYPOINT_SENTINEL;
+
+ /* traversal variables in registers */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* ray parameters in registers */
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+
+ const uint visibility = PATH_RAY_ALL_VISIBILITY;
+
+#if FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+ isect->t = ray->t;
+ isect->u = 0.0f;
+ isect->v = 0.0f;
+ isect->prim = PRIM_NONE;
+ isect->object = OBJECT_NONE;
+
+#if defined(__KERNEL_SSE2__)
+ const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+ const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+
+ const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
+ ssef Psplat[3], idirsplat[3];
+ shuffle_swap_t shufflexyz[3];
+
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
+
+ ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ /* traversal loop */
+ do {
+ do {
+ /* traverse internal nodes */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ bool traverseChild0, traverseChild1;
+ int nodeAddrChild1;
+
+#if !defined(__KERNEL_SSE2__)
+ /* Intersect two child bounding boxes, non-SSE version */
+ float t = isect->t;
+
+ /* fetch node data */
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
+
+ /* intersect ray against child nodes */
+ NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+ NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+ NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+ NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+ /* decide which nodes to traverse next */
+#ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
+ traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
+#else
+ traverseChild0 = (c0max >= c0min);
+ traverseChild1 = (c1max >= c1min);
+#endif
+
+#else // __KERNEL_SSE2__
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+
+ /* fetch node data */
+ const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+ const float4 cnodes = ((float4*)bvh_nodes)[3];
+
+ /* intersect ray against child nodes */
+ const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
+ const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
+ const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
+
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
+ const ssef tminmax = minmax ^ pn;
+
+ const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
+
+ /* decide which nodes to traverse next */
+#ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
+ traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
+#else
+ traverseChild0 = (movemask(lrhit) & 1);
+ traverseChild1 = (movemask(lrhit) & 2);
+#endif
+#endif // __KERNEL_SSE2__
+
+ nodeAddr = __float_as_int(cnodes.x);
+ nodeAddrChild1 = __float_as_int(cnodes.y);
+
+ if(traverseChild0 && traverseChild1) {
+ /* both children were intersected, push the farther one */
+#if !defined(__KERNEL_SSE2__)
+ bool closestChild1 = (c1min < c0min);
+#else
+ bool closestChild1 = tminmax[1] < tminmax[0];
+#endif
+
+ if(closestChild1) {
+ int tmp = nodeAddr;
+ nodeAddr = nodeAddrChild1;
+ nodeAddrChild1 = tmp;
+ }
+
+ ++stackPtr;
+ traversalStack[stackPtr] = nodeAddrChild1;
+ }
+ else {
+ /* one child was intersected */
+ if(traverseChild1) {
+ nodeAddr = nodeAddrChild1;
+ }
+ else if(!traverseChild0) {
+ /* neither child was intersected */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+ }
+ }
+
+ /* if node is leaf, fetch triangle list */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1));
+ int primAddr = __float_as_int(leaf.x);
+
+#if FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ int primAddr2 = __float_as_int(leaf.y);
+
+ /* pop */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+
+ /* primitive intersection */
+ for(; primAddr < primAddr2; primAddr++) {
+ /* only primitives from volume object */
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ continue;
+ }
+
+ /* intersect ray against primitive */
+ uint type = kernel_tex_fetch(__prim_type, primAddr);
+
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ triangle_intersect(kg, isect, P, dir, visibility, object, primAddr);
+ break;
+ }
+#if FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
+ break;
+ }
+#endif
+#if FEATURE(BVH_HAIR)
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ else
+ bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
+ break;
+ }
+#endif
+ default: {
+ break;
+ }
+ }
+ }
+ }
+#if FEATURE(BVH_INSTANCING)
+ else {
+ /* instance push */
+ object = kernel_tex_fetch(__prim_object, -primAddr-1);
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+
+ if(object_flag & SD_OBJECT_HAS_VOLUME) {
+
+#if FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm);
+#else
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
+#endif
+
+#if defined(__KERNEL_SSE2__)
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
+
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ ++stackPtr;
+ traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* pop */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+ }
+ }
+#endif
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ /* instance pop */
+#if FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
+#endif
+
+#if defined(__KERNEL_SSE2__)
+ Psplat[0] = ssef(P.x);
+ Psplat[1] = ssef(P.y);
+ Psplat[2] = ssef(P.z);
+
+ tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+#endif
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return (isect->prim != PRIM_NONE);
+}
+
+#undef FEATURE
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
+
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index e1d225436a6..b6d21c91916 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -214,9 +214,9 @@ ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta,
}
#ifdef __KERNEL_SSE2__
-ccl_device_inline __m128 transform_point_T3(const __m128 t[3], const __m128 &a)
+ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a)
{
- return fma(broadcast<0>(a), t[0], fma(broadcast<1>(a), t[1], _mm_mul_ps(broadcast<2>(a), t[2])));
+ return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2]));
}
#endif
@@ -238,16 +238,16 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
int prim = kernel_tex_fetch(__prim_index, curveAddr);
#ifdef __KERNEL_SSE2__
- __m128 vdir = load_m128(dir);
- __m128 vcurve_coef[4];
+ ssef vdir = load4f(dir);
+ ssef vcurve_coef[4];
const float3 *curve_coef = (float3 *)vcurve_coef;
{
- __m128 dtmp = _mm_mul_ps(vdir, vdir);
- __m128 d_ss = _mm_sqrt_ss(_mm_add_ss(dtmp, broadcast<2>(dtmp)));
- __m128 rd_ss = _mm_div_ss(_mm_set_ss(1.0f), d_ss);
+ ssef dtmp = vdir * vdir;
+ ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp));
+ ssef rd_ss = load1f_first(1.0f) / d_ss;
- __m128i v00vec = _mm_load_si128((__m128i *)&kg->__curves.data[prim]);
+ ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]);
int2 &v00 = (int2 &)v00vec;
int k0 = v00.x + segment;
@@ -255,44 +255,44 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
int ka = max(k0 - 1, v00.x);
int kb = min(k1 + 1, v00.x + v00.y - 1);
- __m128 P_curve[4];
+ ssef P_curve[4];
if(type & PRIMITIVE_CURVE) {
- P_curve[0] = _mm_load_ps(&kg->__curve_keys.data[ka].x);
- P_curve[1] = _mm_load_ps(&kg->__curve_keys.data[k0].x);
- P_curve[2] = _mm_load_ps(&kg->__curve_keys.data[k1].x);
- P_curve[3] = _mm_load_ps(&kg->__curve_keys.data[kb].x);
+ P_curve[0] = load4f(&kg->__curve_keys.data[ka].x);
+ P_curve[1] = load4f(&kg->__curve_keys.data[k0].x);
+ P_curve[2] = load4f(&kg->__curve_keys.data[k1].x);
+ P_curve[3] = load4f(&kg->__curve_keys.data[kb].x);
}
else {
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
}
- __m128 rd_sgn = set_sign_bit<0, 1, 1, 1>(broadcast<0>(rd_ss));
- __m128 mul_zxxy = _mm_mul_ps(shuffle<2, 0, 0, 1>(vdir), rd_sgn);
- __m128 mul_yz = _mm_mul_ps(shuffle<1, 2, 1, 2>(vdir), mul_zxxy);
- __m128 mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
- __m128 vdir0 = _mm_and_ps(vdir, _mm_castsi128_ps(_mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)));
+ ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
+ ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
+ ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy;
+ ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
+ ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0));
- __m128 htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
- __m128 htfm1 = shuffle<1, 0, 1, 3>(_mm_set_ss(_mm_cvtss_f32(d_ss)), vdir0);
- __m128 htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
+ ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
+ ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
+ ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
- __m128 htfm[] = { htfm0, htfm1, htfm2 };
- __m128 vP = load_m128(P);
- __m128 p0 = transform_point_T3(htfm, _mm_sub_ps(P_curve[0], vP));
- __m128 p1 = transform_point_T3(htfm, _mm_sub_ps(P_curve[1], vP));
- __m128 p2 = transform_point_T3(htfm, _mm_sub_ps(P_curve[2], vP));
- __m128 p3 = transform_point_T3(htfm, _mm_sub_ps(P_curve[3], vP));
+ ssef htfm[] = { htfm0, htfm1, htfm2 };
+ ssef vP = load4f(P);
+ ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
+ ssef p1 = transform_point_T3(htfm, P_curve[1] - vP);
+ ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
+ ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
float fc = 0.71f;
- __m128 vfc = _mm_set1_ps(fc);
- __m128 vfcxp3 = _mm_mul_ps(vfc, p3);
+ ssef vfc = ssef(fc);
+ ssef vfcxp3 = vfc * p3;
vcurve_coef[0] = p1;
- vcurve_coef[1] = _mm_mul_ps(vfc, _mm_sub_ps(p2, p0));
- vcurve_coef[2] = fma(_mm_set1_ps(fc * 2.0f), p0, fma(_mm_set1_ps(fc - 3.0f), p1, fms(_mm_set1_ps(3.0f - 2.0f * fc), p2, vfcxp3)));
- vcurve_coef[3] = fms(_mm_set1_ps(fc - 2.0f), _mm_sub_ps(p2, p1), fms(vfc, p0, vfcxp3));
+ vcurve_coef[1] = vfc * (p2 - p0);
+ vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
+ vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
r_st = ((float4 &)P_curve[1]).w;
r_en = ((float4 &)P_curve[2]).w;
@@ -386,12 +386,12 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
float i_st = tree * resol;
float i_en = i_st + (level * resol);
#ifdef __KERNEL_SSE2__
- __m128 vi_st = _mm_set1_ps(i_st), vi_en = _mm_set1_ps(i_en);
- __m128 vp_st = fma(fma(fma(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
- __m128 vp_en = fma(fma(fma(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]);
+ ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
+ ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
+ ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]);
- __m128 vbmin = _mm_min_ps(vp_st, vp_en);
- __m128 vbmax = _mm_max_ps(vp_st, vp_en);
+ ssef vbmin = min(vp_st, vp_en);
+ ssef vbmax = max(vp_st, vp_en);
float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax;
float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z;
@@ -600,13 +600,12 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
#endif
{
/* record intersection */
+ isect->t = t;
+ isect->u = u;
+ isect->v = gd;
isect->prim = curveAddr;
isect->object = object;
isect->type = type;
- isect->u = u;
- isect->v = gd;
- /*isect->transparency = 1.0f - coverage; */
- isect->t = t;
hit = true;
}
@@ -679,38 +678,38 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
float sphere_b_tmp = dot3(dir, sphere_dif1);
float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir;
#else
- __m128 P_curve[2];
+ ssef P_curve[2];
if(type & PRIMITIVE_CURVE) {
- P_curve[0] = _mm_load_ps(&kg->__curve_keys.data[k0].x);
- P_curve[1] = _mm_load_ps(&kg->__curve_keys.data[k1].x);
+ P_curve[0] = load4f(&kg->__curve_keys.data[k0].x);
+ P_curve[1] = load4f(&kg->__curve_keys.data[k1].x);
}
else {
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4*)&P_curve);
}
- const __m128 or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]);
+ const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]);
- __m128 r12 = or12;
- const __m128 vP = load_m128(P);
- const __m128 dif = _mm_sub_ps(vP, P_curve[0]);
- const __m128 dif_second = _mm_sub_ps(vP, P_curve[1]);
+ ssef r12 = or12;
+ const ssef vP = load4f(P);
+ const ssef dif = vP - P_curve[0];
+ const ssef dif_second = vP - P_curve[1];
if(difl != 0.0f) {
- const __m128 len1_sq = len3_squared_splat(dif);
- const __m128 len2_sq = len3_squared_splat(dif_second);
- const __m128 len12 = _mm_sqrt_ps(shuffle<0, 0, 0, 0>(len1_sq, len2_sq));
- const __m128 pixelsize12 = _mm_min_ps(_mm_mul_ps(len12, _mm_set1_ps(difl)), _mm_set1_ps(extmax));
- r12 = _mm_max_ps(or12, pixelsize12);
+ const ssef len1_sq = len3_squared_splat(dif);
+ const ssef len2_sq = len3_squared_splat(dif_second);
+ const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq));
+ const ssef pixelsize12 = min(len12 * difl, ssef(extmax));
+ r12 = max(or12, pixelsize12);
}
- float or1 = _mm_cvtss_f32(or12), or2 = _mm_cvtss_f32(broadcast<2>(or12));
- float r1 = _mm_cvtss_f32(r12), r2 = _mm_cvtss_f32(broadcast<2>(r12));
-
- const __m128 p21_diff = _mm_sub_ps(P_curve[1], P_curve[0]);
- const __m128 sphere_dif1 = _mm_mul_ps(_mm_add_ps(dif, dif_second), _mm_set1_ps(0.5f));
- const __m128 dir = load_m128(direction);
- const __m128 sphere_b_tmp = dot3_splat(dir, sphere_dif1);
- const __m128 sphere_dif2 = fnma(sphere_b_tmp, dir, sphere_dif1);
+ float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12));
+ float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12));
+
+ const ssef p21_diff = P_curve[1] - P_curve[0];
+ const ssef sphere_dif1 = (dif + dif_second) * 0.5f;
+ const ssef dir = load4f(direction);
+ const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1);
+ const ssef sphere_dif2 = nmsub(sphere_b_tmp, dir, sphere_dif1);
#endif
float mr = max(r1, r2);
@@ -728,7 +727,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
#ifndef __KERNEL_SSE2__
float3 tg = p21_diff * invl;
#else
- const __m128 tg = _mm_mul_ps(p21_diff, _mm_set1_ps(invl));
+ const ssef tg = p21_diff * invl;
#endif
float gd = (r2 - r1) * invl;
@@ -752,7 +751,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
float3 cprod = cross(tg, dir);
float cprod2sq = len3_squared(cross(tg, dif));
#else
- const __m128 cprod = cross(tg, dir);
+ const ssef cprod = cross(tg, dir);
float cprod2sq = len3_squared(cross_zxy(tg, dif));
#endif
float cprodsq = len3_squared(cprod);
@@ -770,7 +769,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
#ifndef __KERNEL_SSE2__
float3 tdif = dif + tcentre * dir;
#else
- const __m128 tdif = fma(_mm_set1_ps(tcentre), dir, dif);
+ const ssef tdif = madd(ssef(tcentre), dir, dif);
#endif
float tdifz = dot3(tdif, tg);
float tdifma = tdifz*gd + r1;
@@ -836,13 +835,12 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
#endif
{
/* record intersection */
+ isect->t = t;
+ isect->u = z*invl;
+ isect->v = gd;
isect->prim = curveAddr;
isect->object = object;
isect->type = type;
- isect->u = z*invl;
- isect->v = gd;
- /*isect->transparency = 1.0f - adjradius;*/
- isect->t = t;
return true;
}
@@ -938,9 +936,10 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
sd->u = isect->u;
sd->v = 0.0f;
#endif
-
+
+ tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
+
if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
- tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
sd->Ng = normalize(-(D - tg * (dot(tg, D))));
}
else {
@@ -952,7 +951,6 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
float gd = isect->v;
if(gd != 0.0f) {
- tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
sd->Ng = sd->Ng - gd * tg;
sd->Ng = normalize(sd->Ng);
}
@@ -1012,10 +1010,6 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con
sd->dPdv = cross(tg, sd->Ng);
#endif
- /*add fading parameter for minimum pixel width with transparency bsdf*/
- /*sd->curve_transparency = isect->transparency;*/
- /*sd->curve_radius = sd->u * gd * l + r1;*/
-
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 73338bb6b3b..3a4b20e61aa 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -233,8 +233,7 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh
ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface)
{
/* get shader */
- float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
- sd->shader = __float_as_int(Ns.w);
+ sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
/* get motion info */
int numsteps, numverts;
@@ -273,7 +272,11 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD
#endif
/* compute face normal */
- float3 Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+ float3 Ng;
+ if(sd->flag & SD_NEGATIVE_SCALE_APPLIED)
+ Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
+ else
+ Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
sd->Ng = Ng;
sd->N = Ng;
@@ -327,14 +330,21 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection
float t, u, v;
if(ray_triangle_intersect_uv(P, dir, isect->t, verts[2], verts[0], verts[1], &u, &v, &t)) {
- isect->prim = triAddr;
- isect->object = object;
- isect->type = PRIMITIVE_MOTION_TRIANGLE;
- isect->u = u;
- isect->v = v;
- isect->t = t;
+#ifdef __VISIBILITY_FLAG__
+ /* visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags */
+ if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
+#endif
+ {
+ isect->t = t;
+ isect->u = u;
+ isect->v = v;
+ isect->prim = triAddr;
+ isect->object = object;
+ isect->type = PRIMITIVE_MOTION_TRIANGLE;
- return true;
+ return true;
+ }
}
return false;
@@ -378,12 +388,12 @@ ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, I
/* record intersection */
Intersection *isect = &isect_array[hit];
+ isect->t = t;
+ isect->u = u;
+ isect->v = v;
isect->prim = triAddr;
isect->object = object;
isect->type = PRIMITIVE_MOTION_TRIANGLE;
- isect->u = u;
- isect->v = v;
- isect->t = t;
}
}
#endif
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index 533973621d7..5df6c75df86 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -143,6 +143,7 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
/* center position */
float3 center;
+#ifdef __HAIR__
if(sd->type & PRIMITIVE_ALL_CURVE) {
center = curve_motion_center_location(kg, sd);
@@ -150,6 +151,7 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
object_position_transform(kg, sd, &center);
}
else
+#endif
center = sd->P;
float3 motion_pre = center, motion_post = center;
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 355e36fef0c..c08a82ee038 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -18,7 +18,7 @@
/* Triangle Primitive
*
* Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH
- * ray intersection we use a precomputed triangle storage to accelarate
+ * ray intersection we use a precomputed triangle storage to accelerate
* intersection at the cost of more memory usage */
CCL_NAMESPACE_BEGIN
@@ -116,11 +116,28 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat
#endif
}
+/* normal on triangle */
+ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd)
+{
+ /* load triangle vertices */
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+
+ float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+
+ /* return normal */
+ if(sd->flag & SD_NEGATIVE_SCALE_APPLIED)
+ return normalize(cross(v2 - v0, v1 - v0));
+ else
+ return normalize(cross(v1 - v0, v2 - v0));
+}
+
/* point and normal on triangle */
-ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float u, float v, float3 *P, float3 *Ng, int *shader)
+ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader)
{
/* load triangle vertices */
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
@@ -130,16 +147,24 @@ ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float
float t = 1.0f - u - v;
*P = (u*v0 + v*v1 + t*v2);
- float4 Nm = kernel_tex_fetch(__tri_normal, prim);
- *Ng = make_float3(Nm.x, Nm.y, Nm.z);
- *shader = __float_as_int(Nm.w);
+ /* get object flags, instance-aware */
+ int object_flag = kernel_tex_fetch(__object_flag, object >= 0 ? object : ~object);
+
+ /* compute normal */
+ if(object_flag & SD_NEGATIVE_SCALE_APPLIED)
+ *Ng = normalize(cross(v2 - v0, v1 - v0));
+ else
+ *Ng = normalize(cross(v1 - v0, v2 - v0));
+
+ /* shader`*/
+ *shader = kernel_tex_fetch(__tri_shader, prim);
}
/* Triangle vertex locations */
ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3])
{
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
@@ -151,7 +176,7 @@ ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3
ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, float u, float v)
{
/* load triangle vertices */
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
@@ -165,7 +190,7 @@ ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, flo
ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, float3 *dPdu, float3 *dPdv)
{
/* fetch triangle vertex coordinates */
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
@@ -187,7 +212,7 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s
return kernel_tex_fetch(__attributes_float, offset + sd->prim);
}
else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x));
float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y));
@@ -230,7 +255,7 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData
return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
}
else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+ float4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
@@ -243,11 +268,20 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData
return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
}
- else if(elem == ATTR_ELEMENT_CORNER) {
+ else if(elem == ATTR_ELEMENT_CORNER || elem == ATTR_ELEMENT_CORNER_BYTE) {
int tri = offset + sd->prim*3;
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
- float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
+ float3 f0, f1, f2;
+
+ if(elem == ATTR_ELEMENT_CORNER) {
+ f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
+ f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
+ f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
+ }
+ else {
+ f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 0));
+ f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 1));
+ f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 2));
+ }
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
@@ -300,12 +334,12 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, Intersection *isect
#endif
{
/* record intersection */
+ isect->t = t;
+ isect->u = u;
+ isect->v = v;
isect->prim = triAddr;
isect->object = object;
isect->type = PRIMITIVE_TRIANGLE;
- isect->u = u;
- isect->v = v;
- isect->t = t;
return true;
}
}
@@ -363,12 +397,12 @@ ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersec
/* record intersection */
Intersection *isect = &isect_array[hit];
+ isect->t = t;
+ isect->u = u;
+ isect->v = v;
isect->prim = triAddr;
isect->object = object;
isect->type = PRIMITIVE_TRIANGLE;
- isect->u = u;
- isect->v = v;
- isect->t = t;
}
}
}
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 963d6cbee9c..3cb6d168f80 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -49,7 +49,15 @@ ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData
ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float *dx, float *dy)
{
float3 P = volume_normalized_position(kg, sd, sd->P);
- float4 r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z);
+#ifdef __KERNEL_GPU__
+ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#else
+ float4 r;
+ if(sd->flag & SD_VOLUME_CUBIC)
+ r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC);
+ else
+ r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z);
+#endif
if(dx) *dx = 0.0f;
if(dx) *dy = 0.0f;
@@ -61,7 +69,15 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float3 *dx, float3 *dy)
{
float3 P = volume_normalized_position(kg, sd, sd->P);
- float4 r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z);
+#ifdef __KERNEL_GPU__
+ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#else
+ float4 r;
+ if(sd->flag & SD_VOLUME_CUBIC)
+ r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC);
+ else
+ r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z);
+#endif
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl
index 6988ad6027f..4f20ef9ca15 100644
--- a/intern/cycles/kernel/kernel.cl
+++ b/intern/cycles/kernel/kernel.cl
@@ -23,7 +23,7 @@
#include "kernel_film.h"
#include "kernel_path.h"
-#include "kernel_displace.h"
+#include "kernel_bake.h"
__kernel void kernel_ocl_path_trace(
ccl_constant KernelData *data,
@@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader(
ccl_global type *name,
#include "kernel_textures.h"
- int type, int sx, int sw)
+ int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@@ -128,6 +128,31 @@ __kernel void kernel_ocl_shader(
int x = sx + get_global_id(0);
if(x < sx + sw)
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x);
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
+}
+
+__kernel void kernel_ocl_bake(
+ ccl_constant KernelData *data,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
+
+#define KERNEL_TEX(type, ttype, name) \
+ ccl_global type *name,
+#include "kernel_textures.h"
+
+ int type, int sx, int sw, int offset, int sample)
+{
+ KernelGlobals kglobals, *kg = &kglobals;
+
+ kg->data = data;
+
+#define KERNEL_TEX(type, ttype, name) \
+ kg->name = name;
+#include "kernel_textures.h"
+
+ int x = sx + get_global_id(0);
+
+ if(x < sx + sw)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample);
}
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp
index 173028d50c8..fa2113fbb46 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernel.cpp
@@ -23,7 +23,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
-#include "kernel_displace.h"
+#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
@@ -120,9 +120,12 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu
/* Shader Evaluation */
-void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index 636e48b5456..489daacddde 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -22,7 +22,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
-#include "kernel_displace.h"
+#include "kernel_bake.h"
/* device data taken from CUDA occupancy calculator */
@@ -52,8 +52,20 @@
#define CUDA_KERNEL_MAX_REGISTERS 63
#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
-/* 5.0 */
-#elif __CUDA_ARCH__ == 500
+/* 3.2 */
+#elif __CUDA_ARCH__ == 320
+#define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768
+#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
+#define CUDA_BLOCK_MAX_THREADS 1024
+#define CUDA_THREAD_MAX_REGISTERS 63
+
+/* tunable parameters */
+#define CUDA_THREADS_BLOCK_WIDTH 16
+#define CUDA_KERNEL_MAX_REGISTERS 63
+#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
+
+/* 5.0 and 5.2 */
+#elif __CUDA_ARCH__ == 500 || __CUDA_ARCH__ == 520
#define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
#define CUDA_BLOCK_MAX_THREADS 1024
@@ -61,12 +73,12 @@
/* tunable parameters */
#define CUDA_THREADS_BLOCK_WIDTH 16
-#define CUDA_KERNEL_MAX_REGISTERS 63
+#define CUDA_KERNEL_MAX_REGISTERS 40
#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
/* unknown architecture */
#else
-#error "Unknown or unuspported CUDA architecture, can't determine launch bounds"
+#error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
#endif
/* compute number of threads per block and minimum blocks per multiprocessor
@@ -146,11 +158,22 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx)
+kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample)
+{
+ int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
+
+ if(x < sx + sw)
+ kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample);
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
- kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+ if(x < sx + sw)
+ kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, offset, sample);
}
#endif
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index c4a08646bab..19e06b88797 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int offset, int sample);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
@@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
@@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
@@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu
void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
@@ -84,7 +84,18 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff
void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i);
+ int type, int i, int offset, int sample);
+#endif
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+void kernel_cpu_avx2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
+ int sample, int x, int y, int offset, int stride);
+void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
+ float sample_scale, int x, int y, int offset, int stride);
+void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
+ float sample_scale, int x, int y, int offset, int stride);
+void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
+ int type, int i, int offset, int sample);
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index b4f6dcdace9..b0efcdc66a7 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -32,10 +32,11 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v
eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
eval->transparent = make_float3(0.0f, 0.0f, 0.0f);
eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
if(type == CLOSURE_BSDF_TRANSPARENT_ID)
eval->transparent = value;
- else if(CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_PHASE(type))
+ else if(CLOSURE_IS_BSDF_DIFFUSE(type))
eval->diffuse = value;
else if(CLOSURE_IS_BSDF_GLOSSY(type))
eval->glossy = value;
@@ -43,6 +44,8 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v
eval->transmission = value;
else if(CLOSURE_IS_BSDF_BSSRDF(type))
eval->subsurface = value;
+ else if(CLOSURE_IS_PHASE(type))
+ eval->scatter = value;
}
else
eval->diffuse = value;
@@ -51,11 +54,17 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 v
#endif
}
-ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value)
+/* TODO(sergey): This is just a workaround for annoying 6.5 compiler bug. */
+#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ < 500
+ccl_device_inline
+#else
+ccl_device_noinline
+#endif
+void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value)
{
#ifdef __PASSES__
if(eval->use_light_pass) {
- if(CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_PHASE(type))
+ if(CLOSURE_IS_BSDF_DIFFUSE(type))
eval->diffuse += value;
else if(CLOSURE_IS_BSDF_GLOSSY(type))
eval->glossy += value;
@@ -63,6 +72,8 @@ ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3
eval->transmission += value;
else if(CLOSURE_IS_BSDF_BSSRDF(type))
eval->subsurface += value;
+ else if(CLOSURE_IS_PHASE(type))
+ eval->scatter += value;
/* skipping transparent, this function is used by for eval(), will be zero then */
}
@@ -81,7 +92,8 @@ ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
&& is_zero(eval->glossy)
&& is_zero(eval->transmission)
&& is_zero(eval->transparent)
- && is_zero(eval->subsurface);
+ && is_zero(eval->subsurface)
+ && is_zero(eval->scatter);
}
else
return is_zero(eval->diffuse);
@@ -98,6 +110,7 @@ ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float3 value)
eval->glossy *= value;
eval->transmission *= value;
eval->subsurface *= value;
+ eval->scatter *= value;
/* skipping transparent, this function is used by for eval(), will be zero then */
}
@@ -111,7 +124,7 @@ ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float3 value)
/* Path Radiance
*
* We accumulate different render passes separately. After summing at the end
- * to get the combined result, it should be identical. We definte directly
+ * to get the combined result, it should be identical. We definite directly
* visible as the first non-transparent hit, while indirectly visible are the
* bounces after that. */
@@ -130,21 +143,25 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
L->color_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->color_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->color_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->path_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->path_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->path_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->path_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->emission = make_float3(0.0f, 0.0f, 0.0f);
L->background = make_float3(0.0f, 0.0f, 0.0f);
@@ -174,14 +191,16 @@ ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, float3 *throug
L->path_glossy = bsdf_eval->glossy*value;
L->path_transmission = bsdf_eval->transmission*value;
L->path_subsurface = bsdf_eval->subsurface*value;
+ L->path_scatter = bsdf_eval->scatter*value;
- *throughput = L->path_diffuse + L->path_glossy + L->path_transmission + L->path_subsurface;
+ *throughput = L->path_diffuse + L->path_glossy + L->path_transmission + L->path_subsurface + L->path_scatter;
L->direct_throughput = *throughput;
}
else {
/* transparent bounce before first hit, or indirectly visible through BSDF */
- float3 sum = (bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->transparent + bsdf_eval->subsurface)*inverse_pdf;
+ float3 sum = (bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->transparent +
+ bsdf_eval->subsurface + bsdf_eval->scatter) * inverse_pdf;
*throughput *= sum;
}
}
@@ -241,6 +260,7 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L, float3 through
L->direct_glossy += throughput*bsdf_eval->glossy*shadow;
L->direct_transmission += throughput*bsdf_eval->transmission*shadow;
L->direct_subsurface += throughput*bsdf_eval->subsurface*shadow;
+ L->direct_scatter += throughput*bsdf_eval->scatter*shadow;
if(is_lamp) {
L->shadow.x += shadow.x*shadow_fac;
@@ -250,7 +270,7 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L, float3 through
}
else {
/* indirectly visible lighting after BSDF bounce */
- float3 sum = bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->subsurface;
+ float3 sum = bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->subsurface + bsdf_eval->scatter;
L->indirect += throughput*sum*shadow;
}
}
@@ -291,12 +311,14 @@ ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
L->direct_glossy += L->path_glossy*L->direct_emission;
L->direct_transmission += L->path_transmission*L->direct_emission;
L->direct_subsurface += L->path_subsurface*L->direct_emission;
+ L->direct_scatter += L->path_scatter*L->direct_emission;
L->indirect = safe_divide_color(L->indirect, L->direct_throughput);
L->indirect_diffuse += L->path_diffuse*L->indirect;
L->indirect_glossy += L->path_glossy*L->indirect;
L->indirect_transmission += L->path_transmission*L->indirect;
L->indirect_subsurface += L->path_subsurface*L->indirect;
+ L->indirect_scatter += L->path_scatter*L->indirect;
}
#endif
}
@@ -309,6 +331,7 @@ ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
L->path_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->path_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->path_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
L->indirect = make_float3(0.0f, 0.0f, 0.0f);
@@ -327,8 +350,8 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
if(L->use_light_pass) {
path_radiance_sum_indirect(L);
- L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_subsurface + L->emission;
- L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + L->indirect_subsurface;
+ L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_subsurface + L->direct_scatter + L->emission;
+ L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + L->indirect_subsurface + L->indirect_scatter;
if(!kernel_data.background.transparent)
L_direct += L->background;
@@ -344,11 +367,13 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
+ L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
L->emission = make_float3(0.0f, 0.0f, 0.0f);
}
@@ -368,6 +393,7 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
L->direct_glossy *= scale;
L->direct_transmission *= scale;
L->direct_subsurface *= scale;
+ L->direct_scatter *= scale;
L->emission *= scale;
L->background *= scale;
}
@@ -382,6 +408,7 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
L->indirect_glossy *= scale;
L->indirect_transmission *= scale;
L->indirect_subsurface *= scale;
+ L->indirect_scatter *= scale;
}
/* Sum again, after clamping */
@@ -416,11 +443,13 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance
L->direct_glossy += L_sample->direct_glossy*fac;
L->direct_transmission += L_sample->direct_transmission*fac;
L->direct_subsurface += L_sample->direct_subsurface*fac;
+ L->direct_scatter += L_sample->direct_scatter*fac;
L->indirect_diffuse += L_sample->indirect_diffuse*fac;
L->indirect_glossy += L_sample->indirect_glossy*fac;
L->indirect_transmission += L_sample->indirect_transmission*fac;
L->indirect_subsurface += L_sample->indirect_subsurface*fac;
+ L->indirect_scatter += L_sample->indirect_scatter*fac;
L->emission += L_sample->emission*fac;
L->background += L_sample->background*fac;
diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp
index 354214c406e..e7ff21a6f09 100644
--- a/intern/cycles/kernel/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernel_avx.cpp
@@ -24,6 +24,7 @@
#define __KERNEL_SSE3__
#define __KERNEL_SSSE3__
#define __KERNEL_SSE41__
+#define __KERNEL_AVX__
#endif
#include "util_optimization.h"
@@ -37,7 +38,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
-#include "kernel_displace.h"
+#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
@@ -67,9 +68,12 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float
/* Shader Evaluate */
-void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernel_avx2.cpp
new file mode 100644
index 00000000000..cb1662bbfbe
--- /dev/null
+++ b/intern/cycles/kernel/kernel_avx2.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+/* Optimized CPU kernel entry points. This file is compiled with AVX2
+ * optimization flags and nearly all functions inlined, while kernel.cpp
+ * is compiled without for other CPU's. */
+
+/* SSE optimization disabled for now on 32 bit, see bug #36316 */
+#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
+#define __KERNEL_SSE2__
+#define __KERNEL_SSE3__
+#define __KERNEL_SSSE3__
+#define __KERNEL_SSE41__
+#define __KERNEL_AVX__
+#define __KERNEL_AVX2__
+#endif
+
+#include "util_optimization.h"
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+
+#include "kernel.h"
+#include "kernel_compat_cpu.h"
+#include "kernel_math.h"
+#include "kernel_types.h"
+#include "kernel_globals.h"
+#include "kernel_film.h"
+#include "kernel_path.h"
+#include "kernel_bake.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Path Tracing */
+
+void kernel_cpu_avx2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
+{
+#ifdef __BRANCHED_PATH__
+ if(kernel_data.integrator.branched)
+ kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+ else
+#endif
+ kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+}
+
+/* Film */
+
+void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
+{
+ kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
+}
+
+void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
+{
+ kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
+}
+
+/* Shader Evaluate */
+
+void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
+{
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
+}
+
+CCL_NAMESPACE_END
+#else
+
+/* needed for some linkers in combination with scons making empty compilation unit in a library */
+void __dummy_function_cycles_avx2(void);
+void __dummy_function_cycles_avx2(void) {}
+
+#endif
diff --git a/intern/cycles/kernel/kernel_displace.h b/intern/cycles/kernel/kernel_bake.h
index b8c64af658f..a1ec080e3d3 100644
--- a/intern/cycles/kernel/kernel_displace.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -17,65 +17,125 @@
CCL_NAMESPACE_BEGIN
ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng,
- bool is_combined, bool is_ao, bool is_sss)
+ const bool is_combined, const bool is_ao, const bool is_sss, int sample)
{
- int samples = kernel_data.integrator.aa_samples;
-
/* initialize master radiance accumulator */
kernel_assert(kernel_data.film.use_light_pass);
path_radiance_init(L, kernel_data.film.use_light_pass);
- /* take multiple samples */
- for(int sample = 0; sample < samples; sample++) {
- PathRadiance L_sample;
- PathState state;
- Ray ray;
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ PathRadiance L_sample;
+ PathState state;
+ Ray ray;
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ bool is_sss_sample = is_sss;
- /* init radiance */
- path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
+ /* init radiance */
+ path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
- /* init path state */
- path_state_init(kg, &state, &rng, sample);
- state.num_samples = samples;
+ /* init path state */
+ path_state_init(kg, &state, &rng, sample, NULL);
- /* evaluate surface shader */
- float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
- shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
+ /* evaluate surface shader */
+ float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
+ shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
- /* TODO, disable the closures we won't need */
+ /* TODO, disable the closures we won't need */
+
+#ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched) {
+ /* regular path tracer */
+#endif
/* sample ambient occlusion */
if(is_combined || is_ao) {
kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
}
- /* sample subsurface scattering */
- if((is_combined || is_sss) && (sd->flag & SD_BSSRDF)) {
#ifdef __SUBSURFACE__
+ /* sample subsurface scattering */
+ if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
- is_sss = true;
-#endif
+ is_sss_sample = true;
}
+#endif
/* sample light and BSDF */
- if((!is_sss) && (!is_ao)) {
- if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
+ if((!is_sss_sample) && (!is_ao)) {
+
+ if(sd->flag & SD_EMISSION) {
+ float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+ path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
+ }
+
+ kernel_path_surface_connect_light(kg, &rng, sd, throughput, &state, &L_sample);
+
+ if(kernel_path_surface_bounce(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
#ifdef __LAMP_MIS__
state.ray_t = 0.0f;
#endif
/* compute indirect light */
- kernel_path_indirect(kg, &rng, ray, throughput, state.num_samples, state, &L_sample);
+ kernel_path_indirect(kg, &rng, ray, throughput, 1, state, &L_sample);
/* sum and reset indirect light pass variables for the next samples */
path_radiance_sum_indirect(&L_sample);
path_radiance_reset_indirect(&L_sample);
}
}
+#ifdef __BRANCHED_PATH__
+ }
+ else {
+ /* branched path tracer */
+
+ /* sample ambient occlusion */
+ if(is_combined || is_ao) {
+ kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+ }
+
+#ifdef __SUBSURFACE__
+ /* sample subsurface scattering */
+ if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
+ /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+ kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, throughput);
+ }
+#endif
+
+ /* sample light and BSDF */
+ if((!is_sss_sample) && (!is_ao)) {
+
+ if(sd->flag & SD_EMISSION) {
+ float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
+ path_radiance_accum_emission(&L_sample, throughput, emission, state.bounce);
+ }
+
+#if defined(__EMISSION__)
+ /* direct light */
+ if(kernel_data.integrator.use_direct_light) {
+ bool all = kernel_data.integrator.sample_all_lights_direct;
+ kernel_branched_path_surface_connect_light(kg, &rng,
+ sd, &state, throughput, 1.0f, &L_sample, all);
+ }
+#endif
+
+ /* indirect light */
+ kernel_branched_path_surface_indirect_light(kg, &rng,
+ sd, throughput, 1.0f, &state, &L_sample);
+ }
+ }
+#endif
+
+ /* accumulate into master L */
+ path_radiance_accum_sample(L, &L_sample, 1);
+}
- /* accumulate into master L */
- path_radiance_accum_sample(L, &L_sample, samples);
+ccl_device bool is_aa_pass(ShaderEvalType type)
+{
+ switch(type) {
+ case SHADER_EVAL_UV:
+ case SHADER_EVAL_NORMAL:
+ return false;
+ default:
+ return true;
}
}
@@ -99,7 +159,21 @@ ccl_device bool is_light_pass(ShaderEvalType type)
}
}
-ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
+#if 0
+ccl_device_inline float bake_clamp_mirror_repeat(float u)
+{
+ /* use mirror repeat (like opengl texture) so that if the barycentric
+ * coordinate goes past the end of the triangle it is not always clamped
+ * to the same value, gives ugly patterns */
+ float fu = floorf(u);
+ u = u - fu;
+
+ return (((int)fu) & 1)? 1.0f - u: u;
+}
+#endif
+
+ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output,
+ ShaderEvalType type, int i, int offset, int sample)
{
ShaderData sd;
uint4 in = input[i * 2];
@@ -121,10 +195,28 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
float dvdx = __uint_as_float(diff.z);
float dvdy = __uint_as_float(diff.w);
+ int num_samples = kernel_data.integrator.aa_samples;
+
+ /* random number generator */
+ RNG rng = cmj_hash(offset + i, 0);
+
+#if 0
+ uint rng_state = cmj_hash(i, 0);
+ float filter_x, filter_y;
+ path_rng_init(kg, &rng_state, sample, num_samples, &rng, 0, 0, &filter_x, &filter_y);
+
+ /* subpixel u/v offset */
+ if(sample > 0) {
+ u = bake_clamp_mirror_repeat(u + dudx*(filter_x - 0.5f) + dudy*(filter_y - 0.5f));
+ v = bake_clamp_mirror_repeat(v + dvdx*(filter_x - 0.5f) + dvdy*(filter_y - 0.5f));
+ }
+#endif
+
+ /* triangle */
int shader;
float3 P, Ng;
- triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader);
+ triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
/* dummy initilizations copied from SHADER_EVAL_DISPLACE */
float3 I = Ng;
@@ -147,12 +239,14 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
sd.dv.dx = dvdx;
sd.dv.dy = dvdy;
+ /* light passes */
if(is_light_pass(type)) {
- RNG rng = cmj_hash(i, 0);
- compute_light_pass(kg, &sd, &L, rng, (type == SHADER_EVAL_COMBINED),
- (type == SHADER_EVAL_AO),
- (type == SHADER_EVAL_SUBSURFACE_DIRECT ||
- type == SHADER_EVAL_SUBSURFACE_INDIRECT));
+ compute_light_pass(kg, &sd, &L, rng,
+ (type == SHADER_EVAL_COMBINED),
+ (type == SHADER_EVAL_AO),
+ (type == SHADER_EVAL_SUBSURFACE_DIRECT ||
+ type == SHADER_EVAL_SUBSURFACE_INDIRECT),
+ sample);
}
switch (type) {
@@ -307,17 +401,16 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
}
/* write output */
- output[i] = make_float4(out.x, out.y, out.z, 1.0f);
- return;
+ float output_fac = is_aa_pass(type)? 1.0f/num_samples: 1.0f;
+
+ if(sample == 0)
+ output[i] = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
+ else
+ output[i] += make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
}
-ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
+ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample)
{
- if(type >= SHADER_EVAL_BAKE) {
- kernel_bake_evaluate(kg, input, output, type, i);
- return;
- }
-
ShaderData sd;
uint4 in = input[i];
float3 out;
@@ -363,7 +456,10 @@ ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *inpu
}
/* write output */
- output[i] = make_float4(out.x, out.y, out.z, 0.0f);
+ if(sample == 0)
+ output[i] = make_float4(out.x, out.y, out.z, 0.0f);
+ else
+ output[i] += make_float4(out.x, out.y, out.z, 0.0f);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index 7fc66a9fdee..5c83358a56d 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -21,16 +21,22 @@ CCL_NAMESPACE_BEGIN
ccl_device float2 camera_sample_aperture(KernelGlobals *kg, float u, float v)
{
float blades = kernel_data.cam.blades;
+ float2 bokeh;
if(blades == 0.0f) {
/* sample disk */
- return concentric_sample_disk(u, v);
+ bokeh = concentric_sample_disk(u, v);
}
else {
/* sample polygon */
float rotation = kernel_data.cam.bladesrotation;
- return regular_polygon_sample(blades, rotation, u, v);
+ bokeh = regular_polygon_sample(blades, rotation, u, v);
}
+
+ /* anamorphic lens bokeh */
+ bokeh.x *= kernel_data.cam.inv_aperture_ratio;
+
+ return bokeh;
}
ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, Ray *ray)
@@ -183,7 +189,8 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float
/* calculate orthonormal coordinates perpendicular to D */
float3 U, V;
- make_orthonormals(D, &U, &V);
+ U = normalize(make_float3(1.0f, 0.0f, 0.0f) - D.x * D);
+ V = normalize(cross(D, U));
/* update ray for effect of lens */
ray->P = U * lensuv.x + V * lensuv.y;
@@ -262,6 +269,20 @@ ccl_device_inline float camera_distance(KernelGlobals *kg, float3 P)
return len(P - camP);
}
+ccl_device_inline float3 camera_direction_from_point(KernelGlobals *kg, float3 P)
+{
+ Transform cameratoworld = kernel_data.cam.cameratoworld;
+
+ if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+ float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z);
+ return -camD;
+ }
+ else {
+ float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
+ return normalize(camP - P);
+ }
+}
+
ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, float3 P)
{
if(kernel_data.cam.type != CAMERA_PANORAMA) {
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index d027bb62ebe..37cba03ff97 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -25,6 +25,13 @@
#include "util_half.h"
#include "util_types.h"
+/* On 64bit linux single precision exponent is really slow comparing to the
+ * double precision version, even with float<->double conversion involved.
+ */
+#if !defined(__KERNEL_GPU__) && defined(__linux__) && defined(__x86_64__)
+# define expf(x) ((float)exp((double)(x)))
+#endif
+
CCL_NAMESPACE_BEGIN
/* Assertions inside the kernel only work for the CPU device, so we wrap it in
@@ -44,16 +51,16 @@ template<typename T> struct texture {
}
#if 0
- ccl_always_inline __m128 fetch_m128(int index)
+ ccl_always_inline ssef fetch_ssef(int index)
{
kernel_assert(index >= 0 && index < width);
- return ((__m128*)data)[index];
+ return ((ssef*)data)[index];
}
- ccl_always_inline __m128i fetch_m128i(int index)
+ ccl_always_inline ssei fetch_ssei(int index)
{
kernel_assert(index >= 0 && index < width);
- return ((__m128i*)data)[index];
+ return ((ssei*)data)[index];
}
#endif
@@ -144,6 +151,13 @@ template<typename T> struct texture_image {
ccl_always_inline float4 interp_3d(float x, float y, float z, bool periodic = false)
{
+ return interp_3d_ex(x, y, z, interpolation, periodic);
+ }
+
+ ccl_always_inline float4 interp_3d_ex(float x, float y, float z,
+ int interpolation = INTERPOLATION_LINEAR,
+ bool periodic = false)
+ {
if(UNLIKELY(!data))
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
@@ -167,7 +181,7 @@ template<typename T> struct texture_image {
return read(data[ix + iy*width + iz*width*height]);
}
- else {
+ else if(interpolation == INTERPOLATION_LINEAR) {
float tx = frac(x*(float)width - 0.5f, &ix);
float ty = frac(y*(float)height - 0.5f, &iy);
float tz = frac(z*(float)depth - 0.5f, &iz);
@@ -205,6 +219,93 @@ template<typename T> struct texture_image {
return r;
}
+ else {
+ /* Tricubic b-spline interpolation. */
+ const float tx = frac(x*(float)width - 0.5f, &ix);
+ const float ty = frac(y*(float)height - 0.5f, &iy);
+ const float tz = frac(z*(float)depth - 0.5f, &iz);
+ int pix, piy, piz, nnix, nniy, nniz;
+
+ if(periodic) {
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ iz = wrap_periodic(iz, depth);
+
+ pix = wrap_periodic(ix-1, width);
+ piy = wrap_periodic(iy-1, height);
+ piz = wrap_periodic(iz-1, depth);
+
+ nix = wrap_periodic(ix+1, width);
+ niy = wrap_periodic(iy+1, height);
+ niz = wrap_periodic(iz+1, depth);
+
+ nnix = wrap_periodic(ix+2, width);
+ nniy = wrap_periodic(iy+2, height);
+ nniz = wrap_periodic(iz+2, depth);
+ }
+ else {
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ iz = wrap_clamp(iz, depth);
+
+ pix = wrap_clamp(ix-1, width);
+ piy = wrap_clamp(iy-1, height);
+ piz = wrap_clamp(iz-1, depth);
+
+ nix = wrap_clamp(ix+1, width);
+ niy = wrap_clamp(iy+1, height);
+ niz = wrap_clamp(iz+1, depth);
+
+ nnix = wrap_clamp(ix+2, width);
+ nniy = wrap_clamp(iy+2, height);
+ nniz = wrap_clamp(iz+2, depth);
+ }
+
+ const int xc[4] = {pix, ix, nix, nnix};
+ const int yc[4] = {width * piy,
+ width * iy,
+ width * niy,
+ width * nniy};
+ const int zc[4] = {width * height * piz,
+ width * height * iz,
+ width * height * niz,
+ width * height * nniz};
+ float u[4], v[4], w[4];
+
+ /* Some helper macro to keep code reasonable size,
+ * let compiler to inline all the matrix multiplications.
+ */
+#define SET_SPLINE_WEIGHTS(u, t) \
+ { \
+ u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
+ u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
+ u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
+ u[3] = (1.0f / 6.0f) * t * t * t; \
+ } (void)0
+#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
+#define COL_TERM(col, row) \
+ (v[col] * (u[0] * DATA(0, col, row) + \
+ u[1] * DATA(1, col, row) + \
+ u[2] * DATA(2, col, row) + \
+ u[3] * DATA(3, col, row)))
+#define ROW_TERM(row) \
+ (w[row] * (COL_TERM(0, row) + \
+ COL_TERM(1, row) + \
+ COL_TERM(2, row) + \
+ COL_TERM(3, row)))
+
+ SET_SPLINE_WEIGHTS(u, tx);
+ SET_SPLINE_WEIGHTS(v, ty);
+ SET_SPLINE_WEIGHTS(w, tz);
+
+ /* Actual interpolation. */
+ return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
+
+#undef COL_TERM
+#undef ROW_TERM
+#undef DATA
+#undef SET_SPLINE_WEIGHTS
+ }
}
ccl_always_inline void dimensions_set(int width_, int height_, int depth_)
@@ -232,11 +333,12 @@ typedef texture_image<uchar4> texture_image_uchar4;
/* Macros to handle different memory storage on different devices */
#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
-#define kernel_tex_fetch_m128(tex, index) (kg->tex.fetch_m128(index))
-#define kernel_tex_fetch_m128i(tex, index) (kg->tex.fetch_m128i(index))
+#define kernel_tex_fetch_ssef(tex, index) (kg->tex.fetch_ssef(index))
+#define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
#define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
#define kernel_tex_image_interp(tex, x, y) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp(x, y) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp(x, y))
#define kernel_tex_image_interp_3d(tex, x, y, z) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp_3d(x, y, z) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp_3d(x, y, z))
+#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp_3d_ex(x, y, z, interpolation) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp_3d_ex(x, y, z, interpolation))
#define kernel_data (kg->__data)
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index e4c20d26ff1..f14f3262274 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -75,12 +75,11 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
/* Use fast math functions */
-#define cosf(x) __cosf(((float)x))
-#define sinf(x) __sinf(((float)x))
-#define powf(x, y) __powf(((float)x), ((float)y))
-#define tanf(x) __tanf(((float)x))
-#define logf(x) __logf(((float)x))
-#define expf(x) __expf(((float)x))
+#define cosf(x) __cosf(((float)(x)))
+#define sinf(x) __sinf(((float)(x)))
+#define powf(x, y) __powf(((float)(x)), ((float)(y)))
+#define tanf(x) __tanf(((float)(x)))
+#define logf(x) __logf(((float)(x)))
+#define expf(x) __expf(((float)(x)))
#endif /* __KERNEL_COMPAT_CUDA_H__ */
-
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 8346b09619e..58031a41b78 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -24,14 +24,6 @@
#define CCL_NAMESPACE_BEGIN
#define CCL_NAMESPACE_END
-#ifdef __KERNEL_OPENCL_AMD__
-#define __CL_NO_FLOAT3__
-#endif
-
-#ifdef __CL_NO_FLOAT3__
-#define float3 float4
-#endif
-
#ifdef __CL_NOINLINE__
#define ccl_noinline __attribute__((noinline))
#else
@@ -68,51 +60,51 @@
#ifdef make_int4
#undef make_int4
#endif
+#ifdef make_uchar4
+#undef make_uchar4
+#endif
#define make_float2(x, y) ((float2)(x, y))
-#ifdef __CL_NO_FLOAT3__
-#define make_float3(x, y, z) ((float4)(x, y, z, 0.0f))
-#else
#define make_float3(x, y, z) ((float3)(x, y, z))
-#endif
#define make_float4(x, y, z, w) ((float4)(x, y, z, w))
#define make_int2(x, y) ((int2)(x, y))
#define make_int3(x, y, z) ((int3)(x, y, z))
#define make_int4(x, y, z, w) ((int4)(x, y, z, w))
+#define make_uchar4(x, y, z, w) ((uchar4)(x, y, z, w))
/* math functions */
#define __uint_as_float(x) as_float(x)
#define __float_as_uint(x) as_uint(x)
#define __int_as_float(x) as_float(x)
#define __float_as_int(x) as_int(x)
-#define powf(x, y) pow(((float)x), ((float)y))
-#define fabsf(x) fabs(((float)x))
-#define copysignf(x, y) copysign(((float)x), ((float)y))
-#define asinf(x) asin(((float)x))
-#define acosf(x) acos(((float)x))
-#define atanf(x) atan(((float)x))
-#define floorf(x) floor(((float)x))
-#define ceilf(x) ceil(((float)x))
-#define hypotf(x, y) hypot(((float)x), ((float)y))
-#define atan2f(x, y) atan2(((float)x), ((float)y))
-#define fmaxf(x, y) fmax(((float)x), ((float)y))
-#define fminf(x, y) fmin(((float)x), ((float)y))
-#define fmodf(x, y) fmod((float)x, (float)y)
+#define powf(x, y) pow(((float)(x)), ((float)(y)))
+#define fabsf(x) fabs(((float)(x)))
+#define copysignf(x, y) copysign(((float)(x)), ((float)(y)))
+#define asinf(x) asin(((float)(x)))
+#define acosf(x) acos(((float)(x)))
+#define atanf(x) atan(((float)(x)))
+#define floorf(x) floor(((float)(x)))
+#define ceilf(x) ceil(((float)(x)))
+#define hypotf(x, y) hypot(((float)(x)), ((float)(y)))
+#define atan2f(x, y) atan2(((float)(x)), ((float)(y)))
+#define fmaxf(x, y) fmax(((float)(x)), ((float)(y)))
+#define fminf(x, y) fmin(((float)(x)), ((float)(y)))
+#define fmodf(x, y) fmod((float)(x), (float)(y))
#ifndef __CL_USE_NATIVE__
-#define sinf(x) native_sin(((float)x))
-#define cosf(x) native_cos(((float)x))
-#define tanf(x) native_tan(((float)x))
-#define expf(x) native_exp(((float)x))
-#define sqrtf(x) native_sqrt(((float)x))
-#define logf(x) native_log(((float)x))
+#define sinf(x) native_sin(((float)(x)))
+#define cosf(x) native_cos(((float)(x)))
+#define tanf(x) native_tan(((float)(x)))
+#define expf(x) native_exp(((float)(x)))
+#define sqrtf(x) native_sqrt(((float)(x)))
+#define logf(x) native_log(((float)(x)))
#else
-#define sinf(x) sin(((float)x))
-#define cosf(x) cos(((float)x))
-#define tanf(x) tan(((float)x))
-#define expf(x) exp(((float)x))
-#define sqrtf(x) sqrt(((float)x))
-#define logf(x) log(((float)x))
+#define sinf(x) sin(((float)(x)))
+#define cosf(x) cos(((float)(x)))
+#define tanf(x) tan(((float)(x)))
+#define expf(x) exp(((float)(x)))
+#define sqrtf(x) sqrt(((float)(x)))
+#define logf(x) log(((float)(x)))
#endif
/* data lookup defines */
diff --git a/intern/cycles/kernel/kernel_debug.h b/intern/cycles/kernel/kernel_debug.h
new file mode 100644
index 00000000000..bf1bc0e9db8
--- /dev/null
+++ b/intern/cycles/kernel/kernel_debug.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void debug_data_init(DebugData *debug_data)
+{
+ debug_data->num_bvh_traversal_steps = 0;
+}
+
+ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
+ ccl_global float *buffer,
+ PathState *state,
+ DebugData *debug_data,
+ int sample)
+{
+ int flag = kernel_data.film.pass_flag;
+ if(flag & PASS_BVH_TRAVERSAL_STEPS) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversal_steps,
+ sample,
+ debug_data->num_bvh_traversal_steps);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index deffa7f2ba2..4b2bb723ab6 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -63,32 +63,18 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
return eval;
}
-ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int lindex,
- float randt, float randu, float randv, Ray *ray, BsdfEval *eval,
- bool *is_lamp, int bounce, int transparent_bounce)
+ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
+ LightSample *ls, Ray *ray, BsdfEval *eval, bool *is_lamp,
+ int bounce, int transparent_bounce)
{
- LightSample ls;
-
-#ifdef __BRANCHED_PATH__
- if(lindex != LAMP_NONE) {
- /* sample position on a specified light */
- light_select(kg, lindex, randu, randv, sd->P, &ls);
- }
- else
-#endif
- {
- /* sample a light and position on int */
- light_sample(kg, randt, randu, randv, sd->time, sd->P, &ls);
- }
-
- if(ls.pdf == 0.0f)
+ if(ls->pdf == 0.0f)
return false;
/* todo: implement */
differential3 dD = differential3_zero();
/* evaluate closure */
- float3 light_eval = direct_emissive_eval(kg, &ls, -ls.D, dD, ls.t, sd->time, bounce, transparent_bounce);
+ float3 light_eval = direct_emissive_eval(kg, ls, -ls->D, dD, ls->t, sd->time, bounce, transparent_bounce);
if(is_zero(light_eval))
return false;
@@ -98,49 +84,51 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int
#ifdef __VOLUME__
if(sd->prim != PRIM_NONE)
- shader_bsdf_eval(kg, sd, ls.D, eval, &bsdf_pdf);
+ shader_bsdf_eval(kg, sd, ls->D, eval, &bsdf_pdf);
else
- shader_volume_phase_eval(kg, sd, ls.D, eval, &bsdf_pdf);
+ shader_volume_phase_eval(kg, sd, ls->D, eval, &bsdf_pdf);
#else
- shader_bsdf_eval(kg, sd, ls.D, eval, &bsdf_pdf);
+ shader_bsdf_eval(kg, sd, ls->D, eval, &bsdf_pdf);
#endif
- if(ls.shader & SHADER_USE_MIS) {
+ if(ls->shader & SHADER_USE_MIS) {
/* multiple importance sampling */
- float mis_weight = power_heuristic(ls.pdf, bsdf_pdf);
+ float mis_weight = power_heuristic(ls->pdf, bsdf_pdf);
light_eval *= mis_weight;
}
- bsdf_eval_mul(eval, light_eval/ls.pdf);
+ bsdf_eval_mul(eval, light_eval/ls->pdf);
#ifdef __PASSES__
/* use visibility flag to skip lights */
- if(ls.shader & SHADER_EXCLUDE_ANY) {
- if(ls.shader & SHADER_EXCLUDE_DIFFUSE)
+ if(ls->shader & SHADER_EXCLUDE_ANY) {
+ if(ls->shader & SHADER_EXCLUDE_DIFFUSE)
eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
- if(ls.shader & SHADER_EXCLUDE_GLOSSY)
+ if(ls->shader & SHADER_EXCLUDE_GLOSSY)
eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
- if(ls.shader & SHADER_EXCLUDE_TRANSMIT)
+ if(ls->shader & SHADER_EXCLUDE_TRANSMIT)
eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
+ if(ls->shader & SHADER_EXCLUDE_SCATTER)
+ eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
}
#endif
if(bsdf_eval_is_zero(eval))
return false;
- if(ls.shader & SHADER_CAST_SHADOW) {
+ if(ls->shader & SHADER_CAST_SHADOW) {
/* setup ray */
- bool transmit = (dot(sd->Ng, ls.D) < 0.0f);
+ bool transmit = (dot(sd->Ng, ls->D) < 0.0f);
ray->P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng);
- if(ls.t == FLT_MAX) {
+ if(ls->t == FLT_MAX) {
/* distant light */
- ray->D = ls.D;
- ray->t = ls.t;
+ ray->D = ls->D;
+ ray->t = ls->t;
}
else {
/* other lights, avoid self-intersection */
- ray->D = ray_offset(ls.P, ls.Ng) - ray->P;
+ ray->D = ray_offset(ls->P, ls->Ng) - ray->P;
ray->D = normalize_len(ray->D, &ray->t);
}
@@ -153,7 +141,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int
}
/* return if it's a lamp for shadow pass */
- *is_lamp = (ls.prim == PRIM_NONE && ls.type != LIGHT_BACKGROUND);
+ *is_lamp = (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND);
return true;
}
@@ -201,13 +189,25 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *st
if(ls.shader & SHADER_EXCLUDE_ANY) {
if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
((ls.shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) ||
- ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)))
+ ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
+ ((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
continue;
}
#endif
float3 L = direct_emissive_eval(kg, &ls, -ray->D, ray->dD, ls.t, ray->time, state->bounce, state->transparent_bounce);
+#ifdef __VOLUME__
+ if(state->volume_stack[0].shader != SHADER_NONE) {
+ /* shadow attenuation */
+ Ray volume_ray = *ray;
+ volume_ray.t = ls.t;
+ float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
+ kernel_volume_shadow(kg, state, &volume_ray, &volume_tp);
+ L *= volume_tp;
+ }
+#endif
+
if(!(state->flag & PATH_RAY_MIS_SKIP)) {
/* multiple importance sampling, get regular light pdf,
* and compute weight with respect to BSDF pdf */
@@ -234,7 +234,8 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *sta
if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
((shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) ||
((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
- ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)))
+ ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) ||
+ ((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER)))
return make_float3(0.0f, 0.0f, 0.0f);
}
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index 7a850844bf2..2a5b7689e57 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -14,6 +14,8 @@
* limitations under the License
*/
+/* TODO(sergey): Consider moving portable ctz/clz stuff to util. */
+
CCL_NAMESPACE_BEGIN
/* "Correlated Multi-Jittered Sampling"
@@ -35,8 +37,16 @@ ccl_device_inline int cmj_fast_mod_pow2(int a, int b)
/* a must be > 0 and b must be > 1 */
ccl_device_inline int cmj_fast_div_pow2(int a, int b)
{
-#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER)
+ kernel_assert(a > 0);
+ kernel_assert(b > 1);
+#if defined(__KERNEL_SSE2__)
+# ifdef _MSC_VER
+ unsigned long ctz;
+ _BitScanForward(&ctz, b);
+ return a >> ctz;
+# else
return a >> __builtin_ctz(b);
+# endif
#else
return a/b;
#endif
@@ -44,8 +54,15 @@ ccl_device_inline int cmj_fast_div_pow2(int a, int b)
ccl_device_inline uint cmj_w_mask(uint w)
{
-#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER)
+ kernel_assert(w > 1);
+#if defined(__KERNEL_SSE2__)
+# ifdef _MSC_VER
+ unsigned long leading_zero;
+ _BitScanReverse(&leading_zero, w);
+ return ((1 << (1 + leading_zero)) - 1);
+# else
return ((1 << (32 - __builtin_clz(w))) - 1);
+# endif
#else
w |= w >> 1;
w |= w >> 2;
@@ -165,7 +182,8 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
smodm = cmj_fast_mod_pow2(s, m);
}
else {
- sdivm = float_to_int(s * invm);
+ /* Doing s*inmv gives precision issues here. */
+ sdivm = s / m;
smodm = s - sdivm*m;
}
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index ac432d3fe04..b18f67ad524 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -27,7 +27,7 @@ typedef struct LightSample {
float pdf; /* light sampling probability density function */
float eval_fac; /* intensity multiplier */
int object; /* object id for triangle/curve lights */
- int prim; /* primitive id for triangle/curve ligths */
+ int prim; /* primitive id for triangle/curve lights */
int shader; /* shader id */
int lamp; /* lamp id */
LightType type; /* type of light */
@@ -167,12 +167,137 @@ ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, flo
return disk_light_sample(normalize(P - center), randu, randv)*radius;
}
-ccl_device float3 area_light_sample(float3 axisu, float3 axisv, float randu, float randv)
+/* Uses the following paper:
+ *
+ * Carlos Urena et al.
+ * An Area-Preserving Parametrization for Spherical Rectangles.
+ *
+ * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
+ */
+ccl_device float3 area_light_sample(float3 P,
+ float3 light_p,
+ float3 axisu, float3 axisv,
+ float randu, float randv,
+ float *pdf)
{
- randu = randu - 0.5f;
- randv = randv - 0.5f;
+ /* In our name system we're using P for the center,
+ * which is o in the paper.
+ */
+
+ float3 corner = light_p - axisu * 0.5f - axisv * 0.5f;
+ float axisu_len, axisv_len;
+ /* Compute local reference system R. */
+ float3 x = normalize_len(axisu, &axisu_len);
+ float3 y = normalize_len(axisv, &axisv_len);
+ float3 z = cross(x, y);
+ /* Compute rectangle coords in local reference system. */
+ float3 dir = corner - P;
+ float z0 = dot(dir, z);
+ /* Flip 'z' to make it point against Q. */
+ if(z0 > 0.0f) {
+ z *= -1.0f;
+ z0 *= -1.0f;
+ }
+ float z0sq = z0 * z0;
+ float x0 = dot(dir, x);
+ float y0 = dot(dir, y);
+ float x1 = x0 + axisu_len;
+ float y1 = y0 + axisv_len;
+ float y0sq = y0 * y0;
+ float y1sq = y1 * y1;
+ /* Create vectors to four vertices. */
+ float3 v00 = make_float3(x0, y0, z0);
+ float3 v01 = make_float3(x0, y1, z0);
+ float3 v10 = make_float3(x1, y0, z0);
+ float3 v11 = make_float3(x1, y1, z0);
+ /* Compute normals to edges. */
+ float3 n0 = normalize(cross(v00, v10));
+ float3 n1 = normalize(cross(v10, v11));
+ float3 n2 = normalize(cross(v11, v01));
+ float3 n3 = normalize(cross(v01, v00));
+ /* Compute internal angles (gamma_i). */
+ float g0 = acosf(-dot(n0, n1));
+ float g1 = acosf(-dot(n1, n2));
+ float g2 = acosf(-dot(n2, n3));
+ float g3 = acosf(-dot(n3, n0));
+ /* Compute predefined constants. */
+ float b0 = n0.z;
+ float b1 = n2.z;
+ float b0sq = b0 * b0;
+ float k = M_2PI_F - g2 - g3;
+ /* Compute solid angle from internal angles. */
+ float S = g0 + g1 - k;
+
+ /* Compute cu. */
+ float au = randu * S + k;
+ float fu = (cosf(au) * b0 - b1) / sinf(au);
+ float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
+ cu = clamp(cu, -1.0f, 1.0f);
+ /* Compute xu. */
+ float xu = -(cu * z0) / sqrtf(1.0f - cu * cu);
+ xu = clamp(xu, x0, x1);
+ /* Compute yv. */
+ float d = sqrtf(xu * xu + z0sq);
+ float h0 = y0 / sqrtf(d * d + y0sq);
+ float h1 = y1 / sqrtf(d * d + y1sq);
+ float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
+ float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
+
+ *pdf = 1.0f / S;
+
+ /* Transform (xu, yv, z0) to world coords. */
+ return P + xu * x + yv * y + z0 * z;
+}
- return axisu*randu + axisv*randv;
+/* TODO(sergey): This is actually a duplicated code from above, but how to avoid
+ * this without having some nasty function with loads of parameters?
+ */
+ccl_device float area_light_pdf(float3 P,
+ float3 light_p,
+ float3 axisu, float3 axisv)
+{
+ /* In our name system we're using P for the center,
+ * which is o in the paper.
+ */
+
+ float3 corner = light_p - axisu * 0.5f - axisv * 0.5f;
+ float axisu_len, axisv_len;
+ /* Compute local reference system R. */
+ float3 x = normalize_len(axisu, &axisu_len);
+ float3 y = normalize_len(axisv, &axisv_len);
+ float3 z = cross(x, y);
+ /* Compute rectangle coords in local reference system. */
+ float3 dir = corner - P;
+ float z0 = dot(dir, z);
+ /* Flip 'z' to make it point against Q. */
+ if(z0 > 0.0f) {
+ z *= -1.0f;
+ z0 *= -1.0f;
+ }
+ float x0 = dot(dir, x);
+ float y0 = dot(dir, y);
+ float x1 = x0 + axisu_len;
+ float y1 = y0 + axisv_len;
+ /* Create vectors to four vertices. */
+ float3 v00 = make_float3(x0, y0, z0);
+ float3 v01 = make_float3(x0, y1, z0);
+ float3 v10 = make_float3(x1, y0, z0);
+ float3 v11 = make_float3(x1, y1, z0);
+ /* Compute normals to edges. */
+ float3 n0 = normalize(cross(v00, v10));
+ float3 n1 = normalize(cross(v10, v11));
+ float3 n2 = normalize(cross(v11, v01));
+ float3 n3 = normalize(cross(v01, v00));
+ /* Compute internal angles (gamma_i). */
+ float g0 = acosf(-dot(n0, n1));
+ float g1 = acosf(-dot(n1, n2));
+ float g2 = acosf(-dot(n2, n3));
+ float g3 = acosf(-dot(n3, n0));
+ /* Compute predefined constants. */
+ float k = M_2PI_F - g2 - g3;
+ /* Compute solid angle from internal angles. */
+ float S = g0 + g1 - k;
+ return 1.0f / S;
}
ccl_device float spot_light_attenuation(float4 data1, float4 data2, LightSample *ls)
@@ -276,6 +401,7 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2);
ls->eval_fac *= spot_light_attenuation(data1, data2, ls);
}
+ ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
}
else {
/* area light */
@@ -286,18 +412,22 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
float3 axisv = make_float3(data2.y, data2.z, data2.w);
float3 D = make_float3(data3.y, data3.z, data3.w);
- ls->P += area_light_sample(axisu, axisv, randu, randv);
+ ls->P = area_light_sample(P, ls->P,
+ axisu, axisv,
+ randu, randv,
+ &ls->pdf);
+
ls->Ng = D;
ls->D = normalize_len(ls->P - P, &ls->t);
float invarea = data2.x;
-
ls->eval_fac = 0.25f*invarea;
- ls->pdf = invarea;
+
+ if(dot(ls->D, D) > 0.0f)
+ ls->pdf = 0.0f;
}
ls->eval_fac *= kernel_data.integrator.inv_pdf_lights;
- ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
}
}
@@ -355,8 +485,12 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
ls->D = D;
ls->t = FLT_MAX;
+ /* compute pdf */
float invarea = data1.w;
ls->pdf = invarea/(costheta*costheta*costheta);
+ if(ls->t != FLT_MAX)
+ ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
+
ls->eval_fac = ls->pdf;
}
else if(type == LIGHT_POINT || type == LIGHT_SPOT) {
@@ -386,6 +520,10 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
if(ls->eval_fac == 0.0f)
return false;
}
+
+ /* compute pdf */
+ if(ls->t != FLT_MAX)
+ ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
}
else if(type == LIGHT_AREA) {
/* area light */
@@ -412,16 +550,12 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
ls->D = D;
ls->Ng = Ng;
- ls->pdf = invarea;
- ls->eval_fac = 0.25f*ls->pdf;
+ ls->pdf = area_light_pdf(P, ls->P, axisu, axisv);
+ ls->eval_fac = 0.25f*invarea;
}
else
return false;
- /* compute pdf */
- if(ls->t != FLT_MAX)
- ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
-
return true;
}
@@ -457,7 +591,7 @@ ccl_device void triangle_light_sample(KernelGlobals *kg, int prim, int object,
v = randv*randu;
/* triangle, so get position, normal, shader */
- triangle_point_normal(kg, prim, u, v, &ls->P, &ls->Ng, &ls->shader);
+ triangle_point_normal(kg, object, prim, u, v, &ls->P, &ls->Ng, &ls->shader);
ls->object = object;
ls->prim = prim;
ls->lamp = LAMP_NONE;
@@ -546,11 +680,6 @@ ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
return __float_as_int(data3.x);
}
-ccl_device void light_select(KernelGlobals *kg, int index, float randu, float randv, float3 P, LightSample *ls)
-{
- lamp_light_sample(kg, index, randu, randv, P, ls);
-}
-
ccl_device int lamp_light_eval_sample(KernelGlobals *kg, float randt)
{
/* sample index */
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index a80a0033712..c03229f0a3a 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -29,7 +29,6 @@
#include "kernel_accumulate.h"
#include "kernel_shader.h"
#include "kernel_light.h"
-#include "kernel_emission.h"
#include "kernel_passes.h"
#ifdef __SUBSURFACE__
@@ -42,177 +41,15 @@
#include "kernel_path_state.h"
#include "kernel_shadow.h"
+#include "kernel_emission.h"
+#include "kernel_path_surface.h"
+#include "kernel_path_volume.h"
-CCL_NAMESPACE_BEGIN
-
-#ifdef __VOLUME__
-
-ccl_device_inline bool kernel_path_integrate_scatter_lighting(KernelGlobals *kg, RNG *rng,
- ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray,
- float num_samples_adjust)
-{
-#ifdef __EMISSION__
- if(kernel_data.integrator.use_direct_light) {
- /* sample illumination from lights to find path contribution */
- if(sd->flag & SD_BSDF_HAS_EVAL) {
- float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
- float light_u, light_v;
- path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
-
- Ray light_ray;
- BsdfEval L_light;
- bool is_lamp;
-
-#ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
-#endif
-
- if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, *throughput * num_samples_adjust, &L_light, shadow, 1.0f, state->bounce, is_lamp);
- }
- }
- }
- }
-#endif
-
- /* sample phase function */
- float phase_pdf;
- BsdfEval phase_eval;
- float3 phase_omega_in;
- differential3 phase_domega_in;
- float phase_u, phase_v;
- path_state_rng_2D(kg, rng, state, PRNG_PHASE_U, &phase_u, &phase_v);
- int label;
-
- label = shader_volume_phase_sample(kg, sd, phase_u, phase_v, &phase_eval,
- &phase_omega_in, &phase_domega_in, &phase_pdf);
-
- if(phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval))
- return false;
-
- /* modify throughput */
- path_radiance_bsdf_bounce(L, throughput, &phase_eval, phase_pdf, state->bounce, label);
-
- /* set labels */
- state->ray_pdf = phase_pdf;
-#ifdef __LAMP_MIS__
- state->ray_t = 0.0f;
-#endif
- state->min_ray_pdf = fminf(phase_pdf, state->min_ray_pdf);
-
- /* update path state */
- path_state_next(kg, state, label);
-
- /* setup ray */
- ray->P = sd->P;
- ray->D = phase_omega_in;
- ray->t = FLT_MAX;
-
-#ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
- ray->dD = phase_domega_in;
-#endif
-
- return true;
-}
-
+#ifdef __KERNEL_DEBUG__
+#include "kernel_debug.h"
#endif
-#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)
-
-ccl_device void kernel_branched_path_integrate_direct_lighting(KernelGlobals *kg, RNG *rng,
- ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, bool sample_all_lights)
-{
- /* sample illumination from lights to find path contribution */
- if(sd->flag & SD_BSDF_HAS_EVAL) {
- Ray light_ray;
- BsdfEval L_light;
- bool is_lamp;
-
-#ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
-#endif
-
- if(sample_all_lights) {
- /* lamp sampling */
- for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
- int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
- float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
- RNG lamp_rng = cmj_hash(*rng, i);
-
- if(kernel_data.integrator.pdf_triangles != 0.0f)
- num_samples_inv *= 0.5f;
-
- for(int j = 0; j < num_samples; j++) {
- float light_u, light_v;
- path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-
- if(direct_emission(kg, sd, i, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
- }
- }
- }
- }
-
- /* mesh light sampling */
- if(kernel_data.integrator.pdf_triangles != 0.0f) {
- int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
- float num_samples_inv = num_samples_adjust/num_samples;
-
- if(kernel_data.integrator.num_all_lights)
- num_samples_inv *= 0.5f;
-
- for(int j = 0; j < num_samples; j++) {
- float light_t = path_branched_rng_1D(kg, rng, state, j, num_samples, PRNG_LIGHT);
- float light_u, light_v;
- path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-
- /* only sample triangle lights */
- if(kernel_data.integrator.num_all_lights)
- light_t = 0.5f*light_t;
-
- if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
- }
- }
- }
- }
- }
- else {
- float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
- float light_u, light_v;
- path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
-
- /* sample random light */
- if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
- }
- }
- }
- }
-}
-
-#endif
+CCL_NAMESPACE_BEGIN
ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
float3 throughput, int num_samples, PathState state, PathRadiance *L)
@@ -222,11 +59,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
/* intersect scene */
Intersection isect;
uint visibility = path_state_ray_visibility(kg, &state);
-#ifdef __HAIR__
bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
-#else
- bool hit = scene_intersect(kg, &ray, visibility, &isect);
-#endif
#ifdef __LAMP_MIS__
if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
@@ -255,15 +88,81 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
Ray volume_ray = ray;
volume_ray.t = (hit)? isect.t: FLT_MAX;
- ShaderData volume_sd;
- VolumeIntegrateResult result = kernel_volume_integrate(kg, &state,
- &volume_sd, &volume_ray, L, &throughput, rng);
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
- if(result == VOLUME_PATH_SCATTERED) {
- if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f))
- continue;
- else
- break;
+#ifdef __VOLUME_DECOUPLED__
+ int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
+ bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
+
+ if(decoupled) {
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
+ ShaderData volume_sd;
+
+ shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
+ kernel_volume_decoupled_record(kg, &state,
+ &volume_ray, &volume_sd, &volume_segment, heterogeneous);
+
+ volume_segment.sampling_method = sampling_method;
+
+ /* emission */
+ if(volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(L, throughput, volume_segment.accum_emission, state.bounce);
+
+ /* scattering */
+ VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+
+ if(volume_segment.closure_flag & SD_SCATTER) {
+ bool all = kernel_data.integrator.sample_all_lights_indirect;
+
+ /* direct light sampling */
+ kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
+ throughput, &state, L, 1.0f, all, &volume_ray, &volume_segment);
+
+ /* indirect sample. if we use distance sampling and take just
+ * one sample for direct and indirect light, we could share
+ * this computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
+ float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
+
+ result = kernel_volume_decoupled_scatter(kg,
+ &state, &volume_ray, &volume_sd, &throughput,
+ rphase, rscatter, &volume_segment, NULL, true);
+ }
+
+ if(result != VOLUME_PATH_SCATTERED)
+ throughput *= volume_segment.accum_transmittance;
+
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
+
+ if(result == VOLUME_PATH_SCATTERED) {
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray))
+ continue;
+ else
+ break;
+ }
+ }
+ else
+#endif
+ {
+ /* integrate along volume segment with distance sampling */
+ ShaderData volume_sd;
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, &state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
+
+#ifdef __VOLUME_SCATTER__
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, L);
+
+ /* indirect light bounce */
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray))
+ continue;
+ else
+ break;
+ }
+#endif
}
}
#endif
@@ -281,7 +180,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
/* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce);
- float rbsdf = path_state_rng_1D(kg, rng, &state, PRNG_BSDF);
+ float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT);
#ifdef __BRANCHED_PATH__
shader_merge_closures(&sd);
@@ -315,7 +214,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
break;
}
else if(probability != 1.0f) {
- float terminate = path_state_rng_1D(kg, rng, &state, PRNG_TERMINATE);
+ float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
if(terminate >= probability)
break;
@@ -383,187 +282,12 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
#if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
if(kernel_data.integrator.use_direct_light) {
bool all = kernel_data.integrator.sample_all_lights_indirect;
- kernel_branched_path_integrate_direct_lighting(kg, rng, &sd, &state, throughput, 1.0f, L, all);
- }
-#endif
-
- /* no BSDF? we can stop here */
- if(sd.flag & SD_BSDF) {
- /* sample BSDF */
- float bsdf_pdf;
- BsdfEval bsdf_eval;
- float3 bsdf_omega_in;
- differential3 bsdf_domega_in;
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng, &state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
- int label;
-
- label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
- &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
-
- if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
- break;
-
- /* modify throughput */
- path_radiance_bsdf_bounce(L, &throughput, &bsdf_eval, bsdf_pdf, state.bounce, label);
-
- /* set labels */
- if(!(label & LABEL_TRANSPARENT)) {
- state.ray_pdf = bsdf_pdf;
-#ifdef __LAMP_MIS__
- state.ray_t = 0.0f;
-#endif
- state.min_ray_pdf = fminf(bsdf_pdf, state.min_ray_pdf);
- }
-
- /* update path state */
- path_state_next(kg, &state, label);
-
- /* setup ray */
- ray.P = ray_offset(sd.P, (label & LABEL_TRANSMIT)? -sd.Ng: sd.Ng);
- ray.D = bsdf_omega_in;
- ray.t = FLT_MAX;
-#ifdef __RAY_DIFFERENTIALS__
- ray.dP = sd.dP;
- ray.dD = bsdf_domega_in;
-#endif
-
-#ifdef __VOLUME__
- /* enter/exit volume */
- if(label & LABEL_TRANSMIT)
- kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
-#endif
+ kernel_branched_path_surface_connect_light(kg, rng, &sd, &state, throughput, 1.0f, L, all);
}
-#ifdef __VOLUME__
- else if(sd.flag & SD_HAS_ONLY_VOLUME) {
- /* no surface shader but have a volume shader? act transparent */
-
- /* update path state, count as transparent */
- path_state_next(kg, &state, LABEL_TRANSPARENT);
-
- /* setup ray position, direction stays unchanged */
- ray.P = ray_offset(sd.P, -sd.Ng);
-#ifdef __RAY_DIFFERENTIALS__
- ray.dP = sd.dP;
#endif
- /* enter/exit volume */
- kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
- }
-#endif
- else {
- /* no bsdf or volume? we're done */
+ if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, L, &ray))
break;
- }
- }
-}
-
-ccl_device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rng,
- ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray)
-{
-#ifdef __EMISSION__
- if(kernel_data.integrator.use_direct_light) {
- /* sample illumination from lights to find path contribution */
- if(sd->flag & SD_BSDF_HAS_EVAL) {
- float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
- float light_u, light_v;
- path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
-
- Ray light_ray;
- BsdfEval L_light;
- bool is_lamp;
-
-#ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
-#endif
-
- if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, *throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
- }
- }
- }
- }
-#endif
-
- /* no BSDF? we can stop here */
- if(sd->flag & SD_BSDF) {
- /* sample BSDF */
- float bsdf_pdf;
- BsdfEval bsdf_eval;
- float3 bsdf_omega_in;
- differential3 bsdf_domega_in;
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
- int label;
-
- label = shader_bsdf_sample(kg, sd, bsdf_u, bsdf_v, &bsdf_eval,
- &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
-
- if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
- return false;
-
- /* modify throughput */
- path_radiance_bsdf_bounce(L, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
-
- /* set labels */
- if(!(label & LABEL_TRANSPARENT)) {
- state->ray_pdf = bsdf_pdf;
-#ifdef __LAMP_MIS__
- state->ray_t = 0.0f;
-#endif
- state->min_ray_pdf = fminf(bsdf_pdf, state->min_ray_pdf);
- }
-
- /* update path state */
- path_state_next(kg, state, label);
-
- /* setup ray */
- ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
- ray->D = bsdf_omega_in;
-
- if(state->bounce == 0)
- ray->t -= sd->ray_length; /* clipping works through transparent */
- else
- ray->t = FLT_MAX;
-
-#ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
- ray->dD = bsdf_domega_in;
-#endif
-
-#ifdef __VOLUME__
- /* enter/exit volume */
- if(label & LABEL_TRANSMIT)
- kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
-#endif
- return true;
- }
-#ifdef __VOLUME__
- else if(sd->flag & SD_HAS_ONLY_VOLUME) {
- /* no surface shader but have a volume shader? act transparent */
-
- /* update path state, count as transparent */
- path_state_next(kg, state, LABEL_TRANSPARENT);
-
- /* setup ray position, direction stays unchanged */
- ray->P = ray_offset(sd->P, -sd->Ng);
-#ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
-#endif
-
- /* enter/exit volume */
- kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
- return true;
- }
-#endif
- else {
- /* no bsdf or volume? */
- return false;
}
}
@@ -601,7 +325,68 @@ ccl_device void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *
}
}
+ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
+{
+ int num_samples = kernel_data.integrator.ao_samples;
+ float num_samples_inv = 1.0f/num_samples;
+ float ao_factor = kernel_data.background.ao_factor;
+ float3 ao_N;
+ float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+ float3 ao_alpha = shader_bsdf_alpha(kg, sd);
+
+ for(int j = 0; j < num_samples; j++) {
+ float bsdf_u, bsdf_v;
+ path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+ float3 ao_D;
+ float ao_pdf;
+
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+ if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+ Ray light_ray;
+ float3 ao_shadow;
+
+ light_ray.P = ray_offset(sd->P, sd->Ng);
+ light_ray.D = ao_D;
+ light_ray.t = kernel_data.background.ao_distance;
+#ifdef __OBJECT_MOTION__
+ light_ray.time = sd->time;
+#endif
+ light_ray.dP = sd->dP;
+ light_ray.dD = differential3_zero();
+
+ if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
+ path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
+ }
+ }
+}
+
#ifdef __SUBSURFACE__
+
+#ifdef __VOLUME__
+ccl_device void kernel_path_subsurface_update_volume_stack(KernelGlobals *kg,
+ Ray *ray,
+ VolumeStack *stack)
+{
+ kernel_assert(kernel_data.integrator.use_volumes);
+
+ Ray volume_ray = *ray;
+ Intersection isect;
+
+ while(scene_intersect_volume(kg, &volume_ray, &isect))
+ {
+ ShaderData sd;
+ shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0);
+ kernel_volume_stack_enter_exit(kg, &sd, stack);
+
+ /* Move ray forward. */
+ volume_ray.P = ray_offset(sd.P, -sd.Ng);
+ volume_ray.t -= sd.ray_length;
+ }
+}
+#endif
+
ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, Ray *ray, float3 *throughput)
{
float bssrdf_probability;
@@ -618,6 +403,11 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
+#ifdef __VOLUME__
+ Ray volume_ray = *ray;
+ bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+ sd->flag & SD_OBJECT_INTERSECTS_VOLUME;
+#endif
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
@@ -627,12 +417,30 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
hit_state.rng_offset += PRNG_BOUNCE_NUM;
+
+ kernel_path_surface_connect_light(kg, rng, &bssrdf_sd[hit], tp, state, L);
- if(kernel_path_integrate_lighting(kg, rng, &bssrdf_sd[hit], &tp, &hit_state, L, &hit_ray)) {
+ if(kernel_path_surface_bounce(kg, rng, &bssrdf_sd[hit], &tp, &hit_state, L, &hit_ray)) {
#ifdef __LAMP_MIS__
hit_state.ray_t = 0.0f;
#endif
+#ifdef __VOLUME__
+ if(need_update_volume_stack) {
+ /* Setup ray from previous surface point to the new one. */
+ volume_ray.D = normalize_len(hit_ray.P - volume_ray.P,
+ &volume_ray.t);
+
+ kernel_path_subsurface_update_volume_stack(
+ kg,
+ &volume_ray,
+ hit_state.volume_stack);
+
+ /* Move volume ray forward. */
+ volume_ray.P = hit_ray.P;
+ }
+#endif
+
kernel_path_indirect(kg, rng, hit_ray, tp, state->num_samples, hit_state, L);
/* for render passes, sum and reset indirect light pass variables
@@ -657,7 +465,12 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
path_radiance_init(&L, kernel_data.film.use_light_pass);
PathState state;
- path_state_init(kg, &state, rng, sample);
+ path_state_init(kg, &state, rng, sample, &ray);
+
+#ifdef __KERNEL_DEBUG__
+ DebugData debug_data;
+ debug_data_init(&debug_data);
+#endif
/* path iteration */
for(;;) {
@@ -682,7 +495,13 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
#else
- bool hit = scene_intersect(kg, &ray, visibility, &isect);
+ bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
+#endif
+
+#ifdef __KERNEL_DEBUG__
+ if(state.flag & PATH_RAY_CAMERA) {
+ debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
+ }
#endif
#ifdef __LAMP_MIS__
@@ -712,15 +531,81 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
Ray volume_ray = ray;
volume_ray.t = (hit)? isect.t: FLT_MAX;
- ShaderData volume_sd;
- VolumeIntegrateResult result = kernel_volume_integrate(kg, &state,
- &volume_sd, &volume_ray, &L, &throughput, rng);
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
- if(result == VOLUME_PATH_SCATTERED) {
- if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f))
- continue;
- else
- break;
+#ifdef __VOLUME_DECOUPLED__
+ int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
+ bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
+
+ if(decoupled) {
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
+ ShaderData volume_sd;
+
+ shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
+ kernel_volume_decoupled_record(kg, &state,
+ &volume_ray, &volume_sd, &volume_segment, heterogeneous);
+
+ volume_segment.sampling_method = sampling_method;
+
+ /* emission */
+ if(volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
+
+ /* scattering */
+ VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+
+ if(volume_segment.closure_flag & SD_SCATTER) {
+ bool all = false;
+
+ /* direct light sampling */
+ kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
+ throughput, &state, &L, 1.0f, all, &volume_ray, &volume_segment);
+
+ /* indirect sample. if we use distance sampling and take just
+ * one sample for direct and indirect light, we could share
+ * this computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
+ float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
+
+ result = kernel_volume_decoupled_scatter(kg,
+ &state, &volume_ray, &volume_sd, &throughput,
+ rphase, rscatter, &volume_segment, NULL, true);
+ }
+
+ if(result != VOLUME_PATH_SCATTERED)
+ throughput *= volume_segment.accum_transmittance;
+
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
+
+ if(result == VOLUME_PATH_SCATTERED) {
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
+ continue;
+ else
+ break;
+ }
+ }
+ else
+#endif
+ {
+ /* integrate along volume segment with distance sampling */
+ ShaderData volume_sd;
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous);
+
+#ifdef __VOLUME_SCATTER__
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L);
+
+ /* indirect light bounce */
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
+ continue;
+ else
+ break;
+ }
+#endif
}
}
#endif
@@ -748,7 +633,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
/* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce);
- float rbsdf = path_state_rng_1D(kg, rng, &state, PRNG_BSDF);
+ float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
/* holdout */
@@ -803,7 +688,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
break;
}
else if(probability != 1.0f) {
- float terminate = path_state_rng_1D(kg, rng, &state, PRNG_TERMINATE);
+ float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
if(terminate >= probability)
break;
@@ -826,134 +711,33 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
break;
}
#endif
-
- /* Same as kernel_path_integrate_lighting(kg, rng, &sd, &throughput, &state, &L, &ray),
- but for CUDA the function call is slower. */
-#ifdef __EMISSION__
- if(kernel_data.integrator.use_direct_light) {
- /* sample illumination from lights to find path contribution */
- if(sd.flag & SD_BSDF_HAS_EVAL) {
- float light_t = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT);
- float light_u, light_v;
- path_state_rng_2D(kg, rng, &state, PRNG_LIGHT_U, &light_u, &light_v);
-
- Ray light_ray;
- BsdfEval L_light;
- bool is_lamp;
-
-#ifdef __OBJECT_MOTION__
- light_ray.time = sd.time;
-#endif
-
- if(direct_emission(kg, &sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce, state.transparent_bounce)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(&L, throughput, &L_light, shadow, 1.0f, state.bounce, is_lamp);
- }
- }
- }
- }
-#endif
-
- if(sd.flag & SD_BSDF) {
- /* sample BSDF */
- float bsdf_pdf;
- BsdfEval bsdf_eval;
- float3 bsdf_omega_in;
- differential3 bsdf_domega_in;
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng, &state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
- int label;
-
- label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
- &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
-
- if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
- break;
-
- /* modify throughput */
- path_radiance_bsdf_bounce(&L, &throughput, &bsdf_eval, bsdf_pdf, state.bounce, label);
-
- /* set labels */
- if(!(label & LABEL_TRANSPARENT)) {
- state.ray_pdf = bsdf_pdf;
-#ifdef __LAMP_MIS__
- state.ray_t = 0.0f;
-#endif
- state.min_ray_pdf = fminf(bsdf_pdf, state.min_ray_pdf);
- }
-
- /* update path state */
- path_state_next(kg, &state, label);
-
- /* setup ray */
- ray.P = ray_offset(sd.P, (label & LABEL_TRANSMIT)? -sd.Ng: sd.Ng);
- ray.D = bsdf_omega_in;
-
-#ifdef __RAY_DIFFERENTIALS__
- ray.dP = sd.dP;
- ray.dD = bsdf_domega_in;
-#endif
-
-#ifdef __VOLUME__
- /* enter/exit volume */
- if(label & LABEL_TRANSMIT)
- kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
-#endif
- }
-#ifdef __VOLUME__
- else if(sd.flag & SD_HAS_ONLY_VOLUME) {
- /* no surface shader but have a volume shader? act transparent */
+ /* direct lighting */
+ kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L);
- /* update path state, count as transparent */
- path_state_next(kg, &state, LABEL_TRANSPARENT);
-
- /* setup ray position, direction stays unchanged */
- ray.P = ray_offset(sd.P, -sd.Ng);
-#ifdef __RAY_DIFFERENTIALS__
- ray.dP = sd.dP;
-#endif
-
- /* enter/exit volume */
- kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
- }
-#endif
- else {
- /* no bsdf or volume? we're done */
+ /* compute direct lighting and next bounce */
+ if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
break;
- }
-
- /* adjust ray distance for clipping */
- if(state.bounce == 0)
- ray.t -= sd.ray_length; /* clipping works through transparent */
- else
- ray.t = FLT_MAX;
}
float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
kernel_write_light_passes(kg, buffer, &L, sample);
+#ifdef __KERNEL_DEBUG__
+ kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
+#endif
+
return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
}
#ifdef __BRANCHED_PATH__
-ccl_device_noinline void kernel_branched_path_integrate_lighting(KernelGlobals *kg,
+/* branched path tracing: bounce off surface and integrate indirect light */
+ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust,
- PathState *state, PathRadiance *L, ccl_global float *buffer)
+ PathState *state, PathRadiance *L)
{
-#ifdef __EMISSION__
- if(kernel_data.integrator.use_direct_light) {
- bool all = kernel_data.integrator.sample_all_lights_direct;
- kernel_branched_path_integrate_direct_lighting(kg, rng, sd, state, throughput, num_samples_adjust, L, all);
- }
-#endif
-
for(int i = 0; i< sd->num_closure; i++) {
const ShaderClosure *sc = &sd->closure[i];
@@ -980,68 +764,102 @@ ccl_device_noinline void kernel_branched_path_integrate_lighting(KernelGlobals *
RNG bsdf_rng = cmj_hash(*rng, i);
for(int j = 0; j < num_samples; j++) {
- /* sample BSDF */
- float bsdf_pdf;
- BsdfEval bsdf_eval;
- float3 bsdf_omega_in;
- differential3 bsdf_domega_in;
- float bsdf_u, bsdf_v;
- path_branched_rng_2D(kg, &bsdf_rng, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
- int label;
-
- label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
- &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+ PathState ps = *state;
+ float3 tp = throughput;
+ Ray bsdf_ray;
- if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
+ if(!kernel_branched_path_surface_bounce(kg, &bsdf_rng, sd, sc, j, num_samples, &tp, &ps, L, &bsdf_ray))
continue;
- /* modify throughput */
- float3 tp = throughput;
- path_radiance_bsdf_bounce(L, &tp, &bsdf_eval, bsdf_pdf, state->bounce, label);
+ kernel_path_indirect(kg, rng, bsdf_ray, tp*num_samples_inv, num_samples, ps, L);
- /* modify path state */
- PathState ps = *state;
- path_state_next(kg, &ps, label);
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+ }
+ }
+}
- /* setup ray */
- Ray bsdf_ray;
+#ifdef __SUBSURFACE__
+ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
+ ShaderData *sd,
+ PathRadiance *L,
+ PathState *state,
+ RNG *rng,
+ Ray *ray,
+ float3 throughput)
+{
+ for(int i = 0; i< sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
- bsdf_ray.P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
- bsdf_ray.D = bsdf_omega_in;
- bsdf_ray.t = FLT_MAX;
-#ifdef __RAY_DIFFERENTIALS__
- bsdf_ray.dP = sd->dP;
- bsdf_ray.dD = bsdf_domega_in;
-#endif
-#ifdef __OBJECT_MOTION__
- bsdf_ray.time = sd->time;
-#endif
+ if(!CLOSURE_IS_BSSRDF(sc->type))
+ continue;
+
+ /* set up random number generator */
+ uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
+ int num_samples = kernel_data.integrator.subsurface_samples;
+ float num_samples_inv = 1.0f/num_samples;
+ RNG bssrdf_rng = cmj_hash(*rng, i);
+
+ state->flag |= PATH_RAY_BSSRDF_ANCESTOR;
+ /* do subsurface scatter step with copy of shader data, this will
+ * replace the BSSRDF with a diffuse BSDF closure */
+ for(int j = 0; j < num_samples; j++) {
+ ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
+ float bssrdf_u, bssrdf_v;
+ path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
#ifdef __VOLUME__
- /* enter/exit volume */
- if(label & LABEL_TRANSMIT)
- kernel_volume_stack_enter_exit(kg, sd, ps.volume_stack);
+ Ray volume_ray = *ray;
+ bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+ sd->flag & SD_OBJECT_INTERSECTS_VOLUME;
#endif
- /* branch RNG state */
- path_state_branch(&ps, j, num_samples);
+ /* compute lighting with the BSDF closure */
+ for(int hit = 0; hit < num_hits; hit++) {
+ PathState hit_state = *state;
- /* set MIS state */
- ps.min_ray_pdf = fminf(bsdf_pdf, FLT_MAX);
- ps.ray_pdf = bsdf_pdf;
-#ifdef __LAMP_MIS__
- ps.ray_t = 0.0f;
+ path_state_branch(&hit_state, j, num_samples);
+
+#ifdef __VOLUME__
+ if(need_update_volume_stack) {
+ /* Setup ray from previous surface point to the new one. */
+ float3 P = ray_offset(bssrdf_sd[hit].P, -bssrdf_sd[hit].Ng);
+ volume_ray.D = normalize_len(P - volume_ray.P,
+ &volume_ray.t);
+
+ kernel_path_subsurface_update_volume_stack(
+ kg,
+ &volume_ray,
+ hit_state.volume_stack);
+
+ /* Move volume ray forward. */
+ volume_ray.P = P;
+ }
#endif
- kernel_path_indirect(kg, rng, bsdf_ray, tp*num_samples_inv, num_samples, ps, L);
+#if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
+ /* direct light */
+ if(kernel_data.integrator.use_direct_light) {
+ bool all = kernel_data.integrator.sample_all_lights_direct;
+ kernel_branched_path_surface_connect_light(kg, rng,
+ &bssrdf_sd[hit], &hit_state, throughput, num_samples_inv, L, all);
+ }
+#endif
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(L);
- path_radiance_reset_indirect(L);
+ /* indirect light */
+ kernel_branched_path_surface_indirect_light(kg, rng,
+ &bssrdf_sd[hit], throughput, num_samples_inv,
+ &hit_state, L);
+ }
}
+
+ state->flag &= ~PATH_RAY_BSSRDF_ANCESTOR;
}
}
+#endif
ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
{
@@ -1053,7 +871,12 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
path_radiance_init(&L, kernel_data.film.use_light_pass);
PathState state;
- path_state_init(kg, &state, rng, sample);
+ path_state_init(kg, &state, rng, sample, &ray);
+
+#ifdef __KERNEL_DEBUG__
+ DebugData debug_data;
+ debug_data_init(&debug_data);
+#endif
for(;;) {
/* intersect scene */
@@ -1077,7 +900,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
#else
- bool hit = scene_intersect(kg, &ray, visibility, &isect);
+ bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
+#endif
+
+#ifdef __KERNEL_DEBUG__
+ if(state.flag & PATH_RAY_CAMERA) {
+ debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
+ }
#endif
#ifdef __VOLUME__
@@ -1085,10 +914,11 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
if(state.volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = ray;
volume_ray.t = (hit)? isect.t: FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
-#ifdef __KERNEL_CPU__
+#ifdef __VOLUME_DECOUPLED__
/* decoupled ray marching only supported on CPU */
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
@@ -1098,29 +928,45 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
kernel_volume_decoupled_record(kg, &state,
&volume_ray, &volume_sd, &volume_segment, heterogeneous);
- /* sample scattering */
- int num_samples = kernel_data.integrator.volume_samples;
- float num_samples_inv = 1.0f/num_samples;
+ /* direct light sampling */
+ if(volume_segment.closure_flag & SD_SCATTER) {
+ volume_segment.sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
- for(int j = 0; j < num_samples; j++) {
- /* workaround to fix correlation bug in T38710, can find better solution
- * in random number generator later, for now this is done here to not impact
- * performance of rendering without volumes */
- RNG tmp_rng = cmj_hash(*rng, state.rng_offset);
+ bool all = kernel_data.integrator.sample_all_lights_direct;
- PathState ps = state;
- Ray pray = ray;
- float3 tp = throughput;
+ kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
+ throughput, &state, &L, 1.0f, all, &volume_ray, &volume_segment);
- /* branch RNG state */
- path_state_branch(&ps, j, num_samples);
+ /* indirect light sampling */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f/num_samples;
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- &ps, &volume_ray, &volume_sd, &tp, &tmp_rng, &volume_segment);
-
- if(result == VOLUME_PATH_SCATTERED) {
- /* todo: use all-light sampling */
- if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
+ for(int j = 0; j < num_samples; j++) {
+ /* workaround to fix correlation bug in T38710, can find better solution
+ * in random number generator later, for now this is done here to not impact
+ * performance of rendering without volumes */
+ RNG tmp_rng = cmj_hash(*rng, state.rng_offset);
+
+ PathState ps = state;
+ Ray pray = ray;
+ float3 tp = throughput;
+
+ /* branch RNG state */
+ path_state_branch(&ps, j, num_samples);
+
+ /* scatter sample. if we use distance sampling and take just one
+ * sample for direct and indirect light, we could share this
+ * computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_PHASE);
+ float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+
+ (void)result;
+ kernel_assert(result == VOLUME_PATH_SCATTERED);
+
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
/* for render passes, sum and reset indirect light pass variables
@@ -1150,18 +996,22 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
PathState ps = state;
Ray pray = ray;
ShaderData volume_sd;
- float3 tp = throughput;
+ float3 tp = throughput * num_samples_inv;
/* branch RNG state */
path_state_branch(&ps, j, num_samples);
- VolumeIntegrateResult result = kernel_volume_integrate(kg, &ps,
- &volume_sd, &volume_ray, &L, &tp, rng);
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
+#ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
- /* todo: use all-light sampling */
- if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
- kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
+ /* todo: support equiangular, MIS and all light sampling.
+ * alternatively get decoupled ray marching working on the GPU */
+ kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
+
+ if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
+ kernel_path_indirect(kg, rng, pray, tp, num_samples, ps, &L);
/* for render passes, sum and reset indirect light pass variables
* for the next samples */
@@ -1169,6 +1019,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
path_radiance_reset_indirect(&L);
}
}
+#endif
}
/* todo: avoid this calculation using decoupled ray marching */
@@ -1205,7 +1056,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
/* holdout */
#ifdef __HOLDOUT__
- if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK))) {
+ if(sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) {
if(kernel_data.background.transparent) {
float3 holdout_weight;
@@ -1245,7 +1096,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
break;
}
else if(probability != 1.0f) {
- float terminate = path_state_rng_1D(kg, rng, &state, PRNG_TERMINATE);
+ float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
if(terminate >= probability)
break;
@@ -1257,90 +1108,33 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
- int num_samples = kernel_data.integrator.ao_samples;
- float num_samples_inv = 1.0f/num_samples;
- float ao_factor = kernel_data.background.ao_factor;
- float3 ao_N;
- float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
- float3 ao_alpha = shader_bsdf_alpha(kg, &sd);
-
- for(int j = 0; j < num_samples; j++) {
- float bsdf_u, bsdf_v;
- path_branched_rng_2D(kg, rng, &state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
- float3 ao_D;
- float ao_pdf;
-
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
- if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
- Ray light_ray;
- float3 ao_shadow;
-
- light_ray.P = ray_offset(sd.P, sd.Ng);
- light_ray.D = ao_D;
- light_ray.t = kernel_data.background.ao_distance;
-#ifdef __OBJECT_MOTION__
- light_ray.time = sd.time;
-#endif
- light_ray.dP = sd.dP;
- light_ray.dD = differential3_zero();
-
- if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow))
- path_radiance_accum_ao(&L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state.bounce);
- }
- }
+ kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput);
}
#endif
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object */
if(sd.flag & SD_BSSRDF) {
- for(int i = 0; i< sd.num_closure; i++) {
- ShaderClosure *sc = &sd.closure[i];
-
- if(!CLOSURE_IS_BSSRDF(sc->type))
- continue;
-
- /* set up random number generator */
- uint lcg_state = lcg_state_init(rng, &state, 0x68bc21eb);
- int num_samples = kernel_data.integrator.subsurface_samples;
- float num_samples_inv = 1.0f/num_samples;
- RNG bssrdf_rng = cmj_hash(*rng, i);
-
- state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
-
- /* do subsurface scatter step with copy of shader data, this will
- * replace the BSSRDF with a diffuse BSDF closure */
- for(int j = 0; j < num_samples; j++) {
- ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
- float bssrdf_u, bssrdf_v;
- path_branched_rng_2D(kg, &bssrdf_rng, &state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
- int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
-
- /* compute lighting with the BSDF closure */
- for(int hit = 0; hit < num_hits; hit++) {
- PathState hit_state = state;
-
- path_state_branch(&hit_state, j, num_samples);
-
- kernel_branched_path_integrate_lighting(kg, rng,
- &bssrdf_sd[hit], throughput, num_samples_inv,
- &hit_state, &L, buffer);
- }
- }
-
- state.flag &= ~PATH_RAY_BSSRDF_ANCESTOR;
- }
+ kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state,
+ rng, &ray, throughput);
}
#endif
if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
PathState hit_state = state;
- /* lighting */
- kernel_branched_path_integrate_lighting(kg, rng,
- &sd, throughput, 1.0f, &hit_state, &L, buffer);
+#ifdef __EMISSION__
+ /* direct light */
+ if(kernel_data.integrator.use_direct_light) {
+ bool all = kernel_data.integrator.sample_all_lights_direct;
+ kernel_branched_path_surface_connect_light(kg, rng,
+ &sd, &hit_state, throughput, 1.0f, &L, all);
+ }
+#endif
+
+ /* indirect light */
+ kernel_branched_path_surface_indirect_light(kg, rng,
+ &sd, throughput, 1.0f, &hit_state, &L);
/* continue in case of transparency */
throughput *= shader_bsdf_transparency(kg, &sd);
@@ -1353,6 +1147,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
ray.P = ray_offset(sd.P, -sd.Ng);
ray.t -= sd.ray_length; /* clipping works through transparent */
+
+#ifdef __RAY_DIFFERENTIALS__
+ ray.dP = sd.dP;
+ ray.dD.dx = -sd.dI.dx;
+ ray.dD.dy = -sd.dI.dy;
+#endif
+
#ifdef __VOLUME__
/* enter/exit volume */
kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
@@ -1363,6 +1164,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
kernel_write_light_passes(kg, buffer, &L, sample);
+#ifdef __KERNEL_DEBUG__
+ kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
+#endif
+
return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
}
@@ -1372,11 +1177,8 @@ ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg, ccl_global uin
{
float filter_u;
float filter_v;
-#ifdef __CMJ__
+
int num_samples = kernel_data.integrator.aa_samples;
-#else
- int num_samples = 0;
-#endif
path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v);
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index 406654c1741..f29168642a4 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -16,17 +16,13 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline void path_state_init(KernelGlobals *kg, PathState *state, RNG *rng, int sample)
+ccl_device_inline void path_state_init(KernelGlobals *kg, PathState *state, RNG *rng, int sample, Ray *ray)
{
- state->flag = PATH_RAY_CAMERA|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP;
+ state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP;
state->rng_offset = PRNG_BASE_NUM;
state->sample = sample;
-#ifdef __CMJ__
state->num_samples = kernel_data.integrator.aa_samples;
-#else
- state->num_samples = 0;
-#endif
state->bounce = 0;
state->diffuse_bounce = 0;
@@ -45,7 +41,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, PathState *state, RNG
if(kernel_data.integrator.use_volumes) {
/* initialize volume stack with volume we are inside of */
- kernel_volume_stack_init(kg, state->volume_stack);
+ kernel_volume_stack_init(kg, ray, state->volume_stack);
/* seed RNG for cases where we can't use stratified samples */
state->rng_congruential = lcg_init(*rng + sample*0x51633e2d);
}
@@ -63,8 +59,8 @@ ccl_device_inline void path_state_next(KernelGlobals *kg, PathState *state, int
state->flag |= PATH_RAY_TRANSPARENT;
state->transparent_bounce++;
- /* random number generator next bounce */
- state->rng_offset += PRNG_BOUNCE_NUM;
+ /* don't increase random number generator offset here, to avoid some
+ * unwanted patterns, see path_state_rng_1D_for_decision */
if(!kernel_data.integrator.transparent_shadows)
state->flag |= PATH_RAY_MIS_SKIP;
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
new file mode 100644
index 00000000000..9553c2da0df
--- /dev/null
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)
+
+/* branched path tracing: connect path directly to position on one or more lights and add it to L */
+ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
+ ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, bool sample_all_lights)
+{
+#ifdef __EMISSION__
+ /* sample illumination from lights to find path contribution */
+ if(!(sd->flag & SD_BSDF_HAS_EVAL))
+ return;
+
+ Ray light_ray;
+ BsdfEval L_light;
+ bool is_lamp;
+
+#ifdef __OBJECT_MOTION__
+ light_ray.time = sd->time;
+#endif
+
+ if(sample_all_lights) {
+ /* lamp sampling */
+ for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
+ int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
+ float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
+ RNG lamp_rng = cmj_hash(*rng, i);
+
+ if(kernel_data.integrator.pdf_triangles != 0.0f)
+ num_samples_inv *= 0.5f;
+
+ for(int j = 0; j < num_samples; j++) {
+ float light_u, light_v;
+ path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+ LightSample ls;
+ lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls);
+
+ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ }
+ }
+ }
+ }
+
+ /* mesh light sampling */
+ if(kernel_data.integrator.pdf_triangles != 0.0f) {
+ int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
+ float num_samples_inv = num_samples_adjust/num_samples;
+
+ if(kernel_data.integrator.num_all_lights)
+ num_samples_inv *= 0.5f;
+
+ for(int j = 0; j < num_samples; j++) {
+ float light_t = path_branched_rng_1D(kg, rng, state, j, num_samples, PRNG_LIGHT);
+ float light_u, light_v;
+ path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+ /* only sample triangle lights */
+ if(kernel_data.integrator.num_all_lights)
+ light_t = 0.5f*light_t;
+
+ LightSample ls;
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
+
+ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ }
+ }
+ }
+ }
+ }
+ else {
+ /* sample one light at random */
+ float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ LightSample ls;
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
+
+ /* sample random light */
+ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp);
+ }
+ }
+ }
+#endif
+}
+
+/* branched path tracing: bounce off or through surface to with new direction stored in ray */
+ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
+ ShaderData *sd, const ShaderClosure *sc, int sample, int num_samples,
+ float3 *throughput, PathState *state, PathRadiance *L, Ray *ray)
+{
+ /* sample BSDF */
+ float bsdf_pdf;
+ BsdfEval bsdf_eval;
+ float3 bsdf_omega_in;
+ differential3 bsdf_domega_in;
+ float bsdf_u, bsdf_v;
+ path_branched_rng_2D(kg, rng, state, sample, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+ int label;
+
+ label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
+ &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+
+ if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
+ return false;
+
+ /* modify throughput */
+ path_radiance_bsdf_bounce(L, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
+
+ /* modify path state */
+ path_state_next(kg, state, label);
+
+ /* setup ray */
+ ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
+ ray->D = bsdf_omega_in;
+ ray->t = FLT_MAX;
+#ifdef __RAY_DIFFERENTIALS__
+ ray->dP = sd->dP;
+ ray->dD = bsdf_domega_in;
+#endif
+#ifdef __OBJECT_MOTION__
+ ray->time = sd->time;
+#endif
+
+#ifdef __VOLUME__
+ /* enter/exit volume */
+ if(label & LABEL_TRANSMIT)
+ kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+#endif
+
+ /* branch RNG state */
+ path_state_branch(state, sample, num_samples);
+
+ /* set MIS state */
+ state->min_ray_pdf = fminf(bsdf_pdf, FLT_MAX);
+ state->ray_pdf = bsdf_pdf;
+#ifdef __LAMP_MIS__
+ state->ray_t = 0.0f;
+#endif
+
+ return true;
+}
+
+#endif
+
+/* path tracing: connect path directly to position on a light and add it to L */
+ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
+ ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L)
+{
+#ifdef __EMISSION__
+ if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)))
+ return;
+
+ /* sample illumination from lights to find path contribution */
+ float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ Ray light_ray;
+ BsdfEval L_light;
+ bool is_lamp;
+
+#ifdef __OBJECT_MOTION__
+ light_ray.time = sd->time;
+#endif
+
+ LightSample ls;
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
+
+ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ }
+ }
+#endif
+}
+
+/* path tracing: bounce off or through surface to with new direction stored in ray */
+ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, RNG *rng,
+ ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray)
+{
+ /* no BSDF? we can stop here */
+ if(sd->flag & SD_BSDF) {
+ /* sample BSDF */
+ float bsdf_pdf;
+ BsdfEval bsdf_eval;
+ float3 bsdf_omega_in;
+ differential3 bsdf_domega_in;
+ float bsdf_u, bsdf_v;
+ path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+ int label;
+
+ label = shader_bsdf_sample(kg, sd, bsdf_u, bsdf_v, &bsdf_eval,
+ &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+
+ if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
+ return false;
+
+ /* modify throughput */
+ path_radiance_bsdf_bounce(L, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
+
+ /* set labels */
+ if(!(label & LABEL_TRANSPARENT)) {
+ state->ray_pdf = bsdf_pdf;
+#ifdef __LAMP_MIS__
+ state->ray_t = 0.0f;
+#endif
+ state->min_ray_pdf = fminf(bsdf_pdf, state->min_ray_pdf);
+ }
+
+ /* update path state */
+ path_state_next(kg, state, label);
+
+ /* setup ray */
+ ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
+ ray->D = bsdf_omega_in;
+
+ if(state->bounce == 0)
+ ray->t -= sd->ray_length; /* clipping works through transparent */
+ else
+ ray->t = FLT_MAX;
+
+#ifdef __RAY_DIFFERENTIALS__
+ ray->dP = sd->dP;
+ ray->dD = bsdf_domega_in;
+#endif
+
+#ifdef __VOLUME__
+ /* enter/exit volume */
+ if(label & LABEL_TRANSMIT)
+ kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+#endif
+ return true;
+ }
+#ifdef __VOLUME__
+ else if(sd->flag & SD_HAS_ONLY_VOLUME) {
+ /* no surface shader but have a volume shader? act transparent */
+
+ /* update path state, count as transparent */
+ path_state_next(kg, state, LABEL_TRANSPARENT);
+
+ /* setup ray position, direction stays unchanged */
+ ray->P = ray_offset(sd->P, -sd->Ng);
+#ifdef __RAY_DIFFERENTIALS__
+ ray->dP = sd->dP;
+#endif
+
+ /* enter/exit volume */
+ kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+ return true;
+ }
+#endif
+ else {
+ /* no bsdf or volume? */
+ return false;
+ }
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
new file mode 100644
index 00000000000..d8143832294
--- /dev/null
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __VOLUME_SCATTER__
+
+ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
+ ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L)
+{
+#ifdef __EMISSION__
+ if(!kernel_data.integrator.use_direct_light)
+ return;
+
+ /* sample illumination from lights to find path contribution */
+ float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ Ray light_ray;
+ BsdfEval L_light;
+ LightSample ls;
+ bool is_lamp;
+
+ /* connect to light from given point where shader has been evaluated */
+#ifdef __OBJECT_MOTION__
+ light_ray.time = sd->time;
+#endif
+
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
+ if(ls.pdf == 0.0f)
+ return;
+
+ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ }
+ }
+#endif
+}
+
+#ifdef __KERNEL_GPU__
+ccl_device_noinline
+#else
+ccl_device
+#endif
+bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
+ ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray)
+{
+ /* sample phase function */
+ float phase_pdf;
+ BsdfEval phase_eval;
+ float3 phase_omega_in;
+ differential3 phase_domega_in;
+ float phase_u, phase_v;
+ path_state_rng_2D(kg, rng, state, PRNG_PHASE_U, &phase_u, &phase_v);
+ int label;
+
+ label = shader_volume_phase_sample(kg, sd, phase_u, phase_v, &phase_eval,
+ &phase_omega_in, &phase_domega_in, &phase_pdf);
+
+ if(phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval))
+ return false;
+
+ /* modify throughput */
+ path_radiance_bsdf_bounce(L, throughput, &phase_eval, phase_pdf, state->bounce, label);
+
+ /* set labels */
+ state->ray_pdf = phase_pdf;
+#ifdef __LAMP_MIS__
+ state->ray_t = 0.0f;
+#endif
+ state->min_ray_pdf = fminf(phase_pdf, state->min_ray_pdf);
+
+ /* update path state */
+ path_state_next(kg, state, label);
+
+ /* setup ray */
+ ray->P = sd->P;
+ ray->D = phase_omega_in;
+ ray->t = FLT_MAX;
+
+#ifdef __RAY_DIFFERENTIALS__
+ ray->dP = sd->dP;
+ ray->dD = phase_domega_in;
+#endif
+
+ return true;
+}
+
+ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
+ ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L,
+ float num_samples_adjust, bool sample_all_lights, Ray *ray, const VolumeSegment *segment)
+{
+#ifdef __EMISSION__
+ if(!kernel_data.integrator.use_direct_light)
+ return;
+
+ Ray light_ray;
+ BsdfEval L_light;
+ bool is_lamp;
+
+#ifdef __OBJECT_MOTION__
+ light_ray.time = sd->time;
+#endif
+
+ if(sample_all_lights) {
+ /* lamp sampling */
+ for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
+ int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
+ float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
+ RNG lamp_rng = cmj_hash(*rng, i);
+
+ if(kernel_data.integrator.pdf_triangles != 0.0f)
+ num_samples_inv *= 0.5f;
+
+ for(int j = 0; j < num_samples; j++) {
+ /* sample random position on given light */
+ float light_u, light_v;
+ path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+ LightSample ls;
+ lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls);
+
+ float3 tp = throughput;
+
+ /* sample position on volume segment */
+ float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE);
+ float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+
+ (void)result;
+ kernel_assert(result == VOLUME_PATH_SCATTERED);
+
+ /* todo: split up light_sample so we don't have to call it again with new position */
+ lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls);
+
+ if(ls.pdf == 0.0f)
+ continue;
+
+ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ }
+ }
+ }
+ }
+
+ /* mesh light sampling */
+ if(kernel_data.integrator.pdf_triangles != 0.0f) {
+ int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
+ float num_samples_inv = num_samples_adjust/num_samples;
+
+ if(kernel_data.integrator.num_all_lights)
+ num_samples_inv *= 0.5f;
+
+ for(int j = 0; j < num_samples; j++) {
+ /* sample random position on random triangle */
+ float light_t = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_LIGHT);
+ float light_u, light_v;
+ path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+ /* only sample triangle lights */
+ if(kernel_data.integrator.num_all_lights)
+ light_t = 0.5f*light_t;
+
+ LightSample ls;
+ light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls);
+
+ float3 tp = throughput;
+
+ /* sample position on volume segment */
+ float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE);
+ float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+
+ (void)result;
+ kernel_assert(result == VOLUME_PATH_SCATTERED);
+
+ /* todo: split up light_sample so we don't have to call it again with new position */
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
+
+ if(ls.pdf == 0.0f)
+ continue;
+
+ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ }
+ }
+ }
+ }
+ }
+ else {
+ /* sample random position on random light */
+ float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ LightSample ls;
+ light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls);
+
+ float3 tp = throughput;
+
+ /* sample position on volume segment */
+ float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
+ float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false);
+
+ (void)result;
+ kernel_assert(result == VOLUME_PATH_SCATTERED);
+
+ /* todo: split up light_sample so we don't have to call it again with new position */
+ light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls);
+
+ if(ls.pdf == 0.0f)
+ return;
+
+ /* sample random light */
+ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ }
+ }
+ }
+#endif
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 31cb6ff6abd..236f74c0a82 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -261,22 +261,41 @@ ccl_device uint lcg_init(uint seed)
* For branches in the path we must be careful not to reuse the same number
* in a sequence and offset accordingly. */
-ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, RNG *rng, PathState *state, int dimension)
+ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension)
{
return path_rng_1D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension);
}
-ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, RNG *rng, PathState *state, int dimension, float *fx, float *fy)
+ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension)
+{
+ /* the rng_offset is not increased for transparent bounces. if we do then
+ * fully transparent objects can become subtly visible by the different
+ * sampling patterns used where the transparent object is.
+ *
+ * however for some random numbers that will determine if we next bounce
+ * is transparent we do need to increase the offset to avoid always making
+ * the same decision */
+ int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM;
+ return path_rng_1D(kg, rng, state->sample, state->num_samples, rng_offset + dimension);
+}
+
+ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension, float *fx, float *fy)
{
path_rng_2D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension, fx, fy);
}
-ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, RNG *rng, PathState *state, int branch, int num_branches, int dimension)
+ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
{
return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension);
}
-ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, RNG *rng, PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy)
+ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
+{
+ int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM;
+ return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, rng_offset + dimension);
+}
+
+ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy)
{
path_rng_2D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension, fx, fy);
}
@@ -290,7 +309,7 @@ ccl_device_inline void path_state_branch(PathState *state, int branch, int num_b
state->num_samples = state->num_samples*num_branches;
}
-ccl_device_inline uint lcg_state_init(RNG *rng, PathState *state, uint scramble)
+ccl_device_inline uint lcg_state_init(RNG *rng, const PathState *state, uint scramble)
{
return lcg_init(*rng + state->rng_offset + state->sample*scramble);
}
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 58cec090410..db08c328d7e 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -86,9 +86,8 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
#endif
if(sd->type & PRIMITIVE_TRIANGLE) {
/* static triangle */
- float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
- float3 Ng = make_float3(Ns.x, Ns.y, Ns.z);
- sd->shader = __float_as_int(Ns.w);
+ float3 Ng = triangle_normal(kg, sd);
+ sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
/* vectors */
sd->P = triangle_refine(kg, sd, isect, ray);
@@ -166,9 +165,8 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
/* fetch triangle data */
if(sd->type == PRIMITIVE_TRIANGLE) {
- float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
- float3 Ng = make_float3(Ns.x, Ns.y, Ns.z);
- sd->shader = __float_as_int(Ns.w);
+ float3 Ng = triangle_normal(kg, sd);
+ sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
/* static triangle */
sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
@@ -342,7 +340,7 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
int shader;
- triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader);
+ triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
/* force smooth shading for displacement */
shader |= SHADER_SMOOTH_NORMAL;
@@ -609,6 +607,9 @@ ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughn
ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
{
+ if(sd->flag & SD_HAS_ONLY_VOLUME)
+ return make_float3(1.0f, 1.0f, 1.0f);
+
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i< sd->num_closure; i++) {
@@ -797,8 +798,8 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
#ifdef __SVM__
svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, path_flag);
#else
- sd->closure.weight = make_float3(0.8f, 0.8f, 0.8f);
- sd->closure.N = sd->N;
+ sd->closure->weight = make_float3(0.8f, 0.8f, 0.8f);
+ sd->closure->N = sd->N;
sd->flag |= bsdf_diffuse_setup(&sd->closure);
#endif
}
@@ -857,7 +858,7 @@ ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, con
if(phase_pdf != 0.0f) {
bsdf_eval_accum(result_eval, sc->type, eval);
- sum_pdf += phase_pdf;
+ sum_pdf += phase_pdf*sc->sample_weight;
}
sum_sample_weight += sc->sample_weight;
@@ -1025,8 +1026,7 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect
#ifdef __HAIR__
if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
#endif
- float4 Ns = kernel_tex_fetch(__tri_normal, prim);
- shader = __float_as_int(Ns.w);
+ shader = kernel_tex_fetch(__tri_shader, prim);
#ifdef __HAIR__
}
else {
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index ab7524c411a..61954282c28 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -64,18 +64,21 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
bool blocked;
if(kernel_data.integrator.transparent_shadows) {
+ /* check transparent bounces here, for volume scatter which can do
+ * lighting before surface path termination is checked */
+ if(state->transparent_bounce >= kernel_data.integrator.transparent_max_bounce)
+ return true;
+
/* intersect to find an opaque surface, or record all transparent surface hits */
Intersection hits_stack[STACK_MAX_HITS];
- Intersection *hits;
+ Intersection *hits = hits_stack;
uint max_hits = kernel_data.integrator.transparent_max_bounce - state->transparent_bounce - 1;
/* prefer to use stack but use dynamic allocation if too deep max hits
* we need max_hits + 1 storage space due to the logic in
* scene_intersect_shadow_all which will first store and then check if
* the limit is exceeded */
- if(max_hits + 1 <= STACK_MAX_HITS)
- hits = hits_stack;
- else
+ if(max_hits + 1 > STACK_MAX_HITS)
hits = (Intersection*)malloc(sizeof(Intersection)*(max_hits + 1));
uint num_hits;
@@ -152,7 +155,11 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
kernel_volume_shadow(kg, &ps, ray, &throughput);
#endif
- *shadow *= throughput;
+ *shadow = throughput;
+
+ if(hits != hits_stack)
+ free(hits);
+ return is_zero(throughput);
}
/* free dynamic storage */
@@ -161,11 +168,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
}
else {
Intersection isect;
-#ifdef __HAIR__
blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
-#else
- blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect);
-#endif
}
#ifdef __VOLUME__
@@ -178,6 +181,8 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
return blocked;
}
+#undef STACK_MAX_HITS
+
#else
/* Shadow function to compute how much light is blocked, GPU variation.
@@ -196,11 +201,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
return false;
Intersection isect;
-#ifdef __HAIR__
bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
-#else
- bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect);
-#endif
#ifdef __TRANSPARENT_SHADOWS__
if(blocked && kernel_data.integrator.transparent_shadows) {
@@ -216,11 +217,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
if(bounce >= kernel_data.integrator.transparent_max_bounce)
return true;
-#ifdef __HAIR__
if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect, NULL, 0.0f, 0.0f))
-#else
- if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect))
-#endif
{
#ifdef __VOLUME__
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 2d5f6091908..740998e8c92 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -34,7 +34,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
-#include "kernel_displace.h"
+#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
@@ -64,9 +64,12 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
-void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index 1062fd0c990..da73a3a1c97 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -36,7 +36,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
-#include "kernel_displace.h"
+#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
@@ -66,9 +66,12 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
-void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index ba3b4887650..5704f60e138 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -37,7 +37,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
-#include "kernel_displace.h"
+#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
@@ -67,9 +67,12 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo
/* Shader Evaluate */
-void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index b07075c6c95..ef46b2f707f 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -36,7 +36,7 @@ KERNEL_TEX(float4, texture_float4, __objects)
KERNEL_TEX(float4, texture_float4, __objects_vector)
/* triangles */
-KERNEL_TEX(float4, texture_float4, __tri_normal)
+KERNEL_TEX(uint, texture_uint, __tri_shader)
KERNEL_TEX(float4, texture_float4, __tri_vnormal)
KERNEL_TEX(float4, texture_float4, __tri_vindex)
KERNEL_TEX(float4, texture_float4, __tri_verts)
@@ -49,6 +49,7 @@ KERNEL_TEX(float4, texture_float4, __curve_keys)
KERNEL_TEX(uint4, texture_uint4, __attributes_map)
KERNEL_TEX(float, texture_float, __attributes_float)
KERNEL_TEX(float4, texture_float4, __attributes_float3)
+KERNEL_TEX(uchar4, texture_uchar4, __attributes_uchar4)
/* lights */
KERNEL_TEX(float4, texture_float4, __light_distribution)
@@ -172,10 +173,9 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_095)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_096)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_097)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099)
/* Kepler and above */
-#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_100)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_101)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_102)
@@ -227,7 +227,6 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_147)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_148)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_149)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_150)
-#endif
/* packed image (opencl) */
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed)
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 11445aa1c93..cfac8d1e905 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -38,12 +38,14 @@ CCL_NAMESPACE_BEGIN
#define BSSRDF_MIN_RADIUS 1e-8f
#define BSSRDF_MAX_HITS 4
-#define BB_DRAPPER 800.0f
+#define BB_DRAPER 800.0f
#define BB_MAX_TABLE_RANGE 12000.0f
#define BB_TABLE_XPOWER 1.5f
#define BB_TABLE_YPOWER 5.0f
#define BB_TABLE_SPACING 2.0f
+#define BECKMANN_TABLE_SIZE 256
+
#define TEX_NUM_FLOAT_IMAGES 5
#define SHADER_NONE (~0)
@@ -64,6 +66,8 @@ CCL_NAMESPACE_BEGIN
#define __SUBSURFACE__
#define __CMJ__
#define __VOLUME__
+#define __VOLUME_DECOUPLED__
+#define __VOLUME_SCATTER__
#define __SHADOW_RECORD_ALL__
#endif
@@ -71,10 +75,15 @@ CCL_NAMESPACE_BEGIN
#define __KERNEL_SHADING__
#define __KERNEL_ADV_SHADING__
#define __BRANCHED_PATH__
+#define __VOLUME__
+#define __VOLUME_SCATTER__
/* Experimental on GPU */
-//#define __VOLUME__
-//#define __SUBSURFACE__
+#ifdef __KERNEL_CUDA_EXPERIMENTAL__
+#define __SUBSURFACE__
+#define __CMJ__
+#endif
+
#endif
#ifdef __KERNEL_OPENCL__
@@ -101,7 +110,6 @@ CCL_NAMESPACE_BEGIN
#define __BACKGROUND_MIS__
#define __LAMP_MIS__
#define __AO__
-#define __ANISOTROPIC__
//#define __CAMERA_MOTION__
//#define __OBJECT_MOTION__
//#define __HAIR__
@@ -132,11 +140,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SHADING__
#define __SVM__
#define __EMISSION__
-#define __PROCEDURAL_TEXTURES__
-#define __IMAGE_TEXTURES__
+#define __TEXTURES__
#define __EXTRA_NODES__
#define __HOLDOUT__
-#define __NORMAL_MAP__
#endif
#ifdef __KERNEL_ADV_SHADING__
@@ -146,12 +152,15 @@ CCL_NAMESPACE_BEGIN
#define __BACKGROUND_MIS__
#define __LAMP_MIS__
#define __AO__
-#define __ANISOTROPIC__
#define __CAMERA_MOTION__
#define __OBJECT_MOTION__
#define __HAIR__
#endif
+#ifdef WITH_CYCLES_DEBUG
+# define __KERNEL_DEBUG__
+#endif
+
/* Random Numbers */
typedef uint RNG;
@@ -221,10 +230,9 @@ enum PathTraceDimension {
PRNG_PHASE_V = 9,
PRNG_PHASE = 10,
PRNG_SCATTER_DISTANCE = 11,
- PRNG_BOUNCE_NUM = 12,
-#else
- PRNG_BOUNCE_NUM = 8,
#endif
+
+ PRNG_BOUNCE_NUM = 12,
};
enum SamplingPattern {
@@ -250,17 +258,17 @@ enum PathRayFlag {
PATH_RAY_SHADOW_TRANSPARENT = 256,
PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),
- PATH_RAY_CURVE = 512, /* visibility flag to define curve segments*/
+ PATH_RAY_CURVE = 512, /* visibility flag to define curve segments */
+ PATH_RAY_VOLUME_SCATTER = 1024, /* volume scattering */
/* note that these can use maximum 12 bits, the other are for layers */
- PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512),
+ PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512|1024),
- PATH_RAY_MIS_SKIP = 1024,
- PATH_RAY_DIFFUSE_ANCESTOR = 2048,
- PATH_RAY_GLOSSY_ANCESTOR = 4096,
- PATH_RAY_BSSRDF_ANCESTOR = 8192,
- PATH_RAY_SINGLE_PASS_DONE = 16384,
- PATH_RAY_VOLUME_SCATTER = 32768,
+ PATH_RAY_MIS_SKIP = 2048,
+ PATH_RAY_DIFFUSE_ANCESTOR = 4096,
+ PATH_RAY_GLOSSY_ANCESTOR = 8192,
+ PATH_RAY_BSSRDF_ANCESTOR = 16384,
+ PATH_RAY_SINGLE_PASS_DONE = 32768,
/* we need layer member flags to be the 20 upper bits */
PATH_RAY_LAYER_SHIFT = (32-20)
@@ -283,32 +291,35 @@ typedef enum ClosureLabel {
typedef enum PassType {
PASS_NONE = 0,
- PASS_COMBINED = 1,
- PASS_DEPTH = 2,
- PASS_NORMAL = 4,
- PASS_UV = 8,
- PASS_OBJECT_ID = 16,
- PASS_MATERIAL_ID = 32,
- PASS_DIFFUSE_COLOR = 64,
- PASS_GLOSSY_COLOR = 128,
- PASS_TRANSMISSION_COLOR = 256,
- PASS_DIFFUSE_INDIRECT = 512,
- PASS_GLOSSY_INDIRECT = 1024,
- PASS_TRANSMISSION_INDIRECT = 2048,
- PASS_DIFFUSE_DIRECT = 4096,
- PASS_GLOSSY_DIRECT = 8192,
- PASS_TRANSMISSION_DIRECT = 16384,
- PASS_EMISSION = 32768,
- PASS_BACKGROUND = 65536,
- PASS_AO = 131072,
- PASS_SHADOW = 262144,
- PASS_MOTION = 524288,
- PASS_MOTION_WEIGHT = 1048576,
- PASS_MIST = 2097152,
- PASS_SUBSURFACE_DIRECT = 4194304,
- PASS_SUBSURFACE_INDIRECT = 8388608,
- PASS_SUBSURFACE_COLOR = 16777216,
- PASS_LIGHT = 33554432, /* no real pass, used to force use_light_pass */
+ PASS_COMBINED = (1 << 0),
+ PASS_DEPTH = (1 << 1),
+ PASS_NORMAL = (1 << 2),
+ PASS_UV = (1 << 3),
+ PASS_OBJECT_ID = (1 << 4),
+ PASS_MATERIAL_ID = (1 << 5),
+ PASS_DIFFUSE_COLOR = (1 << 6),
+ PASS_GLOSSY_COLOR = (1 << 7),
+ PASS_TRANSMISSION_COLOR = (1 << 8),
+ PASS_DIFFUSE_INDIRECT = (1 << 9),
+ PASS_GLOSSY_INDIRECT = (1 << 10),
+ PASS_TRANSMISSION_INDIRECT = (1 << 11),
+ PASS_DIFFUSE_DIRECT = (1 << 12),
+ PASS_GLOSSY_DIRECT = (1 << 13),
+ PASS_TRANSMISSION_DIRECT = (1 << 14),
+ PASS_EMISSION = (1 << 15),
+ PASS_BACKGROUND = (1 << 16),
+ PASS_AO = (1 << 17),
+ PASS_SHADOW = (1 << 18),
+ PASS_MOTION = (1 << 19),
+ PASS_MOTION_WEIGHT = (1 << 20),
+ PASS_MIST = (1 << 21),
+ PASS_SUBSURFACE_DIRECT = (1 << 22),
+ PASS_SUBSURFACE_INDIRECT = (1 << 23),
+ PASS_SUBSURFACE_COLOR = (1 << 24),
+ PASS_LIGHT = (1 << 25), /* no real pass, used to force use_light_pass */
+#ifdef __KERNEL_DEBUG__
+ PASS_BVH_TRAVERSAL_STEPS = (1 << 26),
+#endif
} PassType;
#define PASS_ALL (~0)
@@ -330,21 +341,25 @@ typedef struct PathRadiance {
float3 color_glossy;
float3 color_transmission;
float3 color_subsurface;
+ float3 color_scatter;
float3 direct_diffuse;
float3 direct_glossy;
float3 direct_transmission;
float3 direct_subsurface;
+ float3 direct_scatter;
float3 indirect_diffuse;
float3 indirect_glossy;
float3 indirect_transmission;
float3 indirect_subsurface;
+ float3 indirect_scatter;
float3 path_diffuse;
float3 path_glossy;
float3 path_transmission;
float3 path_subsurface;
+ float3 path_scatter;
float4 shadow;
float mist;
@@ -358,6 +373,7 @@ typedef struct BsdfEval {
float3 transmission;
float3 transparent;
float3 subsurface;
+ float3 scatter;
} BsdfEval;
#else
@@ -378,7 +394,8 @@ typedef enum ShaderFlag {
SHADER_EXCLUDE_GLOSSY = (1 << 26),
SHADER_EXCLUDE_TRANSMIT = (1 << 25),
SHADER_EXCLUDE_CAMERA = (1 << 24),
- SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA),
+ SHADER_EXCLUDE_SCATTER = (1 << 23),
+ SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA|SHADER_EXCLUDE_SCATTER),
SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT|SHADER_USE_MIS|SHADER_EXCLUDE_ANY)
} ShaderFlag;
@@ -390,10 +407,8 @@ typedef enum LightType {
LIGHT_DISTANT,
LIGHT_BACKGROUND,
LIGHT_AREA,
- LIGHT_AO,
LIGHT_SPOT,
- LIGHT_TRIANGLE,
- LIGHT_STRAND
+ LIGHT_TRIANGLE
} LightType;
/* Camera Type */
@@ -445,6 +460,10 @@ typedef struct Intersection {
int prim;
int object;
int type;
+
+#ifdef __KERNEL_DEBUG__
+ int num_traversal_steps;
+#endif
} Intersection;
/* Primitives */
@@ -478,6 +497,7 @@ typedef enum AttributeElement {
ATTR_ELEMENT_VERTEX,
ATTR_ELEMENT_VERTEX_MOTION,
ATTR_ELEMENT_CORNER,
+ ATTR_ELEMENT_CORNER_BYTE,
ATTR_ELEMENT_CURVE,
ATTR_ELEMENT_CURVE_KEY,
ATTR_ELEMENT_CURVE_KEY_MOTION,
@@ -519,24 +539,32 @@ typedef enum AttributeStandard {
#define MAX_CLOSURE 1
#endif
+/* TODO(sergey): This is rather nasty bug happening in here, which
+ * could be simply a compilers bug for which we can't find a generic
+ * platform independent workaround. Also even if it's a compiler
+ * issue, it's not so simple to upgrade the compiler in the release
+ * environment for linux and doing it so closer to the release is
+ * rather a risky business.
+ *
+ * For this release it's probably safer to stick with such a rather
+ * dirty solution, and look for a cleaner fix during the next release
+ * cycle.
+ */
typedef struct ShaderClosure {
ClosureType type;
float3 weight;
-
+#ifndef __APPLE__
float sample_weight;
-
+#endif
float data0;
float data1;
+ float data2;
float3 N;
-#if defined(__ANISOTROPIC__) || defined(__SUBSURFACE__) || defined(__HAIR__)
float3 T;
+#ifdef __APPLE__
+ float sample_weight;
#endif
-
-#ifdef __HAIR__
- float offset;
-#endif
-
#ifdef __OSL__
void *prim;
#endif
@@ -563,37 +591,49 @@ typedef enum ShaderContext {
enum ShaderDataFlag {
/* runtime flags */
- SD_BACKFACING = 1, /* backside of surface? */
- SD_EMISSION = 2, /* have emissive closure? */
- SD_BSDF = 4, /* have bsdf closure? */
- SD_BSDF_HAS_EVAL = 8, /* have non-singular bsdf closure? */
- SD_PHASE_HAS_EVAL = 8, /* have non-singular phase closure? */
- SD_BSDF_GLOSSY = 16, /* have glossy bsdf */
- SD_BSSRDF = 32, /* have bssrdf */
- SD_HOLDOUT = 64, /* have holdout closure? */
- SD_ABSORPTION = 128, /* have volume absorption closure? */
- SD_SCATTER = 256, /* have volume phase closure? */
- SD_AO = 512, /* have ao closure? */
- SD_TRANSPARENT = 1024, /* have transparent closure? */
-
- SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY|SD_BSSRDF|SD_HOLDOUT|SD_ABSORPTION|SD_SCATTER|SD_AO),
+ SD_BACKFACING = (1 << 0), /* backside of surface? */
+ SD_EMISSION = (1 << 1), /* have emissive closure? */
+ SD_BSDF = (1 << 2), /* have bsdf closure? */
+ SD_BSDF_HAS_EVAL = (1 << 3), /* have non-singular bsdf closure? */
+ SD_PHASE_HAS_EVAL = (1 << 3), /* have non-singular phase closure? */
+ SD_BSDF_GLOSSY = (1 << 4), /* have glossy bsdf */
+ SD_BSSRDF = (1 << 5), /* have bssrdf */
+ SD_HOLDOUT = (1 << 6), /* have holdout closure? */
+ SD_ABSORPTION = (1 << 7), /* have volume absorption closure? */
+ SD_SCATTER = (1 << 8), /* have volume phase closure? */
+ SD_AO = (1 << 9), /* have ao closure? */
+ SD_TRANSPARENT = (1 << 10), /* have transparent closure? */
+
+ SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY|
+ SD_BSSRDF|SD_HOLDOUT|SD_ABSORPTION|SD_SCATTER|SD_AO),
/* shader flags */
- SD_USE_MIS = 2048, /* direct light sample */
- SD_HAS_TRANSPARENT_SHADOW = 4096, /* has transparent shadow */
- SD_HAS_VOLUME = 8192, /* has volume shader */
- SD_HAS_ONLY_VOLUME = 16384, /* has only volume shader, no surface */
- SD_HETEROGENEOUS_VOLUME = 32768, /* has heterogeneous volume */
- SD_HAS_BSSRDF_BUMP = 65536, /* bssrdf normal uses bump */
-
- SD_SHADER_FLAGS = (SD_USE_MIS|SD_HAS_TRANSPARENT_SHADOW|SD_HAS_VOLUME|SD_HAS_ONLY_VOLUME|SD_HETEROGENEOUS_VOLUME|SD_HAS_BSSRDF_BUMP),
+ SD_USE_MIS = (1 << 11), /* direct light sample */
+ SD_HAS_TRANSPARENT_SHADOW = (1 << 12), /* has transparent shadow */
+ SD_HAS_VOLUME = (1 << 13), /* has volume shader */
+ SD_HAS_ONLY_VOLUME = (1 << 14), /* has only volume shader, no surface */
+ SD_HETEROGENEOUS_VOLUME = (1 << 15), /* has heterogeneous volume */
+ SD_HAS_BSSRDF_BUMP = (1 << 16), /* bssrdf normal uses bump */
+ SD_VOLUME_EQUIANGULAR = (1 << 17), /* use equiangular sampling */
+ SD_VOLUME_MIS = (1 << 18), /* use multiple importance sampling */
+ SD_VOLUME_CUBIC = (1 << 19), /* use cubic interpolation for voxels */
+
+ SD_SHADER_FLAGS = (SD_USE_MIS|SD_HAS_TRANSPARENT_SHADOW|SD_HAS_VOLUME|
+ SD_HAS_ONLY_VOLUME|SD_HETEROGENEOUS_VOLUME|
+ SD_HAS_BSSRDF_BUMP|SD_VOLUME_EQUIANGULAR|SD_VOLUME_MIS|
+ SD_VOLUME_CUBIC),
/* object flags */
- SD_HOLDOUT_MASK = 131072, /* holdout for camera rays */
- SD_OBJECT_MOTION = 262144, /* has object motion blur */
- SD_TRANSFORM_APPLIED = 524288, /* vertices have transform applied */
-
- SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED)
+ SD_HOLDOUT_MASK = (1 << 20), /* holdout for camera rays */
+ SD_OBJECT_MOTION = (1 << 21), /* has object motion blur */
+ SD_TRANSFORM_APPLIED = (1 << 22), /* vertices have transform applied */
+ SD_NEGATIVE_SCALE_APPLIED = (1 << 23), /* vertices have negative scale applied */
+ SD_OBJECT_HAS_VOLUME = (1 << 24), /* object has a volume shader */
+ SD_OBJECT_INTERSECTS_VOLUME = (1 << 25), /* object intersects AABB of an object with volume shader */
+
+ SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED|
+ SD_NEGATIVE_SCALE_APPLIED|SD_OBJECT_HAS_VOLUME|
+ SD_OBJECT_INTERSECTS_VOLUME)
};
struct KernelGlobals;
@@ -686,9 +726,10 @@ typedef struct PathState {
int flag;
/* random number generator state */
- int rng_offset; /* dimension offset */
- int sample; /* path sample number */
- int num_samples; /* total number of times this path will be sampled */
+ int rng_offset; /* dimension offset */
+ int rng_offset_bsdf; /* dimension offset for picking bsdf */
+ int sample; /* path sample number */
+ int num_samples; /* total number of times this path will be sampled */
/* bounce counting */
int bounce;
@@ -756,9 +797,12 @@ typedef struct KernelCamera {
/* render size */
float width, height;
int resolution;
- int pad1;
+
+ /* anamorphic lens bokeh */
+ float inv_aperture_ratio;
+
+ int is_inside_volume;
int pad2;
- int pad3;
/* more matrices */
Transform screentoworld;
@@ -819,6 +863,11 @@ typedef struct KernelFilm {
float mist_start;
float mist_inv_depth;
float mist_falloff;
+
+#ifdef __KERNEL_DEBUG__
+ int pass_bvh_traversal_steps;
+ int pass_pad3, pass_pad4, pass_pad5;
+#endif
} KernelFilm;
typedef struct KernelBackground {
@@ -860,7 +909,8 @@ typedef struct KernelIntegrator {
int transparent_shadows;
/* caustics */
- int no_caustics;
+ int caustics_reflective;
+ int caustics_refractive;
float filter_glossy;
/* seed */
@@ -892,7 +942,6 @@ typedef struct KernelIntegrator {
int aa_samples;
/* volume render */
- int volume_homogeneous_sampling;
int use_volumes;
int volume_max_steps;
float volume_step_size;
@@ -922,7 +971,6 @@ typedef enum CurveFlag {
} CurveFlag;
typedef struct KernelCurves {
- /* strand intersect and normal parameters - many can be changed to flags */
int curveflags;
int subdivisions;
@@ -930,11 +978,11 @@ typedef struct KernelCurves {
float maximum_width;
} KernelCurves;
-typedef struct KernelBlackbody {
- int table_offset;
- int pad1, pad2, pad3;
-} KernelBlackbody;
-
+typedef struct KernelTables {
+ int blackbody_offset;
+ int beckmann_offset;
+ int pad1, pad2;
+} KernelTables;
typedef struct KernelData {
KernelCamera cam;
@@ -943,9 +991,17 @@ typedef struct KernelData {
KernelIntegrator integrator;
KernelBVH bvh;
KernelCurves curve;
- KernelBlackbody blackbody;
+ KernelTables tables;
} KernelData;
+#ifdef __KERNEL_DEBUG__
+typedef struct DebugData {
+ // Total number of BVH node travesal steps and primitives intersections
+ // for the camera rays.
+ int num_bvh_traversal_steps;
+} DebugData;
+#endif
+
CCL_NAMESPACE_END
#endif /* __KERNEL_TYPES_H__ */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index faaa68e3309..ce20f20e75a 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -116,6 +116,36 @@ ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *st
return false;
}
+ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
+{
+ if(kernel_data.integrator.num_all_lights == 0)
+ return 0;
+
+ int method = -1;
+
+ for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
+ int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
+
+ if(shader_flag & SD_VOLUME_MIS) {
+ return SD_VOLUME_MIS;
+ }
+ else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
+ if(method == 0)
+ return SD_VOLUME_MIS;
+
+ method = SD_VOLUME_EQUIANGULAR;
+ }
+ else {
+ if(method == SD_VOLUME_EQUIANGULAR)
+ return SD_VOLUME_MIS;
+
+ method = 0;
+ }
+ }
+
+ return method;
+}
+
/* Volume Shadows
*
* These functions are used to attenuate shadow rays to lights. Both absorption
@@ -136,7 +166,7 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *s
ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
{
float3 tp = *throughput;
- const float tp_eps = 1e-10f; /* todo: this is likely not the right value */
+ const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
/* prepare for stepping */
int max_steps = kernel_data.integrator.volume_max_steps;
@@ -146,6 +176,8 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* compute extinction at the start */
float t = 0.0f;
+ float3 sum = make_float3(0.0f, 0.0f, 0.0f);
+
for(int i = 0; i < max_steps; i++) {
/* advance to new position */
float new_t = min(ray->t, (i+1) * step);
@@ -160,20 +192,26 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* compute attenuation over segment */
if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
- /* todo: we could avoid computing expf() for each step by summing,
- * because exp(a)*exp(b) = exp(a+b), but we still want a quick
- * tp_eps check too */
- tp *= volume_color_transmittance(sigma_t, new_t - t);
-
- /* stop if nearly all light blocked */
- if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
- break;
+ /* Compute expf() only for every Nth step, to save some calculations
+ * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
+
+ sum += (-sigma_t * (new_t - t));
+ if((i & 0x07) == 0) { /* ToDo: Other interval? */
+ tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
+
+ /* stop if nearly all light is blocked */
+ if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
+ break;
+ }
}
/* stop if at the end of the volume */
t = new_t;
- if(t == ray->t)
+ if(t == ray->t) {
+ /* Update throughput in case we haven't done it above */
+ tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
break;
+ }
}
*throughput = tp;
@@ -226,33 +264,6 @@ ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float s
return pdf;
}
-ccl_device bool kernel_volume_equiangular_light_position(KernelGlobals *kg, PathState *state, Ray *ray, RNG *rng, float3 *light_P)
-{
- /* light RNGs */
- float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
- float light_u, light_v;
- path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
-
- /* light sample */
- LightSample ls;
- light_sample(kg, light_t, light_u, light_v, ray->time, ray->P, &ls);
- if(ls.pdf == 0.0f)
- return false;
-
- *light_P = ls.P;
- return true;
-}
-
-ccl_device float kernel_volume_decoupled_equiangular_pdf(KernelGlobals *kg, PathState *state, Ray *ray, RNG *rng, float sample_t)
-{
- float3 light_P;
-
- if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
- return 0.0f;
-
- return kernel_volume_equiangular_pdf(ray, light_P, sample_t);
-}
-
/* Distance sampling */
ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
@@ -312,7 +323,7 @@ ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coe
* the volume shading coefficient for the entire line segment */
ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
- RNG *rng)
+ RNG *rng, bool probalistic_scatter)
{
VolumeShaderCoefficients coeff;
@@ -323,6 +334,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
float t = ray->t;
float3 new_tp;
+#ifdef __VOLUME_SCATTER__
/* randomly scatter, and if we do t is shortened */
if(closure_flag & SD_SCATTER) {
/* extinction coefficient */
@@ -330,43 +342,41 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
/* pick random color channel, we use the Veach one-sample
* model with balance heuristic for the channels */
- float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE);
+ float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
int channel = (int)(rphase*3.0f);
sd->randb_closure = rphase*3.0f - channel;
- float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
-
/* decide if we will hit or miss */
- float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
- float sample_transmittance = expf(-sample_sigma_t * t);
+ bool scatter = true;
+ float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
+
+ if(probalistic_scatter) {
+ float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+ float sample_transmittance = expf(-sample_sigma_t * t);
+
+ if(1.0f - xi >= sample_transmittance) {
+ scatter = true;
+
+ /* rescale random number so we can reuse it */
+ xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
- if(xi >= sample_transmittance) {
+ }
+ else
+ scatter = false;
+ }
+
+ if(scatter) {
/* scattering */
float3 pdf;
float3 transmittance;
float sample_t;
- /* rescale random number so we can reuse it */
- xi = (xi - sample_transmittance)/(1.0f - sample_transmittance);
-
- if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) {
- /* distance sampling */
- sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
- }
- else {
- /* equiangular sampling */
- float3 light_P;
- float equi_pdf;
- if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
- return VOLUME_PATH_MISSED;
-
- sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &equi_pdf);
- transmittance = volume_color_transmittance(sigma_t, sample_t);
- pdf = make_float3(equi_pdf, equi_pdf, equi_pdf);
- }
+ /* distance sampling */
+ sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
/* modifiy pdf for hit/miss decision */
- pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
+ if(probalistic_scatter)
+ pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf);
t = sample_t;
@@ -378,14 +388,16 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
new_tp = *throughput * transmittance / pdf;
}
}
- else if(closure_flag & SD_ABSORPTION) {
+ else
+#endif
+ if(closure_flag & SD_ABSORPTION) {
/* absorption only, no sampling needed */
float3 transmittance = volume_color_transmittance(coeff.sigma_a, t);
new_tp = *throughput * transmittance;
}
/* integrate emission attenuated by extinction */
- if(closure_flag & SD_EMISSION) {
+ if(L && (closure_flag & SD_EMISSION)) {
float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
float3 transmittance = volume_color_transmittance(sigma_t, ray->t);
float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
@@ -408,13 +420,15 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
return VOLUME_PATH_ATTENUATED;
}
-/* heterogeneous volume: integrate stepping through the volume until we
- * reach the end, get absorbed entirely, or run out of iterations */
-ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlobals *kg,
+/* heterogeneous volume distance sampling: integrate stepping through the
+ * volume until we reach the end, get absorbed entirely, or run out of
+ * iterations. this does probalistically scatter or get transmitted through
+ * for path tracing where we don't want to branch. */
+ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng)
{
float3 tp = *throughput;
- const float tp_eps = 1e-10f; /* todo: this is likely not the right value */
+ const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
/* prepare for stepping */
int max_steps = kernel_data.integrator.volume_max_steps;
@@ -425,9 +439,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
float t = 0.0f;
float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
- /* cache some constant variables */
- float xi;
- int channel = -1;
+ /* pick random color channel, we use the Veach one-sample
+ * model with balance heuristic for the channels */
+ float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
+ float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
+ int channel = (int)(rphase*3.0f);
+ sd->randb_closure = rphase*3.0f - channel;
bool has_scatter = false;
for(int i = 0; i < max_steps; i++) {
@@ -449,25 +466,14 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
float3 transmittance;
bool scatter = false;
- /* randomly scatter, and if we do dt and new_t are shortened */
+ /* distance sampling */
+#ifdef __VOLUME_SCATTER__
if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_ABSORPTION))) {
has_scatter = true;
- /* average sigma_t and sigma_s over segment */
float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
float3 sigma_s = coeff.sigma_s;
- /* lazily set up variables for sampling */
- if(channel == -1) {
- /* pick random color channel, we use the Veach one-sample
- * model with balance heuristic for the channels */
- xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
-
- float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE);
- channel = (int)(rphase*3.0f);
- sd->randb_closure = rphase*3.0f - channel;
- }
-
/* compute transmittance over full step */
transmittance = volume_color_transmittance(sigma_t, dt);
@@ -480,10 +486,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
float new_dt = -logf(1.0f - xi)/sample_sigma_t;
new_t = t + new_dt;
- /* transmittance, throughput */
+ /* transmittance and pdf */
float3 new_transmittance = volume_color_transmittance(sigma_t, new_dt);
- float pdf = average(sigma_t * new_transmittance);
- new_tp = tp * sigma_s * new_transmittance / pdf;
+ float3 pdf = sigma_t * new_transmittance;
+
+ /* throughput */
+ new_tp = tp * sigma_s * new_transmittance / average(pdf);
scatter = true;
}
else {
@@ -495,7 +503,9 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
xi = 1.0f - (1.0f - xi)/sample_transmittance;
}
}
- else if(closure_flag & SD_ABSORPTION) {
+ else
+#endif
+ if(closure_flag & SD_ABSORPTION) {
/* absorption only, no sampling needed */
float3 sigma_a = coeff.sigma_a;
@@ -504,7 +514,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
}
/* integrate emission attenuated by absorption */
- if(closure_flag & SD_EMISSION) {
+ if(L && (closure_flag & SD_EMISSION)) {
float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
path_radiance_accum_emission(L, tp, emission, state->bounce);
}
@@ -518,19 +528,19 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
tp = make_float3(0.0f, 0.0f, 0.0f);
break;
}
+ }
- /* prepare to scatter to new direction */
- if(scatter) {
- /* adjust throughput and move to new location */
- sd->P = ray->P + new_t*ray->D;
- *throughput = tp;
+ /* prepare to scatter to new direction */
+ if(scatter) {
+ /* adjust throughput and move to new location */
+ sd->P = ray->P + new_t*ray->D;
+ *throughput = tp;
- return VOLUME_PATH_SCATTERED;
- }
- else {
- /* accumulate transmittance */
- accum_transmittance *= transmittance;
- }
+ return VOLUME_PATH_SCATTERED;
+ }
+ else {
+ /* accumulate transmittance */
+ accum_transmittance *= transmittance;
}
}
@@ -545,14 +555,34 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
return VOLUME_PATH_ATTENUATED;
}
+/* get the volume attenuation and emission over line segment defined by
+ * ray, with the assumption that there are no surfaces blocking light
+ * between the endpoints. distance sampling is used to decide if we will
+ * scatter or not. */
+ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
+ PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
+{
+ /* workaround to fix correlation bug in T38710, can find better solution
+ * in random number generator later, for now this is done here to not impact
+ * performance of rendering without volumes */
+ RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
+
+ shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
+
+ if(heterogeneous)
+ return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, &tmp_rng);
+ else
+ return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, true);
+}
+
/* Decoupled Volume Sampling
*
* VolumeSegment is list of coefficients and transmittance stored at all steps
* through a volume. This can then latter be used for decoupled sampling as in:
- * "Importance Sampling Techniques for Path Tracing in Participating Media" */
-
-/* CPU only because of malloc/free */
-#ifdef __KERNEL_CPU__
+ * "Importance Sampling Techniques for Path Tracing in Participating Media"
+ *
+ * On the GPU this is only supported for homogeneous volumes (1 step), due to
+ * no support for malloc/free and too much stack usage with a fix size array. */
typedef struct VolumeStep {
float3 sigma_s; /* scatter coefficient */
@@ -571,6 +601,8 @@ typedef struct VolumeSegment {
float3 accum_emission; /* accumulated emission at end of segment */
float3 accum_transmittance; /* accumulated transmittance at end of segment */
+
+ int sampling_method; /* volume sampling method */
} VolumeSegment;
/* record volume steps to the end of the volume.
@@ -578,10 +610,12 @@ typedef struct VolumeSegment {
* it would be nice if we could only record up to the point that we need to scatter,
* but the entire segment is needed to do always scattering, rather than probalistically
* hitting or missing the volume. if we don't know the transmittance at the end of the
- * volume we can't generate stratitied distance samples up to that transmittance */
+ * volume we can't generate stratified distance samples up to that transmittance */
ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
{
+ const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
/* prepare for volume stepping */
int max_steps;
float step_size, random_jitter_offset;
@@ -608,6 +642,7 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
segment->closure_flag = 0;
segment->numsteps = 0;
+
segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
VolumeStep *step = segment->steps;
@@ -669,6 +704,10 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
t = new_t;
if(t == ray->t)
break;
+
+ /* stop if nearly all light blocked */
+ if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
+ break;
}
/* store total emission and transmittance */
@@ -698,35 +737,70 @@ ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *s
* scattering, they always scatter if there is any non-zero scattering
* coefficient.
*
- * these also do not do emission or modify throughput. */
+ * these also do not do emission or modify throughput.
+ *
+ * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
- float3 *throughput, RNG *rng, VolumeSegment *segment)
+ float3 *throughput, float rphase, float rscatter,
+ const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
{
- int closure_flag = segment->closure_flag;
-
- if(!(closure_flag & SD_SCATTER))
- return VOLUME_PATH_MISSED;
+ kernel_assert(segment->closure_flag & SD_SCATTER);
/* pick random color channel, we use the Veach one-sample
* model with balance heuristic for the channels */
- float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE);
int channel = (int)(rphase*3.0f);
sd->randb_closure = rphase*3.0f - channel;
+ float xi = rscatter;
- float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
+ /* probalistic scattering decision based on transmittance */
+ if(probalistic_scatter) {
+ float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
+
+ if(1.0f - xi >= sample_transmittance) {
+ /* rescale random number so we can reuse it */
+ xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
+ }
+ else {
+ *throughput /= sample_transmittance;
+ return VOLUME_PATH_MISSED;
+ }
+ }
VolumeStep *step;
float3 transmittance;
float pdf, sample_t;
+ float mis_weight = 1.0f;
+ bool distance_sample = true;
+ bool use_mis = false;
+
+ if(segment->sampling_method && light_P) {
+ if(segment->sampling_method == SD_VOLUME_MIS) {
+ /* multiple importance sample: randomly pick between
+ * equiangular and distance sampling strategy */
+ if(xi < 0.5f) {
+ xi *= 2.0f;
+ }
+ else {
+ xi = (xi - 0.5f)*2.0f;
+ distance_sample = false;
+ }
+
+ use_mis = true;
+ }
+ else {
+ /* only equiangular sampling */
+ distance_sample = false;
+ }
+ }
/* distance sampling */
- if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) {
+ if(distance_sample) {
/* find step in cdf */
step = segment->steps;
float prev_t = 0.0f;
- float3 step_pdf = make_float3(1.0f, 1.0f, 1.0f);
+ float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
if(segment->numsteps > 1) {
float prev_cdf = 0.0f;
@@ -749,7 +823,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
/* pdf for picking step */
- step_pdf = step->cdf_distance - prev_cdf_distance;
+ step_pdf_distance = step->cdf_distance - prev_cdf_distance;
}
/* determine range in which we will sample */
@@ -758,35 +832,77 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
/* sample distance and compute transmittance */
float3 distance_pdf;
sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
- pdf = average(distance_pdf * step_pdf);
+
+ /* modifiy pdf for hit/miss decision */
+ if(probalistic_scatter)
+ distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
+
+ pdf = average(distance_pdf * step_pdf_distance);
+
+ /* multiple importance sampling */
+ if(use_mis) {
+ float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
+ mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
+ }
}
/* equi-angular sampling */
else {
- /* pick position on light */
- float3 light_P;
- if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
- return VOLUME_PATH_MISSED;
-
/* sample distance */
- sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &pdf);
+ sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
/* find step in which sampled distance is located */
step = segment->steps;
float prev_t = 0.0f;
+ float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
if(segment->numsteps > 1) {
- /* todo: optimize using binary search */
- for(int i = 0; i < segment->numsteps-1; i++, step++) {
- if(sample_t < step->t)
+ float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+
+ int numsteps = segment->numsteps;
+ int high = numsteps - 1;
+ int low = 0;
+ int mid;
+
+ while(low < high) {
+ mid = (low + high) >> 1;
+
+ if(sample_t < step[mid].t)
+ high = mid;
+ else if(sample_t >= step[mid + 1].t)
+ low = mid + 1;
+ else {
+ /* found our interval in step[mid] .. step[mid+1] */
+ prev_t = step[mid].t;
+ prev_cdf_distance = step[mid].cdf_distance;
+ step += mid+1;
break;
+ }
+ }
- prev_t = step->t;
+ if(low >= numsteps - 1) {
+ prev_t = step[numsteps - 1].t;
+ prev_cdf_distance = step[numsteps-1].cdf_distance;
+ step += numsteps - 1;
}
+
+ /* pdf for picking step with distance sampling */
+ step_pdf_distance = step->cdf_distance - prev_cdf_distance;
}
-
+
+ /* determine range in which we will sample */
+ float step_t = step->t - prev_t;
+ float step_sample_t = sample_t - prev_t;
+
/* compute transmittance */
- transmittance = volume_color_transmittance(step->sigma_t, sample_t - prev_t);
+ transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
+
+ /* multiple importance sampling */
+ if(use_mis) {
+ float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
+ float distance_pdf = average(distance_pdf3 * step_pdf_distance);
+ mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
+ }
}
/* compute transmittance up to this step */
@@ -794,7 +910,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
transmittance *= (step-1)->accum_transmittance;
/* modify throughput */
- *throughput *= step->sigma_s * transmittance / pdf;
+ *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
/* evaluate shader to create closures at shading point */
if(segment->numsteps > 1) {
@@ -810,40 +926,27 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
return VOLUME_PATH_SCATTERED;
}
-#endif
-
-/* get the volume attenuation and emission over line segment defined by
- * ray, with the assumption that there are no surfaces blocking light
- * between the endpoints */
-ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
- PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng)
+/* decide if we need to use decoupled or not */
+ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
{
- /* workaround to fix correlation bug in T38710, can find better solution
- * in random number generator later, for now this is done here to not impact
- * performance of rendering without volumes */
- RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
-
-#if 0
- /* debugging code to compare decoupled ray marching */
- VolumeSegment segment;
-
- shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
- kernel_volume_decoupled_record(kg, state, ray, sd, &segment, heterogeneous);
-
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, state, ray, sd, throughput, &tmp_rng, &segment);
-
- kernel_volume_decoupled_free(kg, &segment);
+ /* decoupled ray marching for heterogenous volumes not supported on the GPU,
+ * which also means equiangular and multiple importance sampling is not
+ * support for that case */
+#ifdef __KERNEL_GPU__
+ if(heterogeneous)
+ return false;
+#endif
- return result;
-#else
- shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
+ /* equiangular and multiple importance sampling only implemented for decoupled */
+ if(sampling_method != 0)
+ return true;
- if(heterogeneous)
- return kernel_volume_integrate_heterogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
+ /* for all light sampling use decoupled, reusing shader evaluations is
+ * typically faster in that case */
+ if(direct)
+ return kernel_data.integrator.sample_all_lights_direct;
else
- return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
-#endif
+ return kernel_data.integrator.sample_all_lights_indirect;
}
/* Volume Stack
@@ -851,17 +954,88 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals
* This is an array of object/shared ID's that the current segment of the path
* is inside of. */
-ccl_device void kernel_volume_stack_init(KernelGlobals *kg, VolumeStack *stack)
+ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
+ Ray *ray,
+ VolumeStack *stack)
{
- /* todo: this assumes camera is always in air, need to detect when it isn't */
- if(kernel_data.background.volume_shader == SHADER_NONE) {
- stack[0].shader = SHADER_NONE;
+ /* NULL ray happens in the baker, does it need proper initialization of
+ * camera in volume?
+ */
+ if(!kernel_data.cam.is_inside_volume || ray == NULL) {
+ /* Camera is guaranteed to be in the air, only take background volume
+ * into account in this case.
+ */
+ if(kernel_data.background.volume_shader != SHADER_NONE) {
+ stack[0].shader = kernel_data.background.volume_shader;
+ stack[0].object = PRIM_NONE;
+ stack[1].shader = SHADER_NONE;
+ }
+ else {
+ stack[0].shader = SHADER_NONE;
+ }
+ return;
}
- else {
+
+ Ray volume_ray = *ray;
+ volume_ray.t = FLT_MAX;
+
+ int stack_index = 0, enclosed_index = 0;
+ int enclosed_volumes[VOLUME_STACK_SIZE];
+
+ while(stack_index < VOLUME_STACK_SIZE - 1 &&
+ enclosed_index < VOLUME_STACK_SIZE - 1)
+ {
+ Intersection isect;
+ if(!scene_intersect_volume(kg, &volume_ray, &isect)) {
+ break;
+ }
+
+ ShaderData sd;
+ shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0);
+ if(sd.flag & SD_HAS_VOLUME) {
+ if(sd.flag & SD_BACKFACING) {
+ /* If ray exited the volume and never entered to that volume
+ * it means that camera is inside such a volume.
+ */
+ bool is_enclosed = false;
+ for(int i = 0; i < enclosed_index; ++i) {
+ if(enclosed_volumes[i] == sd.object) {
+ is_enclosed = true;
+ break;
+ }
+ }
+ if(is_enclosed == false) {
+ stack[stack_index].object = sd.object;
+ stack[stack_index].shader = sd.shader;
+ ++stack_index;
+ }
+ }
+ else {
+ /* If ray from camera enters the volume, this volume shouldn't
+ * be added to the stak on exit.
+ */
+ enclosed_volumes[enclosed_index++] = sd.object;
+ }
+ }
+
+ /* Move ray forward. */
+ volume_ray.P = ray_offset(sd.P, -sd.Ng);
+ }
+ /* stack_index of 0 means quick checks outside of the kernel gave false
+ * positive, nothing to worry about, just we've wasted quite a few of
+ * ticks just to come into conclusion that camera is in the air.
+ *
+ * In this case we're doing the same above -- check whether background has
+ * volume.
+ */
+ if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
stack[0].shader = kernel_data.background.volume_shader;
stack[0].object = PRIM_NONE;
stack[1].shader = SHADER_NONE;
}
+ else {
+ stack[stack_index].shader = SHADER_NONE;
+ }
}
ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, VolumeStack *stack)
@@ -910,4 +1084,3 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
}
CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/osl/SConscript b/intern/cycles/kernel/osl/SConscript
index 4685bb7753e..d721edbaf6e 100644
--- a/intern/cycles/kernel/osl/SConscript
+++ b/intern/cycles/kernel/osl/SConscript
@@ -43,6 +43,9 @@ defs.append('CCL_NAMESPACE_BEGIN=namespace ccl {')
defs.append('CCL_NAMESPACE_END=}')
defs.append('WITH_OSL')
+if env['WITH_BF_CYCLES_DEBUG']:
+ defs.append('WITH_CYCLES_DEBUG')
+
if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'):
cxxflags.append('-DBOOST_NO_RTTI -DBOOST_NO_TYPEID /fp:fast'.split())
incs.append(env['BF_PTHREADS_INC'])
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
index 94337290d20..84ef85e089d 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -66,18 +66,6 @@ ClosureParam *closure_bssrdf_cubic_params()
static ClosureParam params[] = {
CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, sc.N),
CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius),
- //CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1),
- CLOSURE_STRING_KEYPARAM("label"),
- CLOSURE_FINISH_PARAM(CubicBSSRDFClosure)
- };
- return params;
-}
-
-ClosureParam *closure_bssrdf_cubic_extended_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, sc.N),
- CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius),
CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1),
CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.T.x),
CLOSURE_STRING_KEYPARAM("label"),
@@ -107,18 +95,6 @@ ClosureParam *closure_bssrdf_gaussian_params()
static ClosureParam params[] = {
CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N),
CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius),
- //CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1),
- CLOSURE_STRING_KEYPARAM("label"),
- CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure)
- };
- return params;
-}
-
-ClosureParam *closure_bssrdf_gaussian_extended_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N),
- CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius),
CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1),
CLOSURE_STRING_KEYPARAM("label"),
CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure)
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index a96c0e2b1fb..cc9942b024e 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -41,6 +41,8 @@
#include "util_param.h"
#include "kernel_types.h"
+#include "kernel_compat_cpu.h"
+#include "kernel_globals.h"
#include "kernel_montecarlo.h"
#include "closure/bsdf_util.h"
@@ -51,8 +53,7 @@
#include "closure/bsdf_reflection.h"
#include "closure/bsdf_refraction.h"
#include "closure/bsdf_transparent.h"
-#include "closure/bsdf_ward.h"
-#include "closure/bsdf_westin.h"
+#include "closure/bsdf_ashikhmin_shirley.h"
#include "closure/bsdf_toon.h"
#include "closure/bsdf_hair.h"
#include "closure/volume.h"
@@ -85,16 +86,6 @@ BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, refraction, LABEL_SINGULAR)
CLOSURE_FLOAT_PARAM(RefractionClosure, sc.data0),
BSDF_CLOSURE_CLASS_END(Refraction, refraction)
-BSDF_CLOSURE_CLASS_BEGIN(WestinBackscatter, westin_backscatter, westin_backscatter, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(WestinBackscatterClosure, sc.N),
- CLOSURE_FLOAT_PARAM(WestinBackscatterClosure, sc.data0),
-BSDF_CLOSURE_CLASS_END(WestinBackscatter, westin_backscatter)
-
-BSDF_CLOSURE_CLASS_BEGIN(WestinSheen, westin_sheen, westin_sheen, LABEL_DIFFUSE)
- CLOSURE_FLOAT3_PARAM(WestinSheenClosure, sc.N),
- CLOSURE_FLOAT_PARAM(WestinSheenClosure, sc.data0),
-BSDF_CLOSURE_CLASS_END(WestinSheen, westin_sheen)
-
BSDF_CLOSURE_CLASS_BEGIN(Transparent, transparent, transparent, LABEL_SINGULAR)
BSDF_CLOSURE_CLASS_END(Transparent, transparent)
@@ -103,12 +94,12 @@ BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, ashikhmin_velvet, LA
CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, sc.data0),
BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet)
-BSDF_CLOSURE_CLASS_BEGIN(Ward, ward, ward, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(WardClosure, sc.N),
- CLOSURE_FLOAT3_PARAM(WardClosure, sc.T),
- CLOSURE_FLOAT_PARAM(WardClosure, sc.data0),
- CLOSURE_FLOAT_PARAM(WardClosure, sc.data1),
-BSDF_CLOSURE_CLASS_END(Ward, ward)
+BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, ashikhmin_shirley_aniso, ashikhmin_shirley, LABEL_GLOSSY|LABEL_REFLECT)
+ CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, sc.N),
+ CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, sc.T),
+ CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, sc.data0),
+ CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, sc.data1),
+BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley_aniso)
BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, diffuse_toon, LABEL_DIFFUSE)
CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, sc.N),
@@ -122,26 +113,40 @@ BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, glossy_toon, LABEL_GLOSSY)
CLOSURE_FLOAT_PARAM(GlossyToonClosure, sc.data1),
BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon)
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, microfacet_ggx, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, microfacet_ggx, LABEL_GLOSSY|LABEL_REFLECT)
CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, sc.N),
CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, sc.data0),
BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx)
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, microfacet_beckmann, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, microfacet_ggx_aniso, microfacet_ggx, LABEL_GLOSSY|LABEL_REFLECT)
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, sc.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, sc.T),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, sc.data0),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, sc.data1),
+BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso)
+
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, microfacet_beckmann, LABEL_GLOSSY|LABEL_REFLECT)
CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, sc.N),
CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, sc.data0),
BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann)
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, microfacet_ggx, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, microfacet_beckmann_aniso, microfacet_beckmann, LABEL_GLOSSY|LABEL_REFLECT)
+ CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, sc.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, sc.T),
+ CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, sc.data0),
+ CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, sc.data1),
+BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso)
+
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, microfacet_ggx, LABEL_GLOSSY|LABEL_TRANSMIT)
CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, sc.N),
CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, sc.data0),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, sc.data1),
+ CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, sc.data2),
BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction)
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, microfacet_beckmann, LABEL_GLOSSY)
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, microfacet_beckmann, LABEL_GLOSSY|LABEL_TRANSMIT)
CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, sc.N),
CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, sc.data0),
- CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, sc.data1),
+ CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, sc.data2),
BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, hair_reflection, LABEL_GLOSSY)
@@ -150,7 +155,7 @@ BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, hair_reflection, LABEL
CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data1),
#ifdef __HAIR__
CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.T),
- CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.offset),
+ CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data2),
#else
CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.N),
CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data1),
@@ -163,7 +168,7 @@ BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, hair_transmission,
CLOSURE_FLOAT_PARAM(HairTransmissionClosure, sc.data1),
#ifdef __HAIR__
CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.T),
- CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.offset),
+ CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data2),
#else
CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.N),
CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data1),
@@ -210,26 +215,24 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
bsdf_transparent_params(), bsdf_transparent_prepare);
register_closure(ss, "microfacet_ggx", id++,
bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare);
+ register_closure(ss, "microfacet_ggx_aniso", id++,
+ bsdf_microfacet_ggx_aniso_params(), bsdf_microfacet_ggx_aniso_prepare);
register_closure(ss, "microfacet_ggx_refraction", id++,
bsdf_microfacet_ggx_refraction_params(), bsdf_microfacet_ggx_refraction_prepare);
register_closure(ss, "microfacet_beckmann", id++,
bsdf_microfacet_beckmann_params(), bsdf_microfacet_beckmann_prepare);
+ register_closure(ss, "microfacet_beckmann_aniso", id++,
+ bsdf_microfacet_beckmann_aniso_params(), bsdf_microfacet_beckmann_aniso_prepare);
register_closure(ss, "microfacet_beckmann_refraction", id++,
bsdf_microfacet_beckmann_refraction_params(), bsdf_microfacet_beckmann_refraction_prepare);
- register_closure(ss, "ward", id++,
- bsdf_ward_params(), bsdf_ward_prepare);
+ register_closure(ss, "ashikhmin_shirley", id++,
+ bsdf_ashikhmin_shirley_aniso_params(), bsdf_ashikhmin_shirley_aniso_prepare);
register_closure(ss, "ashikhmin_velvet", id++,
bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare);
register_closure(ss, "diffuse_toon", id++,
bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare);
register_closure(ss, "glossy_toon", id++,
bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
- register_closure(ss, "specular_toon", id++,
- bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
- register_closure(ss, "westin_backscatter", id++,
- bsdf_westin_backscatter_params(), bsdf_westin_backscatter_prepare);
- register_closure(ss, "westin_sheen", id++,
- bsdf_westin_sheen_params(), bsdf_westin_sheen_prepare);
register_closure(ss, "emission", id++,
closure_emission_params(), closure_emission_prepare);
@@ -247,10 +250,6 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
closure_bssrdf_cubic_params(), closure_bssrdf_cubic_prepare);
register_closure(ss, "bssrdf_gaussian", id++,
closure_bssrdf_gaussian_params(), closure_bssrdf_gaussian_prepare);
- register_closure(ss, "bssrdf_cubic", id++,
- closure_bssrdf_cubic_extended_params(), closure_bssrdf_cubic_prepare);
- register_closure(ss, "bssrdf_gaussian", id++,
- closure_bssrdf_gaussian_extended_params(), closure_bssrdf_gaussian_prepare);
register_closure(ss, "hair_reflection", id++,
bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare);
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index 218cf1c19cc..5e833d738d8 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -48,12 +48,8 @@ OSL::ClosureParam *closure_holdout_params();
OSL::ClosureParam *closure_ambient_occlusion_params();
OSL::ClosureParam *closure_bsdf_diffuse_ramp_params();
OSL::ClosureParam *closure_bsdf_phong_ramp_params();
-OSL::ClosureParam *closure_westin_backscatter_params();
-OSL::ClosureParam *closure_westin_sheen_params();
OSL::ClosureParam *closure_bssrdf_cubic_params();
OSL::ClosureParam *closure_bssrdf_gaussian_params();
-OSL::ClosureParam *closure_bssrdf_cubic_extended_params();
-OSL::ClosureParam *closure_bssrdf_gaussian_extended_params();
OSL::ClosureParam *closure_henyey_greenstein_volume_params();
void closure_emission_prepare(OSL::RendererServices *, int id, void *data);
@@ -62,8 +58,6 @@ void closure_holdout_prepare(OSL::RendererServices *, int id, void *data);
void closure_ambient_occlusion_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_diffuse_ramp_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_phong_ramp_prepare(OSL::RendererServices *, int id, void *data);
-void closure_westin_backscatter_prepare(OSL::RendererServices *, int id, void *data);
-void closure_westin_sheen_prepare(OSL::RendererServices *, int id, void *data);
void closure_bssrdf_cubic_prepare(OSL::RendererServices *, int id, void *data);
void closure_bssrdf_gaussian_prepare(OSL::RendererServices *, int id, void *data);
void closure_henyey_greenstein_volume_prepare(OSL::RendererServices *, int id, void *data);
@@ -149,17 +143,18 @@ public: \
\
void blur(float roughness) \
{ \
- bsdf_##svmlower##_blur(&sc, roughness); \
} \
\
float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float& pdf) const \
{ \
- return bsdf_##svmlower##_eval_reflect(&sc, omega_out, omega_in, &pdf); \
+ pdf = 0; \
+ return make_float3(0, 0, 0); \
} \
\
float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float& pdf) const \
{ \
- return bsdf_##svmlower##_eval_transmit(&sc, omega_out, omega_in, &pdf); \
+ pdf = 0; \
+ return make_float3(0, 0, 0); \
} \
\
int sample(const float3 &Ng, \
@@ -168,8 +163,8 @@ public: \
float3 &omega_in, float3 &domega_in_dx, float3 &domega_in_dy, \
float &pdf, float3 &eval) const \
{ \
- return bsdf_##svmlower##_sample(&sc, Ng, omega_out, domega_out_dx, domega_out_dy, \
- randu, randv, &eval, &omega_in, &domega_in_dx, &domega_in_dy, &pdf); \
+ pdf = 0; \
+ return LABEL_NONE; \
} \
}; \
\
diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h
index 5a658d8244a..9c3134e41c9 100644
--- a/intern/cycles/kernel/osl/osl_globals.h
+++ b/intern/cycles/kernel/osl/osl_globals.h
@@ -20,7 +20,6 @@
#ifdef WITH_OSL
#include <OSL/oslexec.h>
-#include <cmath>
#include "util_map.h"
#include "util_param.h"
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 54894ea19eb..a9694651e14 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -126,7 +126,7 @@ void OSLRenderServices::thread_init(KernelGlobals *kernel_globals_, OSL::Texture
osl_ts = osl_ts_;
}
-bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
@@ -156,7 +156,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr
return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
@@ -186,7 +186,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::Transform
return false;
}
-bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from, float time)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time)
{
KernelGlobals *kg = kernel_globals;
@@ -218,7 +218,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from, float ti
return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time)
{
KernelGlobals *kg = kernel_globals;
@@ -250,7 +250,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to, fl
return false;
}
-bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform)
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
@@ -275,7 +275,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr
return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform)
{
/* this is only used for shader and object space, we don't really have
* a concept of shader space, so we just use object space for both. */
@@ -300,7 +300,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::Transform
return false;
}
-bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from)
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from)
{
KernelGlobals *kg = kernel_globals;
@@ -328,7 +328,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from)
return false;
}
-bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to)
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to)
{
KernelGlobals *kg = kernel_globals;
@@ -356,7 +356,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to)
return false;
}
-bool OSLRenderServices::get_array_attribute(void *renderstate, bool derivatives,
+bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives,
ustring object, TypeDesc type, ustring name,
int index, void *val)
{
@@ -479,7 +479,7 @@ static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val)
static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, void *val)
{
- if(type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
+ if(type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
ustring *sval = (ustring *)val;
sval[0] = str;
@@ -718,7 +718,7 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *
return set_attribute_int(f, type, derivatives, val);
}
else if (name == u_path_transparent_depth) {
- /* Ray Depth */
+ /* Transparent Ray Depth */
int f = sd->transparent_depth;
return set_attribute_int(f, type, derivatives, val);
}
@@ -751,14 +751,22 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *
return false;
}
-bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustring object_name,
+bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object_name,
+ TypeDesc type, ustring name, void *val)
+{
+ if (sg->renderstate == NULL)
+ return false;
+
+ ShaderData *sd = (ShaderData *)(sg->renderstate);
+ return get_attribute(sd, derivatives, object_name, type, name, val);
+}
+
+bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring object_name,
TypeDesc type, ustring name, void *val)
{
- ShaderData *sd = (ShaderData *)renderstate;
KernelGlobals *kg = sd->osl_globals;
bool is_curve;
int object;
- // int prim;
/* lookup of attribute on another object */
if (object_name != u_empty) {
@@ -768,12 +776,10 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri
return false;
object = it->second;
- // prim = PRIM_NONE;
is_curve = false;
}
else {
object = sd->object;
- // prim = sd->prim;
is_curve = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
if (object == OBJECT_NONE)
@@ -815,12 +821,12 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri
}
bool OSLRenderServices::get_userdata(bool derivatives, ustring name, TypeDesc type,
- void *renderstate, void *val)
+ OSL::ShaderGlobals *sg, void *val)
{
return false; /* disabled by lockgeom */
}
-bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, void *renderstate)
+bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, OSL::ShaderGlobals *sg)
{
return false; /* never called by OSL */
}
@@ -871,14 +877,30 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options,
return true;
}
#endif
+ bool status;
- OSLThreadData *tdata = kg->osl_tdata;
- OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info;
+ if(filename[0] == '@' && filename.find('.') == -1) {
+ int slot = atoi(filename.c_str() + 1);
+ float4 rgba = kernel_tex_image_interp(slot, s, 1.0f - t);
- OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info);
+ result[0] = rgba[0];
+ if(options.nchannels > 1)
+ result[1] = rgba[1];
+ if(options.nchannels > 2)
+ result[2] = rgba[2];
+ if(options.nchannels > 3)
+ result[3] = rgba[3];
+ status = true;
+ }
+ else {
+ OSLThreadData *tdata = kg->osl_tdata;
+ OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info;
- bool status = ts->texture(th, thread_info,
- options, s, t, dsdx, dtdx, dsdy, dtdy, result);
+ OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info);
+
+ status = ts->texture(th, thread_info,
+ options, s, t, dsdx, dtdx, dsdy, dtdy, result);
+ }
if(!status) {
if(options.nchannels == 3 || options.nchannels == 4) {
@@ -953,7 +975,7 @@ bool OSLRenderServices::environment(ustring filename, TextureOpt &options,
return status;
}
-bool OSLRenderServices::get_texture_info(ustring filename, int subimage,
+bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg, ustring filename, int subimage,
ustring dataname,
TypeDesc datatype, void *data)
{
@@ -996,7 +1018,7 @@ bool OSLRenderServices::trace(TraceOpt &options, OSL::ShaderGlobals *sg,
ray.P = TO_FLOAT3(P);
ray.D = TO_FLOAT3(R);
- ray.t = (options.maxdist == 1.0e30)? FLT_MAX: options.maxdist - options.mindist;
+ ray.t = (options.maxdist == 1.0e30f)? FLT_MAX: options.maxdist - options.mindist;
ray.time = sd->time;
if(options.mindist == 0.0f) {
@@ -1025,11 +1047,7 @@ bool OSLRenderServices::trace(TraceOpt &options, OSL::ShaderGlobals *sg,
tracedata->sd.osl_globals = sd->osl_globals;
/* raytrace */
-#ifdef __HAIR__
return scene_intersect(sd->osl_globals, &ray, PATH_RAY_ALL_VISIBILITY, &tracedata->isect, NULL, 0.0f, 0.0f);
-#else
- return scene_intersect(sd->osl_globals, &ray, PATH_RAY_ALL_VISIBILITY, &tracedata->isect);
-#endif
}
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index 069722d81b6..6f928a0d103 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -49,27 +49,29 @@ public:
void thread_init(KernelGlobals *kernel_globals, OSL::TextureSystem *ts);
- bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time);
- bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time);
+ bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time);
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time);
- bool get_matrix(OSL::Matrix44 &result, ustring from, float time);
- bool get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time);
+ bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time);
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time);
- bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform);
- bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform);
+ bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform);
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform);
- bool get_matrix(OSL::Matrix44 &result, ustring from);
- bool get_inverse_matrix(OSL::Matrix44 &result, ustring from);
+ bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from);
+ bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from);
- bool get_array_attribute(void *renderstate, bool derivatives,
+ bool get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives,
ustring object, TypeDesc type, ustring name,
int index, void *val);
- bool get_attribute(void *renderstate, bool derivatives, ustring object,
+ bool get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object,
+ TypeDesc type, ustring name, void *val);
+ bool get_attribute(ShaderData *sd, bool derivatives, ustring object_name,
TypeDesc type, ustring name, void *val);
bool get_userdata(bool derivatives, ustring name, TypeDesc type,
- void *renderstate, void *val);
- bool has_userdata(ustring name, TypeDesc type, void *renderstate);
+ OSL::ShaderGlobals *sg, void *val);
+ bool has_userdata(ustring name, TypeDesc type, OSL::ShaderGlobals *sg);
int pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 &center,
float radius, int max_points, bool sort, size_t *out_indices,
@@ -106,7 +108,7 @@ public:
OSL::ShaderGlobals *sg, const OSL::Vec3 &R,
const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, float *result);
- bool get_texture_info(ustring filename, int subimage,
+ bool get_texture_info(OSL::ShaderGlobals *sg, ustring filename, int subimage,
ustring dataname, TypeDesc datatype, void *data);
static bool get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
@@ -157,6 +159,70 @@ public:
static ustring u_v;
static ustring u_empty;
+#if OSL_LIBRARY_VERSION_CODE < 10500
+ bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) {
+ return get_matrix(NULL, result, xform, time);
+ }
+
+ bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) {
+ return get_inverse_matrix(NULL, result, xform, time);
+ }
+
+ bool get_matrix(OSL::Matrix44 &result, ustring from, float time) {
+ return get_matrix(NULL, result, from, time);
+ }
+
+ bool get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time) {
+ return get_inverse_matrix(NULL, result, to, time);
+ }
+
+ bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) {
+ return get_matrix(NULL, result, xform);
+ }
+
+ bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) {
+ return get_inverse_matrix(NULL, result, xform);
+ }
+
+ bool get_matrix(OSL::Matrix44 &result, ustring from) {
+ return get_matrix(NULL, result, from);
+ }
+
+ bool get_inverse_matrix(OSL::Matrix44 &result, ustring to) {
+ return get_inverse_matrix(NULL, result, to);
+ }
+
+ bool get_array_attribute(void *renderstate, bool derivatives,
+ ustring object, TypeDesc type, ustring name,
+ int index, void *val) {
+ OSL::ShaderGlobals sg;
+ sg.renderstate = renderstate;
+ return get_array_attribute(&sg, derivatives,
+ object, type, name,
+ index, val);
+ }
+
+ bool get_attribute(void *renderstate, bool derivatives, ustring object_name,
+ TypeDesc type, ustring name, void *val) {
+ OSL::ShaderGlobals sg;
+ sg.renderstate = renderstate;
+ return get_attribute(&sg, derivatives, object_name, type, name, val);
+ }
+
+ bool has_userdata(ustring name, TypeDesc type, void *renderstate) {
+ return has_userdata(name, type, (OSL::ShaderGlobals *) renderstate);
+ }
+
+ bool get_userdata(bool derivatives, ustring name, TypeDesc type,
+ void *renderstate, void *val) {
+ return get_userdata(derivatives, name, type, (OSL::ShaderGlobals *) renderstate, val);
+ }
+
+ bool get_texture_info(ustring filename, int subimage,
+ ustring dataname, TypeDesc datatype, void *data) {
+ return get_texture_info(NULL, filename, subimage, dataname, datatype, data);
+ }
+#endif
private:
KernelGlobals *kernel_globals;
OSL::TextureSystem *osl_ts;
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index 843dcdd0985..ca0c2cc4415 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -14,6 +14,8 @@
* limitations under the License
*/
+#include <OSL/oslexec.h>
+
#include "kernel_compat_cpu.h"
#include "kernel_montecarlo.h"
#include "kernel_types.h"
@@ -34,7 +36,6 @@
#include "attribute.h"
-#include <OSL/oslexec.h>
CCL_NAMESPACE_BEGIN
@@ -164,11 +165,14 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
CBSDFClosure *bsdf = (CBSDFClosure *)prim;
int scattering = bsdf->scattering();
- /* no caustics option */
- if(scattering == LABEL_GLOSSY && (path_flag & PATH_RAY_DIFFUSE)) {
+ /* caustic options */
+ if((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
KernelGlobals *kg = sd->osl_globals;
- if(kernel_data.integrator.no_caustics)
+
+ if((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
+ (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) {
return;
+ }
}
/* sample weight */
@@ -181,12 +185,9 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
sc.T = bsdf->sc.T;
sc.data0 = bsdf->sc.data0;
sc.data1 = bsdf->sc.data1;
+ sc.data2 = bsdf->sc.data2;
sc.prim = bsdf->sc.prim;
-#ifdef __HAIR__
- sc.offset = bsdf->sc.offset;
-#endif
-
/* add */
if(sc.sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure < MAX_CLOSURE) {
sd->closure[sd->num_closure++] = sc;
@@ -202,6 +203,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
sc.type = CLOSURE_EMISSION_ID;
sc.data0 = 0.0f;
sc.data1 = 0.0f;
+ sc.data2 = 0.0f;
sc.prim = NULL;
/* flag */
@@ -219,6 +221,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
sc.type = CLOSURE_AMBIENT_OCCLUSION_ID;
sc.data0 = 0.0f;
sc.data1 = 0.0f;
+ sc.data2 = 0.0f;
sc.prim = NULL;
if(sd->num_closure < MAX_CLOSURE) {
@@ -232,6 +235,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
sc.type = CLOSURE_HOLDOUT_ID;
sc.data0 = 0.0f;
sc.data1 = 0.0f;
+ sc.data2 = 0.0f;
sc.prim = NULL;
if(sd->num_closure < MAX_CLOSURE) {
diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt
index 5518d652bf9..0b735ede701 100644
--- a/intern/cycles/kernel/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/shaders/CMakeLists.txt
@@ -4,6 +4,7 @@
set(SRC_OSL
node_add_closure.osl
node_ambient_occlusion.osl
+ node_anisotropic_bsdf.osl
node_attribute.osl
node_background.osl
node_brick_texture.osl
@@ -13,6 +14,7 @@ set(SRC_OSL
node_checker_texture.osl
node_combine_rgb.osl
node_combine_hsv.osl
+ node_combine_xyz.osl
node_convert_from_color.osl
node_convert_from_float.osl
node_convert_from_int.osl
@@ -57,6 +59,7 @@ set(SRC_OSL
node_rgb_ramp.osl
node_separate_rgb.osl
node_separate_hsv.osl
+ node_separate_xyz.osl
node_set_normal.osl
node_sky_texture.osl
node_subsurface_scattering.osl
@@ -71,7 +74,6 @@ set(SRC_OSL
node_vector_transform.osl
node_velvet_bsdf.osl
node_voronoi_texture.osl
- node_ward_bsdf.osl
node_wavelength.osl
node_blackbody.osl
node_wave_texture.osl
diff --git a/intern/cycles/kernel/shaders/node_ward_bsdf.osl b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
index 2d360d594f2..da1e4f77107 100644
--- a/intern/cycles/kernel/shaders/node_ward_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl
@@ -16,8 +16,9 @@
#include "stdosl.h"
-shader node_ward_bsdf(
+shader node_anisotropic_bsdf(
color Color = 0.0,
+ string distribution = "GGX",
float Roughness = 0.0,
float Anisotropy = 0.0,
float Rotation = 0.0,
@@ -44,6 +45,13 @@ shader node_ward_bsdf(
RoughnessV = Roughness / (1.0 - aniso);
}
- BSDF = Color * ward(Normal, T, RoughnessU, RoughnessV);
+ if (distribution == "Sharp")
+ BSDF = Color * reflection(Normal);
+ else if (distribution == "Beckmann")
+ BSDF = Color * microfacet_beckmann_aniso(Normal, T, RoughnessU, RoughnessV);
+ else if (distribution == "GGX")
+ BSDF = Color * microfacet_ggx_aniso(Normal, T, RoughnessU, RoughnessV);
+ else
+ BSDF = Color * ashikhmin_shirley(Normal, T, RoughnessU, RoughnessV);
}
diff --git a/intern/cycles/kernel/shaders/node_brick_texture.osl b/intern/cycles/kernel/shaders/node_brick_texture.osl
index 70a6a6ea7ce..c9fb3542aef 100644
--- a/intern/cycles/kernel/shaders/node_brick_texture.osl
+++ b/intern/cycles/kernel/shaders/node_brick_texture.osl
@@ -93,6 +93,6 @@ shader node_brick_texture(
Col[2] = facm * (Color1[2]) + tint * Color2[2];
}
- Color = (Fac == 1.0) ? Mortar: Col;
+ Color = (Fac == 1.0) ? Mortar : Col;
}
diff --git a/intern/cycles/kernel/shaders/node_checker_texture.osl b/intern/cycles/kernel/shaders/node_checker_texture.osl
index 6723076723c..a6d21fd36f3 100644
--- a/intern/cycles/kernel/shaders/node_checker_texture.osl
+++ b/intern/cycles/kernel/shaders/node_checker_texture.osl
@@ -21,9 +21,9 @@
float checker(point p)
{
- p[0] = (p[0] + 0.00001) * 0.9999;
- p[1] = (p[1] + 0.00001) * 0.9999;
- p[2] = (p[2] + 0.00001) * 0.9999;
+ p[0] = (p[0] + 0.000001) * 0.999999;
+ p[1] = (p[1] + 0.000001) * 0.999999;
+ p[2] = (p[2] + 0.000001) * 0.999999;
int xi = (int)fabs(floor(p[0]));
int yi = (int)fabs(floor(p[1]));
diff --git a/intern/cycles/kernel/shaders/node_combine_xyz.osl b/intern/cycles/kernel/shaders/node_combine_xyz.osl
new file mode 100644
index 00000000000..933dee5bd78
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_combine_xyz.osl
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#include "stdosl.h"
+
+shader node_combine_xyz(
+ float X = 0.0,
+ float Y = 0.0,
+ float Z = 0.0,
+ output vector Vector = 0.8)
+{
+ Vector = vector(X, Y, Z);
+}
+
diff --git a/intern/cycles/kernel/shaders/node_emission.osl b/intern/cycles/kernel/shaders/node_emission.osl
index 2428da5ef4e..b28d731c19f 100644
--- a/intern/cycles/kernel/shaders/node_emission.osl
+++ b/intern/cycles/kernel/shaders/node_emission.osl
@@ -17,14 +17,10 @@
#include "stdosl.h"
shader node_emission(
- int TotalPower = 0,
color Color = 0.8,
float Strength = 1.0,
output closure color Emission = 0)
{
- if (TotalPower)
- Emission = ((Strength / surfacearea()) * Color) * emission();
- else
- Emission = (Strength * Color) * emission();
+ Emission = (Strength * Color) * emission();
}
diff --git a/intern/cycles/kernel/shaders/node_fresnel.h b/intern/cycles/kernel/shaders/node_fresnel.h
index 447a84255ef..d192c5d02de 100644
--- a/intern/cycles/kernel/shaders/node_fresnel.h
+++ b/intern/cycles/kernel/shaders/node_fresnel.h
@@ -34,3 +34,16 @@ float fresnel_dielectric_cos(float cosi, float eta)
return result;
}
+color fresnel_conductor(float cosi, color eta, color k)
+{
+ color cosi2 = color(cosi * cosi);
+ color one = color(1, 1, 1);
+ color tmp_f = eta * eta + k * k;
+ color tmp = tmp_f * cosi2;
+ color Rparl2 = (tmp - (2.0 * eta * cosi) + one) /
+ (tmp + (2.0 * eta * cosi) + one);
+ color Rperp2 = (tmp_f - (2.0 * eta * cosi) + cosi2) /
+ (tmp_f + (2.0 * eta * cosi) + cosi2);
+ return (Rparl2 + Rperp2) * 0.5;
+}
+
diff --git a/intern/cycles/kernel/shaders/node_geometry.osl b/intern/cycles/kernel/shaders/node_geometry.osl
index dbdf55802ae..cd68f07b21e 100644
--- a/intern/cycles/kernel/shaders/node_geometry.osl
+++ b/intern/cycles/kernel/shaders/node_geometry.osl
@@ -49,12 +49,8 @@ shader node_geometry(
/* try to create spherical tangent from generated coordinates */
if (getattribute("geom:generated", generated)) {
- matrix project = matrix(0.0, 1.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0,
- 0.0, 0.0, 0.0, 0.0,
- 0.5, -0.5, 0.0, 1.0);
-
- vector T = transform("object", "world", transform(project, generated));
+ normal data = normal(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0);
+ vector T = transform("object", "world", data);
Tangent = cross(Normal, normalize(cross(T, Normal)));
}
else {
diff --git a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
index b4e0fe62223..5c727ca6917 100644
--- a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl
@@ -19,7 +19,7 @@
shader node_glossy_bsdf(
color Color = 0.8,
- string distribution = "Beckmann",
+ string distribution = "GGX",
float Roughness = 0.2,
normal Normal = N,
output closure color BSDF = 0)
@@ -30,6 +30,8 @@ shader node_glossy_bsdf(
BSDF = Color * microfacet_beckmann(Normal, Roughness);
else if (distribution == "GGX")
BSDF = Color * microfacet_ggx(Normal, Roughness);
+ else
+ BSDF = Color * ashikhmin_shirley(Normal, vector(0, 0, 0), Roughness, Roughness);
}
diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl
index 7238a1e8862..18b5fb4b31f 100644
--- a/intern/cycles/kernel/shaders/node_image_texture.osl
+++ b/intern/cycles/kernel/shaders/node_image_texture.osl
@@ -113,6 +113,10 @@ shader node_image_texture(
weight[2] = ((2.0 - limit) * Nob[2] + (limit - 1.0)) / (2.0 * limit - 1.0);
}
}
+ else {
+ /* Desperate mode, no valid choice anyway, fallback to one side.*/
+ weight[0] = 1.0;
+ }
Color = color(0.0, 0.0, 0.0);
Alpha = 0.0;
diff --git a/intern/cycles/kernel/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
index 60762539002..a32c3d4b1b8 100644
--- a/intern/cycles/kernel/shaders/node_musgrave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
@@ -35,14 +35,14 @@ float noise_musgrave_fBm(point p, string basis, float H, float lacunarity, float
int i;
for (i = 0; i < (int)octaves; i++) {
- value += safe_noise(p, 0) * pwr;
+ value += safe_noise(p, "signed") * pwr;
pwr *= pwHL;
p *= lacunarity;
}
rmd = octaves - floor(octaves);
if (rmd != 0.0)
- value += rmd * safe_noise(p, 0) * pwr;
+ value += rmd * safe_noise(p, "signed") * pwr;
return value;
}
@@ -63,14 +63,14 @@ float noise_musgrave_multi_fractal(point p, string basis, float H, float lacunar
int i;
for (i = 0; i < (int)octaves; i++) {
- value *= (pwr * safe_noise(p, 0) + 1.0);
+ value *= (pwr * safe_noise(p, "signed") + 1.0);
pwr *= pwHL;
p *= lacunarity;
}
rmd = octaves - floor(octaves);
if (rmd != 0.0)
- value *= (rmd * pwr * safe_noise(p, 0) + 1.0); /* correct? */
+ value *= (rmd * pwr * safe_noise(p, "signed") + 1.0); /* correct? */
return value;
}
@@ -91,11 +91,11 @@ float noise_musgrave_hetero_terrain(point p, string basis, float H, float lacuna
int i;
/* first unscaled octave of function; later octaves are scaled */
- value = offset + safe_noise(p, 0);
+ value = offset + safe_noise(p, "signed");
p *= lacunarity;
for (i = 1; i < (int)octaves; i++) {
- increment = (safe_noise(p, 0) + offset) * pwr * value;
+ increment = (safe_noise(p, "signed") + offset) * pwr * value;
value += increment;
pwr *= pwHL;
p *= lacunarity;
@@ -103,7 +103,7 @@ float noise_musgrave_hetero_terrain(point p, string basis, float H, float lacuna
rmd = octaves - floor(octaves);
if (rmd != 0.0) {
- increment = (safe_noise(p, 0) + offset) * pwr * value;
+ increment = (safe_noise(p, "signed") + offset) * pwr * value;
value += rmd * increment;
}
@@ -126,7 +126,7 @@ float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H,
float pwr = pwHL;
int i;
- result = safe_noise(p, 0) + offset;
+ result = safe_noise(p, "signed") + offset;
weight = gain * result;
p *= lacunarity;
@@ -134,7 +134,7 @@ float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H,
if (weight > 1.0)
weight = 1.0;
- signal = (safe_noise(p, 0) + offset) * pwr;
+ signal = (safe_noise(p, "signed") + offset) * pwr;
pwr *= pwHL;
result += weight * signal;
weight *= gain * signal;
@@ -143,7 +143,7 @@ float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H,
rmd = octaves - floor(octaves);
if (rmd != 0.0)
- result += rmd * ((safe_noise(p, 0) + offset) * pwr);
+ result += rmd * ((safe_noise(p, "signed") + offset) * pwr);
return result;
}
@@ -164,7 +164,7 @@ float noise_musgrave_ridged_multi_fractal(point p, string basis, float H,
float pwr = pwHL;
int i;
- signal = offset - fabs(safe_noise(p, 0));
+ signal = offset - fabs(safe_noise(p, "signed"));
signal *= signal;
result = signal;
weight = 1.0;
@@ -172,7 +172,7 @@ float noise_musgrave_ridged_multi_fractal(point p, string basis, float H,
for (i = 1; i < (int)octaves; i++) {
p *= lacunarity;
weight = clamp(signal * gain, 0.0, 1.0);
- signal = offset - fabs(safe_noise(p, 0));
+ signal = offset - fabs(safe_noise(p, "signed"));
signal *= signal;
signal *= weight;
result += signal * pwr;
diff --git a/intern/cycles/kernel/shaders/node_separate_xyz.osl b/intern/cycles/kernel/shaders/node_separate_xyz.osl
new file mode 100644
index 00000000000..63725cb9995
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_separate_xyz.osl
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#include "stdosl.h"
+
+shader node_separate_xyz(
+ vector Vector = 0.8,
+ output float X = 0.0,
+ output float Y = 0.0,
+ output float Z = 0.0)
+{
+ X = Vector[0];
+ Y = Vector[1];
+ Z = Vector[2];
+}
diff --git a/intern/cycles/kernel/shaders/node_texture.h b/intern/cycles/kernel/shaders/node_texture.h
index de51559f297..2710eed414a 100644
--- a/intern/cycles/kernel/shaders/node_texture.h
+++ b/intern/cycles/kernel/shaders/node_texture.h
@@ -153,12 +153,12 @@ float voronoi_CrS(point p) { return 2.0 * voronoi_Cr(p) - 1.0; }
/* Noise Bases */
-float safe_noise(point p, int type)
+float safe_noise(point p, string type)
{
float f = 0.0;
/* Perlin noise in range -1..1 */
- if (type == 0)
+ if (type == "signed")
f = noise("perlin", p);
/* Perlin noise in range 0..1 */
@@ -175,7 +175,7 @@ float safe_noise(point p, int type)
float noise_basis(point p, string basis)
{
if (basis == "Perlin")
- return safe_noise(p, 1);
+ return safe_noise(p, "unsigned");
if (basis == "Voronoi F1")
return voronoi_F1S(p);
if (basis == "Voronoi F2")
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
index 6f824ea8ebd..1ff8f363b49 100644
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@@ -476,17 +476,17 @@ closure color diffuse_ramp(normal N, color colors[8]) BUILTIN;
closure color phong_ramp(normal N, float exponent, color colors[8]) BUILTIN;
closure color diffuse_toon(normal N, float size, float smooth) BUILTIN;
closure color glossy_toon(normal N, float size, float smooth) BUILTIN;
-closure color westin_backscatter(normal N, float roughness) BUILTIN;
-closure color westin_sheen(normal N, float edginess) BUILTIN;
closure color translucent(normal N) BUILTIN;
closure color reflection(normal N) BUILTIN;
closure color refraction(normal N, float eta) BUILTIN;
closure color transparent() BUILTIN;
closure color microfacet_ggx(normal N, float ag) BUILTIN;
+closure color microfacet_ggx_aniso(normal N, vector T, float ax, float ay) BUILTIN;
closure color microfacet_ggx_refraction(normal N, float ag, float eta) BUILTIN;
closure color microfacet_beckmann(normal N, float ab) BUILTIN;
+closure color microfacet_beckmann_aniso(normal N, vector T, float ax, float ay) BUILTIN;
closure color microfacet_beckmann_refraction(normal N, float ab, float eta) BUILTIN;
-closure color ward(normal N, vector T,float ax, float ay) BUILTIN;
+closure color ashikhmin_shirley(normal N, vector T,float ax, float ay) BUILTIN;
closure color ashikhmin_velvet(normal N, float sigma) BUILTIN;
closure color emission() BUILTIN;
closure color background() BUILTIN;
@@ -505,12 +505,8 @@ closure color hair_transmission(normal N, float roughnessu, float roughnessv, ve
closure color henyey_greenstein(float g) BUILTIN;
closure color absorption() BUILTIN;
-// Backwards compatibility
-closure color bssrdf_cubic(normal N, vector radius) BUILTIN;
-closure color bssrdf_gaussian(normal N, vector radius) BUILTIN;
-closure color specular_toon(normal N, float size, float smooth) BUILTIN;
-
// Renderer state
+int backfacing () BUILTIN;
int raytype (string typename) BUILTIN;
// the individual 'isFOOray' functions are deprecated
int iscameraray () { return raytype("camera"); }
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index dbf59c60cb0..c13eae813d6 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -167,8 +167,8 @@ CCL_NAMESPACE_END
#include "svm_math.h"
#include "svm_mix.h"
#include "svm_ramp.h"
-#include "svm_sepcomb_rgb.h"
#include "svm_sepcomb_hsv.h"
+#include "svm_sepcomb_vector.h"
#include "svm_musgrave.h"
#include "svm_sky.h"
#include "svm_tex_coord.h"
@@ -236,7 +236,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
if(stack_load_float(stack, node.z) == 1.0f)
offset += node.y;
break;
-#ifdef __IMAGE_TEXTURES__
+#ifdef __TEXTURES__
case NODE_TEX_IMAGE:
svm_node_tex_image(kg, sd, stack, node);
break;
@@ -246,8 +246,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
case NODE_TEX_ENVIRONMENT:
svm_node_tex_environment(kg, sd, stack, node);
break;
-#endif
-#ifdef __PROCEDURAL_TEXTURES__
case NODE_TEX_SKY:
svm_node_tex_sky(kg, sd, stack, node, &offset);
break;
@@ -327,11 +325,11 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
case NODE_MIX:
svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
break;
- case NODE_SEPARATE_RGB:
- svm_node_separate_rgb(sd, stack, node.y, node.z, node.w);
+ case NODE_SEPARATE_VECTOR:
+ svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
break;
- case NODE_COMBINE_RGB:
- svm_node_combine_rgb(sd, stack, node.y, node.z, node.w);
+ case NODE_COMBINE_VECTOR:
+ svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
break;
case NODE_SEPARATE_HSV:
svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
@@ -407,12 +405,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
break;
case NODE_CLOSURE_SET_NORMAL:
svm_node_set_normal(kg, sd, stack, node.y, node.z );
- break;
-#endif
- case NODE_EMISSION_SET_WEIGHT_TOTAL:
- svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w);
break;
-#ifdef __EXTRA_NODES__
case NODE_RGB_RAMP:
svm_node_rgb_ramp(kg, sd, stack, node, &offset);
break;
@@ -425,17 +418,13 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
case NODE_LIGHT_FALLOFF:
svm_node_light_falloff(sd, stack, node);
break;
-#endif
-#ifdef __ANISOTROPIC__
+#endif
case NODE_TANGENT:
svm_node_tangent(kg, sd, stack, node);
break;
-#endif
-#ifdef __NORMAL_MAP__
case NODE_NORMAL_MAP:
svm_node_normal_map(kg, sd, stack, node);
- break;
-#endif
+ break;
case NODE_END:
default:
return;
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
index 63dbf27d35e..1e40e868e14 100644
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@@ -42,7 +42,7 @@ ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *sta
/* Input */
float temperature = stack_load_float(stack, temperature_offset);
- if (temperature < BB_DRAPPER) {
+ if (temperature < BB_DRAPER) {
/* just return very very dim red */
color_rgb = make_float3(1.0e-6f,0.0f,0.0f);
}
@@ -53,9 +53,9 @@ ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *sta
/* reconstruct a proper index for the table lookup, compared to OSL we don't look up two colors
just one (the OSL-lerp is also automatically done for us by "lookup_table_read") */
- float t = powf((temperature - BB_DRAPPER) * (1.0f / BB_TABLE_SPACING), (1.0f / BB_TABLE_XPOWER));
+ float t = powf((temperature - BB_DRAPER) * (1.0f / BB_TABLE_SPACING), (1.0f / BB_TABLE_XPOWER));
- int blackbody_table_offset = kernel_data.blackbody.table_offset;
+ int blackbody_table_offset = kernel_data.tables.blackbody_offset;
/* Retrieve colors from the lookup table */
float lutval = t*lookuptablenormalize;
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index 8d1a1a40449..e0408ad334a 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -21,9 +21,9 @@ CCL_NAMESPACE_BEGIN
ccl_device_noinline float svm_checker(float3 p)
{
/* avoid precision issues on unit coordinates */
- p.x = (p.x + 0.00001f)*0.9999f;
- p.y = (p.y + 0.00001f)*0.9999f;
- p.z = (p.z + 0.00001f)*0.9999f;
+ p.x = (p.x + 0.000001f)*0.999999f;
+ p.y = (p.y + 0.000001f)*0.999999f;
+ p.z = (p.z + 0.000001f)*0.999999f;
int xi = float_to_int(fabsf(floorf(p.x)));
int yi = float_to_int(fabsf(floorf(p.y)));
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index a3770877544..30110db3ef9 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -24,6 +24,7 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type
if(refract) {
sc->data0 = eta;
sc->data1 = 0.0f;
+ sc->data2 = 0.0f;
sd->flag |= bsdf_refraction_setup(sc);
}
else
@@ -31,7 +32,8 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type
}
else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) {
sc->data0 = roughness;
- sc->data1 = eta;
+ sc->data1 = roughness;
+ sc->data2 = eta;
if(refract)
sd->flag |= bsdf_microfacet_beckmann_refraction_setup(sc);
@@ -40,7 +42,8 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type
}
else {
sc->data0 = roughness;
- sc->data1 = eta;
+ sc->data1 = roughness;
+ sc->data2 = eta;
if(refract)
sd->flag |= bsdf_microfacet_ggx_refraction_setup(sc);
@@ -135,11 +138,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
if(roughness == 0.0f) {
sc->data0 = 0.0f;
sc->data1 = 0.0f;
+ sc->data2 = 0.0f;
sd->flag |= bsdf_diffuse_setup(sc);
}
else {
sc->data0 = roughness;
sc->data1 = 0.0f;
+ sc->data2 = 0.0f;
sd->flag |= bsdf_oren_nayar_setup(sc);
}
}
@@ -151,6 +156,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
if(sc) {
sc->data0 = 0.0f;
sc->data1 = 0.0f;
+ sc->data2 = 0.0f;
sc->N = N;
sd->flag |= bsdf_translucent_setup(sc);
}
@@ -162,6 +168,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
if(sc) {
sc->data0 = 0.0f;
sc->data1 = 0.0f;
+ sc->data2 = 0.0f;
sc->N = N;
sd->flag |= bsdf_transparent_setup(sc);
}
@@ -169,9 +176,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
}
case CLOSURE_BSDF_REFLECTION_ID:
case CLOSURE_BSDF_MICROFACET_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: {
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE))
+ if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
break;
#endif
ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight);
@@ -179,15 +187,18 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
if(sc) {
sc->N = N;
sc->data0 = param1;
- sc->data1 = 0.0f;
+ sc->data1 = param1;
+ sc->data2 = 0.0f;
/* setup bsdf */
if(type == CLOSURE_BSDF_REFLECTION_ID)
sd->flag |= bsdf_reflection_setup(sc);
else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID)
sd->flag |= bsdf_microfacet_beckmann_setup(sc);
- else
+ else if(type == CLOSURE_BSDF_MICROFACET_GGX_ID)
sd->flag |= bsdf_microfacet_ggx_setup(sc);
+ else
+ sd->flag |= bsdf_ashikhmin_shirley_setup(sc);
}
break;
@@ -196,25 +207,35 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE))
+ if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
break;
#endif
ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight);
if(sc) {
sc->N = N;
- sc->data0 = param1;
float eta = fmaxf(param2, 1e-5f);
- sc->data1 = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
+ eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
/* setup bsdf */
- if(type == CLOSURE_BSDF_REFRACTION_ID)
+ if(type == CLOSURE_BSDF_REFRACTION_ID) {
+ sc->data0 = eta;
+ sc->data1 = 0.0f;
+ sc->data2 = 0.0f;
+
sd->flag |= bsdf_refraction_setup(sc);
- else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID)
- sd->flag |= bsdf_microfacet_beckmann_refraction_setup(sc);
- else
- sd->flag |= bsdf_microfacet_ggx_refraction_setup(sc);
+ }
+ else {
+ sc->data0 = param1;
+ sc->data1 = param1;
+ sc->data2 = eta;
+
+ if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID)
+ sd->flag |= bsdf_microfacet_beckmann_refraction_setup(sc);
+ else
+ sd->flag |= bsdf_microfacet_ggx_refraction_setup(sc);
+ }
}
break;
@@ -223,8 +244,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE))
+ if(!kernel_data.integrator.caustics_reflective &&
+ !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) {
break;
+ }
#endif
/* index of refraction */
float eta = fmaxf(param2, 1e-5f);
@@ -241,12 +264,21 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
float sample_weight = sc->sample_weight;
sc = svm_node_closure_get_bsdf(sd, mix_weight*fresnel);
-
- if(sc) {
- sc->N = N;
- svm_node_glass_setup(sd, sc, type, eta, roughness, false);
+#ifdef __CAUSTICS_TRICKS__
+ if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
+#endif
+ {
+ if(sc) {
+ sc->N = N;
+ svm_node_glass_setup(sd, sc, type, eta, roughness, false);
+ }
}
+#ifdef __CAUSTICS_TRICKS__
+ if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
+ break;
+#endif
+
/* refraction */
sc = &sd->closure[sd->num_closure];
sc->weight = weight;
@@ -261,9 +293,11 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
break;
}
- case CLOSURE_BSDF_WARD_ID: {
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: {
#ifdef __CAUSTICS_TRICKS__
- if(kernel_data.integrator.no_caustics && (path_flag & PATH_RAY_DIFFUSE))
+ if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
break;
#endif
ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight);
@@ -271,7 +305,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
if(sc) {
sc->N = N;
-#ifdef __ANISOTROPIC__
sc->T = stack_load_float3(stack, data_node.y);
/* rotate tangent */
@@ -293,10 +326,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data1 = roughness/(1.0f - anisotropy);
}
- sd->flag |= bsdf_ward_setup(sc);
-#else
- sd->flag |= bsdf_diffuse_setup(sc);
-#endif
+ sc->data2 = 0.0f;
+
+ if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID)
+ sd->flag |= bsdf_microfacet_beckmann_aniso_setup(sc);
+ else if (type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID)
+ sd->flag |= bsdf_microfacet_ggx_aniso_setup(sc);
+ else
+ sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(sc);
}
break;
}
@@ -309,6 +346,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
/* sigma */
sc->data0 = clamp(param1, 0.0f, 1.0f);
sc->data1 = 0.0f;
+ sc->data2 = 0.0f;
sd->flag |= bsdf_ashikhmin_velvet_setup(sc);
}
break;
@@ -322,6 +360,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->N = N;
sc->data0 = param1;
sc->data1 = param2;
+ sc->data2 = 0.0f;
if (type == CLOSURE_BSDF_DIFFUSE_TOON_ID)
sd->flag |= bsdf_diffuse_toon_setup(sc);
@@ -339,7 +378,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
if(sc) {
/* todo: giving a fixed weight here will cause issues when
- * mixing multiple BSDFS. energey will not be conserved and
+ * mixing multiple BSDFS. energy will not be conserved and
* the throughput can blow up after multiple bounces. we
* better figure out a way to skip backfaces from rays
* spawned by transmission from the front */
@@ -356,11 +395,11 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->N = N;
sc->data0 = param1;
sc->data1 = param2;
- sc->offset = -stack_load_float(stack, data_node.z);
+ sc->data2 = -stack_load_float(stack, data_node.z);
if(!(sd->type & PRIMITIVE_ALL_CURVE)) {
sc->T = normalize(sd->dPdv);
- sc->offset = 0.0f;
+ sc->data2 = 0.0f;
}
else
sc->T = sd->dPdu;
@@ -405,6 +444,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->sample_weight = sample_weight;
sc->data0 = radius.x;
sc->data1 = texture_blur;
+ sc->data2 = 0.0f;
sc->T.x = sharpness;
#ifdef __OSL__
sc->prim = NULL;
@@ -421,6 +461,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->sample_weight = sample_weight;
sc->data0 = radius.y;
sc->data1 = texture_blur;
+ sc->data2 = 0.0f;
sc->T.x = sharpness;
#ifdef __OSL__
sc->prim = NULL;
@@ -437,6 +478,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->sample_weight = sample_weight;
sc->data0 = radius.z;
sc->data1 = texture_blur;
+ sc->data2 = 0.0f;
sc->T.x = sharpness;
#ifdef __OSL__
sc->prim = NULL;
@@ -582,16 +624,6 @@ ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint
svm_node_closure_store_weight(sd, weight);
}
-ccl_device void svm_node_emission_set_weight_total(KernelGlobals *kg, ShaderData *sd, uint r, uint g, uint b)
-{
- float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
-
- if(sd->object != OBJECT_NONE)
- weight /= object_surface_area(kg, sd->object);
-
- svm_node_closure_store_weight(sd, weight);
-}
-
ccl_device void svm_node_closure_weight(ShaderData *sd, float *stack, uint weight_offset)
{
float3 weight = stack_load_float3(stack, weight_offset);
@@ -603,14 +635,10 @@ ccl_device void svm_node_emission_weight(KernelGlobals *kg, ShaderData *sd, floa
{
uint color_offset = node.y;
uint strength_offset = node.z;
- uint total_power = node.w;
float strength = stack_load_float(stack, strength_offset);
float3 weight = stack_load_float3(stack, color_offset)*strength;
- if(total_power && sd->object != OBJECT_NONE)
- weight /= object_surface_area(kg, sd->object);
-
svm_node_closure_store_weight(sd, weight);
}
diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h
index 2503912c5c6..b221e0728ec 100644
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ b/intern/cycles/kernel/svm/svm_convert.h
@@ -45,13 +45,13 @@ ccl_device void svm_node_convert(ShaderData *sd, float *stack, uint type, uint f
}
case NODE_CONVERT_VF: {
float3 f = stack_load_float3(stack, from);
- float g = (f.x + f.y + f.z)*(1.0f/3.0f);
+ float g = average(f);
stack_store_float(stack, to, g);
break;
}
case NODE_CONVERT_VI: {
float3 f = stack_load_float3(stack, from);
- int i = (int)((f.x + f.y + f.z)*(1.0f/3.0f));
+ int i = (int)average(f);
stack_store_int(stack, to, i);
break;
}
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index daf7c6652d2..8a256c9bda5 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -134,8 +134,8 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
{
#ifdef __KERNEL_CPU__
#ifdef __KERNEL_SSE2__
- __m128 r_m128;
- float4 &r = (float4 &)r_m128;
+ ssef r_ssef;
+ float4 &r = (float4 &)r_ssef;
r = kernel_tex_image_interp(id, x, y);
#else
float4 r = kernel_tex_image_interp(id, x, y);
@@ -252,9 +252,9 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
case 96: r = kernel_tex_image_interp(__tex_image_096, x, y); break;
case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break;
case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break;
- case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break;
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
+ case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break;
case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break;
case 101: r = kernel_tex_image_interp(__tex_image_101, x, y); break;
case 102: r = kernel_tex_image_interp(__tex_image_102, x, y); break;
@@ -318,14 +318,14 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
float alpha = r.w;
if(use_alpha && alpha != 1.0f && alpha != 0.0f) {
- r_m128 = _mm_div_ps(r_m128, _mm_set1_ps(alpha));
+ r_ssef = r_ssef / ssef(alpha);
if(id >= TEX_NUM_FLOAT_IMAGES)
- r_m128 = _mm_min_ps(r_m128, _mm_set1_ps(1.0f));
+ r_ssef = min(r_ssef, ssef(1.0f));
r.w = alpha;
}
if(srgb) {
- r_m128 = color_srgb_to_scene_linear(r_m128);
+ r_ssef = color_srgb_to_scene_linear(r_ssef);
r.w = alpha;
}
#else
@@ -435,6 +435,10 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float
weight.z = ((2.0f - limit)*N.z + (limit - 1.0f))/(2.0f*limit - 1.0f);
}
}
+ else {
+ /* Desperate mode, no valid choice anyway, fallback to one side.*/
+ weight.x = 1.0f;
+ }
/* now fetch textures */
uint co_offset, out_offset, alpha_offset, srgb;
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 91dda8972f9..c77c2a1c482 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -38,11 +38,11 @@ ccl_device int quick_floor(float x)
return float_to_int(x) - ((x < 0) ? 1 : 0);
}
#else
-ccl_device_inline __m128i quick_floor_sse(const __m128& x)
+ccl_device_inline ssei quick_floor_sse(const ssef& x)
{
- __m128i b = _mm_cvttps_epi32(x);
- __m128i isneg = _mm_castps_si128(_mm_cmplt_ps(x, _mm_set1_ps(0.0f)));
- return _mm_add_epi32(b, isneg); // unsaturated add 0xffffffff is the same as subtract -1
+ ssei b = truncatei(x);
+ ssei isneg = cast((x < ssef(0.0f)).m128);
+ return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1
}
#endif
@@ -52,9 +52,9 @@ ccl_device float bits_to_01(uint bits)
return bits * (1.0f/(float)0xFFFFFFFF);
}
#else
-ccl_device_inline __m128 bits_to_01_sse(const __m128i& bits)
+ccl_device_inline ssef bits_to_01_sse(const ssei& bits)
{
- return _mm_mul_ps(uint32_to_float(bits), _mm_set1_ps(1.0f/(float)0xFFFFFFFF));
+ return uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF);
}
#endif
@@ -88,16 +88,16 @@ ccl_device uint hash(uint kx, uint ky, uint kz)
}
#ifdef __KERNEL_SSE2__
-ccl_device_inline __m128i hash_sse(const __m128i& kx, const __m128i& ky, const __m128i& kz)
+ccl_device_inline ssei hash_sse(const ssei& kx, const ssei& ky, const ssei& kz)
{
-#define rot(x,k) _mm_or_si128(_mm_slli_epi32((x), (k)), _mm_srli_epi32((x), 32-(k)))
-#define xor_rot(a, b, c) do {a = _mm_xor_si128(a, b); a = _mm_sub_epi32(a, rot(b, c));} while(0)
+#define rot(x,k) (((x)<<(k)) | (srl(x, 32-(k))))
+#define xor_rot(a, b, c) do {a = a^b; a = a - rot(b, c);} while(0)
uint len = 3;
- __m128i magic = _mm_set1_epi32(0xdeadbeef + (len << 2) + 13);
- __m128i a = _mm_add_epi32(magic, kx);
- __m128i b = _mm_add_epi32(magic, ky);
- __m128i c = _mm_add_epi32(magic, kz);
+ ssei magic = ssei(0xdeadbeef + (len << 2) + 13);
+ ssei a = magic + kx;
+ ssei b = magic + ky;
+ ssei c = magic + kz;
xor_rot(c, b, 14);
xor_rot(a, c, 11);
@@ -133,10 +133,10 @@ ccl_device float floorfrac(float x, int* i)
return x - *i;
}
#else
-ccl_device_inline __m128 floorfrac_sse(const __m128& x, __m128i *i)
+ccl_device_inline ssef floorfrac_sse(const ssef& x, ssei *i)
{
*i = quick_floor_sse(x);
- return _mm_sub_ps(x, _mm_cvtepi32_ps(*i));
+ return x - ssef(*i);
}
#endif
@@ -146,11 +146,11 @@ ccl_device float fade(float t)
return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
}
#else
-ccl_device_inline __m128 fade_sse(const __m128 *t)
+ccl_device_inline ssef fade_sse(const ssef *t)
{
- __m128 a = fma(*t, _mm_set1_ps(6.0f), _mm_set1_ps(-15.0f));
- __m128 b = fma(*t, a, _mm_set1_ps(10.0f));
- return _mm_mul_ps(_mm_mul_ps(*t, *t), _mm_mul_ps(*t, b));
+ ssef a = madd(*t, ssef(6.0f), ssef(-15.0f));
+ ssef b = madd(*t, a, ssef(10.0f));
+ return ((*t) * (*t)) * ((*t) * b);
}
#endif
@@ -160,10 +160,10 @@ ccl_device float nerp(float t, float a, float b)
return (1.0f - t) * a + t * b;
}
#else
-ccl_device_inline __m128 nerp_sse(const __m128& t, const __m128& a, const __m128& b)
+ccl_device_inline ssef nerp_sse(const ssef& t, const ssef& a, const ssef& b)
{
- __m128 x1 = _mm_mul_ps(_mm_sub_ps(_mm_set1_ps(1.0f), t), a);
- return fma(t, b, x1);
+ ssef x1 = (ssef(1.0f) - t) * a;
+ return madd(t, b, x1);
}
#endif
@@ -178,35 +178,35 @@ ccl_device float grad(int hash, float x, float y, float z)
return ((h&1) ? -u : u) + ((h&2) ? -v : v);
}
#else
-ccl_device_inline __m128 grad_sse(const __m128i& hash, const __m128& x, const __m128& y, const __m128& z)
+ccl_device_inline ssef grad_sse(const ssei& hash, const ssef& x, const ssef& y, const ssef& z)
{
- __m128i c1 = _mm_set1_epi32(1);
- __m128i c2 = _mm_set1_epi32(2);
+ ssei c1 = ssei(1);
+ ssei c2 = ssei(2);
- __m128i h = _mm_and_si128(hash, _mm_set1_epi32(15)); // h = hash & 15
+ ssei h = hash & ssei(15); // h = hash & 15
- __m128i case_ux = _mm_cmplt_epi32(h, _mm_set1_epi32(8)); // 0xffffffff if h < 8 else 0
+ sseb case_ux = h < ssei(8); // 0xffffffff if h < 8 else 0
- __m128 u = blend(_mm_castsi128_ps(case_ux), x, y); // u = h<8 ? x : y
+ ssef u = select(case_ux, x, y); // u = h<8 ? x : y
- __m128i case_vy = _mm_cmplt_epi32(h, _mm_set1_epi32(4)); // 0xffffffff if h < 4 else 0
+ sseb case_vy = h < ssei(4); // 0xffffffff if h < 4 else 0
- __m128i case_h12 = _mm_cmpeq_epi32(h, _mm_set1_epi32(12)); // 0xffffffff if h == 12 else 0
- __m128i case_h14 = _mm_cmpeq_epi32(h, _mm_set1_epi32(14)); // 0xffffffff if h == 14 else 0
+ sseb case_h12 = h == ssei(12); // 0xffffffff if h == 12 else 0
+ sseb case_h14 = h == ssei(14); // 0xffffffff if h == 14 else 0
- __m128i case_vx = _mm_or_si128(case_h12, case_h14); // 0xffffffff if h == 12 or h == 14 else 0
+ sseb case_vx = case_h12 | case_h14; // 0xffffffff if h == 12 or h == 14 else 0
- __m128 v = blend(_mm_castsi128_ps(case_vy), y, blend(_mm_castsi128_ps(case_vx), x, z)); // v = h<4 ? y : h == 12 || h == 14 ? x : z
+ ssef v = select(case_vy, y, select(case_vx, x, z)); // v = h<4 ? y : h == 12 || h == 14 ? x : z
- __m128i case_uneg = _mm_slli_epi32(_mm_and_si128(h, c1), 31); // 1<<31 if h&1 else 0
- __m128 case_uneg_mask = _mm_castsi128_ps(case_uneg); // -0.0 if h&1 else +0.0
- __m128 ru = _mm_xor_ps(u, case_uneg_mask); // -u if h&1 else u (copy float sign)
+ ssei case_uneg = (h & c1) << 31; // 1<<31 if h&1 else 0
+ ssef case_uneg_mask = cast(case_uneg); // -0.0 if h&1 else +0.0
+ ssef ru = u ^ case_uneg_mask; // -u if h&1 else u (copy float sign)
- __m128i case_vneg = _mm_slli_epi32(_mm_and_si128(h, c2), 30); // 2<<30 if h&2 else 0
- __m128 case_vneg_mask = _mm_castsi128_ps(case_vneg); // -0.0 if h&2 else +0.0
- __m128 rv = _mm_xor_ps(v, case_vneg_mask); // -v if h&2 else v (copy float sign)
+ ssei case_vneg = (h & c2) << 30; // 2<<30 if h&2 else 0
+ ssef case_vneg_mask = cast(case_vneg); // -0.0 if h&2 else +0.0
+ ssef rv = v ^ case_vneg_mask; // -v if h&2 else v (copy float sign)
- __m128 r = _mm_add_ps(ru, rv); // ((h&1) ? -u : u) + ((h&2) ? -v : v)
+ ssef r = ru + rv; // ((h&1) ? -u : u) + ((h&2) ? -v : v)
return r;
}
#endif
@@ -217,9 +217,9 @@ ccl_device float scale3(float result)
return 0.9820f * result;
}
#else
-ccl_device_inline __m128 scale3_sse(const __m128& result)
+ccl_device_inline ssef scale3_sse(const ssef& result)
{
- return _mm_mul_ps(_mm_set1_ps(0.9820f), result);
+ return ssef(0.9820f) * result;
}
#endif
@@ -252,75 +252,41 @@ ccl_device_noinline float perlin(float x, float y, float z)
#else
ccl_device_noinline float perlin(float x, float y, float z)
{
- __m128 xyz = _mm_setr_ps(x, y, z, 0.0f);
- __m128i XYZ;
+ ssef xyz = ssef(x, y, z, 0.0f);
+ ssei XYZ;
- __m128 fxyz = floorfrac_sse(xyz, &XYZ);
+ ssef fxyz = floorfrac_sse(xyz, &XYZ);
- __m128 uvw = fade_sse(&fxyz);
- __m128 u = broadcast<0>(uvw), v = broadcast<1>(uvw), w = broadcast<2>(uvw);
+ ssef uvw = fade_sse(&fxyz);
+ ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw);
- __m128i XYZ_ofc = _mm_add_epi32(XYZ, _mm_set1_epi32(1));
- __m128i vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc); // +0, +0, +1, +1
- __m128i vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1
+ ssei XYZ_ofc = XYZ + ssei(1);
+ ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc); // +0, +0, +1, +1
+ ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1
- __m128i h1 = hash_sse(broadcast<0>(XYZ), vdy, vdz); // hash directions 000, 001, 010, 011
- __m128i h2 = hash_sse(broadcast<0>(XYZ_ofc), vdy, vdz); // hash directions 100, 101, 110, 111
+ ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz); // hash directions 000, 001, 010, 011
+ ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz); // hash directions 100, 101, 110, 111
- __m128 fxyz_ofc = _mm_sub_ps(fxyz, _mm_set1_ps(1.0f));
- __m128 vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc);
- __m128 vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc));
+ ssef fxyz_ofc = fxyz - ssef(1.0f);
+ ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc);
+ ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc));
- __m128 g1 = grad_sse(h1, broadcast<0>(fxyz), vfy, vfz);
- __m128 g2 = grad_sse(h2, broadcast<0>(fxyz_ofc), vfy, vfz);
- __m128 n1 = nerp_sse(u, g1, g2);
+ ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz);
+ ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz);
+ ssef n1 = nerp_sse(u, g1, g2);
- __m128 n1_half = shuffle<2, 3, 2, 3>(n1); // extract 2 floats to a separate vector
- __m128 n2 = nerp_sse(v, n1, n1_half); // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _]
+ ssef n1_half = shuffle<2, 3, 2, 3>(n1); // extract 2 floats to a separate vector
+ ssef n2 = nerp_sse(v, n1, n1_half); // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _]
- __m128 n2_second = broadcast<1>(n2); // extract b to a separate vector
- __m128 result = nerp_sse(w, n2, n2_second); // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _]
+ ssef n2_second = shuffle<1>(n2); // extract b to a separate vector
+ ssef result = nerp_sse(w, n2, n2_second); // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _]
- __m128 r = scale3_sse(result);
+ ssef r = scale3_sse(result);
- __m128 infmask = _mm_castsi128_ps(_mm_set1_epi32(0x7f800000));
- __m128 rinfmask = _mm_cmpeq_ps(_mm_and_ps(r, infmask), infmask); // 0xffffffff if r is inf/-inf/nan else 0
- __m128 rfinite = _mm_andnot_ps(rinfmask, r); // 0 if r is inf/-inf/nan else r
- return _mm_cvtss_f32(rfinite);
-}
-#endif
-
-#if 0 // unused
-ccl_device_noinline float perlin_periodic(float x, float y, float z, float3 pperiod)
-{
- int X; float fx = floorfrac(x, &X);
- int Y; float fy = floorfrac(y, &Y);
- int Z; float fz = floorfrac(z, &Z);
-
- int3 p;
-
- p.x = max(quick_floor(pperiod.x), 1);
- p.y = max(quick_floor(pperiod.y), 1);
- p.z = max(quick_floor(pperiod.z), 1);
-
- float u = fade(fx);
- float v = fade(fy);
- float w = fade(fz);
-
- float result;
-
- result = nerp (w, nerp (v, nerp (u, grad (phash (X , Y , Z , p), fx , fy , fz ),
- grad (phash (X+1, Y , Z , p), fx-1.0f, fy , fz )),
- nerp (u, grad (phash (X , Y+1, Z , p), fx , fy-1.0f, fz ),
- grad (phash (X+1, Y+1, Z , p), fx-1.0f, fy-1.0f, fz ))),
- nerp (v, nerp (u, grad (phash (X , Y , Z+1, p), fx , fy , fz-1.0f ),
- grad (phash (X+1, Y , Z+1, p), fx-1.0f, fy , fz-1.0f )),
- nerp (u, grad (phash (X , Y+1, Z+1, p), fx , fy-1.0f, fz-1.0f ),
- grad (phash (X+1, Y+1, Z+1, p), fx-1.0f, fy-1.0f, fz-1.0f ))));
- float r = scale3(result);
-
- /* can happen for big coordinates, things even out to 0.0 then anyway */
- return (isfinite(r))? r: 0.0f;
+ ssef infmask = cast(ssei(0x7f800000));
+ ssef rinfmask = ((r & infmask) == infmask).m128; // 0xffffffff if r is inf/-inf/nan else 0
+ ssef rfinite = andnot(rinfmask, r); // 0 if r is inf/-inf/nan else r
+ return extract<0>(rfinite);
}
#endif
@@ -357,30 +323,15 @@ ccl_device float3 cellnoise_color(float3 p)
return make_float3(r, g, b);
}
#else
-ccl_device __m128 cellnoise_color(const __m128& p)
+ccl_device ssef cellnoise_color(const ssef& p)
{
- __m128i ip = quick_floor_sse(p);
- __m128i ip_yxz = shuffle<1, 0, 2, 3>(ip);
- __m128i ip_xyy = shuffle<0, 1, 1, 3>(ip);
- __m128i ip_zzx = shuffle<2, 2, 0, 3>(ip);
+ ssei ip = quick_floor_sse(p);
+ ssei ip_yxz = shuffle<1, 0, 2, 3>(ip);
+ ssei ip_xyy = shuffle<0, 1, 1, 3>(ip);
+ ssei ip_zzx = shuffle<2, 2, 0, 3>(ip);
return bits_to_01_sse(hash_sse(ip_xyy, ip_yxz, ip_zzx));
}
#endif
-#if 0 // unused
-/* periodic perlin noise in range 0..1 */
-ccl_device float pnoise(float3 p, float3 pperiod)
-{
- float r = perlin_periodic(p.x, p.y, p.z, pperiod);
- return 0.5f*r + 0.5f;
-}
-
-/* periodic perlin noise in range -1..1 */
-ccl_device float psnoise(float3 p, float3 pperiod)
-{
- return perlin_periodic(p.x, p.y, p.z, pperiod);
-}
-#endif
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_rgb.h b/intern/cycles/kernel/svm/svm_sepcomb_rgb.h
deleted file mode 100644
index 34c4449ecdb..00000000000
--- a/intern/cycles/kernel/svm/svm_sepcomb_rgb.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void svm_node_combine_rgb(ShaderData *sd, float *stack, uint in_offset, uint color_index, uint out_offset)
-{
- float color = stack_load_float(stack, in_offset);
-
- if (stack_valid(out_offset))
- stack_store_float(stack, out_offset+color_index, color);
-}
-
-ccl_device void svm_node_separate_rgb(ShaderData *sd, float *stack, uint icolor_offset, uint color_index, uint out_offset)
-{
- float3 color = stack_load_float3(stack, icolor_offset);
-
- if (stack_valid(out_offset)) {
- if (color_index == 0)
- stack_store_float(stack, out_offset, color.x);
- else if (color_index == 1)
- stack_store_float(stack, out_offset, color.y);
- else
- stack_store_float(stack, out_offset, color.z);
- }
-}
-
-CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_vector.h b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
new file mode 100644
index 00000000000..c8e7e34f87d
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Vector combine / separate, used for the RGB and XYZ nodes */
+
+ccl_device void svm_node_combine_vector(ShaderData *sd, float *stack, uint in_offset, uint vector_index, uint out_offset)
+{
+ float vector = stack_load_float(stack, in_offset);
+
+ if (stack_valid(out_offset))
+ stack_store_float(stack, out_offset+vector_index, vector);
+}
+
+ccl_device void svm_node_separate_vector(ShaderData *sd, float *stack, uint ivector_offset, uint vector_index, uint out_offset)
+{
+ float3 vector = stack_load_float3(stack, ivector_offset);
+
+ if (stack_valid(out_offset)) {
+ if (vector_index == 0)
+ stack_store_float(stack, out_offset, vector.x);
+ else if (vector_index == 1)
+ stack_store_float(stack, out_offset, vector.y);
+ else
+ stack_store_float(stack, out_offset, vector.z);
+ }
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h
index 5fd9204cbf6..d97c85db36a 100644
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ b/intern/cycles/kernel/svm/svm_texture.h
@@ -140,15 +140,15 @@ ccl_device float voronoi_F1_distance(float3 p)
}
}
#else
- __m128 vec_p = load_m128(p);
- __m128i xyzi = quick_floor_sse(vec_p);
+ ssef vec_p = load4f(p);
+ ssei xyzi = quick_floor_sse(vec_p);
for (int xx = -1; xx <= 1; xx++) {
for (int yy = -1; yy <= 1; yy++) {
for (int zz = -1; zz <= 1; zz++) {
- __m128 ip = _mm_cvtepi32_ps(_mm_add_epi32(xyzi, _mm_setr_epi32(xx, yy, zz, 0)));
- __m128 vp = _mm_add_ps(ip, cellnoise_color(ip));
- float d = len_squared<1, 1, 1, 0>(_mm_sub_ps(vec_p, vp));
+ ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0));
+ ssef vp = ip + cellnoise_color(ip);
+ float d = len_squared<1, 1, 1, 0>(vec_p - vp);
da = min(d, da);
}
}
@@ -184,15 +184,15 @@ ccl_device float3 voronoi_F1_color(float3 p)
return cellnoise_color(pa);
#else
- __m128 pa, vec_p = load_m128(p);
- __m128i xyzi = quick_floor_sse(vec_p);
+ ssef pa, vec_p = load4f(p);
+ ssei xyzi = quick_floor_sse(vec_p);
for (int xx = -1; xx <= 1; xx++) {
for (int yy = -1; yy <= 1; yy++) {
for (int zz = -1; zz <= 1; zz++) {
- __m128 ip = _mm_cvtepi32_ps(_mm_add_epi32(xyzi, _mm_setr_epi32(xx, yy, zz, 0)));
- __m128 vp = _mm_add_ps(ip, cellnoise_color(ip));
- float d = len_squared<1, 1, 1, 0>(_mm_sub_ps(vec_p, vp));
+ ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0));
+ ssef vp = ip + cellnoise_color(ip);
+ float d = len_squared<1, 1, 1, 0>(vec_p - vp);
if(d < da) {
da = d;
@@ -202,7 +202,7 @@ ccl_device float3 voronoi_F1_color(float3 p)
}
}
- __m128 color = cellnoise_color(pa);
+ ssef color = cellnoise_color(pa);
return (float3 &)color;
#endif
}
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 80972ec82bc..fbe669c1fab 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -72,15 +72,14 @@ typedef enum NodeType {
NODE_TEX_COORD,
NODE_TEX_COORD_BUMP_DX,
NODE_TEX_COORD_BUMP_DY,
- NODE_EMISSION_SET_WEIGHT_TOTAL,
NODE_ATTR_BUMP_DX,
NODE_ATTR_BUMP_DY,
NODE_TEX_ENVIRONMENT,
NODE_CLOSURE_HOLDOUT,
NODE_LAYER_WEIGHT,
NODE_CLOSURE_VOLUME,
- NODE_SEPARATE_RGB,
- NODE_COMBINE_RGB,
+ NODE_SEPARATE_VECTOR,
+ NODE_COMBINE_VECTOR,
NODE_SEPARATE_HSV,
NODE_COMBINE_HSV,
NODE_HSV,
@@ -349,7 +348,6 @@ typedef enum ClosureType {
/* Diffuse */
CLOSURE_BSDF_DIFFUSE_ID,
CLOSURE_BSDF_OREN_NAYAR_ID,
- CLOSURE_BSDF_WESTIN_SHEEN_ID,
CLOSURE_BSDF_DIFFUSE_RAMP_ID,
CLOSURE_BSDF_DIFFUSE_TOON_ID,
@@ -358,9 +356,11 @@ typedef enum ClosureType {
CLOSURE_BSDF_REFLECTION_ID,
CLOSURE_BSDF_MICROFACET_GGX_ID,
CLOSURE_BSDF_MICROFACET_BECKMANN_ID,
- CLOSURE_BSDF_WARD_ID,
+ CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID,
+ CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID,
+ CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID,
+ CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID,
CLOSURE_BSDF_ASHIKHMIN_VELVET_ID,
- CLOSURE_BSDF_WESTIN_BACKSCATTER_ID,
CLOSURE_BSDF_PHONG_RAMP_ID,
CLOSURE_BSDF_GLOSSY_TOON_ID,
CLOSURE_BSDF_HAIR_REFLECTION_ID,
@@ -404,7 +404,7 @@ typedef enum ClosureType {
#define CLOSURE_IS_BSDF_GLOSSY(type) (type >= CLOSURE_BSDF_GLOSSY_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID)
#define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSMISSION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
#define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID)
-#define CLOSURE_IS_BSDF_ANISOTROPIC(type) (type == CLOSURE_BSDF_WARD_ID)
+#define CLOSURE_IS_BSDF_ANISOTROPIC(type) (type >= CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID)
#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_GAUSSIAN_ID)
#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_GAUSSIAN_ID)
#define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)