diff options
Diffstat (limited to 'intern/cycles/kernel')
241 files changed, 12237 insertions, 4224 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 8857f86890c..b44e91751ad 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -1,3 +1,4 @@ +remove_extra_strict_flags() set(INC . @@ -11,9 +12,20 @@ set(INC_SYS ) set(SRC - kernel.cpp - kernel.cl - kernel.cu + kernels/cpu/kernel.cpp + kernels/opencl/kernel.cl + kernels/opencl/kernel_data_init.cl + kernels/opencl/kernel_queue_enqueue.cl + kernels/opencl/kernel_scene_intersect.cl + kernels/opencl/kernel_lamp_emission.cl + kernels/opencl/kernel_background_buffer_update.cl + kernels/opencl/kernel_shader_eval.cl + kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl + kernels/opencl/kernel_direct_lighting.cl + kernels/opencl/kernel_shadow_blocked.cl + kernels/opencl/kernel_next_iteration_setup.cl + kernels/opencl/kernel_sum_all_radiance.cl + kernels/cuda/kernel.cu ) set(SRC_HEADERS @@ -24,6 +36,7 @@ set(SRC_HEADERS kernel_compat_cpu.h kernel_compat_cuda.h kernel_compat_opencl.h + kernel_debug.h kernel_differential.h kernel_emission.h kernel_film.h @@ -34,17 +47,22 @@ set(SRC_HEADERS kernel_montecarlo.h kernel_passes.h kernel_path.h + kernel_path_branched.h + kernel_path_common.h kernel_path_state.h kernel_path_surface.h kernel_path_volume.h kernel_projection.h + kernel_queues.h kernel_random.h kernel_shader.h + kernel_shaderdata_vars.h kernel_shadow.h kernel_subsurface.h kernel_textures.h kernel_types.h kernel_volume.h + kernel_work_stealing.h ) set(SRC_CLOSURE_HEADERS @@ -61,12 +79,12 @@ set(SRC_CLOSURE_HEADERS closure/bsdf_transparent.h closure/bsdf_util.h closure/bsdf_ashikhmin_shirley.h - closure/bsdf_westin.h closure/bsdf_hair.h closure/bssrdf.h closure/emissive.h closure/volume.h ) + set(SRC_SVM_HEADERS svm/svm.h svm/svm_attribute.h @@ -91,6 +109,7 @@ set(SRC_SVM_HEADERS svm/svm_magic.h svm/svm_mapping.h svm/svm_math.h + svm/svm_math_util.h svm/svm_mix.h svm/svm_musgrave.h svm/svm_noise.h @@ -106,6 +125,7 @@ set(SRC_SVM_HEADERS svm/svm_value.h svm/svm_vector_transform.h svm/svm_voronoi.h + svm/svm_voxel.h svm/svm_wave.h ) @@ -116,22 +136,48 @@ set(SRC_GEOM_HEADERS geom/geom_bvh_shadow.h geom/geom_bvh_subsurface.h geom/geom_bvh_traversal.h + geom/geom_bvh_volume.h + geom/geom_bvh_volume_all.h geom/geom_curve.h geom/geom_motion_curve.h geom/geom_motion_triangle.h geom/geom_object.h geom/geom_primitive.h + geom/geom_qbvh.h + geom/geom_qbvh_shadow.h + geom/geom_qbvh_subsurface.h + geom/geom_qbvh_traversal.h + geom/geom_qbvh_volume.h + geom/geom_qbvh_volume_all.h geom/geom_triangle.h + geom/geom_triangle_intersect.h geom/geom_volume.h ) set(SRC_UTIL_HEADERS + ../util/util_atomic.h ../util/util_color.h ../util/util_half.h ../util/util_math.h + ../util/util_math_fast.h ../util/util_transform.h ../util/util_types.h ) + +set(SRC_SPLIT_HEADERS + split/kernel_background_buffer_update.h + split/kernel_data_init.h + split/kernel_direct_lighting.h + split/kernel_holdout_emission_blurring_pathtermination_ao.h + split/kernel_lamp_emission.h + split/kernel_next_iteration_setup.h + split/kernel_scene_intersect.h + split/kernel_shader_eval.h + split/kernel_shadow_blocked.h + split/kernel_split_common.h + split/kernel_sum_all_radiance.h +) + # CUDA module if(WITH_CYCLES_CUDA_BINARIES) @@ -143,7 +189,7 @@ if(WITH_CYCLES_CUDA_BINARIES) endif() # CUDA version - execute_process (COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) + execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT}) string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT}) set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") @@ -157,18 +203,24 @@ if(WITH_CYCLES_CUDA_BINARIES) endif() # build for each arch - set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) + set(cuda_sources kernels/cuda/kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) set(cuda_cubins) macro(CYCLES_CUDA_KERNEL_ADD arch experimental) if(${experimental}) - set(cuda_extra_flags "-D__KERNEL_CUDA_EXPERIMENTAL__") + set(cuda_extra_flags "-D__KERNEL_EXPERIMENTAL__") set(cuda_cubin kernel_experimental_${arch}.cubin) else() set(cuda_extra_flags "") set(cuda_cubin kernel_${arch}.cubin) endif() + if(WITH_CYCLES_DEBUG) + set(cuda_debug_flags "-D__KERNEL_DEBUG__") + else() + set(cuda_debug_flags "") + endif() + set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") set(cuda_math_flags "--use_fast_math") @@ -177,13 +229,14 @@ if(WITH_CYCLES_CUDA_BINARIES) COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} - --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu + --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} ${cuda_extra_flags} + ${cuda_debug_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= @@ -196,6 +249,7 @@ if(WITH_CYCLES_CUDA_BINARIES) list(APPEND cuda_cubins ${cuda_cubin}) unset(cuda_extra_flags) + unset(cuda_debug_flags) endmacro() foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) @@ -223,20 +277,29 @@ include_directories(SYSTEM ${INC_SYS}) if(CXX_HAS_SSE) list(APPEND SRC - kernel_sse2.cpp - kernel_sse3.cpp - kernel_sse41.cpp - kernel_avx.cpp - kernel_avx2.cpp + kernels/cpu/kernel_sse2.cpp + kernels/cpu/kernel_sse3.cpp + kernels/cpu/kernel_sse41.cpp ) - set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") - set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") - set_source_files_properties(kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") - set_source_files_properties(kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") - set_source_files_properties(kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") endif() +if(CXX_HAS_AVX) + list(APPEND SRC + kernels/cpu/kernel_avx.cpp + ) + set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") +endif() + +if(CXX_HAS_AVX2) + list(APPEND SRC + kernels/cpu/kernel_avx2.cpp + ) + set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") +endif() add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS}) @@ -254,11 +317,23 @@ endif() #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED}) #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cu" ${CYCLES_INSTALL_PATH}/kernel) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/geom) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SPLIT_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/split) diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index 5a9e57c5342..e8d51013924 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -57,8 +57,9 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: build_dir = os.path.join(root_build_dir, 'intern/cycles/kernel') # source directories and files + kernel_file_rel = os.path.join("kernels", "cuda", "kernel.cu") source_dir = Dir('.').srcnode().path - kernel_file = os.path.join(source_dir, "kernel.cu") + kernel_file = os.path.join(source_dir, kernel_file_rel) util_dir = os.path.join(source_dir, "../util") svm_dir = os.path.join(source_dir, "../svm") geom_dir = os.path.join(source_dir, "../geom") @@ -79,12 +80,15 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: nvcc_flags += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC" nvcc_flags += " -I \"%s\" -I \"%s\" -I \"%s\" -I \"%s\"" % (util_dir, svm_dir, geom_dir, closure_dir) + if env['WITH_BF_CYCLES_DEBUG']: + nvcc_flags += " -D__KERNEL_DEBUG__" + # dependencies - dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h') + dependencies = [kernel_file_rel] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h') last_cubin_file = None configs = (("kernel_%s.cubin", ''), - ("kernel_experimental_%s.cubin", ' -D__KERNEL_CUDA_EXPERIMENTAL__')) + ("kernel_experimental_%s.cubin", ' -D__KERNEL_EXPERIMENTAL__')) # add command for each cuda architecture for arch in cuda_archs: @@ -102,7 +106,7 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: else: command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file) - kernel.Command(cubin_file, 'kernel.cu', command) + kernel.Command(cubin_file, kernel_file_rel, command) kernel.Depends(cubin_file, dependencies) kernel_binaries.append(cubin_file) diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 9961071c2ac..558aa0dc6a9 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "../closure/bsdf_ashikhmin_velvet.h" @@ -24,7 +24,6 @@ #include "../closure/bsdf_refraction.h" #include "../closure/bsdf_transparent.h" #include "../closure/bsdf_ashikhmin_shirley.h" -#include "../closure/bsdf_westin.h" #include "../closure/bsdf_toon.h" #include "../closure/bsdf_hair.h" #ifdef __SUBSURFACE__ @@ -48,87 +47,79 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader switch(sc->type) { case CLOSURE_BSDF_DIFFUSE_ID: case CLOSURE_BSDF_BSSRDF_ID: - label = bsdf_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_diffuse_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; #ifdef __SVM__ case CLOSURE_BSDF_OREN_NAYAR_ID: - label = bsdf_oren_nayar_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_oren_nayar_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; /*case CLOSURE_BSDF_PHONG_RAMP_ID: - label = bsdf_phong_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_phong_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_DIFFUSE_RAMP_ID: - label = bsdf_diffuse_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_diffuse_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break;*/ case CLOSURE_BSDF_TRANSLUCENT_ID: - label = bsdf_translucent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_translucent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_REFLECTION_ID: - label = bsdf_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_reflection_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_REFRACTION_ID: - label = bsdf_refraction_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_refraction_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_TRANSPARENT_ID: - label = bsdf_transparent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_transparent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_MICROFACET_GGX_ID: case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_microfacet_ggx_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_microfacet_beckmann_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_ashikhmin_shirley_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_ashikhmin_velvet_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_DIFFUSE_TOON_ID: - label = bsdf_diffuse_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_diffuse_toon_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_GLOSSY_TOON_ID: - label = bsdf_glossy_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_WESTIN_BACKSCATTER_ID: - label = bsdf_westin_backscatter_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_WESTIN_SHEEN_ID: - label = bsdf_westin_sheen_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_glossy_toon_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_HAIR_REFLECTION_ID: - label = bsdf_hair_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_hair_reflection_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - label = bsdf_hair_transmission_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_hair_transmission_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; #endif #ifdef __VOLUME__ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); + label = volume_henyey_greenstein_sample(sc, ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; #endif default: @@ -148,73 +139,67 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade return OSLShader::bsdf_eval(sd, sc, omega_in, *pdf); #endif - if(dot(sd->Ng, omega_in) >= 0.0f) { + if(dot(ccl_fetch(sd, Ng), omega_in) >= 0.0f) { switch(sc->type) { case CLOSURE_BSDF_DIFFUSE_ID: case CLOSURE_BSDF_BSSRDF_ID: - eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_diffuse_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; #ifdef __SVM__ case CLOSURE_BSDF_OREN_NAYAR_ID: - eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_oren_nayar_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; /*case CLOSURE_BSDF_PHONG_RAMP_ID: - eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_phong_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_DIFFUSE_RAMP_ID: - eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_diffuse_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break;*/ case CLOSURE_BSDF_TRANSLUCENT_ID: - eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_translucent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_REFLECTION_ID: - eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_reflection_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_REFRACTION_ID: - eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_refraction_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_TRANSPARENT_ID: - eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_transparent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_MICROFACET_GGX_ID: case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_microfacet_ggx_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_microfacet_beckmann_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_ashikhmin_shirley_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_ashikhmin_velvet_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_DIFFUSE_TOON_ID: - eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_diffuse_toon_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_GLOSSY_TOON_ID: - eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_WESTIN_BACKSCATTER_ID: - eval = bsdf_westin_backscatter_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_WESTIN_SHEEN_ID: - eval = bsdf_westin_sheen_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_glossy_toon_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_HAIR_REFLECTION_ID: - eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_hair_reflection_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf); + eval = bsdf_hair_transmission_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); break; #endif #ifdef __VOLUME__ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); + eval = volume_henyey_greenstein_eval_phase(sc, ccl_fetch(sd, I), omega_in, pdf); break; #endif default: @@ -226,63 +211,57 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade switch(sc->type) { case CLOSURE_BSDF_DIFFUSE_ID: case CLOSURE_BSDF_BSSRDF_ID: - eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_diffuse_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; #ifdef __SVM__ case CLOSURE_BSDF_OREN_NAYAR_ID: - eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_oren_nayar_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_TRANSLUCENT_ID: - eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_translucent_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_REFLECTION_ID: - eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_reflection_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_REFRACTION_ID: - eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_refraction_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_TRANSPARENT_ID: - eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_transparent_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_MICROFACET_GGX_ID: case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_microfacet_ggx_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_microfacet_beckmann_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_ashikhmin_shirley_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_ashikhmin_velvet_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_DIFFUSE_TOON_ID: - eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_diffuse_toon_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_GLOSSY_TOON_ID: - eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_WESTIN_BACKSCATTER_ID: - eval = bsdf_westin_backscatter_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_WESTIN_SHEEN_ID: - eval = bsdf_westin_sheen_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_glossy_toon_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_HAIR_REFLECTION_ID: - eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_hair_reflection_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf); + eval = bsdf_hair_transmission_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); break; #endif #ifdef __VOLUME__ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); + eval = volume_henyey_greenstein_eval_phase(sc, ccl_fetch(sd, I), omega_in, pdf); break; #endif default: @@ -296,6 +275,8 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness) { +/* ToDo: do we want to blur volume closures? */ + #ifdef __OSL__ if(kg->osl && sc->prim) { OSLShader::bsdf_blur(sc, roughness); @@ -303,33 +284,8 @@ ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness) } #endif - switch(sc->type) { - case CLOSURE_BSDF_DIFFUSE_ID: - case CLOSURE_BSDF_BSSRDF_ID: - bsdf_diffuse_blur(sc, roughness); - break; #ifdef __SVM__ - case CLOSURE_BSDF_OREN_NAYAR_ID: - bsdf_oren_nayar_blur(sc, roughness); - break; - /*case CLOSURE_BSDF_PHONG_RAMP_ID: - bsdf_phong_ramp_blur(sc, roughness); - break; - case CLOSURE_BSDF_DIFFUSE_RAMP_ID: - bsdf_diffuse_ramp_blur(sc, roughness); - break;*/ - case CLOSURE_BSDF_TRANSLUCENT_ID: - bsdf_translucent_blur(sc, roughness); - break; - case CLOSURE_BSDF_REFLECTION_ID: - bsdf_reflection_blur(sc, roughness); - break; - case CLOSURE_BSDF_REFRACTION_ID: - bsdf_refraction_blur(sc, roughness); - break; - case CLOSURE_BSDF_TRANSPARENT_ID: - bsdf_transparent_blur(sc, roughness); - break; + switch(sc->type) { case CLOSURE_BSDF_MICROFACET_GGX_ID: case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: @@ -344,30 +300,10 @@ ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness) case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: bsdf_ashikhmin_shirley_blur(sc, roughness); break; - case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - bsdf_ashikhmin_velvet_blur(sc, roughness); - break; - case CLOSURE_BSDF_DIFFUSE_TOON_ID: - bsdf_diffuse_toon_blur(sc, roughness); - break; - case CLOSURE_BSDF_GLOSSY_TOON_ID: - bsdf_glossy_toon_blur(sc, roughness); - break; - case CLOSURE_BSDF_WESTIN_BACKSCATTER_ID: - bsdf_westin_backscatter_blur(sc, roughness); - break; - case CLOSURE_BSDF_WESTIN_SHEEN_ID: - bsdf_westin_sheen_blur(sc, roughness); - break; - case CLOSURE_BSDF_HAIR_REFLECTION_ID: - case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - bsdf_hair_reflection_blur(sc, roughness); - break; -#endif - /* todo: do we want to blur volume closures? */ default: break; } +#endif } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h index ad7864cb8ea..8d7d533d6f8 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h @@ -1,5 +1,5 @@ /* - * Copyright 2011-2013 Blender Foundation + * Copyright 2011-2014 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __BSDF_ASHIKHMIN_SHIRLEY_H__ @@ -33,24 +33,20 @@ CCL_NAMESPACE_BEGIN ccl_device int bsdf_ashikhmin_shirley_setup(ShaderClosure *sc) { - /* store roughness. could already convert to exponent to save some cycles - * in eval, but this is more consistent with other bsdfs and shader_blur. */ sc->data0 = clamp(sc->data0, 1e-4f, 1.0f); sc->data1 = sc->data0; sc->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID; - return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_ashikhmin_shirley_aniso_setup(ShaderClosure *sc) { - /* store roughness. could already convert to exponent to save some cycles - * in eval, but this is more consistent with other bsdfs and shader_blur. */ sc->data0 = clamp(sc->data0, 1e-4f, 1.0f); sc->data1 = clamp(sc->data1, 1e-4f, 1.0f); sc->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID; - return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_ashikhmin_shirley_blur(ShaderClosure *sc, float roughness) @@ -73,7 +69,10 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c float out = 0.0f; - if (NdotI > 0.0f && NdotO > 0.0f) { + if(fmaxf(sc->data0, sc->data1) <= 1e-4f) + return make_float3(0.0f, 0.0f, 0.0f); + + if(NdotI > 0.0f && NdotO > 0.0f) { NdotI = fmaxf(NdotI, 1e-6f); NdotO = fmaxf(NdotO, 1e-6f); float3 H = normalize(omega_in + I); @@ -86,7 +85,8 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data0); float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data1); - if (n_x == n_y) { /* => isotropic case */ + if(n_x == n_y) { + /* isotropic */ float e = n_x; float lobe = powf(HdotN, e); float norm = (n_x + 1.0f) / (8.0f * M_PI_F); @@ -94,7 +94,8 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c out = NdotO * norm * lobe * pump; *pdf = norm * lobe / HdotI; /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */ } - else { /* => ANisotropic case */ + else { + /* anisotropic */ float3 X, Y; make_orthonormals_tangent(N, sc->T, &X, &Y); @@ -130,7 +131,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, float3 N = sc->N; float NdotI = dot(N, I); - if (NdotI > 0.0f) { + if(NdotI > 0.0f) { float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data0); float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data1); @@ -146,21 +147,23 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, /* sample spherical coords for h in tangent space */ float phi; float cos_theta; - if (n_x == n_y) { /* => simple isotropic sampling */ + if(n_x == n_y) { + /* isotropic sampling */ phi = M_2PI_F * randu; cos_theta = powf(randv, 1.0f / (n_x + 1.0f)); } - else { /* => more complex anisotropic sampling */ - if (randu < 0.25f) { /* first quadrant */ + else { + /* anisotropic sampling */ + if(randu < 0.25f) { /* first quadrant */ float remapped_randu = 4.0f * randu; bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta); } - else if (randu < 0.5f) { /* second quadrant */ + else if(randu < 0.5f) { /* second quadrant */ float remapped_randu = 4.0f * (.5f - randu); bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta); phi = M_PI_F - phi; } - else if (randu < 0.75f) { /* third quadrant */ + else if(randu < 0.75f) { /* third quadrant */ float remapped_randu = 4.0f * (randu - 0.5f); bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta); phi = M_PI_F + phi; @@ -185,14 +188,20 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, /* half vector to world space */ float3 H = h.x*X + h.y*Y + h.z*N; float HdotI = dot(H, I); - if (HdotI < 0.0f) H = -H; + if(HdotI < 0.0f) H = -H; /* reflect I on H to get omega_in */ *omega_in = -I + (2.0f * HdotI) * H; - /* leave the rest to eval_reflect */ - /* (could maybe optimize a few things by manual inlining, but I doubt it would make much difference) */ - *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf); + if(fmaxf(sc->data0, sc->data1) <= 1e-4f) { + /* Some high number for MIS. */ + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); + } + else { + /* leave the rest to eval_reflect */ + *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf); + } #ifdef __RAY_DIFFERENTIALS__ /* just do the reflection thing for now */ @@ -201,7 +210,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, #endif } - return LABEL_REFLECT | LABEL_GLOSSY; + return LABEL_REFLECT|LABEL_GLOSSY; } diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h index 3631f90bf8c..f1a26650078 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h @@ -45,10 +45,6 @@ ccl_device int bsdf_ashikhmin_velvet_setup(ShaderClosure *sc) return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device void bsdf_ashikhmin_velvet_blur(ShaderClosure *sc, float roughness) -{ -} - ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { float m_invsigma2 = sc->data0; @@ -63,7 +59,7 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, co float cosHO = fabsf(dot(I, H)); if(!(fabsf(cosNH) < 1.0f-1e-5f && cosHO > 1e-5f)) - return make_float3(0, 0, 0); + return make_float3(0.0f, 0.0f, 0.0f); float cosNHdivHO = cosNH / cosHO; cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f); @@ -84,7 +80,7 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, co return make_float3(out, out, out); } - return make_float3(0, 0, 0); + return make_float3(0.0f, 0.0f, 0.0f); } ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) @@ -118,7 +114,7 @@ ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc, float3 Ng, float sinNH2 = 1 - cosNH * cosNH; float sinNH4 = sinNH2 * sinNH2; - float cotangent2 = (cosNH * cosNH) / sinNH2; + float cotangent2 = (cosNH * cosNH) / sinNH2; float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4; float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h index 949fe869549..4b29bb096d1 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse.h @@ -43,10 +43,6 @@ ccl_device int bsdf_diffuse_setup(ShaderClosure *sc) return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device void bsdf_diffuse_blur(ShaderClosure *sc, float roughness) -{ -} - ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { float3 N = sc->N; @@ -90,10 +86,6 @@ ccl_device int bsdf_translucent_setup(ShaderClosure *sc) return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device void bsdf_translucent_blur(ShaderClosure *sc, float roughness) -{ -} - ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { return make_float3(0.0f, 0.0f, 0.0f); @@ -108,11 +100,6 @@ ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc, const return make_float3 (cos_pi, cos_pi, cos_pi); } -ccl_device float bsdf_translucent_albedo(const ShaderClosure *sc, const float3 I) -{ - return 1.0f; -} - ccl_device int bsdf_translucent_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { float3 N = sc->N; diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h index b856774375f..e0287e7655a 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h @@ -41,9 +41,9 @@ ccl_device float3 bsdf_diffuse_ramp_get_color(const ShaderClosure *sc, const flo float npos = pos * (float)(MAXCOLORS - 1); int ipos = float_to_int(npos); - if (ipos < 0) + if(ipos < 0) return colors[0]; - if (ipos >= (MAXCOLORS - 1)) + if(ipos >= (MAXCOLORS - 1)) return colors[MAXCOLORS - 1]; float offset = npos - (float)ipos; return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset; @@ -52,7 +52,9 @@ ccl_device float3 bsdf_diffuse_ramp_get_color(const ShaderClosure *sc, const flo ccl_device int bsdf_diffuse_ramp_setup(ShaderClosure *sc) { sc->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID; - return SD_BSDF | SD_BSDF_HAS_EVAL; + sc->data0 = 0.0f; + sc->data1 = 0.0f; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_diffuse_ramp_blur(ShaderClosure *sc, float roughness) diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h index e0b5454592b..1e81617a7d3 100644 --- a/intern/cycles/kernel/closure/bsdf_hair.h +++ b/intern/cycles/kernel/closure/bsdf_hair.h @@ -36,20 +36,12 @@ CCL_NAMESPACE_BEGIN -ccl_device void bsdf_hair_reflection_blur(ShaderClosure *sc, float roughness) -{ -} - -ccl_device void bsdf_hair_transmission_blur(ShaderClosure *sc, float roughness) -{ -} - ccl_device int bsdf_hair_reflection_setup(ShaderClosure *sc) { sc->type = CLOSURE_BSDF_HAIR_REFLECTION_ID; sc->data0 = clamp(sc->data0, 0.001f, 1.0f); sc->data1 = clamp(sc->data1, 0.001f, 1.0f); - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_hair_transmission_setup(ShaderClosure *sc) @@ -57,31 +49,25 @@ ccl_device int bsdf_hair_transmission_setup(ShaderClosure *sc) sc->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID; sc->data0 = clamp(sc->data0, 0.001f, 1.0f); sc->data1 = clamp(sc->data1, 0.001f, 1.0f); - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { -#ifdef __HAIR__ float offset = sc->data2; float3 Tg = sc->T; -#else - float offset = 0.0f; - float3 Tg = make_float3(1.0f, 0.0f, 0.0f); -#endif float roughness1 = sc->data0; float roughness2 = sc->data1; float Iz = dot(Tg, I); float3 locy = normalize(I - Tg * Iz); - //float3 locx = cross(locy, Tg); - float theta_r = M_PI_2_F - safe_acosf(Iz); + float theta_r = M_PI_2_F - fast_acosf(Iz); float omega_in_z = dot(Tg, omega_in); float3 omega_in_y = normalize(omega_in - Tg * omega_in_z); - float theta_i = M_PI_2_F - safe_acosf(omega_in_z); + float theta_i = M_PI_2_F - fast_acosf(omega_in_z); float cosphi_i = dot(omega_in_y, locy); if(M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) { @@ -89,17 +75,19 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, con return make_float3(*pdf, *pdf, *pdf); } - float phi_i = safe_acosf(cosphi_i) / roughness2; + float roughness1_inv = 1.0f / roughness1; + float roughness2_inv = 1.0f / roughness2; + float phi_i = fast_acosf(cosphi_i) * roughness2_inv; phi_i = fabsf(phi_i) < M_PI_F ? phi_i : M_PI_F; - float costheta_i = cosf(theta_i); + float costheta_i = fast_cosf(theta_i); - float a_R = atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) / roughness1, 1.0f); - float b_R = atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) / roughness1, 1.0f); + float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); + float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); float theta_h = (theta_i + theta_r) * 0.5f; float t = theta_h - offset; - float phi_pdf = cosf(phi_i * 0.5f) * 0.25f / roughness2; + float phi_pdf = fast_cosf(phi_i * 0.5f) * 0.25f * roughness2_inv; float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)* costheta_i); *pdf = phi_pdf * theta_pdf; @@ -119,37 +107,32 @@ ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc, co ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { -#ifdef __HAIR__ float offset = sc->data2; float3 Tg = sc->T; -#else - float offset = 0.0f; - float3 Tg = make_float3(1.0f, 0.0f, 0.0f); -#endif float roughness1 = sc->data0; float roughness2 = sc->data1; float Iz = dot(Tg, I); float3 locy = normalize(I - Tg * Iz); - //float3 locx = cross(locy, Tg); - float theta_r = M_PI_2_F - safe_acosf(Iz); + float theta_r = M_PI_2_F - fast_acosf(Iz); float omega_in_z = dot(Tg, omega_in); float3 omega_in_y = normalize(omega_in - Tg * omega_in_z); - float theta_i = M_PI_2_F - safe_acosf(omega_in_z); - float phi_i = safe_acosf(dot(omega_in_y, locy)); + float theta_i = M_PI_2_F - fast_acosf(omega_in_z); + float phi_i = fast_acosf(dot(omega_in_y, locy)); if(M_PI_2_F - fabsf(theta_i) < 0.001f) { *pdf = 0.0f; return make_float3(*pdf, *pdf, *pdf); } - float costheta_i = cosf(theta_i); + float costheta_i = fast_cosf(theta_i); - float a_TT = atan2f(((M_PI_2_F + theta_r)/2 - offset) / roughness1, 1.0f); - float b_TT = atan2f(((-M_PI_2_F + theta_r)/2 - offset) / roughness1, 1.0f); - float c_TT = 2 * atan2f(M_PI_2_F / roughness2, 1.0f); + float roughness1_inv = 1.0f / roughness1; + float a_TT = fast_atan2f(((M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); + float b_TT = fast_atan2f(((-M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); + float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f); float theta_h = (theta_i + theta_r) / 2; float t = theta_h - offset; @@ -165,39 +148,38 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { -#ifdef __HAIR__ float offset = sc->data2; float3 Tg = sc->T; -#else - float offset = 0.0f; - float3 Tg = make_float3(1.0f, 0.0f, 0.0f); -#endif float roughness1 = sc->data0; float roughness2 = sc->data1; float Iz = dot(Tg, I); float3 locy = normalize(I - Tg * Iz); float3 locx = cross(locy, Tg); - float theta_r = M_PI_2_F - safe_acosf(Iz); + float theta_r = M_PI_2_F - fast_acosf(Iz); - float a_R = atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) / roughness1, 1.0f); - float b_R = atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) / roughness1, 1.0f); + float roughness1_inv = 1.0f / roughness1; + float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); + float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); float t = roughness1 * tanf(randu * (a_R - b_R) + b_R); float theta_h = t + offset; float theta_i = 2 * theta_h - theta_r; - float costheta_i = cosf(theta_i); - float sintheta_i = sinf(theta_i); + + float costheta_i, sintheta_i; + fast_sincosf(theta_i, &sintheta_i, &costheta_i); float phi = 2 * safe_asinf(1 - 2 * randv) * roughness2; - float phi_pdf = cosf(phi * 0.5f) * 0.25f / roughness2; + float phi_pdf = fast_cosf(phi * 0.5f) * 0.25f / roughness2; float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)*costheta_i); - *omega_in =(cosf(phi) * costheta_i) * locy - - (sinf(phi) * costheta_i) * locx + - ( sintheta_i) * Tg; + float sinphi, cosphi; + fast_sincosf(phi, &sinphi, &cosphi); + *omega_in =(cosphi * costheta_i) * locy - + (sinphi * costheta_i) * locx + + ( sintheta_i) * Tg; //differentials - TODO: find a better approximation for the reflective bounce #ifdef __RAY_DIFFERENTIALS__ @@ -211,48 +193,43 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, f *eval = make_float3(*pdf, *pdf, *pdf); - if(dot(locy, *omega_in) < 0.0f) { - return LABEL_REFLECT|LABEL_TRANSMIT|LABEL_GLOSSY; - } - return LABEL_REFLECT|LABEL_GLOSSY; } ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { -#ifdef __HAIR__ float offset = sc->data2; float3 Tg = sc->T; -#else - float offset = 0.0f; - float3 Tg = make_float3(1.0f, 0.0f, 0.0f); -#endif float roughness1 = sc->data0; float roughness2 = sc->data1; float Iz = dot(Tg, I); float3 locy = normalize(I - Tg * Iz); float3 locx = cross(locy, Tg); - float theta_r = M_PI_2_F - safe_acosf(Iz); + float theta_r = M_PI_2_F - fast_acosf(Iz); - float a_TT = atan2f(((M_PI_2_F + theta_r)/2 - offset) / roughness1, 1.0f); - float b_TT = atan2f(((-M_PI_2_F + theta_r)/2 - offset) / roughness1, 1.0f); - float c_TT = 2 * atan2f(M_PI_2_F / roughness2, 1.0f); + float roughness1_inv = 1.0f / roughness1; + float a_TT = fast_atan2f(((M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); + float b_TT = fast_atan2f(((-M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); + float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f); float t = roughness1 * tanf(randu * (a_TT - b_TT) + b_TT); float theta_h = t + offset; float theta_i = 2 * theta_h - theta_r; - float costheta_i = cosf(theta_i); - float sintheta_i = sinf(theta_i); + + float costheta_i, sintheta_i; + fast_sincosf(theta_i, &sintheta_i, &costheta_i); float p = roughness2 * tanf(c_TT * (randv - 0.5f)); float phi = p + M_PI_F; float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_TT - b_TT) * costheta_i); float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2)); - *omega_in =(cosf(phi) * costheta_i) * locy - - (sinf(phi) * costheta_i) * locx + - ( sintheta_i) * Tg; + float sinphi, cosphi; + fast_sincosf(phi, &sinphi, &cosphi); + *omega_in =(cosphi * costheta_i) * locy - + (sinphi * costheta_i) * locx + + ( sintheta_i) * Tg; //differentials - TODO: find a better approximation for the transmission bounce #ifdef __RAY_DIFFERENTIALS__ @@ -267,10 +244,9 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, *eval = make_float3(*pdf, *pdf, *pdf); - if(dot(locy, *omega_in) < 0.0f) - return LABEL_TRANSMIT|LABEL_GLOSSY; - - return LABEL_GLOSSY; + kernel_assert(dot(locy, *omega_in) < 0.0f); + + return LABEL_TRANSMIT|LABEL_GLOSSY; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index a0c59e6cbc0..2a0e8f62e7c 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -35,145 +35,7 @@ CCL_NAMESPACE_BEGIN -/* Approximate erf and erfinv implementations - * - * Adapted from code (C) Copyright John Maddock 2006. - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */ - -ccl_device float approx_erff_impl(float z) -{ - float result; - - if(z < 0.5f) { - if(z < 1e-10f) { - if(z == 0) { - result = 0; - } - else { - float c = 0.0033791670f; - result = z * 1.125f + z * c; - } - } - else { - float Y = 1.044948577f; - - float zz = z * z; - float num = (((-0.007727583f * zz) + -0.050999073f)*zz + -0.338165134f)*zz + 0.083430589f; - float denom = (((0.000370900f * zz) + 0.008585719f)*zz + 0.087522260f)*zz + 0.455004033f; - result = z * (Y + num / denom); - } - } - else if(z < 2.5f) { - if(z < 1.5f) { - float Y = 0.4059357643f; - float fz = z - 0.5f; - - float num = (((0.088890036f * fz) + 0.191003695f)*fz + 0.178114665f)*fz + -0.098090592f; - float denom = (((0.123850974f * fz) + 0.578052804f)*fz + 1.426280048f)*fz + 1.847590709f; - - result = Y + num / denom; - result *= expf(-z * z) / z; - } - else { - float Y = 0.506728172f; - float fz = z - 1.5f; - float num = (((0.017567943f * fz) + 0.043948189f)*fz + 0.038654037f)*fz + -0.024350047f; - float denom = (((0.325732924f * fz) + 0.982403709f)*fz + 1.539914949f)*fz + 1; - - result = Y + num / denom; - result *= expf(-z * z) / z; - } - - result = 1 - result; - } - else { - result = 1; - } - - return result; -} - -ccl_device float approx_erff(float z) -{ - float s = 1.0f; - - if(z < 0.0f) { - s = -1.0f; - z = -z; - } - - return s * approx_erff_impl(z); -} - -ccl_device float approx_erfinvf_impl(float p, float q) -{ - float result = 0; - - if(p <= 0.5f) { - float Y = 0.089131474f; - float g = p * (p + 10); - float num = (((-0.012692614f * p) + 0.033480662f)*p + -0.008368748f)*p + -0.000508781f; - float denom = (((1.562215583f * p) + -1.565745582f)*p + -0.970005043f)*p + 1.0f; - float r = num / denom; - result = g * Y + g * r; - } - else if(q >= 0.25f) { - float Y = 2.249481201f; - float g = sqrtf(-2 * logf(q)); - float xs = q - 0.25f; - float num = (((17.644729840f * xs) + 8.370503283f)*xs + 0.105264680f)*xs + -0.202433508f; - float denom = (((-28.660818049f * xs) + 3.971343795f)*xs + 6.242641248f)*xs + 1.0f; - float r = num / denom; - result = g / (Y + r); - } - else { - float x = sqrtf(-logf(q)); - - if(x < 3) { - float Y = 0.807220458f; - float xs = x - 1.125f; - float num = (((0.387079738f * xs) + 0.117030156f)*xs + -0.163794047f)*xs + -0.131102781f; - float denom = (((4.778465929f * xs) + 5.381683457f)*xs + 3.466254072f)*xs + 1.0f; - float R = num / denom; - result = Y * x + R * x; - } - else { - float Y = 0.939955711f; - float xs = x - 3; - float num = (((0.009508047f * xs) + 0.018557330f)*xs + -0.002224265f)*xs + -0.035035378f; - float denom = (((0.220091105f * xs) + 0.762059164f)*xs + 1.365334981f)*xs + 1.0f; - float R = num / denom; - result = Y * x + R * x; - } - } - - return result; -} - -ccl_device float approx_erfinvf(float z) -{ - float p, q, s; - - if(z < 0) { - p = -z; - q = 1 - p; - s = -1; - } - else { - p = z; - q = 1 - z; - s = 1; - } - - return s * approx_erfinvf_impl(p, q); -} - -/* Beckmann and GGX microfacet importance sampling from: - * - * Importance Sampling Microfacet-Based BSDFs using the Distribution of Visible Normals. - * E. Heitz and E. d'Eon, EGSR 2014 */ +/* Beckmann and GGX microfacet importance sampling. */ ccl_device_inline void microfacet_beckmann_sample_slopes( KernelGlobals *kg, @@ -194,64 +56,71 @@ ccl_device_inline void microfacet_beckmann_sample_slopes( /* precomputations */ const float tan_theta_i = sin_theta_i/cos_theta_i; const float inv_a = tan_theta_i; - const float a = 1.0f/inv_a; - const float erf_a = approx_erff(a); - const float exp_a2 = expf(-a*a); + const float cot_theta_i = 1.0f/tan_theta_i; + const float erf_a = fast_erff(cot_theta_i); + const float exp_a2 = expf(-cot_theta_i*cot_theta_i); const float SQRT_PI_INV = 0.56418958354f; const float Lambda = 0.5f*(erf_a - 1.0f) + (0.5f*SQRT_PI_INV)*(exp_a2*inv_a); const float G1 = 1.0f/(1.0f + Lambda); /* masking */ *G1i = G1; -#if 0 - const float C = 1.0f - G1 * erf_a; - - /* sample slope X */ - if(randu < C) { - /* rescale randu */ - randu = randu / C; - const float w_1 = 0.5f * SQRT_PI_INV * sin_theta_i * exp_a2; - const float w_2 = cos_theta_i * (0.5f - 0.5f*erf_a); - const float p = w_1 / (w_1 + w_2); - - if(randu < p) { - randu = randu / p; - *slope_x = -sqrtf(-logf(randu*exp_a2)); - } - else { - randu = (randu - p) / (1.0f - p); - *slope_x = approx_erfinvf(randu - 1.0f - randu*erf_a); - } +#if defined(__KERNEL_GPU__) + /* Based on paper from Wenzel Jakob + * An Improved Visible Normal Sampling Routine for the Beckmann Distribution + * + * http://www.mitsuba-renderer.org/~wenzel/files/visnormal.pdf + * + * Reformulation from OpenShadingLanguage which avoids using inverse + * trigonometric functions. + */ + + /* Sample slope X. + * + * Compute a coarse approximation using the approximation: + * exp(-ierf(x)^2) ~= 1 - x * x + * solve y = 1 + b + K * (1 - b * b) + */ + float K = tan_theta_i * SQRT_PI_INV; + float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a)); + float y_exact = randu * (1.0f + erf_a + K * exp_a2); + float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f; + + /* Perform newton step to refine toward the true root. */ + float inv_erf = fast_ierff(b); + float value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact; + /* Check if we are close enough already, + * this also avoids NaNs as we get close to the root. + */ + if(fabsf(value) > 1e-6f) { + b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 1. */ + inv_erf = fast_ierff(b); + value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact; + b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 2. */ + /* Compute the slope from the refined value. */ + *slope_x = fast_ierff(b); } else { - /* rescale randu */ - randu = (randu - C) / (1.0f - C); - *slope_x = approx_erfinvf((-1.0f + 2.0f*randu)*erf_a); - - const float p = (-(*slope_x)*sin_theta_i + cos_theta_i) / (2.0f*cos_theta_i); - - if(randv > p) { - *slope_x = -(*slope_x); - randv = (randv - p) / (1.0f - p); - } - else - randv = randv / p; + /* We are close enough already. */ + *slope_x = inv_erf; } - - /* sample slope Y */ - *slope_y = approx_erfinvf(2.0f*randv - 1.0f); + *slope_y = fast_ierff(2.0f*randv - 1.0f); #else - /* use precomputed table, because it better preserves stratification - * of the random number pattern */ + /* Use precomputed table on CPU, it gives better perfomance. */ int beckmann_table_offset = kernel_data.tables.beckmann_offset; *slope_x = lookup_table_read_2D(kg, randu, cos_theta_i, beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE); - *slope_y = approx_erfinvf(2.0f*randv - 1.0f); + *slope_y = fast_ierff(2.0f*randv - 1.0f); #endif - } +/* GGX microfacet importance sampling from: + * + * Importance Sampling Microfacet-Based BSDFs using the Distribution of Visible Normals. + * E. Heitz and E. d'Eon, EGSR 2014 + */ + ccl_device_inline void microfacet_ggx_sample_slopes( const float cos_theta_i, const float sin_theta_i, float randu, float randv, float *slope_x, float *slope_y, @@ -366,32 +235,32 @@ ccl_device_inline float3 microfacet_sample_stretched( ccl_device int bsdf_microfacet_ggx_setup(ShaderClosure *sc) { - sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */ + sc->data0 = saturate(sc->data0); /* alpha_x */ sc->data1 = sc->data0; /* alpha_y */ sc->type = CLOSURE_BSDF_MICROFACET_GGX_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_ggx_aniso_setup(ShaderClosure *sc) { - sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */ - sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* alpha_y */ + sc->data0 = saturate(sc->data0); /* alpha_x */ + sc->data1 = saturate(sc->data1); /* alpha_y */ sc->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_ggx_refraction_setup(ShaderClosure *sc) { - sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */ + sc->data0 = saturate(sc->data0); /* alpha_x */ sc->data1 = sc->data0; /* alpha_y */ sc->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_microfacet_ggx_blur(ShaderClosure *sc, float roughness) @@ -404,11 +273,11 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, cons { float alpha_x = sc->data0; float alpha_y = sc->data1; - int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; float3 N = sc->N; if(m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f) - return make_float3(0, 0, 0); + return make_float3(0.0f, 0.0f, 0.0f); float cosNO = dot(N, I); float cosNI = dot(N, omega_in); @@ -487,7 +356,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, cons return make_float3(out, out, out); } - return make_float3(0, 0, 0); + return make_float3(0.0f, 0.0f, 0.0f); } ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) @@ -495,17 +364,17 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con float alpha_x = sc->data0; float alpha_y = sc->data1; float m_eta = sc->data2; - int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; float3 N = sc->N; if(!m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f) - return make_float3(0, 0, 0); + return make_float3(0.0f, 0.0f, 0.0f); float cosNO = dot(N, I); float cosNI = dot(N, omega_in); if(cosNO <= 0 || cosNI >= 0) - return make_float3(0, 0, 0); /* vectors on same side -- not possible */ + return make_float3(0.0f, 0.0f, 0.0f); /* vectors on same side -- not possible */ /* compute half-vector of the refraction (eq. 16) */ float3 ht = -(m_eta * omega_in + I); @@ -513,10 +382,6 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con float cosHO = dot(Ht, I); float cosHI = dot(Ht, omega_in); - /* those situations makes chi+ terms in eq. 33, 34 be zero */ - if(dot(Ht, N) <= 0.0f || cosHO * cosNO <= 0.0f || cosHI * cosNI <= 0.0f) - return make_float3(0.0f, 0.0f, 0.0f); - float D, G1o, G1i; /* eq. 33: first we calculate D(m) with m=Ht: */ @@ -543,7 +408,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */ float common = D * (m_eta * m_eta) / (cosNO * Ht2); float out = G * fabsf(cosHI * cosHO) * common; - *pdf = G1o * cosHO * fabsf(cosHI) * common; + *pdf = G1o * fabsf(cosHO * cosHI) * common; return make_float3(out, out, out); } @@ -552,7 +417,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure { float alpha_x = sc->data0; float alpha_y = sc->data1; - int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; float3 N = sc->N; float cosNO = dot(N, I); @@ -657,16 +522,16 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure #ifdef __RAY_DIFFERENTIALS__ float3 dRdx, dRdy, dTdx, dTdy; #endif - float m_eta = sc->data2; + float m_eta = sc->data2, fresnel; bool inside; - fresnel_dielectric(m_eta, m, I, &R, &T, + fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, #ifdef __RAY_DIFFERENTIALS__ dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy, #endif &inside); - if(!inside) { + if(!inside && fresnel != 1.0f) { *omega_in = T; #ifdef __RAY_DIFFERENTIALS__ @@ -719,29 +584,29 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure ccl_device int bsdf_microfacet_beckmann_setup(ShaderClosure *sc) { - sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */ + sc->data0 = saturate(sc->data0); /* alpha_x */ sc->data1 = sc->data0; /* alpha_y */ sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_beckmann_aniso_setup(ShaderClosure *sc) { - sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */ - sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* alpha_y */ + sc->data0 = saturate(sc->data0); /* alpha_x */ + sc->data1 = saturate(sc->data1); /* alpha_y */ sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_beckmann_refraction_setup(ShaderClosure *sc) { - sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* alpha_x */ + sc->data0 = saturate(sc->data0); /* alpha_x */ sc->data1 = sc->data0; /* alpha_y */ sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_microfacet_beckmann_blur(ShaderClosure *sc, float roughness) @@ -754,11 +619,11 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, { float alpha_x = sc->data0; float alpha_y = sc->data1; - int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; float3 N = sc->N; if(m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f) - return make_float3(0, 0, 0); + return make_float3(0.0f, 0.0f, 0.0f); float cosNO = dot(N, I); float cosNI = dot(N, omega_in); @@ -840,7 +705,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, return make_float3(out, out, out); } - return make_float3(0, 0, 0); + return make_float3(0.0f, 0.0f, 0.0f); } ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) @@ -848,17 +713,17 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc float alpha_x = sc->data0; float alpha_y = sc->data1; float m_eta = sc->data2; - int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; float3 N = sc->N; if(!m_refractive || fmaxf(alpha_x, alpha_y) <= 1e-4f) - return make_float3(0, 0, 0); + return make_float3(0.0f, 0.0f, 0.0f); float cosNO = dot(N, I); float cosNI = dot(N, omega_in); if(cosNO <= 0 || cosNI >= 0) - return make_float3(0, 0, 0); + return make_float3(0.0f, 0.0f, 0.0f); /* compute half-vector of the refraction (eq. 16) */ float3 ht = -(m_eta * omega_in + I); @@ -866,10 +731,6 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc float cosHO = dot(Ht, I); float cosHI = dot(Ht, omega_in); - /* those situations makes chi+ terms in eq. 25, 27 be zero */ - if(dot(Ht, N) <= 0.0f || cosHO * cosNO <= 0.0f || cosHI * cosNI <= 0.0f) - return make_float3(0.0f, 0.0f, 0.0f); - /* eq. 25: first we calculate D(m) with m=Ht: */ float alpha2 = alpha_x * alpha_y; float cosThetaM = min(dot(N, Ht), 1.0f); @@ -895,7 +756,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */ float common = D * (m_eta * m_eta) / (cosNO * Ht2); float out = G * fabsf(cosHI * cosHO) * common; - *pdf = G1o * cosHO * fabsf(cosHI) * common; + *pdf = G1o * fabsf(cosHO * cosHI) * common; return make_float3(out, out, out); } @@ -904,7 +765,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl { float alpha_x = sc->data0; float alpha_y = sc->data1; - int m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + bool m_refractive = sc->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; float3 N = sc->N; float cosNO = dot(N, I); @@ -1011,16 +872,16 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl #ifdef __RAY_DIFFERENTIALS__ float3 dRdx, dRdy, dTdx, dTdy; #endif - float m_eta = sc->data2; + float m_eta = sc->data2, fresnel; bool inside; - fresnel_dielectric(m_eta, m, I, &R, &T, + fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, #ifdef __RAY_DIFFERENTIALS__ dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy, #endif &inside); - if(!inside) { + if(!inside && fresnel != 1.0f) { *omega_in = T; #ifdef __RAY_DIFFERENTIALS__ diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h index 6f685d5eeea..61b7cb11b02 100644 --- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h +++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __BSDF_OREN_NAYAR_H__ @@ -25,7 +25,7 @@ ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc, float3 float nv = max(dot(n, v), 0.0f); float t = dot(l, v) - nl * nv; - if (t > 0.0f) + if(t > 0.0f) t /= max(nl, nv) + FLT_MIN; float is = nl * (sc->data0 + sc->data1 * t); return make_float3(is, is, is); @@ -37,23 +37,19 @@ ccl_device int bsdf_oren_nayar_setup(ShaderClosure *sc) sc->type = CLOSURE_BSDF_OREN_NAYAR_ID; - sigma = clamp(sigma, 0.0f, 1.0f); + sigma = saturate(sigma); float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma); sc->data0 = 1.0f * div; sc->data1 = sigma * div; - return SD_BSDF | SD_BSDF_HAS_EVAL; -} - -ccl_device void bsdf_oren_nayar_blur(ShaderClosure *sc, float roughness) -{ + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - if (dot(sc->N, omega_in) > 0.0f) { + if(dot(sc->N, omega_in) > 0.0f) { *pdf = 0.5f * M_1_PI_F; return bsdf_oren_nayar_get_intensity(sc, sc->N, I, omega_in); } @@ -72,7 +68,7 @@ ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, float3 Ng, float3 { sample_uniform_hemisphere(sc->N, randu, randv, omega_in, pdf); - if (dot(Ng, *omega_in) > 0.0f) { + if(dot(Ng, *omega_in) > 0.0f) { *eval = bsdf_oren_nayar_get_intensity(sc, sc->N, I, *omega_in); #ifdef __RAY_DIFFERENTIALS__ @@ -86,7 +82,7 @@ ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, float3 Ng, float3 *eval = make_float3(0.0f, 0.0f, 0.0f); } - return LABEL_REFLECT | LABEL_DIFFUSE; + return LABEL_REFLECT|LABEL_DIFFUSE; } diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h index 2b4e1c68640..1ab15eee954 100644 --- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h @@ -41,9 +41,9 @@ ccl_device float3 bsdf_phong_ramp_get_color(const ShaderClosure *sc, const float float npos = pos * (float)(MAXCOLORS - 1); int ipos = float_to_int(npos); - if (ipos < 0) + if(ipos < 0) return colors[0]; - if (ipos >= (MAXCOLORS - 1)) + if(ipos >= (MAXCOLORS - 1)) return colors[MAXCOLORS - 1]; float offset = npos - (float)ipos; return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset; @@ -51,10 +51,10 @@ ccl_device float3 bsdf_phong_ramp_get_color(const ShaderClosure *sc, const float ccl_device int bsdf_phong_ramp_setup(ShaderClosure *sc) { - sc->data0 = max(sc->data0, 0.0f); - sc->type = CLOSURE_BSDF_PHONG_RAMP_ID; - return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_GLOSSY; + sc->data0 = max(sc->data0, 0.0f); + sc->data1 = 0.0f; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_phong_ramp_blur(ShaderClosure *sc, float roughness) @@ -67,11 +67,11 @@ ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc, const fl float cosNI = dot(sc->N, omega_in); float cosNO = dot(sc->N, I); - if (cosNI > 0 && cosNO > 0) { + if(cosNI > 0 && cosNO > 0) { // reflect the view vector float3 R = (2 * cosNO) * sc->N - I; float cosRI = dot(R, omega_in); - if (cosRI > 0) { + if(cosRI > 0) { float cosp = powf(cosRI, m_exponent); float common = 0.5f * M_1_PI_F * cosp; float out = cosNI * (m_exponent + 2) * common; @@ -93,7 +93,7 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colo float cosNO = dot(sc->N, I); float m_exponent = sc->data0; - if (cosNO > 0) { + if(cosNO > 0) { // reflect the view vector float3 R = (2 * cosNO) * sc->N - I; @@ -111,12 +111,12 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colo *omega_in = (cosf(phi) * sinTheta) * T + (sinf(phi) * sinTheta) * B + ( cosTheta) * R; - if (dot(Ng, *omega_in) > 0.0f) + if(dot(Ng, *omega_in) > 0.0f) { // common terms for pdf and eval float cosNI = dot(sc->N, *omega_in); // make sure the direction we chose is still in the right hemisphere - if (cosNI > 0) + if(cosNI > 0) { float cosp = powf(cosTheta, m_exponent); float common = 0.5f * M_1_PI_F * cosp; diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h index 0baccdf155c..303f4c9ce34 100644 --- a/intern/cycles/kernel/closure/bsdf_reflection.h +++ b/intern/cycles/kernel/closure/bsdf_reflection.h @@ -43,10 +43,6 @@ ccl_device int bsdf_reflection_setup(ShaderClosure *sc) return SD_BSDF; } -ccl_device void bsdf_reflection_blur(ShaderClosure *sc, float roughness) -{ -} - ccl_device float3 bsdf_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { return make_float3(0.0f, 0.0f, 0.0f); @@ -70,8 +66,9 @@ ccl_device int bsdf_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 *domega_in_dx = 2 * dot(N, dIdx) * N - dIdx; *domega_in_dy = 2 * dot(N, dIdy) * N - dIdy; #endif - *pdf = 1; - *eval = make_float3(1, 1, 1); + /* Some high number for MIS. */ + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); } } return LABEL_REFLECT|LABEL_SINGULAR; diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h index c4698b42060..c78a4b67134 100644 --- a/intern/cycles/kernel/closure/bsdf_refraction.h +++ b/intern/cycles/kernel/closure/bsdf_refraction.h @@ -43,10 +43,6 @@ ccl_device int bsdf_refraction_setup(ShaderClosure *sc) return SD_BSDF; } -ccl_device void bsdf_refraction_blur(ShaderClosure *sc, float roughness) -{ -} - ccl_device float3 bsdf_refraction_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { return make_float3(0.0f, 0.0f, 0.0f); @@ -67,15 +63,17 @@ ccl_device int bsdf_refraction_sample(const ShaderClosure *sc, float3 Ng, float3 float3 dRdx, dRdy, dTdx, dTdy; #endif bool inside; - fresnel_dielectric(m_eta, N, I, &R, &T, + float fresnel; + fresnel = fresnel_dielectric(m_eta, N, I, &R, &T, #ifdef __RAY_DIFFERENTIALS__ dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy, #endif &inside); - - if(!inside) { - *pdf = 1.0f; - *eval = make_float3(1.0f, 1.0f, 1.0f); + + if(!inside && fresnel != 1.0f) { + /* Some high number for MIS. */ + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); *omega_in = T; #ifdef __RAY_DIFFERENTIALS__ *domega_in_dx = dTdx; diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h index 797fa4227ae..e5b6ab93a64 100644 --- a/intern/cycles/kernel/closure/bsdf_toon.h +++ b/intern/cycles/kernel/closure/bsdf_toon.h @@ -40,16 +40,12 @@ CCL_NAMESPACE_BEGIN ccl_device int bsdf_diffuse_toon_setup(ShaderClosure *sc) { sc->type = CLOSURE_BSDF_DIFFUSE_TOON_ID; - sc->data0 = clamp(sc->data0, 0.0f, 1.0f); - sc->data1 = clamp(sc->data1, 0.0f, 1.0f); + sc->data0 = saturate(sc->data0); + sc->data1 = saturate(sc->data1); return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device void bsdf_diffuse_toon_blur(ShaderClosure *sc, float roughness) -{ -} - ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float angle) { float is; @@ -124,16 +120,12 @@ ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, float3 Ng, floa ccl_device int bsdf_glossy_toon_setup(ShaderClosure *sc) { sc->type = CLOSURE_BSDF_GLOSSY_TOON_ID; - sc->data0 = clamp(sc->data0, 0.0f, 1.0f); - sc->data1 = clamp(sc->data1, 0.0f, 1.0f); + sc->data0 = saturate(sc->data0); + sc->data1 = saturate(sc->data1); return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device void bsdf_glossy_toon_blur(ShaderClosure *sc, float roughness) -{ -} - ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { float max_angle = sc->data0*M_PI_2_F; diff --git a/intern/cycles/kernel/closure/bsdf_transparent.h b/intern/cycles/kernel/closure/bsdf_transparent.h index 73601d20c3a..3c2fd8004df 100644 --- a/intern/cycles/kernel/closure/bsdf_transparent.h +++ b/intern/cycles/kernel/closure/bsdf_transparent.h @@ -41,10 +41,6 @@ ccl_device int bsdf_transparent_setup(ShaderClosure *sc) return SD_BSDF|SD_TRANSPARENT; } -ccl_device void bsdf_transparent_blur(ShaderClosure *sc, float roughness) -{ -} - ccl_device float3 bsdf_transparent_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { return make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/closure/bsdf_westin.h b/intern/cycles/kernel/closure/bsdf_westin.h deleted file mode 100644 index 9dc1c00bb3d..00000000000 --- a/intern/cycles/kernel/closure/bsdf_westin.h +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Adapted from Open Shading Language with this license: - * - * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. - * All Rights Reserved. - * - * Modifications Copyright 2011, Blender Foundation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Sony Pictures Imageworks nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __BSDF_WESTIN_H__ -#define __BSDF_WESTIN_H__ - -CCL_NAMESPACE_BEGIN - -/* WESTIN BACKSCATTER */ - -ccl_device int bsdf_westin_backscatter_setup(ShaderClosure *sc) -{ - float roughness = sc->data0; - roughness = clamp(roughness, 1e-5f, 1.0f); - float m_invroughness = 1.0f/roughness; - - sc->type = CLOSURE_BSDF_WESTIN_BACKSCATTER_ID; - sc->data0 = m_invroughness; - - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; -} - -ccl_device void bsdf_westin_backscatter_blur(ShaderClosure *sc, float roughness) -{ - float m_invroughness = sc->data0; - m_invroughness = min(1.0f/roughness, m_invroughness); - sc->data0 = m_invroughness; -} - -ccl_device float3 bsdf_westin_backscatter_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) -{ - float m_invroughness = sc->data0; - float3 N = sc->N; - - // pdf is implicitly 0 (no indirect sampling) - float cosNO = dot(N, I); - float cosNI = dot(N, omega_in); - if(cosNO > 0 && cosNI > 0) { - float cosine = dot(I, omega_in); - *pdf = cosine > 0 ? (m_invroughness + 1) * powf(cosine, m_invroughness) : 0; - *pdf *= 0.5f * M_1_PI_F; - return make_float3 (*pdf, *pdf, *pdf); - } - return make_float3 (0, 0, 0); -} - -ccl_device float3 bsdf_westin_backscatter_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) -{ - return make_float3(0.0f, 0.0f, 0.0f); -} - -ccl_device int bsdf_westin_backscatter_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) -{ - float m_invroughness = sc->data0; - float3 N = sc->N; - - float cosNO = dot(N, I); - if(cosNO > 0) { -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dIdx; - *domega_in_dy = dIdy; -#endif - float3 T, B; - make_orthonormals (I, &T, &B); - float phi = M_2PI_F * randu; - float cosTheta = powf(randv, 1 / (m_invroughness + 1)); - float sinTheta2 = 1 - cosTheta * cosTheta; - float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0; - *omega_in = (cosf(phi) * sinTheta) * T + - (sinf(phi) * sinTheta) * B + - (cosTheta) * I; - if(dot(Ng, *omega_in) > 0) { - // common terms for pdf and eval - float cosNI = dot(N, *omega_in); - // make sure the direction we chose is still in the right hemisphere - if(cosNI > 0) - { - *pdf = 0.5f * M_1_PI_F * powf(cosTheta, m_invroughness); - *pdf = (m_invroughness + 1) * (*pdf); - *eval = make_float3(*pdf, *pdf, *pdf); - } - } - } - return LABEL_REFLECT|LABEL_GLOSSY; -} - -/* WESTIN SHEEN */ - -ccl_device int bsdf_westin_sheen_setup(ShaderClosure *sc) -{ - /* float edginess = sc->data0; */ - sc->type = CLOSURE_BSDF_WESTIN_SHEEN_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; -} - -ccl_device void bsdf_westin_sheen_blur(ShaderClosure *sc, float roughness) -{ -} - -ccl_device float3 bsdf_westin_sheen_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) -{ - float m_edginess = sc->data0; - float3 N = sc->N; - - // pdf is implicitly 0 (no indirect sampling) - float cosNO = dot(N, I); - float cosNI = dot(N, omega_in); - if(cosNO > 0 && cosNI > 0) { - float sinNO2 = 1 - cosNO * cosNO; - *pdf = cosNI * M_1_PI_F; - float westin = sinNO2 > 0 ? powf(sinNO2, 0.5f * m_edginess) * (*pdf) : 0; - return make_float3 (westin, westin, westin); - } - return make_float3 (0, 0, 0); -} - -ccl_device float3 bsdf_westin_sheen_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) -{ - return make_float3(0.0f, 0.0f, 0.0f); -} - -ccl_device int bsdf_westin_sheen_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) -{ - float m_edginess = sc->data0; - float3 N = sc->N; - - // we are viewing the surface from the right side - send a ray out with cosine - // distribution over the hemisphere - sample_cos_hemisphere(N, randu, randv, omega_in, pdf); - if(dot(Ng, *omega_in) > 0) { - // TODO: account for sheen when sampling - float cosNO = dot(N, I); - float sinNO2 = 1 - cosNO * cosNO; - float westin = sinNO2 > 0 ? powf(sinNO2, 0.5f * m_edginess) * (*pdf) : 0; - *eval = make_float3(westin, westin, westin); -#ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; -#endif - } - else { - pdf = 0; - } - return LABEL_REFLECT|LABEL_DIFFUSE; -} - -CCL_NAMESPACE_END - -#endif /* __BSDF_WESTIN_H__ */ - diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index 3849dedc3b6..f817dcd5f2d 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __KERNEL_BSSRDF_H__ @@ -30,8 +30,8 @@ ccl_device int bssrdf_setup(ShaderClosure *sc, ClosureType type) return flag; } else { - sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* texture blur */ - sc->T.x = clamp(sc->T.x, 0.0f, 1.0f); /* sharpness */ + sc->data1 = saturate(sc->data1); /* texture blur */ + sc->T.x = saturate(sc->T.x); /* sharpness */ sc->type = type; return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF; @@ -157,7 +157,7 @@ ccl_device float bssrdf_cubic_quintic_root_find(float xi) float x = 0.25f; int i; - for (i = 0; i < max_iteration_count; i++) { + for(i = 0; i < max_iteration_count; i++) { float x2 = x*x; float x3 = x2*x; float nx = (1.0f - x); @@ -168,7 +168,7 @@ ccl_device float bssrdf_cubic_quintic_root_find(float xi) if(fabsf(f) < tolerance || f_ == 0.0f) break; - x = clamp(x - f/f_, 0.0f, 1.0f); + x = saturate(x - f/f_); } return x; diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h index 058c4b8408f..4d71ba50ec3 100644 --- a/intern/cycles/kernel/closure/volume.h +++ b/intern/cycles/kernel/closure/volume.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __VOLUME_H__ @@ -26,9 +26,6 @@ CCL_NAMESPACE_BEGIN * uniform sphere. g=0 uniform diffuse-like, g=1 close to sharp single ray. */ ccl_device float single_peaked_henyey_greenstein(float cos_theta, float g) { - if(fabsf(g) < 1e-3f) - return M_1_PI_F * 0.25f; - return ((1.0f - g * g) / safe_powf(1.0f + g * g - 2.0f * g * cos_theta, 1.5f)) * (M_1_PI_F * 0.25f); }; @@ -39,7 +36,7 @@ ccl_device int volume_henyey_greenstein_setup(ShaderClosure *sc) /* clamp anisotropy to avoid delta function */ sc->data0 = signf(sc->data0) * min(fabsf(sc->data0), 1.0f - 1e-3f); - return SD_SCATTER|SD_PHASE_HAS_EVAL; + return SD_SCATTER; } ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, const float3 I, float3 omega_in, float *pdf) @@ -47,9 +44,13 @@ ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, c float g = sc->data0; /* note that I points towards the viewer */ - float cos_theta = dot(-I, omega_in); - - *pdf = single_peaked_henyey_greenstein(cos_theta, g); + if(fabsf(g) < 1e-3f) { + *pdf = M_1_PI_F * 0.25f; + } + else { + float cos_theta = dot(-I, omega_in); + *pdf = single_peaked_henyey_greenstein(cos_theta, g); + } return make_float3(*pdf, *pdf, *pdf); } @@ -63,10 +64,12 @@ ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, float3 I /* match pdf for small g */ if(fabsf(g) < 1e-3f) { cos_theta = (1.0f - 2.0f * randu); + *pdf = M_1_PI_F * 0.25f; } else { float k = (1.0f - g * g) / (1.0f - g + 2.0f * g * randu); cos_theta = (1.0f + g * g - k * k) / (2.0f * g); + *pdf = single_peaked_henyey_greenstein(cos_theta, g); } float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta); @@ -80,7 +83,6 @@ ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, float3 I make_orthonormals(-I, &T, &B); *omega_in = sin_theta * cos_phi * T + sin_theta * sin_phi * B + cos_theta * (-I); - *pdf = single_peaked_henyey_greenstein(cos_theta, g); *eval = make_float3(*pdf, *pdf, *pdf); /* perfect importance sampling */ #ifdef __RAY_DIFFERENTIALS__ @@ -105,18 +107,9 @@ ccl_device int volume_absorption_setup(ShaderClosure *sc) ccl_device float3 volume_phase_eval(const ShaderData *sd, const ShaderClosure *sc, float3 omega_in, float *pdf) { - float3 eval; - - switch(sc->type) { - case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); - break; - default: - eval = make_float3(0.0f, 0.0f, 0.0f); - break; - } + kernel_assert(sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID); - return eval; + return volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); } ccl_device int volume_phase_sample(const ShaderData *sd, const ShaderClosure *sc, float randu, diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h index 9495a2541f9..5ab900d47aa 100644 --- a/intern/cycles/kernel/geom/geom.h +++ b/intern/cycles/kernel/geom/geom.h @@ -20,7 +20,11 @@ /* 64 object BVH + 64 mesh BVH + 64 object node splitting */ #define BVH_STACK_SIZE 192 +#define BVH_QSTACK_SIZE 384 #define BVH_NODE_SIZE 4 +#define BVH_NODE_LEAF_SIZE 1 +#define BVH_QNODE_SIZE 7 +#define BVH_QNODE_LEAF_SIZE 1 #define TRI_NODE_SIZE 3 /* silly workaround for float extended precision that happens when compiling @@ -35,6 +39,7 @@ #include "geom_attribute.h" #include "geom_object.h" #include "geom_triangle.h" +#include "geom_triangle_intersect.h" #include "geom_motion_triangle.h" #include "geom_motion_curve.h" #include "geom_curve.h" diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h index 63ce31c492f..c7364e9edac 100644 --- a/intern/cycles/kernel/geom/geom_attribute.h +++ b/intern/cycles/kernel/geom/geom_attribute.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -29,24 +29,27 @@ CCL_NAMESPACE_BEGIN ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem) { - if(sd->object == PRIM_NONE) + if(ccl_fetch(sd, object) == PRIM_NONE) return (int)ATTR_STD_NOT_FOUND; /* for SVM, find attribute by unique id */ - uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride; + uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride; #ifdef __HAIR__ - attr_offset = (sd->type & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset; + attr_offset = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset; #endif uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); while(attr_map.x != id) { + if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) { + return ATTR_STD_NOT_FOUND; + } attr_offset += ATTR_PRIM_TYPES; attr_map = kernel_tex_fetch(__attributes_map, attr_offset); } *elem = (AttributeElement)attr_map.y; - if(sd->prim == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH) + if(ccl_fetch(sd, prim) == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH) return ATTR_STD_NOT_FOUND; /* return result */ diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h index dd6abe32fec..3d0d406dd0b 100644 --- a/intern/cycles/kernel/geom/geom_bvh.h +++ b/intern/cycles/kernel/geom/geom_bvh.h @@ -28,6 +28,13 @@ CCL_NAMESPACE_BEGIN +/* Don't inline intersect functions on GPU, this is faster */ +#ifdef __KERNEL_GPU__ +#define ccl_device_intersect ccl_device_noinline +#else +#define ccl_device_intersect ccl_device_inline +#endif + /* BVH intersection function variations */ #define BVH_INSTANCING 1 @@ -35,6 +42,19 @@ CCL_NAMESPACE_BEGIN #define BVH_HAIR 4 #define BVH_HAIR_MINIMUM_WIDTH 8 +#define BVH_NAME_JOIN(x,y) x ## _ ## y +#define BVH_NAME_EVAL(x,y) BVH_NAME_JOIN(x,y) +#define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME) + +#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) + +/* Common QBVH functions. */ +#ifdef __QBVH__ +#include "geom_qbvh.h" +#endif + +/* Regular BVH traversal */ + #define BVH_FUNCTION_NAME bvh_intersect #define BVH_FUNCTION_FEATURES 0 #include "geom_bvh_traversal.h" @@ -63,6 +83,8 @@ CCL_NAMESPACE_BEGIN #include "geom_bvh_traversal.h" #endif +/* Subsurface scattering BVH traversal */ + #if defined(__SUBSURFACE__) #define BVH_FUNCTION_NAME bvh_intersect_subsurface #define BVH_FUNCTION_FEATURES 0 @@ -93,43 +115,108 @@ CCL_NAMESPACE_BEGIN #include "geom_bvh_subsurface.h" #endif +/* Volume BVH traversal */ + +#if defined(__VOLUME__) +#define BVH_FUNCTION_NAME bvh_intersect_volume +#define BVH_FUNCTION_FEATURES 0 +#include "geom_bvh_volume.h" +#endif + +#if defined(__VOLUME__) && defined(__INSTANCING__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_instancing +#define BVH_FUNCTION_FEATURES BVH_INSTANCING +#include "geom_bvh_volume.h" +#endif + +#if defined(__VOLUME__) && defined(__HAIR__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_hair +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH +#include "geom_bvh_volume.h" +#endif + +#if defined(__VOLUME__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +#include "geom_bvh_volume.h" +#endif + +#if defined(__VOLUME__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_hair_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION +#include "geom_bvh_volume.h" +#endif + +/* Record all intersections - Shadow BVH traversal */ + #if defined(__SHADOW_RECORD_ALL__) #define BVH_FUNCTION_NAME bvh_intersect_shadow_all #define BVH_FUNCTION_FEATURES 0 #include "geom_bvh_shadow.h" #endif -#if defined(__SUBSURFACE__) && defined(__INSTANCING__) +#if defined(__SHADOW_RECORD_ALL__) && defined(__INSTANCING__) #define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing #define BVH_FUNCTION_FEATURES BVH_INSTANCING #include "geom_bvh_shadow.h" #endif -#if defined(__SUBSURFACE__) && defined(__HAIR__) +#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) #define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair #define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR #include "geom_bvh_shadow.h" #endif -#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__) +#if defined(__SHADOW_RECORD_ALL__) && defined(__OBJECT_MOTION__) #define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion #define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION #include "geom_bvh_shadow.h" #endif -#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) +#if defined(__SHADOW_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) #define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion #define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION #include "geom_bvh_shadow.h" #endif -/* to work around titan bug when using arrays instead of textures */ -#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__) -ccl_device_inline -#else -ccl_device_noinline +/* Record all intersections - Volume BVH traversal */ + +#if defined(__VOLUME_RECORD_ALL__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all +#define BVH_FUNCTION_FEATURES 0 +#include "geom_bvh_volume_all.h" +#endif + +#if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing +#define BVH_FUNCTION_FEATURES BVH_INSTANCING +#include "geom_bvh_volume_all.h" +#endif + +#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH +#include "geom_bvh_volume_all.h" #endif -bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, + +#if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +#include "geom_bvh_volume_all.h" +#endif + +#if defined(__VOLUME_RECORD_ALL__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_volume_all_hair_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION +#include "geom_bvh_volume_all.h" +#endif + +#undef BVH_FEATURE +#undef BVH_NAME_JOIN +#undef BVH_NAME_EVAL +#undef BVH_FUNCTION_FULL_NAME + +ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax) { #ifdef __OBJECT_MOTION__ @@ -167,14 +254,8 @@ bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, I #endif /* __KERNEL_CPU__ */ } -/* to work around titan bug when using arrays instead of textures */ #ifdef __SUBSURFACE__ -#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__) -ccl_device_inline -#else -ccl_device_noinline -#endif -uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits) +ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits) { #ifdef __OBJECT_MOTION__ if(kernel_data.bvh.have_motion) { @@ -212,14 +293,8 @@ uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection } #endif -/* to work around titan bug when using arrays instead of textures */ #ifdef __SHADOW_RECORD_ALL__ -#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__) -ccl_device_inline -#else -ccl_device_noinline -#endif -uint scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits) +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits) { #ifdef __OBJECT_MOTION__ if(kernel_data.bvh.have_motion) { @@ -237,26 +312,87 @@ uint scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits); #endif /* __HAIR__ */ -#ifdef __KERNEL_CPU__ - #ifdef __INSTANCING__ if(kernel_data.bvh.have_instancing) return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits); #endif /* __INSTANCING__ */ return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits); +} +#endif + +#ifdef __VOLUME__ +ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, + const Ray *ray, + Intersection *isect) +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_volume_hair_motion(kg, ray, isect); +#endif /* __HAIR__ */ + + return bvh_intersect_volume_motion(kg, ray, isect); + } +#endif /* __OBJECT_MOTION__ */ + +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_volume_hair(kg, ray, isect); +#endif /* __HAIR__ */ + +#ifdef __KERNEL_CPU__ + +#ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_volume_instancing(kg, ray, isect); +#endif /* __INSTANCING__ */ + + return bvh_intersect_volume(kg, ray, isect); #else /* __KERNEL_CPU__ */ #ifdef __INSTANCING__ - return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits); + return bvh_intersect_volume_instancing(kg, ray, isect); #else - return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits); + return bvh_intersect_volume(kg, ray, isect); #endif /* __INSTANCING__ */ #endif /* __KERNEL_CPU__ */ } #endif +#ifdef __VOLUME_RECORD_ALL__ +ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint max_hits) +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_volume_all_hair_motion(kg, ray, isect, max_hits); +#endif /* __HAIR__ */ + + return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits); + } +#endif /* __OBJECT_MOTION__ */ + +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_volume_all_hair(kg, ray, isect, max_hits); +#endif /* __HAIR__ */ + +#ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits); +#endif /* __INSTANCING__ */ + + return bvh_intersect_volume_all(kg, ray, isect, max_hits); +} +#endif + /* Ray offset to avoid self intersection. * @@ -311,5 +447,21 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng) #endif } +#if defined(__SHADOW_RECORD_ALL__) || defined (__VOLUME_RECORD_ALL__) +/* ToDo: Move to another file? */ +ccl_device int intersections_compare(const void *a, const void *b) +{ + const Intersection *isect_a = (const Intersection*)a; + const Intersection *isect_b = (const Intersection*)b; + + if(isect_a->t < isect_b->t) + return -1; + else if(isect_a->t > isect_b->t) + return 1; + else + return 0; +} +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h index aee4097d77e..e4cba99dc96 100644 --- a/intern/cycles/kernel/geom/geom_bvh_shadow.h +++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h @@ -17,6 +17,10 @@ * limitations under the License. */ +#ifdef __QBVH__ +#include "geom_qbvh_shadow.h" +#endif + /* This is a template BVH traversal function, where various features can be * enabled/disabled. This way we can compile optimized versions for each case * without new features slowing things down. @@ -27,10 +31,11 @@ * */ -#define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) - -ccl_device bool BVH_FUNCTION_NAME -(KernelGlobals *kg, const Ray *ray, Intersection *isect_array, const uint max_hits, uint *num_hits) +ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + uint *num_hits) { /* todo: * - likely and unlikely for if() statements @@ -53,11 +58,11 @@ ccl_device bool BVH_FUNCTION_NAME int object = OBJECT_NONE; float isect_t = tmax; -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) Transform ob_tfm; #endif -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) int num_hits_in_instance = 0; #endif @@ -81,6 +86,9 @@ ccl_device bool BVH_FUNCTION_NAME gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + /* traversal loop */ do { do { @@ -174,6 +182,7 @@ ccl_device bool BVH_FUNCTION_NAME } ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); traversalStack[stackPtr] = nodeAddrChild1; } else { @@ -191,13 +200,15 @@ ccl_device bool BVH_FUNCTION_NAME /* if node is leaf, fetch triangle list */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1)); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) if(primAddr >= 0) { #endif - int primAddr2 = __float_as_int(leaf.y); + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; /* pop */ nodeAddr = traversalStack[stackPtr]; @@ -205,25 +216,26 @@ ccl_device bool BVH_FUNCTION_NAME /* primitive intersection */ while(primAddr < primAddr2) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + bool hit; - uint type = kernel_tex_fetch(__prim_type, primAddr); /* todo: specialized intersect functions which don't fill in * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? * might give a few % performance improvement */ - switch(type & PRIMITIVE_ALL) { + switch(p_type) { case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr); + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr); break; } -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) case PRIMITIVE_MOTION_TRIANGLE: { hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr); break; } #endif -#if FEATURE(BVH_HAIR) +#if BVH_FEATURE(BVH_HAIR) case PRIMITIVE_CURVE: case PRIMITIVE_MOTION_CURVE: { if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) @@ -252,7 +264,7 @@ ccl_device bool BVH_FUNCTION_NAME if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) #endif { - shader = kernel_tex_fetch(__tri_shader, prim); + shader = kernel_tex_fetch(__tri_shader, prim); } #ifdef __HAIR__ else { @@ -274,7 +286,7 @@ ccl_device bool BVH_FUNCTION_NAME /* move on to next entry in intersections array */ isect_array++; (*num_hits)++; -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) num_hits_in_instance++; #endif @@ -284,52 +296,55 @@ ccl_device bool BVH_FUNCTION_NAME primAddr++; } } -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) else { /* instance push */ object = kernel_tex_fetch(__prim_object, -primAddr-1); -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); #else bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); #endif + triangle_intersect_precalc(dir, &isect_precalc); num_hits_in_instance = 0; + isect_array->t = isect_t; #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); Psplat[1] = ssef(P.y); Psplat[2] = ssef(P.z); - isect_array->t = isect_t; tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; nodeAddr = kernel_tex_fetch(__object_node, object); } } -#endif +#endif /* FEATURE(BVH_INSTANCING) */ } while(nodeAddr != ENTRYPOINT_SENTINEL); -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) if(stackPtr >= 0) { kernel_assert(object != OBJECT_NONE); if(num_hits_in_instance) { float t_fac; -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm); #else bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); #endif + triangle_intersect_precalc(dir, &isect_precalc); + /* scale isect->t to adjust for instancing */ for(int i = 0; i < num_hits_in_instance; i++) (isect_array-i-1)->t *= t_fac; @@ -337,22 +352,23 @@ ccl_device bool BVH_FUNCTION_NAME else { float ignore_t = FLT_MAX; -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm); #else bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); #endif + triangle_intersect_precalc(dir, &isect_precalc); } + isect_t = tmax; + isect_array->t = isect_t; + #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); Psplat[1] = ssef(P.y); Psplat[2] = ssef(P.z); - isect_t = tmax; - isect_array->t = isect_t; tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif @@ -360,13 +376,37 @@ ccl_device bool BVH_FUNCTION_NAME nodeAddr = traversalStack[stackPtr]; --stackPtr; } -#endif +#endif /* FEATURE(BVH_INSTANCING) */ } while(nodeAddr != ENTRYPOINT_SENTINEL); return false; } -#undef FEATURE +ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + uint *num_hits) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect_array, + max_hits, + num_hits); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect_array, + max_hits, + num_hits); + } +} + #undef BVH_FUNCTION_NAME #undef BVH_FUNCTION_FEATURES - diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h index a8f57cffa78..a73139f9c88 100644 --- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h +++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h @@ -17,6 +17,10 @@ * limitations under the License. */ +#ifdef __QBVH__ +#include "geom_qbvh_subsurface.h" +#endif + /* This is a template BVH traversal function for subsurface scattering, where * various features can be enabled/disabled. This way we can compile optimized * versions for each case without new features slowing things down. @@ -26,10 +30,12 @@ * */ -#define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) - -ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersection *isect_array, - int subsurface_object, uint *lcg_state, int max_hits) +ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + int subsurface_object, + uint *lcg_state, + int max_hits) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) @@ -54,10 +60,9 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio int object = OBJECT_NONE; float isect_t = ray->t; - const uint visibility = PATH_RAY_ALL_VISIBILITY; uint num_hits = 0; -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) Transform ob_tfm; #endif @@ -78,6 +83,9 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + /* traversal loop */ do { do @@ -118,14 +126,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); /* decide which nodes to traverse next */ -#ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility); - traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility); -#else traverseChild0 = (c0max >= c0min); traverseChild1 = (c1max >= c1min); -#endif #else // __KERNEL_SSE2__ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ @@ -145,14 +147,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); /* decide which nodes to traverse next */ -#ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility); - traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility); -#else traverseChild0 = (movemask(lrhit) & 1); traverseChild1 = (movemask(lrhit) & 2); -#endif #endif // __KERNEL_SSE2__ nodeAddr = __float_as_int(cnodes.x); @@ -173,6 +169,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio } ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); traversalStack[stackPtr] = nodeAddrChild1; } else { @@ -190,57 +187,64 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio /* if node is leaf, fetch triangle list */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1)); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) if(primAddr >= 0) { #endif - int primAddr2 = __float_as_int(leaf.y); + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); /* pop */ nodeAddr = traversalStack[stackPtr]; --stackPtr; /* primitive intersection */ - for(; primAddr < primAddr2; primAddr++) { - /* only primitives from the same object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - - if(tri_object != subsurface_object) - continue; - - /* intersect ray against primitive */ - uint type = kernel_tex_fetch(__prim_type, primAddr); - - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - triangle_intersect_subsurface(kg, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); - break; + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from the same object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + if(tri_object != subsurface_object) + continue; + triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); } -#if FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from the same object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + if(tri_object != subsurface_object) + continue; motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); - break; } + break; + } #endif - default: { - break; - } + default: { + break; } } } -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) else { /* instance push */ if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) { object = subsurface_object; -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); #else bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); #endif + triangle_intersect_precalc(dir, &isect_precalc); #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); @@ -253,6 +257,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio #endif ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; nodeAddr = kernel_tex_fetch(__object_node, object); @@ -264,20 +269,22 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio } } } -#endif +#endif /* FEATURE(BVH_INSTANCING) */ } while(nodeAddr != ENTRYPOINT_SENTINEL); -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) if(stackPtr >= 0) { kernel_assert(object != OBJECT_NONE); /* instance pop */ -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); #else bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t); #endif + triangle_intersect_precalc(dir, &isect_precalc); + #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); Psplat[1] = ssef(P.y); @@ -292,13 +299,40 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio nodeAddr = traversalStack[stackPtr]; --stackPtr; } -#endif +#endif /* FEATURE(BVH_INSTANCING) */ } while(nodeAddr != ENTRYPOINT_SENTINEL); return num_hits; } -#undef FEATURE +ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + int subsurface_object, + uint *lcg_state, + int max_hits) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect_array, + subsurface_object, + lcg_state, + max_hits); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect_array, + subsurface_object, + lcg_state, + max_hits); + } +} + #undef BVH_FUNCTION_NAME #undef BVH_FUNCTION_FEATURES - diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h index e39228c33de..73d79fd78ee 100644 --- a/intern/cycles/kernel/geom/geom_bvh_traversal.h +++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h @@ -17,6 +17,10 @@ * limitations under the License. */ +#ifdef __QBVH__ +#include "geom_qbvh_traversal.h" +#endif + /* This is a template BVH traversal function, where various features can be * enabled/disabled. This way we can compile optimized versions for each case * without new features slowing things down. @@ -28,14 +32,16 @@ * */ -#define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) - -ccl_device bool BVH_FUNCTION_NAME -(KernelGlobals *kg, const Ray *ray, Intersection *isect, const uint visibility -#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) -, uint *lcg_state, float difl, float extmax +ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + , uint *lcg_state, + float difl, + float extmax #endif -) + ) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) @@ -58,15 +64,20 @@ ccl_device bool BVH_FUNCTION_NAME float3 idir = bvh_inverse_direction(dir); int object = OBJECT_NONE; -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) Transform ob_tfm; #endif isect->t = ray->t; - isect->object = OBJECT_NONE; - isect->prim = PRIM_NONE; isect->u = 0.0f; isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; + +#if defined(__KERNEL_DEBUG__) + isect->num_traversal_steps = 0; + isect->num_traversed_instances = 0; +#endif #if defined(__KERNEL_SSE2__) const shuffle_swap_t shuf_identity = shuffle_swap_identity(); @@ -85,6 +96,9 @@ ccl_device bool BVH_FUNCTION_NAME gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + /* traversal loop */ do { do { @@ -122,7 +136,7 @@ ccl_device bool BVH_FUNCTION_NAME NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); -#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) if(difl != 0.0f) { float hdiff = 1.0f + difl; float ldiff = 1.0f - difl; @@ -163,7 +177,7 @@ ccl_device bool BVH_FUNCTION_NAME ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); const ssef tminmax = minmax ^ pn; -#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) if(difl != 0.0f) { float4 *tminmaxview = (float4*)&tminmax; float &c0min = tminmaxview->x, &c1min = tminmaxview->y; @@ -213,6 +227,7 @@ ccl_device bool BVH_FUNCTION_NAME } ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); traversalStack[stackPtr] = nodeAddrChild1; } else { @@ -226,80 +241,112 @@ ccl_device bool BVH_FUNCTION_NAME --stackPtr; } } + +#if defined(__KERNEL_DEBUG__) + isect->num_traversal_steps++; +#endif } /* if node is leaf, fetch triangle list */ if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1)); + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); int primAddr = __float_as_int(leaf.x); -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) if(primAddr >= 0) { #endif - int primAddr2 = __float_as_int(leaf.y); + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); /* pop */ nodeAddr = traversalStack[stackPtr]; --stackPtr; /* primitive intersection */ - while(primAddr < primAddr2) { - bool hit; - uint type = kernel_tex_fetch(__prim_type, primAddr); - - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, isect, P, dir, visibility, object, primAddr); - break; + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { +#if defined(__KERNEL_DEBUG__) + isect->num_traversal_steps++; +#endif + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) { + /* shadow ray early termination */ +#if defined(__KERNEL_SSE2__) + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); +#else + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; +#endif + } } -#if FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - hit = motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr); - break; + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { +#if defined(__KERNEL_DEBUG__) + isect->num_traversal_steps++; +#endif + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) { + /* shadow ray early termination */ +#if defined(__KERNEL_SSE2__) + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); +#else + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; +#endif + } } + break; + } +#endif /* BVH_FEATURE(BVH_MOTION) */ +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for(; primAddr < primAddr2; primAddr++) { +#if defined(__KERNEL_DEBUG__) + isect->num_traversal_steps++; #endif -#if FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + bool hit; + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); else hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); - break; - } -#endif - default: { - hit = false; - break; - } - } - - /* shadow ray early termination */ + if(hit) { + /* shadow ray early termination */ #if defined(__KERNEL_SSE2__) - if(hit) { - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); - } + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); #else - if(hit && visibility == PATH_RAY_SHADOW_OPAQUE) - return true; + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; #endif - - primAddr++; + } + } + break; + } +#endif /* BVH_FEATURE(BVH_HAIR) */ } } -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) else { /* instance push */ object = kernel_tex_fetch(__prim_object, -primAddr-1); -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm); #else bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); #endif + triangle_intersect_precalc(dir, &isect_precalc); #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); @@ -312,24 +359,30 @@ ccl_device bool BVH_FUNCTION_NAME #endif ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; nodeAddr = kernel_tex_fetch(__object_node, object); + +#if defined(__KERNEL_DEBUG__) + isect->num_traversed_instances++; +#endif } } -#endif +#endif /* FEATURE(BVH_INSTANCING) */ } while(nodeAddr != ENTRYPOINT_SENTINEL); -#if FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_INSTANCING) if(stackPtr >= 0) { kernel_assert(object != OBJECT_NONE); /* instance pop */ -#if FEATURE(BVH_MOTION) +#if BVH_FEATURE(BVH_MOTION) bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm); #else bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); #endif + triangle_intersect_precalc(dir, &isect_precalc); #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); @@ -345,13 +398,52 @@ ccl_device bool BVH_FUNCTION_NAME nodeAddr = traversalStack[stackPtr]; --stackPtr; } -#endif +#endif /* FEATURE(BVH_INSTANCING) */ } while(nodeAddr != ENTRYPOINT_SENTINEL); return (isect->prim != PRIM_NONE); } -#undef FEATURE +ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + , uint *lcg_state, + float difl, + float extmax +#endif + ) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect, + visibility +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + , lcg_state, + difl, + extmax +#endif + ); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect, + visibility +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + , lcg_state, + difl, + extmax +#endif + ); + } +} + #undef BVH_FUNCTION_NAME #undef BVH_FUNCTION_FEATURES - diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h new file mode 100644 index 00000000000..41c784869f2 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_bvh_volume.h @@ -0,0 +1,358 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __QBVH__ +#include "geom_qbvh_volume.h" +#endif + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect) +{ + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + + const uint visibility = PATH_RAY_ALL_VISIBILITY; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; + shuffle_swap_t shufflexyz[3]; + + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + bool traverseChild0, traverseChild1; + int nodeAddrChild1; + +#if !defined(__KERNEL_SSE2__) + /* Intersect two child bounding boxes, non-SSE version */ + float t = isect->t; + + /* fetch node data */ + float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); + float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); + float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); + + /* intersect ray against child nodes */ + NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); + NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); + + NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); + NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); + + /* decide which nodes to traverse next */ + traverseChild0 = (c0max >= c0min); + traverseChild1 = (c1max >= c1min); + +#else // __KERNEL_SSE2__ + /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ + + /* fetch node data */ + const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; + const float4 cnodes = ((float4*)bvh_nodes)[3]; + + /* intersect ray against child nodes */ + const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; + const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; + const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; + + /* calculate { c0min, c1min, -c0max, -c1max} */ + ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); + const ssef tminmax = minmax ^ pn; + + const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); + + /* decide which nodes to traverse next */ + traverseChild0 = (movemask(lrhit) & 1); + traverseChild1 = (movemask(lrhit) & 2); +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.x); + nodeAddrChild1 = __float_as_int(cnodes.y); + + if(traverseChild0 && traverseChild1) { + /* both children were intersected, push the farther one */ +#if !defined(__KERNEL_SSE2__) + bool closestChild1 = (c1min < c0min); +#else + bool closestChild1 = tminmax[1] < tminmax[0]; +#endif + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* one child was intersected */ + if(traverseChild1) { + nodeAddr = nodeAddrChild1; + } + else if(!traverseChild0) { + /* neither child was intersected */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr); + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr); + } + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + else + bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + } + break; + } +#endif + default: { + break; + } + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); +#endif + + triangle_intersect_precalc(dir, &isect_precalc); + +#if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* instance pop */ +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); +#endif + + triangle_intersect_precalc(dir, &isect_precalc); + +#if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif /* FEATURE(BVH_MOTION) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return (isect->prim != PRIM_NONE); +} + +ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect); + } +} + +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES diff --git a/intern/cycles/kernel/geom/geom_bvh_volume_all.h b/intern/cycles/kernel/geom/geom_bvh_volume_all.h new file mode 100644 index 00000000000..b6db36f4b17 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_bvh_volume_all.h @@ -0,0 +1,454 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __QBVH__ +#include "geom_qbvh_volume_all.h" +#endif + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits) +{ + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + + const uint visibility = PATH_RAY_ALL_VISIBILITY; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + uint num_hits = 0; + isect_array->t = tmax; + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; + shuffle_swap_t shufflexyz[3]; + + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + bool traverseChild0, traverseChild1; + int nodeAddrChild1; + +#if !defined(__KERNEL_SSE2__) + /* Intersect two child bounding boxes, non-SSE version */ + float t = isect_array->t; + + /* fetch node data */ + float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); + float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); + float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); + + /* intersect ray against child nodes */ + NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); + NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); + + NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); + NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); + + /* decide which nodes to traverse next */ + traverseChild0 = (c0max >= c0min); + traverseChild1 = (c1max >= c1min); + +#else // __KERNEL_SSE2__ + /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ + + /* fetch node data */ + const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; + const float4 cnodes = ((float4*)bvh_nodes)[3]; + + /* intersect ray against child nodes */ + const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; + const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; + const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; + + /* calculate { c0min, c1min, -c0max, -c1max} */ + ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); + const ssef tminmax = minmax ^ pn; + + const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); + + /* decide which nodes to traverse next */ + traverseChild0 = (movemask(lrhit) & 1); + traverseChild1 = (movemask(lrhit) & 2); +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.x); + nodeAddrChild1 = __float_as_int(cnodes.y); + + if(traverseChild0 && traverseChild1) { + /* both children were intersected, push the farther one */ +#if !defined(__KERNEL_SSE2__) + bool closestChild1 = (c1min < c0min); +#else + bool closestChild1 = tminmax[1] < tminmax[0]; +#endif + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* one child was intersected */ + if(traverseChild1) { + nodeAddr = nodeAddrChild1; + } + else if(!traverseChild0) { + /* neither child was intersected */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + const int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + bool hit; + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +#else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + /* intersect ray against primitive */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + else + hit = bvh_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif + default: { + break; + } + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +#endif + + triangle_intersect_precalc(dir, &isect_precalc); + num_hits_in_instance = 0; + isect_array->t = isect_t; + +#if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + ++stackPtr; + kernel_assert(stackPtr < BVH_STACK_SIZE); + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + if(num_hits_in_instance) { + float t_fac; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm); +#else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + /* Scale isect->t to adjust for instancing. */ + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } + } + else { + float ignore_t = FLT_MAX; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + } + + isect_t = tmax; + isect_array->t = isect_t; + +#if defined(__KERNEL_SSE2__) + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); + + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif /* FEATURE(BVH_MOTION) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} + +ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits) +{ +#ifdef __QBVH__ + if(kernel_data.bvh.use_qbvh) { + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect_array, + max_hits); + } + else +#endif + { + kernel_assert(kernel_data.bvh.use_qbvh == false); + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect_array, + max_hits); + } +} + +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index c4e9e2ababe..9653ad8f1bb 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -32,22 +32,22 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, if(dy) *dy = 0.0f; #endif - return kernel_tex_fetch(__attributes_float, offset + sd->prim); + return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim)); } else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; float f0 = kernel_tex_fetch(__attributes_float, offset + k0); float f1 = kernel_tex_fetch(__attributes_float, offset + k1); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); + if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); if(dy) *dy = 0.0f; #endif - return (1.0f - sd->u)*f0 + sd->u*f1; + return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -71,22 +71,22 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim)); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim))); } else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0)); float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1)); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); + if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return (1.0f - sd->u)*f0 + sd->u*f1; + return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -104,22 +104,22 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) { float r = 0.0f; - if(sd->type & PRIMITIVE_ALL_CURVE) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { + float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; float4 P_curve[2]; - if(sd->type & PRIMITIVE_CURVE) { + if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); } else { - motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); + motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve); } - r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w; + r = (P_curve[1].w - P_curve[0].w) * ccl_fetch(sd, u) + P_curve[0].w; } return r*2.0f; @@ -130,8 +130,8 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; float4 P_curve[2]; @@ -139,7 +139,7 @@ ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); - return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u); + return float4_to_float3(P_curve[1]) * ccl_fetch(sd, u) + float4_to_float3(P_curve[0]) * (1.0f - ccl_fetch(sd, u)); } /* Curve tangent normal */ @@ -148,14 +148,14 @@ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd) { float3 tgN = make_float3(0.0f,0.0f,0.0f); - if(sd->type & PRIMITIVE_ALL_CURVE) { + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { - tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu))); + tgN = -(-ccl_fetch(sd, I) - ccl_fetch(sd, dPdu) * (dot(ccl_fetch(sd, dPdu),-ccl_fetch(sd, I)) / len_squared(ccl_fetch(sd, dPdu)))); tgN = normalize(tgN); /* need to find suitable scaled gd for corrected normal */ #if 0 - tgN = normalize(tgN - gd * sd->dPdu); + tgN = normalize(tgN - gd * ccl_fetch(sd, dPdu)); #endif } @@ -442,12 +442,12 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect float r_ext = mw_extension + r_curr; float coverage = 1.0f; - if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { + if(bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { /* the bounding box does not overlap the square centered at O */ tree += level; level = tree & -tree; } - else if (level == 1) { + else if(level == 1) { /* the maximum recursion depth is reached. * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. @@ -459,13 +459,13 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect if(flags & CURVE_KN_RIBBONS) { float3 tg = (p_en - p_st); float w = tg.x * tg.x + tg.y * tg.y; - if (w == 0) { + if(w == 0) { tree++; level = tree & -tree; continue; } w = -(p_st.x * tg.x + p_st.y * tg.y) / w; - w = clamp((float)w, 0.0f, 1.0f); + w = saturate(w); /* compute u on the curve segment */ u = i_st * (1 - w) + i_en * w; @@ -474,17 +474,17 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0]; float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if (dot(tg, dp_st)< 0) + if(dot(tg, dp_st)< 0) dp_st *= -1; - if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { + if(dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { tree++; level = tree & -tree; continue; } float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if (dot(tg, dp_en) < 0) + if(dot(tg, dp_en) < 0) dp_en *= -1; - if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { + if(dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { tree++; level = tree & -tree; continue; @@ -500,13 +500,13 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect float d0 = d - r_curr; float d1 = d + r_curr; float inv_mw_extension = 1.0f/mw_extension; - if (d0 >= 0) + if(d0 >= 0) coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f; else // inside coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f; } - if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { + if(p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { tree++; level = tree & -tree; continue; @@ -548,7 +548,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1))); float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd; float td = tb*tb - 4*cyla*tc; - if (td < 0.0f) { + if(td < 0.0f) { tree++; level = tree & -tree; continue; @@ -559,10 +559,10 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect t = tcentre + correction; float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if (dot(tg, dp_st)< 0) + if(dot(tg, dp_st)< 0) dp_st *= -1; float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if (dot(tg, dp_en) < 0) + if(dot(tg, dp_en) < 0) dp_en *= -1; if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) { @@ -570,14 +570,14 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect t = tcentre + correction; } - if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) { + if(dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) { tree++; level = tree & -tree; continue; } float w = (zcentre + (tg.z * correction)) * invl; - w = clamp((float)w, 0.0f, 1.0f); + w = saturate(w); /* compute u on the curve segment */ u = i_st * (1 - w) + i_en * w; @@ -600,12 +600,12 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect #endif { /* record intersection */ + isect->t = t; + isect->u = u; + isect->v = gd; isect->prim = curveAddr; isect->object = object; isect->type = type; - isect->u = u; - isect->v = gd; - isect->t = t; hit = true; } @@ -646,8 +646,8 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec float4 P_curve[2]; if(type & PRIMITIVE_CURVE) { - P_curve[0]= kernel_tex_fetch(__curve_keys, k0); - P_curve[1]= kernel_tex_fetch(__curve_keys, k1); + P_curve[0] = kernel_tex_fetch(__curve_keys, k0); + P_curve[1] = kernel_tex_fetch(__curve_keys, k1); } else { int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; @@ -709,7 +709,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec const ssef sphere_dif1 = (dif + dif_second) * 0.5f; const ssef dir = load4f(direction); const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1); - const ssef sphere_dif2 = nmsub(sphere_b_tmp, dir, sphere_dif1); + const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1); #endif float mr = max(r1, r2); @@ -777,7 +777,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma; float td = tb*tb - 4*a*tc; - if (td < 0.0f) + if(td < 0.0f) return false; float rootd = 0.0f; @@ -818,7 +818,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec if(t > 0.0f && t < isect->t && z >= 0 && z <= l) { - if (flags & CURVE_KN_ENCLOSEFILTER) { + if(flags & CURVE_KN_ENCLOSEFILTER) { float enc_ratio = 1.01f; if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio)); @@ -835,12 +835,12 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec #endif { /* record intersection */ + isect->t = t; + isect->u = z*invl; + isect->v = gd; isect->prim = curveAddr; isect->object = object; isect->type = type; - isect->u = z*invl; - isect->v = gd; - isect->t = t; return true; } @@ -890,7 +890,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = ccl_fetch(sd, ob_itfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif @@ -903,7 +903,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con int prim = kernel_tex_fetch(__prim_index, isect->prim); float4 v00 = kernel_tex_fetch(__curves, prim); - int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; float3 tg; @@ -914,14 +914,14 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con float4 P_curve[4]; - if(sd->type & PRIMITIVE_CURVE) { + if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { P_curve[0] = kernel_tex_fetch(__curve_keys, ka); P_curve[1] = kernel_tex_fetch(__curve_keys, k0); P_curve[2] = kernel_tex_fetch(__curve_keys, k1); P_curve[3] = kernel_tex_fetch(__curve_keys, kb); } else { - motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve); + motion_cardinal_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), ka, k0, k1, kb, P_curve); } float3 p[4]; @@ -933,43 +933,43 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con P = P + D*t; #ifdef __UV__ - sd->u = isect->u; - sd->v = 0.0f; + ccl_fetch(sd, u) = isect->u; + ccl_fetch(sd, v) = 0.0f; #endif tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) { - sd->Ng = normalize(-(D - tg * (dot(tg, D)))); + ccl_fetch(sd, Ng) = normalize(-(D - tg * (dot(tg, D)))); } else { /* direction from inside to surface of curve */ float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); - sd->Ng = normalize(P - p_curr); + ccl_fetch(sd, Ng) = normalize(P - p_curr); /* adjustment for changing radius */ float gd = isect->v; if(gd != 0.0f) { - sd->Ng = sd->Ng - gd * tg; - sd->Ng = normalize(sd->Ng); + ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg; + ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); } } /* todo: sometimes the normal is still so that this is detected as * backfacing even if cull backfaces is enabled */ - sd->N = sd->Ng; + ccl_fetch(sd, N) = ccl_fetch(sd, Ng); } else { float4 P_curve[2]; - if(sd->type & PRIMITIVE_CURVE) { + if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); } else { - motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); + motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve); } float l = 1.0f; @@ -980,39 +980,39 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con float3 dif = P - float4_to_float3(P_curve[0]); #ifdef __UV__ - sd->u = dot(dif,tg)/l; - sd->v = 0.0f; + ccl_fetch(sd, u) = dot(dif,tg)/l; + ccl_fetch(sd, v) = 0.0f; #endif - if (flag & CURVE_KN_TRUETANGENTGNORMAL) { - sd->Ng = -(D - tg * dot(tg, D)); - sd->Ng = normalize(sd->Ng); + if(flag & CURVE_KN_TRUETANGENTGNORMAL) { + ccl_fetch(sd, Ng) = -(D - tg * dot(tg, D)); + ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); } else { float gd = isect->v; /* direction from inside to surface of curve */ - sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd); + ccl_fetch(sd, Ng) = (dif - tg * ccl_fetch(sd, u) * l) / (P_curve[0].w + ccl_fetch(sd, u) * l * gd); /* adjustment for changing radius */ - if (gd != 0.0f) { - sd->Ng = sd->Ng - gd * tg; - sd->Ng = normalize(sd->Ng); + if(gd != 0.0f) { + ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg; + ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); } } - sd->N = sd->Ng; + ccl_fetch(sd, N) = ccl_fetch(sd, Ng); } #ifdef __DPDU__ /* dPdu/dPdv */ - sd->dPdu = tg; - sd->dPdv = cross(tg, sd->Ng); + ccl_fetch(sd, dPdu) = tg; + ccl_fetch(sd, dPdv) = cross(tg, ccl_fetch(sd, Ng)); #endif if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = ccl_fetch(sd, ob_tfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h index 1022a957b05..6de5aa7ea99 100644 --- a/intern/cycles/kernel/geom/geom_motion_curve.h +++ b/intern/cycles/kernel/geom/geom_motion_curve.h @@ -27,17 +27,22 @@ CCL_NAMESPACE_BEGIN ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem) { - /* todo: find a better (faster) solution for this, maybe store offset per object */ + /* todo: find a better (faster) solution for this, maybe store offset per object. + * + * NOTE: currently it's not a bottleneck because in test scenes the loop below runs + * zero iterations and rendering is really slow with motion curves. For until other + * areas are speed up it's probably not so crucial to optimize this out. + */ uint attr_offset = object*kernel_data.bvh.attributes_map_stride + ATTR_PRIM_CURVE; uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - + while(attr_map.x != id) { attr_offset += ATTR_PRIM_TYPES; attr_map = kernel_tex_fetch(__attributes_map, attr_offset); } *elem = (AttributeElement)attr_map.y; - + /* return result */ return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; } diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index b275b89a8a4..86f93f242a1 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -130,8 +130,11 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s #ifdef __INTERSECTION_REFINE__ if(isect->object != OBJECT_NONE) { + if(UNLIKELY(t == 0.0f)) { + return P; + } #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = ccl_fetch(sd, ob_itfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif @@ -158,7 +161,7 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *s if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = ccl_fetch(sd, ob_tfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif @@ -184,7 +187,7 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh #ifdef __INTERSECTION_REFINE__ if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = ccl_fetch(sd, ob_itfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif @@ -210,7 +213,7 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = ccl_fetch(sd, ob_tfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif @@ -233,25 +236,25 @@ ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, Sh ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface) { /* get shader */ - sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); + ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim)); /* get motion info */ int numsteps, numverts; - object_motion_info(kg, sd->object, &numsteps, &numverts, NULL); + object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL); /* figure out which steps we need to fetch and their interpolation factor */ int maxstep = numsteps*2; - int step = min((int)(sd->time*maxstep), maxstep-1); - float t = sd->time*maxstep - step; + int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1); + float t = ccl_fetch(sd, time)*maxstep - step; /* find attribute */ AttributeElement elem; - int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_POSITION, &elem); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* fetch vertex coordinates */ float3 verts[3], next_verts[3]; - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim))); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); @@ -265,33 +268,33 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD #ifdef __SUBSURFACE__ if(!subsurface) #endif - sd->P = motion_triangle_refine(kg, sd, isect, ray, verts); + ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts); #ifdef __SUBSURFACE__ else - sd->P = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts); + ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts); #endif /* compute face normal */ float3 Ng; - if(sd->flag & SD_NEGATIVE_SCALE_APPLIED) + if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED) Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0])); else Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); - sd->Ng = Ng; - sd->N = Ng; + ccl_fetch(sd, Ng) = Ng; + ccl_fetch(sd, N) = Ng; /* compute derivatives of P w.r.t. uv */ #ifdef __DPDU__ - sd->dPdu = (verts[0] - verts[2]); - sd->dPdv = (verts[1] - verts[2]); + ccl_fetch(sd, dPdu) = (verts[0] - verts[2]); + ccl_fetch(sd, dPdv) = (verts[1] - verts[2]); #endif /* compute smooth normal */ - if(sd->shader & SHADER_SMOOTH_NORMAL) { + if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) { /* find attribute */ AttributeElement elem; - int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); + int offset = find_attribute_motion(kg, ccl_fetch(sd, object), ATTR_STD_MOTION_VERTEX_NORMAL, &elem); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* fetch vertex coordinates */ @@ -305,10 +308,10 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD normals[2] = (1.0f - t)*normals[2] + t*next_normals[2]; /* interpolate between vertices */ - float u = sd->u; - float v = sd->v; + float u = ccl_fetch(sd, u); + float v = ccl_fetch(sd, v); float w = 1.0f - u - v; - sd->N = (u*normals[0] + v*normals[1] + w*normals[2]); + ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]); } } @@ -336,12 +339,12 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) #endif { + isect->t = t; + isect->u = u; + isect->v = v; isect->prim = triAddr; isect->object = object; isect->type = PRIMITIVE_MOTION_TRIANGLE; - isect->u = u; - isect->v = v; - isect->t = t; return true; } @@ -388,12 +391,12 @@ ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, I /* record intersection */ Intersection *isect = &isect_array[hit]; + isect->t = t; + isect->u = u; + isect->v = v; isect->prim = triAddr; isect->object = object; isect->type = PRIMITIVE_MOTION_TRIANGLE; - isect->u = u; - isect->v = v; - isect->t = t; } } #endif diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index 91edd5863ac..9d0a008fff1 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -123,9 +123,9 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) { #ifdef __OBJECT_MOTION__ - *P = transform_point(&sd->ob_tfm, *P); + *P = transform_point_auto(&ccl_fetch(sd, ob_tfm), *P); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); *P = transform_point(&tfm, *P); #endif } @@ -135,9 +135,9 @@ ccl_device_inline void object_position_transform(KernelGlobals *kg, const Shader ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) { #ifdef __OBJECT_MOTION__ - *P = transform_point(&sd->ob_itfm, *P); + *P = transform_point_auto(&ccl_fetch(sd, ob_itfm), *P); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); *P = transform_point(&tfm, *P); #endif } @@ -147,9 +147,9 @@ ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, cons ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) { #ifdef __OBJECT_MOTION__ - *N = normalize(transform_direction_transposed(&sd->ob_tfm, *N)); + *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_tfm), *N)); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); *N = normalize(transform_direction_transposed(&tfm, *N)); #endif } @@ -159,9 +159,9 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) { #ifdef __OBJECT_MOTION__ - *N = normalize(transform_direction_transposed(&sd->ob_itfm, *N)); + *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_itfm), *N)); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); *N = normalize(transform_direction_transposed(&tfm, *N)); #endif } @@ -171,9 +171,9 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderDa ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) { #ifdef __OBJECT_MOTION__ - *D = transform_direction(&sd->ob_tfm, *D); + *D = transform_direction_auto(&ccl_fetch(sd, ob_tfm), *D); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); *D = transform_direction(&tfm, *D); #endif } @@ -183,9 +183,9 @@ ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) { #ifdef __OBJECT_MOTION__ - *D = transform_direction(&sd->ob_itfm, *D); + *D = transform_direction_auto(&ccl_fetch(sd, ob_itfm), *D); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); *D = transform_direction(&tfm, *D); #endif } @@ -194,13 +194,13 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const Sha ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd) { - if(sd->object == OBJECT_NONE) + if(ccl_fetch(sd, object) == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); #ifdef __OBJECT_MOTION__ - return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w); + return make_float3(ccl_fetch(sd, ob_tfm).x.w, ccl_fetch(sd, ob_tfm).y.w, ccl_fetch(sd, ob_tfm).z.w); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); return make_float3(tfm.x.w, tfm.y.w, tfm.z.w); #endif } @@ -243,7 +243,7 @@ ccl_device_inline float object_random_number(KernelGlobals *kg, int object) ccl_device_inline int object_particle_id(KernelGlobals *kg, int object) { if(object == OBJECT_NONE) - return 0.0f; + return 0; int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES; float4 f = kernel_tex_fetch(__objects, offset); @@ -296,7 +296,7 @@ ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *nu ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) { - return kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2 + 1); + return kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2 + 1); } /* Particle data from which object was instanced */ @@ -377,7 +377,7 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir) /* Transform ray into object space to enter static object in BVH */ -ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t) +ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t) { Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); @@ -391,9 +391,41 @@ ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ra *t *= len; } +#ifdef __QBVH__ +/* Same as above, but optimized for QBVH scene intersection, + * which needs to modify two max distances. + * + * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized + * so we can avoid having this duplication. + */ +ccl_device_inline void qbvh_instance_push(KernelGlobals *kg, + int object, + const Ray *ray, + float3 *P, + float3 *dir, + float3 *idir, + float *t, + float *t1) +{ + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + + *P = transform_point(&tfm, ray->P); + + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); + + if(*t != FLT_MAX) + *t *= len; + + if(*t1 != -FLT_MAX) + *t1 *= len; +} +#endif + /* Transorm ray to exit static object in BVH */ -ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t) +ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t) { if(*t != FLT_MAX) { Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); @@ -421,7 +453,7 @@ ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, co #ifdef __OBJECT_MOTION__ /* Transform ray into object space to enter motion blurred object in BVH */ -ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm) +ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t, Transform *tfm) { Transform itfm; *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm); @@ -436,9 +468,36 @@ ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, c *t *= len; } +#ifdef __QBVH__ +/* Same as above, but optimized for QBVH scene intersection, + * which needs to modify two max distances. + * + * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized + * so we can avoid having this duplication. + */ +ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, float *t1, Transform *tfm) +{ + Transform itfm; + *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm); + + *P = transform_point(&itfm, ray->P); + + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(&itfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); + + + if(*t != FLT_MAX) + *t *= len; + + if(*t1 != -FLT_MAX) + *t1 *= len; +} +#endif + /* Transorm ray to exit motion blurred object in BVH */ -ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm) +ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t, Transform *tfm) { if(*t != FLT_MAX) *t *= len(transform_direction(tfm, 1.0f/(*idir))); @@ -461,5 +520,38 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, int obj #endif +/* TODO(sergey): This is only for until we've got OpenCL 2.0 + * on all devices we consider supported. It'll be replaced with + * generic address space. + */ + +#ifdef __KERNEL_OPENCL__ +ccl_device_inline void object_dir_transform_addrspace(KernelGlobals *kg, + const ShaderData *sd, + ccl_addr_space float3 *D) +{ + float3 private_D = *D; + object_dir_transform(kg, sd, &private_D); + *D = private_D; +} + +ccl_device_inline void object_normal_transform_addrspace(KernelGlobals *kg, + const ShaderData *sd, + ccl_addr_space float3 *N) +{ + float3 private_N = *N; + object_normal_transform(kg, sd, &private_N); + *N = private_N; +} +#endif + +#ifndef __KERNEL_OPENCL__ +# define object_dir_transform_auto object_dir_transform +# define object_normal_transform_auto object_normal_transform +#else +# define object_dir_transform_auto object_dir_transform_addrspace +# define object_normal_transform_auto object_normal_transform_addrspace +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index 5df6c75df86..30f12d32355 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* Primitive Utilities @@ -25,16 +25,16 @@ CCL_NAMESPACE_BEGIN ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { return triangle_attribute_float(kg, sd, elem, offset, dx, dy); } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { + else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { return curve_attribute_float(kg, sd, elem, offset, dx, dy); } #endif #ifdef __VOLUME__ - else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { + else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { return volume_attribute_float(kg, sd, elem, offset, dx, dy); } #endif @@ -47,16 +47,16 @@ ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData * ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { return triangle_attribute_float3(kg, sd, elem, offset, dx, dy); } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { + else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { return curve_attribute_float3(kg, sd, elem, offset, dx, dy); } #endif #ifdef __VOLUME__ - else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { + else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { return volume_attribute_float3(kg, sd, elem, offset, dx, dy); } #endif @@ -108,9 +108,9 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) { #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) #ifdef __DPDU__ - return normalize(sd->dPdu); + return normalize(ccl_fetch(sd, dPdu)); #else return make_float3(0.0f, 0.0f, 0.0f); #endif @@ -124,12 +124,12 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) float3 data = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); object_normal_transform(kg, sd, &data); - return cross(sd->N, normalize(cross(data, sd->N))); + return cross(ccl_fetch(sd, N), normalize(cross(data, ccl_fetch(sd, N)))); } else { /* otherwise use surface derivatives */ #ifdef __DPDU__ - return normalize(sd->dPdu); + return normalize(ccl_fetch(sd, dPdu)); #else return make_float3(0.0f, 0.0f, 0.0f); #endif @@ -144,15 +144,16 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) float3 center; #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) { + bool is_curve_primitive = ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE; + if(is_curve_primitive) { center = curve_motion_center_location(kg, sd); - if(!(sd->flag & SD_TRANSFORM_APPLIED)) + if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED)) object_position_transform(kg, sd, ¢er); } else #endif - center = sd->P; + center = ccl_fetch(sd, P); float3 motion_pre = center, motion_post = center; @@ -163,30 +164,37 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) if(offset != ATTR_STD_NOT_FOUND) { /* get motion info */ int numverts, numkeys; - object_motion_info(kg, sd->object, NULL, &numverts, &numkeys); + object_motion_info(kg, ccl_fetch(sd, object), NULL, &numverts, &numkeys); /* lookup attributes */ - int offset_next = (sd->type & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys; + int offset_next = (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys; motion_pre = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL); motion_post = primitive_attribute_float3(kg, sd, elem, offset_next, NULL, NULL); + +#ifdef __HAIR__ + if(is_curve_primitive && (ccl_fetch(sd, flag) & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { + object_position_transform(kg, sd, &motion_pre); + object_position_transform(kg, sd, &motion_post); + } +#endif } /* object motion. note that depending on the mesh having motion vectors, this * transformation was set match the world/object space of motion_pre/post */ Transform tfm; - tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_PRE); + tfm = object_fetch_vector_transform(kg, ccl_fetch(sd, object), OBJECT_VECTOR_MOTION_PRE); motion_pre = transform_point(&tfm, motion_pre); - tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_POST); + tfm = object_fetch_vector_transform(kg, ccl_fetch(sd, object), OBJECT_VECTOR_MOTION_POST); motion_post = transform_point(&tfm, motion_post); float3 motion_center; /* camera motion, for perspective/orthographic motion.pre/post will be a * world-to-raster matrix, for panorama it's world-to-camera */ - if (kernel_data.cam.type != CAMERA_PANORAMA) { + if(kernel_data.cam.type != CAMERA_PANORAMA) { tfm = kernel_data.cam.worldtoraster; motion_center = transform_perspective(&tfm, center); diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h new file mode 100644 index 00000000000..37deaac0800 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_qbvh.h @@ -0,0 +1,147 @@ +/* + * Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +struct QBVHStackItem { + int addr; + float dist; +}; + +/* TOOD(sergey): Investigate if using instrinsics helps for both + * stack item swap and float comparison. + */ +ccl_device_inline void qbvh_item_swap(QBVHStackItem *__restrict a, + QBVHStackItem *__restrict b) +{ + QBVHStackItem tmp = *a; + *a = *b; + *b = tmp; +} + +ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1, + QBVHStackItem *__restrict s2, + QBVHStackItem *__restrict s3) +{ + if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } + if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); } + if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } +} + +ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1, + QBVHStackItem *__restrict s2, + QBVHStackItem *__restrict s3, + QBVHStackItem *__restrict s4) +{ + if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } + if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); } + if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); } + if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); } + if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); } +} + +ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg, + const ssef& tnear, + const ssef& tfar, +#ifdef __KERNEL_AVX2__ + const sse3f& org_idir, +#else + const sse3f& org, +#endif + const sse3f& idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int nodeAddr, + ssef *__restrict dist) +{ + const int offset = nodeAddr*BVH_QNODE_SIZE; +#ifdef __KERNEL_AVX2__ + const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x); + const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y); + const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z); + const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x); + const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y); + const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z); +#else + const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x; + const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y; + const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z; + const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x; + const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y; + const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z; +#endif + +#ifdef __KERNEL_SSE41__ + const ssef tNear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear)); + const ssef tFar = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar)); + const sseb vmask = cast(tNear) > cast(tFar); + int mask = (int)movemask(vmask)^0xf; +#else + const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); + const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); + const sseb vmask = tNear <= tFar; + int mask = (int)movemask(vmask); +#endif + *dist = tNear; + return mask; +} + +ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg, + const ssef& tnear, + const ssef& tfar, +#ifdef __KERNEL_AVX2__ + const sse3f& P_idir, +#else + const sse3f& P, +#endif + const sse3f& idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int nodeAddr, + const float difl, + ssef *__restrict dist) +{ + const int offset = nodeAddr*BVH_QNODE_SIZE; +#ifdef __KERNEL_AVX2__ + const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x); + const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y); + const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z); + const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x); + const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y); + const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z); +#else + const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x; + const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y; + const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z; + const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x; + const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y; + const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z; +#endif + + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; + const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear); + const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar); + const sseb vmask = round_down*tNear <= round_up*tFar; + *dist = tNear; + return (int)movemask(vmask); +} diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h new file mode 100644 index 00000000000..dc37e6ecfa4 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_qbvh_shadow.h @@ -0,0 +1,403 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function, where various features can be + * enabled/disabled. This way we can compile optimized versions for each case + * without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + uint *num_hits) +{ + /* TODO(sergey): + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + + *num_hits = 0; + isect_array->t = tmax; + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return false; + } +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + ssef tnear(0.0f), tfar(tmax); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + ssef dist; + int traverseChild = qbvh_node_intersect(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#else + org, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + + if(traverseChild != 0) { + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float*)&dist)[r]; + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); +#ifdef __VISIBILITY_FLAG__ + if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } +#endif + + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + + /* Primitive intersection. */ + while(primAddr < primAddr2) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + + bool hit; + + /* todo: specialized intersect functions which don't fill in + * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? + * might give a few % performance improvement */ + + switch(p_type) { + case PRIMITIVE_TRIANGLE: { + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr); + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr); + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); + else + hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); + break; + } +#endif + default: { + hit = false; + break; + } + } + + /* Shadow ray early termination. */ + if(hit) { + /* detect if this surface has a shader with transparent shadows */ + + /* todo: optimize so primitive visibility flag indicates if + * the primitive has a transparent shadow shader? */ + int prim = kernel_tex_fetch(__prim_index, isect_array->prim); + int shader = 0; + +#ifdef __HAIR__ + if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) +#endif + { + shader = kernel_tex_fetch(__tri_shader, prim); + } +#ifdef __HAIR__ + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } +#endif + int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2); + + /* if no transparent shadows, all light is blocked */ + if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + return true; + } + /* if maximum number of hits reached, block all light */ + else if(*num_hits == max_hits) { + return true; + } + + /* move on to next entry in intersections array */ + isect_array++; + (*num_hits)++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + + isect_array->t = isect_t; + } + + primAddr++; + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +#endif + + num_hits_in_instance = 0; + isect_array->t = isect_t; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + if(num_hits_in_instance) { + float t_fac; + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm); +#else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +#endif + + /* scale isect->t to adjust for instancing */ + for(int i = 0; i < num_hits_in_instance; i++) + (isect_array-i-1)->t *= t_fac; + } + else { + float ignore_t = FLT_MAX; + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +#endif + } + + isect_t = tmax; + isect_array->t = isect_t; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(tmax); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return false; +} diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h new file mode 100644 index 00000000000..d85e1a4691e --- /dev/null +++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h @@ -0,0 +1,326 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function for subsurface scattering, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + int subsurface_object, + uint *lcg_state, + int max_hits) +{ + /* TODO(sergey): + * - Test if pushing distance on the stack helps (for non shadow rays). + * - Separate version for shadow rays. + * - Likely and unlikely for if() statements. + * - SSE for hair. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = ray->t; + uint num_hits = 0; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return 0; + } +#endif + + ssef tnear(0.0f), tfar(isect_t); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + ssef dist; + int traverseChild = qbvh_node_intersect(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#else + org, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + + if(traverseChild != 0) { + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float*)&dist)[r]; + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + + /* Primitive intersection. */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* Intersect ray against primitive, */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from the same object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + if(tri_object != subsurface_object) { + continue; + } + triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + /* Intersect ray against primitive. */ + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from the same object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + if(tri_object != subsurface_object) { + continue; + } + motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); + } + break; + } +#endif + default: + break; + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) { + object = subsurface_object; + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +#endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* Instance pop. */ +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t); +#endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h new file mode 100644 index 00000000000..7e356ea062b --- /dev/null +++ b/intern/cycles/kernel/geom/geom_qbvh_traversal.h @@ -0,0 +1,425 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function, where various features can be + * enabled/disabled. This way we can compile optimized versions for each case + * without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + ,uint *lcg_state, + float difl, + float extmax +#endif + ) +{ + /* TODO(sergey): + * - Test if pushing distance on the stack helps (for non shadow rays). + * - Separate version for shadow rays. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + traversalStack[0].dist = -FLT_MAX; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + float nodeDist = -FLT_MAX; + + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return false; + } +#endif + + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; + +#if defined(__KERNEL_DEBUG__) + isect->num_traversal_steps = 0; + isect->num_traversed_instances = 0; +#endif + + ssef tnear(0.0f), tfar(ray->t); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + if(UNLIKELY(nodeDist > isect->t)) { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + continue; + } + + int traverseChild; + ssef dist; + +#if defined(__KERNEL_DEBUG__) + isect->num_traversal_steps++; +#endif + +#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) + if(difl != 0.0f) { + /* NOTE: We extend all the child BB instead of fetching + * and checking visibility flags for each of the, + * + * Need to test if doing opposite would be any faster. + */ + traverseChild = qbvh_node_intersect_robust(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#else + org, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + difl, + &dist); + } + else +#endif + { + traverseChild = qbvh_node_intersect(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#else + org, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + } + + if(traverseChild != 0) { + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + float d0 = ((float*)&dist)[r]; + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + nodeDist = d0; + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + nodeDist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + nodeDist = d0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); + +#ifdef __VISIBILITY_FLAG__ + if(UNLIKELY((nodeDist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0))) +#else + if(UNLIKELY((nodeDist > isect->t))) +#endif + { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + continue; + } + + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + + /* Primitive intersection. */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { +#if defined(__KERNEL_DEBUG__) + isect->num_traversal_steps++; +#endif + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) { + tfar = ssef(isect->t); + /* Shadow ray early termination. */ + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { +#if defined(__KERNEL_DEBUG__) + isect->num_traversal_steps++; +#endif + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) { + tfar = ssef(isect->t); + /* Shadow ray early termination. */ + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + } + } + break; + } +#endif /* BVH_FEATURE(BVH_MOTION) */ +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for(; primAddr < primAddr2; primAddr++) { +#if defined(__KERNEL_DEBUG__) + isect->num_traversal_steps++; +#endif + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + bool hit; + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); + else + hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); + if(hit) { + tfar = ssef(isect->t); + /* Shadow ray early termination. */ + if(visibility == PATH_RAY_SHADOW_OPAQUE) + return true; + } + } + break; + } +#endif /* BVH_FEATURE(BVH_HAIR) */ + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + +#if BVH_FEATURE(BVH_MOTION) + qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_tfm); +#else + qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist); +#endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect->t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + traversalStack[stackPtr].dist = -FLT_MAX; + + nodeAddr = kernel_tex_fetch(__object_node, object); + +#if defined(__KERNEL_DEBUG__) + isect->num_traversed_instances++; +#endif + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* Instance pop. */ +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); +#endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect->t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return (isect->prim != PRIM_NONE); +} diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/geom/geom_qbvh_volume.h new file mode 100644 index 00000000000..d8cfa3a4061 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_qbvh_volume.h @@ -0,0 +1,351 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect) +{ + /* TODO(sergey): + * - Test if pushing distance on the stack helps. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + + const uint visibility = PATH_RAY_ALL_VISIBILITY; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return false; + } +#endif + + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; + + ssef tnear(0.0f), tfar(ray->t); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + ssef dist; + int traverseChild = qbvh_node_intersect(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#else + org, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + + if(traverseChild != 0) { + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float*)&dist)[r]; + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + + /* Primitive intersection. */ + switch(p_type) { + case PRIMITIVE_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr); + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr); + } + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + else + bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + } + break; + } +#endif + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); +#endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect->t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* Instance pop. */ +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); +#endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect->t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return (isect->prim != PRIM_NONE); +} diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h new file mode 100644 index 00000000000..056ca9a1ad9 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h @@ -0,0 +1,446 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function for volumes, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits) +{ + /* TODO(sergey): + * - Test if pushing distance on the stack helps. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversalStack[BVH_QSTACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + + const uint visibility = PATH_RAY_ALL_VISIBILITY; + +#if BVH_FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + + uint num_hits = 0; + isect_array->t = tmax; + +#ifndef __KERNEL_SSE41__ + if(!isfinite(P.x)) { + return false; + } +#endif + +#if BVH_FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + ssef tnear(0.0f), tfar(isect_t); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + +#ifdef __KERNEL_AVX2__ + float3 P_idir = P*idir; + sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + ssef dist; + int traverseChild = qbvh_node_intersect(kg, + tnear, + tfar, +#ifdef __KERNEL_AVX2__ + P_idir4, +#else + org, +#endif + idir4, + near_x, near_y, near_z, + far_x, far_y, far_z, + nodeAddr, + &dist); + + if(traverseChild != 0) { + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6); + + /* One child is hit, continue with that child. */ + int r = __bscf(traverseChild); + if(traverseChild == 0) { + nodeAddr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float*)&dist)[r]; + r = __bscf(traverseChild); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float*)&dist)[r]; + if(traverseChild == 0) { + if(d1 < d0) { + nodeAddr = c1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + continue; + } + else { + nodeAddr = c0; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float*)&dist)[r]; + if(traverseChild == 0) { + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(traverseChild); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float*)&dist)[r]; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; + qbvh_stack_sort(&traversalStack[stackPtr], + &traversalStack[stackPtr - 1], + &traversalStack[stackPtr - 2], + &traversalStack[stackPtr - 3]); + } + + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + + /* If node is leaf, fetch triangle list. */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE); + int primAddr = __float_as_int(leaf.x); + +#if BVH_FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + bool hit; + + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + + /* Primitive intersection. */ + switch(p_type) { + case PRIMITIVE_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +#if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +#else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#if BVH_FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif +#if BVH_FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for(; primAddr < primAddr2; primAddr++) { + kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + else + hit = bvh_curve_intersect(kg, isect_array, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0); + if(hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; +#if BVH_FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + isect_array->t = isect_t; + if(num_hits == max_hits) { +#if BVH_FEATURE(BVH_INSTANCING) +# if BVH_FEATURE(BVH_MOTION) + float t_fac = len(transform_direction(&ob_tfm, 1.0f/idir)); +# else + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + float t_fac = len(transform_direction(&tfm, 1.0f/idir)); +#endif + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif + } + } +#if BVH_FEATURE(BVH_INSTANCING) + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_HAS_VOLUME) { + +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +#endif + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + num_hits_in_instance = 0; + isect_array->t = isect_t; + + ++stackPtr; + kernel_assert(stackPtr < BVH_QSTACK_SIZE); + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if BVH_FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + /* Instance pop. */ + if(num_hits_in_instance) { + float t_fac; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm); +#else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + /* Scale isect->t to adjust for instancing. */ + for(int i = 0; i < num_hits_in_instance; i++) { + (isect_array-i-1)->t *= t_fac; + } + } + else { + float ignore_t = FLT_MAX; +#if BVH_FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + } + + if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } + if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } + if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); +#ifdef __KERNEL_AVX2__ + P_idir = P*idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); +#else + org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); +#endif + triangle_intersect_precalc(dir, &isect_precalc); + isect_t = tmax; + isect_array->t = isect_t; + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr].addr; + --stackPtr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 3d3a5e72485..995dfac5b09 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -23,111 +23,18 @@ CCL_NAMESPACE_BEGIN -/* Refine triangle intersection to more precise hit point. For rays that travel - * far the precision is often not so good, this reintersects the primitive from - * a closer distance. */ - -ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) -{ - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; - -#ifdef __INTERSECTION_REFINE__ - if(isect->object != OBJECT_NONE) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } - - P = P + D*t; - - float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); - float rt = Oz * invDz; - - P = P + D*rt; - - if(isect->object != OBJECT_NONE) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - } - - return P; -#else - return P + D*t; -#endif -} - -/* same as above, except that isect->t is assumed to be in object space for instancing */ -ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) -{ - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; - -#ifdef __INTERSECTION_REFINE__ - if(isect->object != OBJECT_NONE) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D); - D = normalize(D); - } - - P = P + D*t; - - float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); - float rt = Oz * invDz; - - P = P + D*rt; - - if(isect->object != OBJECT_NONE) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - } - - return P; -#else - return P + D*t; -#endif -} - /* normal on triangle */ ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd) { /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); + float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); /* return normal */ - if(sd->flag & SD_NEGATIVE_SCALE_APPLIED) + if(ccl_fetch(sd, flag) & SD_NEGATIVE_SCALE_APPLIED) return normalize(cross(v2 - v0, v1 - v0)); else return normalize(cross(v1 - v0, v2 - v0)); @@ -137,7 +44,7 @@ ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd) ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) { /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); @@ -164,7 +71,7 @@ ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3]) { - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); @@ -176,7 +83,7 @@ ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3 ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, float u, float v) { /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x))); float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y))); @@ -187,10 +94,10 @@ ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, flo /* Ray differentials on triangle */ -ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, float3 *dPdu, float3 *dPdv) +ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_space float3 *dPdu, ccl_addr_space float3 *dPdv) { /* fetch triangle vertex coordinates */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); @@ -209,34 +116,34 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s if(dx) *dx = 0.0f; if(dy) *dy = 0.0f; - return kernel_tex_fetch(__attributes_float, offset + sd->prim); + return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim)); } else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); + float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x)); float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y)); float f2 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.z)); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; + if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } else if(elem == ATTR_ELEMENT_CORNER) { - int tri = offset + sd->prim*3; + int tri = offset + ccl_fetch(sd, prim)*3; float f0 = kernel_tex_fetch(__attributes_float, tri + 0); float f1 = kernel_tex_fetch(__attributes_float, tri + 1); float f2 = kernel_tex_fetch(__attributes_float, tri + 2); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; + if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } else { if(dx) *dx = 0.0f; @@ -252,24 +159,24 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim)); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim))); } else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); + float4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x))); float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y))); float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z))); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; + if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } else if(elem == ATTR_ELEMENT_CORNER || elem == ATTR_ELEMENT_CORNER_BYTE) { - int tri = offset + sd->prim*3; + int tri = offset + ccl_fetch(sd, prim)*3; float3 f0, f1, f2; if(elem == ATTR_ELEMENT_CORNER) { @@ -284,11 +191,11 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData } #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; + if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } else { if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); @@ -298,116 +205,4 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData } } -/* Ray-Triangle intersection for BVH traversal - * - * Based on Sven Woop's algorithm with precomputed triangle storage */ - -ccl_device_inline bool triangle_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 dir, uint visibility, int object, int triAddr) -{ - /* compute and check intersection t-value */ - float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); - float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); - - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); - float t = Oz * invDz; - - if(t > 0.0f && t < isect->t) { - /* compute and check barycentric u */ - float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; - float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; - float u = Ox + t*Dx; - - if(u >= 0.0f) { - /* compute and check barycentric v */ - float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); - float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; - float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; - float v = Oy + t*Dy; - - if(v >= 0.0f && u + v <= 1.0f) { -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) -#endif - { - /* record intersection */ - isect->prim = triAddr; - isect->object = object; - isect->type = PRIMITIVE_TRIANGLE; - isect->u = u; - isect->v = v; - isect->t = t; - return true; - } - } - } - } - - return false; -} - -/* Special ray intersection routines for subsurface scattering. In that case we - * only want to intersect with primitives in the same object, and if case of - * multiple hits we pick a single random primitive as the intersection point. */ - -#ifdef __SUBSURFACE__ -ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array, - float3 P, float3 dir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits) -{ - /* compute and check intersection t-value */ - float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); - float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); - - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); - float t = Oz * invDz; - - if(t > 0.0f && t < tmax) { - /* compute and check barycentric u */ - float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; - float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; - float u = Ox + t*Dx; - - if(u >= 0.0f) { - /* compute and check barycentric v */ - float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); - float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; - float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; - float v = Oy + t*Dy; - - if(v >= 0.0f && u + v <= 1.0f) { - (*num_hits)++; - - int hit; - - if(*num_hits <= max_hits) { - hit = *num_hits - 1; - } - else { - /* reservoir sampling: if we are at the maximum number of - * hits, randomly replace element or skip it */ - hit = lcg_step_uint(lcg_state) % *num_hits; - - if(hit >= max_hits) - return; - } - - /* record intersection */ - Intersection *isect = &isect_array[hit]; - isect->prim = triAddr; - isect->object = object; - isect->type = PRIMITIVE_TRIANGLE; - isect->u = u; - isect->v = v; - isect->t = t; - } - } - } -} -#endif - CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h new file mode 100644 index 00000000000..ba309a1dc53 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -0,0 +1,431 @@ +/* + * Copyright 2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Triangle/Ray intersections. + * + * For BVH ray intersection we use a precomputed triangle storage to accelerate + * intersection at the cost of more memory usage. + */ + +CCL_NAMESPACE_BEGIN + +/* Workaround stupidness of CUDA/OpenCL which doesn't allow to access indexed + * component of float3 value. + */ +#ifndef __KERNEL_CPU__ +# define IDX(vec, idx) \ + ((idx == 0) ? ((vec).x) : ( (idx == 1) ? ((vec).y) : ((vec).z) )) +#else +# define IDX(vec, idx) ((vec)[idx]) +#endif + +/* Ray-Triangle intersection for BVH traversal + * + * Sven Woop + * Watertight Ray/Triangle Intersection + * + * http://jcgt.org/published/0002/01/05/paper.pdf + */ + +/* Precalculated data for the ray->tri intersection. */ +typedef struct IsectPrecalc { + /* Maximal dimension kz, and orthogonal dimensions. */ + int kx, ky, kz; + + /* Shear constants. */ + float Sx, Sy, Sz; +} IsectPrecalc; + +#if defined(__KERNEL_CUDA__) +# if (defined(i386) || defined(_M_IX86)) +# if __CUDA_ARCH__ > 500 +ccl_device_noinline +# else /* __CUDA_ARCH__ > 500 */ +ccl_device_inline +# endif /* __CUDA_ARCH__ > 500 */ +# else /* (defined(i386) || defined(_M_IX86)) */ +# if defined(__KERNEL_EXPERIMENTAL__) && (__CUDA_ARCH__ >= 500) +ccl_device_noinline +# else +ccl_device_inline +# endif +# endif /* (defined(i386) || defined(_M_IX86)) */ +#elif defined(__KERNEL_OPENCL_APPLE__) +ccl_device_noinline +#else /* defined(__KERNEL_OPENCL_APPLE__) */ +ccl_device_inline +#endif /* defined(__KERNEL_OPENCL_APPLE__) */ +void triangle_intersect_precalc(float3 dir, + IsectPrecalc *isect_precalc) +{ + /* Calculate dimension where the ray direction is maximal. */ + int kz = util_max_axis(make_float3(fabsf(dir.x), + fabsf(dir.y), + fabsf(dir.z))); + int kx = kz + 1; if(kx == 3) kx = 0; + int ky = kx + 1; if(ky == 3) ky = 0; + + /* Swap kx and ky dimensions to preserve winding direction of triangles. */ + if(IDX(dir, kz) < 0.0f) { + int tmp = kx; + kx = ky; + ky = tmp; + } + + /* Calculate the shear constants. */ + float inv_dir_z = 1.0f / IDX(dir, kz); + isect_precalc->Sx = IDX(dir, kx) * inv_dir_z; + isect_precalc->Sy = IDX(dir, ky) * inv_dir_z; + isect_precalc->Sz = inv_dir_z; + + /* Store the dimensions. */ + isect_precalc->kx = kx; + isect_precalc->ky = ky; + isect_precalc->kz = kz; +} + +/* TODO(sergey): Make it general utility function. */ +ccl_device_inline float xor_signmask(float x, int y) +{ + return __int_as_float(__float_as_int(x) ^ y); +} + +ccl_device_inline bool triangle_intersect(KernelGlobals *kg, + const IsectPrecalc *isect_precalc, + Intersection *isect, + float3 P, + uint visibility, + int object, + int triAddr) +{ + const int kx = isect_precalc->kx; + const int ky = isect_precalc->ky; + const int kz = isect_precalc->kz; + const float Sx = isect_precalc->Sx; + const float Sy = isect_precalc->Sy; + const float Sz = isect_precalc->Sz; + + /* Calculate vertices relative to ray origin. */ + const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z); + const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z); + const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z); + + const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz); + const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz); + const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz); + + /* Perform shear and scale of vertices. */ + const float Ax = A_kx - Sx * A_kz; + const float Ay = A_ky - Sy * A_kz; + const float Bx = B_kx - Sx * B_kz; + const float By = B_ky - Sy * B_kz; + const float Cx = C_kx - Sx * C_kz; + const float Cy = C_ky - Sy * C_kz; + + /* Calculate scaled barycentric coordinates. */ + float U = Cx * By - Cy * Bx; + float V = Ax * Cy - Ay * Cx; + float W = Bx * Ay - By * Ax; + const int sign_mask = (__float_as_int(U) & 0x80000000); + /* TODO(sergey): Check if multiplication plus sign check is faster + * or at least same speed (but robust for endian types). + */ + if(sign_mask != (__float_as_int(V) & 0x80000000) || + sign_mask != (__float_as_int(W) & 0x80000000)) + { + return false; + } + + /* Calculate determinant. */ + float det = U + V + W; + if(UNLIKELY(det == 0.0f)) { + return false; + } + + /* Calculate scaled z-coordinates of vertices and use them to calculate + * the hit distance. + */ + const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz; + const float sign_T = xor_signmask(T, sign_mask); + if((sign_T < 0.0f) || + (sign_T > isect->t * xor_signmask(det, sign_mask))) + { + return false; + } + +#ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) +#endif + { +#ifdef __KERNEL_GPU__ + float4 a = tri_b - tri_a, b = tri_c - tri_a; + if(len_squared(make_float3(a.y*b.z - a.z*b.y, + a.z*b.x - a.x*b.z, + a.x*b.y - a.y*b.x)) == 0.0f) + { + return false; + } +#endif + + /* Normalize U, V, W, and T. */ + const float inv_det = 1.0f / det; + isect->prim = triAddr; + isect->object = object; + isect->type = PRIMITIVE_TRIANGLE; + isect->u = U * inv_det; + isect->v = V * inv_det; + isect->t = T * inv_det; + return true; + } + return false; +} + +/* Special ray intersection routines for subsurface scattering. In that case we + * only want to intersect with primitives in the same object, and if case of + * multiple hits we pick a single random primitive as the intersection point. + */ + +#ifdef __SUBSURFACE__ +ccl_device_inline void triangle_intersect_subsurface( + KernelGlobals *kg, + const IsectPrecalc *isect_precalc, + Intersection *isect_array, + float3 P, + int object, + int triAddr, + float tmax, + uint *num_hits, + uint *lcg_state, + int max_hits) +{ + const int kx = isect_precalc->kx; + const int ky = isect_precalc->ky; + const int kz = isect_precalc->kz; + const float Sx = isect_precalc->Sx; + const float Sy = isect_precalc->Sy; + const float Sz = isect_precalc->Sz; + + /* Calculate vertices relative to ray origin. */ + const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z); + const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z); + const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z); + + const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz); + const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz); + const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz); + + /* Perform shear and scale of vertices. */ + const float Ax = A_kx - Sx * A_kz; + const float Ay = A_ky - Sy * A_kz; + const float Bx = B_kx - Sx * B_kz; + const float By = B_ky - Sy * B_kz; + const float Cx = C_kx - Sx * C_kz; + const float Cy = C_ky - Sy * C_kz; + + /* Calculate scaled barycentric coordinates. */ + float U = Cx * By - Cy * Bx; + int sign_mask = (__float_as_int(U) & 0x80000000); + float V = Ax * Cy - Ay * Cx; + if(sign_mask != (__float_as_int(V) & 0x80000000)) { + return; + } + float W = Bx * Ay - By * Ax; + if(sign_mask != (__float_as_int(W) & 0x80000000)) { + return; + } + + /* Calculate determinant. */ + float det = U + V + W; + if(UNLIKELY(det == 0.0f)) { + return; + } + + /* Calculate scaled z−coordinates of vertices and use them to calculate + * the hit distance. + */ + const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz; + const float sign_T = xor_signmask(T, sign_mask); + if((sign_T < 0.0f) || + (sign_T > tmax * xor_signmask(det, sign_mask))) + { + return; + } + + /* Normalize U, V, W, and T. */ + const float inv_det = 1.0f / det; + + (*num_hits)++; + int hit; + + if(*num_hits <= max_hits) { + hit = *num_hits - 1; + } + else { + /* reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it */ + hit = lcg_step_uint(lcg_state) % *num_hits; + + if(hit >= max_hits) + return; + } + + /* record intersection */ + Intersection *isect = &isect_array[hit]; + isect->prim = triAddr; + isect->object = object; + isect->type = PRIMITIVE_TRIANGLE; + isect->u = U * inv_det; + isect->v = V * inv_det; + isect->t = T * inv_det; +} +#endif + +/* Refine triangle intersection to more precise hit point. For rays that travel + * far the precision is often not so good, this reintersects the primitive from + * a closer distance. */ + +/* Reintersections uses the paper: + * + * Tomas Moeller + * Fast, minimum storage ray/triangle intersection + * http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf + */ + +ccl_device_inline float3 triangle_refine(KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray) +{ + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; + +#ifdef __INTERSECTION_REFINE__ + if(isect->object != OBJECT_NONE) { + if(UNLIKELY(t == 0.0f)) { + return P; + } +#ifdef __OBJECT_MOTION__ + Transform tfm = ccl_fetch(sd, ob_itfm); +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D*t); + D = normalize_len(D, &t); + } + + P = P + D*t; + + const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2); + float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); + float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); + float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); + float3 qvec = cross(tvec, edge1); + float3 pvec = cross(D, edge2); + float rt = dot(edge2, qvec) / dot(edge1, pvec); + + P = P + D*rt; + + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = ccl_fetch(sd, ob_tfm); +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +#else + return P + D*t; +#endif +} + +/* Same as above, except that isect->t is assumed to be in object space for + * instancing. + */ +ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray) +{ + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; + +#ifdef __INTERSECTION_REFINE__ + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = ccl_fetch(sd, ob_itfm); +#else + Transform tfm = object_fetch_transform(kg, + isect->object, + OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + D = normalize(D); + } + + P = P + D*t; + + const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2); + float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); + float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); + float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); + float3 qvec = cross(tvec, edge1); + float3 pvec = cross(D, edge2); + float rt = dot(edge2, qvec) / dot(edge1, pvec); + + P = P + D*rt; + + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = ccl_fetch(sd, ob_tfm); +#else + Transform tfm = object_fetch_transform(kg, + isect->object, + OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +#else + return P + D*t; +#endif +} + +#undef IDX + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 33a20494966..c72afa2a3a4 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* Volume Primitive @@ -52,11 +52,15 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, #ifdef __KERNEL_GPU__ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); #else - float4 r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); + float4 r; + if(sd->flag & SD_VOLUME_CUBIC) + r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC); + else + r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); #endif if(dx) *dx = 0.0f; - if(dx) *dy = 0.0f; + if(dy) *dy = 0.0f; /* todo: support float textures to lower memory usage for single floats */ return average(float4_to_float3(r)); @@ -68,7 +72,11 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *s #ifdef __KERNEL_GPU__ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); #else - float4 r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); + float4 r; + if(sd->flag & SD_VOLUME_CUBIC) + r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC); + else + r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); #endif if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 19e06b88797..b2596d10ee7 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __KERNEL_H__ @@ -32,7 +32,14 @@ void *kernel_osl_memory(KernelGlobals *kg); bool kernel_osl_use(KernelGlobals *kg); void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size); -void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height, size_t depth, InterpolationType interpolation=INTERPOLATION_LINEAR); +void kernel_tex_copy(KernelGlobals *kg, + const char *name, + device_ptr mem, + size_t width, + size_t height, + size_t depth, + InterpolationType interpolation=INTERPOLATION_LINEAR, + ExtensionType extension = EXTENSION_REPEAT); void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride); diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index b0efcdc66a7..2dc87fffcbc 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -176,7 +176,7 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass) #endif } -ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, float3 *throughput, +ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, ccl_addr_space float3 *throughput, BsdfEval *bsdf_eval, float bsdf_pdf, int bounce, int bsdf_label) { float inverse_pdf = 1.0f/bsdf_pdf; @@ -341,12 +341,12 @@ ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L) ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadiance *L) { - float3 L_sum, L_direct, L_indirect; - float clamp_direct = kernel_data.integrator.sample_clamp_direct; - float clamp_indirect = kernel_data.integrator.sample_clamp_indirect; - + float3 L_sum; /* Light Passes are used */ #ifdef __PASSES__ + float3 L_direct, L_indirect; + float clamp_direct = kernel_data.integrator.sample_clamp_direct; + float clamp_indirect = kernel_data.integrator.sample_clamp_indirect; if(L->use_light_pass) { path_radiance_sum_indirect(L); diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index a1ec080e3d3..2b305e5488d 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -11,11 +11,13 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN +#undef USE_BAKE_JITTER + ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng, const bool is_combined, const bool is_ao, const bool is_sss, int sample) { @@ -29,6 +31,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian float3 throughput = make_float3(1.0f, 1.0f, 1.0f); bool is_sss_sample = is_sss; + ray.P = sd->P + sd->Ng; + ray.D = -sd->Ng; + ray.t = FLT_MAX; +#ifdef __CAMERA_MOTION__ + ray.time = TIME_INVALID; +#endif + /* init radiance */ path_radiance_init(&L_sample, kernel_data.film.use_light_pass); @@ -55,7 +64,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian /* sample subsurface scattering */ if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) { /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ - if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput)) + if(kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput)) is_sss_sample = true; } #endif @@ -159,7 +168,8 @@ ccl_device bool is_light_pass(ShaderEvalType type) } } -#if 0 +/* this helps with AA but it's not the real solution as it does not AA the geometry + * but it's better than nothing, thus committed */ ccl_device_inline float bake_clamp_mirror_repeat(float u) { /* use mirror repeat (like opengl texture) so that if the barycentric @@ -170,7 +180,6 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u) return (((int)fu) & 1)? 1.0f - u: u; } -#endif ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int offset, int sample) @@ -198,12 +207,16 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, int num_samples = kernel_data.integrator.aa_samples; /* random number generator */ - RNG rng = cmj_hash(offset + i, 0); + RNG rng = cmj_hash(offset + i, kernel_data.integrator.seed); -#if 0 - uint rng_state = cmj_hash(i, 0); +#ifdef USE_BAKE_JITTER float filter_x, filter_y; - path_rng_init(kg, &rng_state, sample, num_samples, &rng, 0, 0, &filter_x, &filter_y); + if(sample == 0) { + filter_x = filter_y = 0.5f; + } + else { + path_rng_2D(kg, &rng, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y); + } /* subpixel u/v offset */ if(sample > 0) { @@ -253,6 +266,10 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, /* data passes */ case SHADER_EVAL_NORMAL: { + if((sd.flag & SD_HAS_BUMP)) { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + } + /* compression: normal = (2 * color) - 1 */ out = sd.N * 0.5f + make_float3(0.5f, 0.5f, 0.5f); break; diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index d1217ae0abc..2d531fdc96e 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -41,11 +41,34 @@ ccl_device float2 camera_sample_aperture(KernelGlobals *kg, float u, float v) return bokeh; } -ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, Ray *ray) +ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray) { /* create ray form raster position */ Transform rastertocamera = kernel_data.cam.rastertocamera; - float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f)); + float3 raster = make_float3(raster_x, raster_y, 0.0f); + float3 Pcamera = transform_perspective(&rastertocamera, raster); + +#ifdef __CAMERA_MOTION__ + if(kernel_data.cam.have_perspective_motion) { + /* TODO(sergey): Currently we interpolate projected coordinate which + * gives nice looking result and which is simple, but is in fact a bit + * different comparing to constructing projective matrix from an + * interpolated field of view. + */ + if(ray->time < 0.5f) { + Transform rastertocamera_pre = kernel_data.cam.perspective_motion.pre; + float3 Pcamera_pre = + transform_perspective(&rastertocamera_pre, raster); + Pcamera = interp(Pcamera_pre, Pcamera, ray->time * 2.0f); + } + else { + Transform rastertocamera_post = kernel_data.cam.perspective_motion.post; + float3 Pcamera_post = + transform_perspective(&rastertocamera_post, raster); + Pcamera = interp(Pcamera, Pcamera_post, (ray->time - 0.5f) * 2.0f); + } + } +#endif ray->P = make_float3(0.0f, 0.0f, 0.0f); ray->D = Pcamera; @@ -70,8 +93,18 @@ ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, flo Transform cameratoworld = kernel_data.cam.cameratoworld; #ifdef __CAMERA_MOTION__ - if(kernel_data.cam.have_motion) - transform_motion_interpolate(&cameratoworld, (const DecompMotionTransform*)&kernel_data.cam.motion, ray->time); + if(kernel_data.cam.have_motion) { +#ifdef __KERNEL_OPENCL__ + const MotionTransform tfm = kernel_data.cam.motion; + transform_motion_interpolate(&cameratoworld, + ((const DecompMotionTransform*)&tfm), + ray->time); +#else + transform_motion_interpolate(&cameratoworld, + ((const DecompMotionTransform*)&kernel_data.cam.motion), + ray->time); +#endif + } #endif ray->P = transform_point(&cameratoworld, ray->P); @@ -90,16 +123,17 @@ ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, flo #ifdef __CAMERA_CLIPPING__ /* clipping */ - ray->P += kernel_data.cam.nearclip*ray->D; - ray->t = kernel_data.cam.cliplength; + float3 Pclip = normalize(Pcamera); + float z_inv = 1.0f / Pclip.z; + ray->P += kernel_data.cam.nearclip*ray->D * z_inv; + ray->t = kernel_data.cam.cliplength * z_inv; #else ray->t = FLT_MAX; #endif } /* Orthographic Camera */ - -ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, Ray *ray) +ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray) { /* create ray form raster position */ Transform rastertocamera = kernel_data.cam.rastertocamera; @@ -129,8 +163,18 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl Transform cameratoworld = kernel_data.cam.cameratoworld; #ifdef __CAMERA_MOTION__ - if(kernel_data.cam.have_motion) - transform_motion_interpolate(&cameratoworld, (const DecompMotionTransform*)&kernel_data.cam.motion, ray->time); + if(kernel_data.cam.have_motion) { +#ifdef __KERNEL_OPENCL__ + const MotionTransform tfm = kernel_data.cam.motion; + transform_motion_interpolate(&cameratoworld, + (const DecompMotionTransform*)&tfm, + ray->time); +#else + transform_motion_interpolate(&cameratoworld, + (const DecompMotionTransform*)&kernel_data.cam.motion, + ray->time); +#endif + } #endif ray->P = transform_point(&cameratoworld, ray->P); @@ -155,7 +199,7 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl /* Panorama Camera */ -ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, Ray *ray) +ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray) { Transform rastertocamera = kernel_data.cam.rastertocamera; float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f)); @@ -203,8 +247,18 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float Transform cameratoworld = kernel_data.cam.cameratoworld; #ifdef __CAMERA_MOTION__ - if(kernel_data.cam.have_motion) - transform_motion_interpolate(&cameratoworld, (const DecompMotionTransform*)&kernel_data.cam.motion, ray->time); + if(kernel_data.cam.have_motion) { +#ifdef __KERNEL_OPENCL__ + const MotionTransform tfm = kernel_data.cam.motion; + transform_motion_interpolate(&cameratoworld, + (const DecompMotionTransform*)&tfm, + ray->time); +#else + transform_motion_interpolate(&cameratoworld, + (const DecompMotionTransform*)&kernel_data.cam.motion, + ray->time); +#endif + } #endif ray->P = transform_point(&cameratoworld, ray->P); @@ -215,18 +269,21 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float /* ray differential */ ray->dP = differential3_zero(); + Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f)); + float3 Ddiff = normalize(transform_direction(&cameratoworld, panorama_to_direction(kg, Pcamera.x, Pcamera.y))); + Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x + 1.0f, raster_y, 0.0f)); - ray->dD.dx = normalize(transform_direction(&cameratoworld, panorama_to_direction(kg, Pcamera.x, Pcamera.y))) - ray->D; + ray->dD.dx = normalize(transform_direction(&cameratoworld, panorama_to_direction(kg, Pcamera.x, Pcamera.y))) - Ddiff; Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y + 1.0f, 0.0f)); - ray->dD.dy = normalize(transform_direction(&cameratoworld, panorama_to_direction(kg, Pcamera.x, Pcamera.y))) - ray->D; + ray->dD.dy = normalize(transform_direction(&cameratoworld, panorama_to_direction(kg, Pcamera.x, Pcamera.y))) - Ddiff; #endif } /* Common */ ccl_device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, float filter_v, - float lens_u, float lens_v, float time, Ray *ray) + float lens_u, float lens_v, float time, ccl_addr_space Ray *ray) { /* pixel filter */ int filter_table_offset = kernel_data.film.filter_table_offset; @@ -303,7 +360,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, { if(kernel_data.cam.type != CAMERA_PANORAMA) { /* perspective / ortho */ - if(sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE) + if(ccl_fetch(sd, object) == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE) P += camera_position(kg); Transform tfm = kernel_data.cam.worldtondc; @@ -313,7 +370,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, /* panorama */ Transform tfm = kernel_data.cam.worldtocamera; - if(sd->object != OBJECT_NONE) + if(ccl_fetch(sd, object) != OBJECT_NONE) P = normalize(transform_point(&tfm, P)); else P = normalize(transform_direction(&tfm, P)); @@ -325,4 +382,3 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, } CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index c2aab93c87b..ed145b4a967 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __KERNEL_COMPAT_CPU_H__ @@ -19,12 +19,39 @@ #define __KERNEL_CPU__ +/* Release kernel has too much false-positive maybe-uninitialized warnings, + * which makes it possible to miss actual warnings. + */ +#if defined(__GNUC__) && defined(NDEBUG) +# pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +# pragma GCC diagnostic ignored "-Wuninitialized" +#endif + +/* Selective nodes compilation. */ +#ifndef __NODES_MAX_GROUP__ +# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX +#endif +#ifndef __NODES_FEATURES__ +# define __NODES_FEATURES__ NODE_FEATURE_ALL +#endif + #include "util_debug.h" #include "util_math.h" #include "util_simd.h" #include "util_half.h" #include "util_types.h" +#define ccl_addr_space + +/* On x86_64, versions of glibc < 2.16 have an issue where expf is + * much slower than the double version. This was fixed in glibc 2.16. + */ +#if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \ + defined(__GNU_LIBRARY__) && defined(__GLIBC__ ) && defined(__GLIBC_MINOR__) && \ + (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16) +# define expf(x) ((float)exp((double)(x))) +#endif + CCL_NAMESPACE_BEGIN /* Assertions inside the kernel only work for the CPU device, so we wrap it in @@ -43,7 +70,7 @@ template<typename T> struct texture { return data[index]; } -#if 0 +#ifdef __KERNEL_SSE2__ ccl_always_inline ssef fetch_ssef(int index) { kernel_assert(index >= 0 && index < width); @@ -62,6 +89,14 @@ template<typename T> struct texture { }; template<typename T> struct texture_image { +#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ + { \ + u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ + u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ + u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ + u[3] = (1.0f / 6.0f) * t * t * t; \ + } (void)0 + ccl_always_inline float4 read(float4 r) { return r; @@ -93,7 +128,7 @@ template<typename T> struct texture_image { return x - (float)i; } - ccl_always_inline float4 interp(float x, float y, bool periodic = true) + ccl_always_inline float4 interp(float x, float y) { if(UNLIKELY(!data)) return make_float4(0.0f, 0.0f, 0.0f, 0.0f); @@ -103,34 +138,47 @@ template<typename T> struct texture_image { if(interpolation == INTERPOLATION_CLOSEST) { frac(x*(float)width, &ix); frac(y*(float)height, &iy); - if(periodic) { - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - - } - else { - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + /* Fall through. */ + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; } return read(data[ix + iy*width]); } - else { + else if(interpolation == INTERPOLATION_LINEAR) { float tx = frac(x*(float)width - 0.5f, &ix); float ty = frac(y*(float)height - 0.5f, &iy); - if(periodic) { - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - } - else { - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + /* Fall through. */ + case EXTENSION_EXTEND: + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; } float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]); @@ -140,9 +188,79 @@ template<typename T> struct texture_image { return r; } + else { + /* Bicubic b-spline interpolation. */ + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + int pix, piy, nnix, nniy; + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + + pix = wrap_periodic(ix-1, width); + piy = wrap_periodic(iy-1, height); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + + nnix = wrap_periodic(ix+2, width); + nniy = wrap_periodic(iy+2, height); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + /* Fall through. */ + case EXTENSION_EXTEND: + pix = wrap_clamp(ix-1, width); + piy = wrap_clamp(iy-1, height); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + + nnix = wrap_clamp(ix+2, width); + nniy = wrap_clamp(iy+2, height); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + float u[4], v[4]; + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +#define DATA(x, y) (read(data[xc[x] + yc[y]])) +#define TERM(col) \ + (v[col] * (u[0] * DATA(0, col) + \ + u[1] * DATA(1, col) + \ + u[2] * DATA(2, col) + \ + u[3] * DATA(3, col))) + + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + + /* Actual interpolation. */ + return TERM(0) + TERM(1) + TERM(2) + TERM(3); + +#undef TERM +#undef DATA + } } - ccl_always_inline float4 interp_3d(float x, float y, float z, bool periodic = false) + ccl_always_inline float4 interp_3d(float x, float y, float z) + { + return interp_3d_ex(x, y, z, interpolation); + } + + ccl_always_inline float4 interp_3d_ex(float x, float y, float z, + int interpolation = INTERPOLATION_LINEAR) { if(UNLIKELY(!data)) return make_float4(0.0f, 0.0f, 0.0f, 0.0f); @@ -154,41 +272,55 @@ template<typename T> struct texture_image { frac(y*(float)height, &iy); frac(z*(float)depth, &iz); - if(periodic) { - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - } - else { - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + /* Fall through. */ + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; } return read(data[ix + iy*width + iz*width*height]); } - else { + else if(interpolation == INTERPOLATION_LINEAR) { float tx = frac(x*(float)width - 0.5f, &ix); float ty = frac(y*(float)height - 0.5f, &iy); float tz = frac(z*(float)depth - 0.5f, &iz); - if(periodic) { - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - } - else { - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + /* Fall through. */ + case EXTENSION_EXTEND: + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; } float4 r; @@ -205,6 +337,92 @@ template<typename T> struct texture_image { return r; } + else { + /* Tricubic b-spline interpolation. */ + const float tx = frac(x*(float)width - 0.5f, &ix); + const float ty = frac(y*(float)height - 0.5f, &iy); + const float tz = frac(z*(float)depth - 0.5f, &iz); + int pix, piy, piz, nnix, nniy, nniz; + + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + pix = wrap_periodic(ix-1, width); + piy = wrap_periodic(iy-1, height); + piz = wrap_periodic(iz-1, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + + nnix = wrap_periodic(ix+2, width); + nniy = wrap_periodic(iy+2, height); + nniz = wrap_periodic(iz+2, depth); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + /* Fall through. */ + case EXTENSION_EXTEND: + pix = wrap_clamp(ix-1, width); + piy = wrap_clamp(iy-1, height); + piz = wrap_clamp(iz-1, depth); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + nnix = wrap_clamp(ix+2, width); + nniy = wrap_clamp(iy+2, height); + nniz = wrap_clamp(iz+2, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + const int zc[4] = {width * height * piz, + width * height * iz, + width * height * niz, + width * height * nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) +#define COL_TERM(col, row) \ + (v[col] * (u[0] * DATA(0, col, row) + \ + u[1] * DATA(1, col, row) + \ + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) +#define ROW_TERM(row) \ + (w[row] * (COL_TERM(0, row) + \ + COL_TERM(1, row) + \ + COL_TERM(2, row) + \ + COL_TERM(3, row))) + + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); + + /* Actual interpolation. */ + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + +#undef COL_TERM +#undef ROW_TERM +#undef DATA + } } ccl_always_inline void dimensions_set(int width_, int height_, int depth_) @@ -216,7 +434,9 @@ template<typename T> struct texture_image { T *data; int interpolation; + ExtensionType extension; int width, height, depth; +#undef SET_CUBIC_SPLINE_WEIGHTS }; typedef texture<float4> texture_float4; @@ -237,9 +457,38 @@ typedef texture_image<uchar4> texture_image_uchar4; #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size)) #define kernel_tex_image_interp(tex, x, y) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp(x, y) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp(x, y)) #define kernel_tex_image_interp_3d(tex, x, y, z) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp_3d(x, y, z) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp_3d(x, y, z)) +#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp_3d_ex(x, y, z, interpolation) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp_3d_ex(x, y, z, interpolation)) #define kernel_data (kg->__data) +#ifdef __KERNEL_SSE2__ +typedef vector3<sseb> sse3b; +typedef vector3<ssef> sse3f; +typedef vector3<ssei> sse3i; + +ccl_device_inline void print_sse3b(const char *label, sse3b& a) +{ + print_sseb(label, a.x); + print_sseb(label, a.y); + print_sseb(label, a.z); +} + +ccl_device_inline void print_sse3f(const char *label, sse3f& a) +{ + print_ssef(label, a.x); + print_ssef(label, a.y); + print_ssef(label, a.z); +} + +ccl_device_inline void print_sse3i(const char *label, sse3i& a) +{ + print_ssei(label, a.x); + print_ssei(label, a.y); + print_ssei(label, a.z); +} + +#endif + CCL_NAMESPACE_END #endif /* __KERNEL_COMPAT_CPU_H__ */ diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index e4c20d26ff1..9fdd3abfec3 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __KERNEL_COMPAT_CUDA_H__ @@ -22,6 +22,14 @@ #define CCL_NAMESPACE_BEGIN #define CCL_NAMESPACE_END +/* Selective nodes compilation. */ +#ifndef __NODES_MAX_GROUP__ +# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX +#endif +#ifndef __NODES_FEATURES__ +# define __NODES_FEATURES__ NODE_FEATURE_ALL +#endif + #include <cuda.h> #include <float.h> @@ -33,6 +41,7 @@ #define ccl_global #define ccl_constant #define ccl_may_alias +#define ccl_addr_space /* No assert supported for CUDA */ @@ -75,12 +84,11 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4; /* Use fast math functions */ -#define cosf(x) __cosf(((float)x)) -#define sinf(x) __sinf(((float)x)) -#define powf(x, y) __powf(((float)x), ((float)y)) -#define tanf(x) __tanf(((float)x)) -#define logf(x) __logf(((float)x)) -#define expf(x) __expf(((float)x)) +#define cosf(x) __cosf(((float)(x))) +#define sinf(x) __sinf(((float)(x))) +#define powf(x, y) __powf(((float)(x)), ((float)(y))) +#define tanf(x) __tanf(((float)(x))) +#define logf(x) __logf(((float)(x))) +#define expf(x) __expf(((float)(x))) #endif /* __KERNEL_COMPAT_CUDA_H__ */ - diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 9e58ebff599..e8b36d2605d 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __KERNEL_COMPAT_OPENCL_H__ @@ -24,14 +24,6 @@ #define CCL_NAMESPACE_BEGIN #define CCL_NAMESPACE_END -#ifdef __KERNEL_OPENCL_AMD__ -#define __CL_NO_FLOAT3__ -#endif - -#ifdef __CL_NO_FLOAT3__ -#define float3 float4 -#endif - #ifdef __CL_NOINLINE__ #define ccl_noinline __attribute__((noinline)) #else @@ -45,6 +37,22 @@ #define ccl_may_alias #define ccl_constant __constant #define ccl_global __global +#define ccl_local __local +#define ccl_private __private + +#ifdef __SPLIT_KERNEL__ +#define ccl_addr_space __global +#else +#define ccl_addr_space +#endif + +/* Selective nodes compilation. */ +#ifndef __NODES_MAX_GROUP__ +# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX +#endif +#ifndef __NODES_FEATURES__ +# define __NODES_FEATURES__ NODE_FEATURE_ALL +#endif /* no assert in opencl */ #define kernel_assert(cond) @@ -73,11 +81,7 @@ #endif #define make_float2(x, y) ((float2)(x, y)) -#ifdef __CL_NO_FLOAT3__ -#define make_float3(x, y, z) ((float4)(x, y, z, 0.0f)) -#else #define make_float3(x, y, z) ((float3)(x, y, z)) -#endif #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #define make_int2(x, y) ((int2)(x, y)) #define make_int3(x, y, z) ((int3)(x, y, z)) @@ -89,34 +93,34 @@ #define __float_as_uint(x) as_uint(x) #define __int_as_float(x) as_float(x) #define __float_as_int(x) as_int(x) -#define powf(x, y) pow(((float)x), ((float)y)) -#define fabsf(x) fabs(((float)x)) -#define copysignf(x, y) copysign(((float)x), ((float)y)) -#define asinf(x) asin(((float)x)) -#define acosf(x) acos(((float)x)) -#define atanf(x) atan(((float)x)) -#define floorf(x) floor(((float)x)) -#define ceilf(x) ceil(((float)x)) -#define hypotf(x, y) hypot(((float)x), ((float)y)) -#define atan2f(x, y) atan2(((float)x), ((float)y)) -#define fmaxf(x, y) fmax(((float)x), ((float)y)) -#define fminf(x, y) fmin(((float)x), ((float)y)) -#define fmodf(x, y) fmod((float)x, (float)y) +#define powf(x, y) pow(((float)(x)), ((float)(y))) +#define fabsf(x) fabs(((float)(x))) +#define copysignf(x, y) copysign(((float)(x)), ((float)(y))) +#define asinf(x) asin(((float)(x))) +#define acosf(x) acos(((float)(x))) +#define atanf(x) atan(((float)(x))) +#define floorf(x) floor(((float)(x))) +#define ceilf(x) ceil(((float)(x))) +#define hypotf(x, y) hypot(((float)(x)), ((float)(y))) +#define atan2f(x, y) atan2(((float)(x)), ((float)(y))) +#define fmaxf(x, y) fmax(((float)(x)), ((float)(y))) +#define fminf(x, y) fmin(((float)(x)), ((float)(y))) +#define fmodf(x, y) fmod((float)(x), (float)(y)) #ifndef __CL_USE_NATIVE__ -#define sinf(x) native_sin(((float)x)) -#define cosf(x) native_cos(((float)x)) -#define tanf(x) native_tan(((float)x)) -#define expf(x) native_exp(((float)x)) -#define sqrtf(x) native_sqrt(((float)x)) -#define logf(x) native_log(((float)x)) +#define sinf(x) native_sin(((float)(x))) +#define cosf(x) native_cos(((float)(x))) +#define tanf(x) native_tan(((float)(x))) +#define expf(x) native_exp(((float)(x))) +#define sqrtf(x) native_sqrt(((float)(x))) +#define logf(x) native_log(((float)(x))) #else -#define sinf(x) sin(((float)x)) -#define cosf(x) cos(((float)x)) -#define tanf(x) tan(((float)x)) -#define expf(x) exp(((float)x)) -#define sqrtf(x) sqrt(((float)x)) -#define logf(x) log(((float)x)) +#define sinf(x) sin(((float)(x))) +#define cosf(x) cos(((float)(x))) +#define tanf(x) tan(((float)(x))) +#define expf(x) exp(((float)(x))) +#define sqrtf(x) sqrt(((float)(x))) +#define logf(x) log(((float)(x))) #endif /* data lookup defines */ diff --git a/intern/cycles/kernel/kernel_debug.h b/intern/cycles/kernel/kernel_debug.h new file mode 100644 index 00000000000..24d6458567e --- /dev/null +++ b/intern/cycles/kernel/kernel_debug.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void debug_data_init(DebugData *debug_data) +{ + debug_data->num_bvh_traversal_steps = 0; + debug_data->num_bvh_traversed_instances = 0; + debug_data->num_ray_bounces = 0; +} + +ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, + ccl_global float *buffer, + ccl_addr_space PathState *state, + DebugData *debug_data, + int sample) +{ + int flag = kernel_data.film.pass_flag; + if(flag & PASS_BVH_TRAVERSAL_STEPS) { + kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversal_steps, + sample, + debug_data->num_bvh_traversal_steps); + } + if(flag & PASS_BVH_TRAVERSED_INSTANCES) { + kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances, + sample, + debug_data->num_bvh_traversed_instances); + } + if(flag & PASS_RAY_BOUNCES) { + kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces, + sample, + debug_data->num_ray_bounces); + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_differential.h b/intern/cycles/kernel/kernel_differential.h index daba2d927b7..ae1e70f0167 100644 --- a/intern/cycles/kernel/kernel_differential.h +++ b/intern/cycles/kernel/kernel_differential.h @@ -11,14 +11,14 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN /* See "Tracing Ray Differentials", Homan Igehy, 1999. */ -ccl_device void differential_transfer(differential3 *dP_, const differential3 dP, float3 D, const differential3 dD, float3 Ng, float t) +ccl_device void differential_transfer(ccl_addr_space differential3 *dP_, const differential3 dP, float3 D, const differential3 dD, float3 Ng, float t) { /* ray differential transfer through homogeneous medium, to * compute dPdx/dy at a shading point from the incoming ray */ @@ -31,7 +31,7 @@ ccl_device void differential_transfer(differential3 *dP_, const differential3 dP dP_->dy = tmpy - dot(tmpy, Ng)*tmp; } -ccl_device void differential_incoming(differential3 *dI, const differential3 dD) +ccl_device void differential_incoming(ccl_addr_space differential3 *dI, const differential3 dD) { /* compute dIdx/dy at a shading point, we just need to negate the * differential of the ray direction */ @@ -40,7 +40,7 @@ ccl_device void differential_incoming(differential3 *dI, const differential3 dD) dI->dy = -dD.dy; } -ccl_device void differential_dudv(differential *du, differential *dv, float3 dPdu, float3 dPdv, differential3 dP, float3 Ng) +ccl_device void differential_dudv(ccl_addr_space differential *du, ccl_addr_space differential *dv, float3 dPdu, float3 dPdv, differential3 dP, float3 Ng) { /* now we have dPdx/dy from the ray differential transfer, and dPdu/dv * from the primitive, we can compute dudx/dy and dvdx/dy. these are diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 4b2bb723ab6..de9e8d77ec8 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -11,18 +11,26 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN /* Direction Emission */ - ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, - LightSample *ls, float3 I, differential3 dI, float t, float time, int bounce, int transparent_bounce) + LightSample *ls, float3 I, differential3 dI, float t, float time, int bounce, int transparent_bounce +#ifdef __SPLIT_KERNEL__ + ,ShaderData *sd_input +#endif +) { /* setup shading at emitter */ - ShaderData sd; +#ifdef __SPLIT_KERNEL__ + ShaderData *sd = sd_input; +#else + ShaderData sd_object; + ShaderData *sd = &sd_object; +#endif float3 eval; #ifdef __BACKGROUND_MIS__ @@ -37,23 +45,23 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, ray.dP = differential3_zero(); ray.dD = dI; - shader_setup_from_background(kg, &sd, &ray, bounce+1, transparent_bounce); - eval = shader_eval_background(kg, &sd, 0, SHADER_CONTEXT_EMISSION); + shader_setup_from_background(kg, sd, &ray, bounce+1, transparent_bounce); + eval = shader_eval_background(kg, sd, 0, SHADER_CONTEXT_EMISSION); } else #endif { - shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, bounce+1, transparent_bounce); + shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, bounce+1, transparent_bounce); - ls->Ng = sd.Ng; + ls->Ng = ccl_fetch(sd, Ng); /* no path flag, we're evaluating this for all closures. that's weak but * we'd have to do multiple evaluations otherwise */ - shader_eval_surface(kg, &sd, 0.0f, 0, SHADER_CONTEXT_EMISSION); + shader_eval_surface(kg, sd, 0.0f, 0, SHADER_CONTEXT_EMISSION); /* evaluate emissive closure */ - if(sd.flag & SD_EMISSION) - eval = shader_emissive_eval(kg, &sd); + if(ccl_fetch(sd, flag) & SD_EMISSION) + eval = shader_emissive_eval(kg, sd); else eval = make_float3(0.0f, 0.0f, 0.0f); } @@ -65,7 +73,11 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, LightSample *ls, Ray *ray, BsdfEval *eval, bool *is_lamp, - int bounce, int transparent_bounce) + int bounce, int transparent_bounce +#ifdef __SPLIT_KERNEL__ + , ShaderData *sd_DL +#endif + ) { if(ls->pdf == 0.0f) return false; @@ -74,7 +86,14 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, differential3 dD = differential3_zero(); /* evaluate closure */ - float3 light_eval = direct_emissive_eval(kg, ls, -ls->D, dD, ls->t, sd->time, bounce, transparent_bounce); + + float3 light_eval = direct_emissive_eval(kg, ls, -ls->D, dD, ls->t, ccl_fetch(sd, time), + bounce, + transparent_bounce +#ifdef __SPLIT_KERNEL__ + ,sd_DL +#endif + ); if(is_zero(light_eval)) return false; @@ -83,7 +102,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, float bsdf_pdf; #ifdef __VOLUME__ - if(sd->prim != PRIM_NONE) + if(ccl_fetch(sd, prim) != PRIM_NONE) shader_bsdf_eval(kg, sd, ls->D, eval, &bsdf_pdf); else shader_volume_phase_eval(kg, sd, ls->D, eval, &bsdf_pdf); @@ -118,8 +137,8 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, if(ls->shader & SHADER_CAST_SHADOW) { /* setup ray */ - bool transmit = (dot(sd->Ng, ls->D) < 0.0f); - ray->P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng); + bool transmit = (dot(ccl_fetch(sd, Ng), ls->D) < 0.0f); + ray->P = ray_offset(ccl_fetch(sd, P), (transmit)? -ccl_fetch(sd, Ng): ccl_fetch(sd, Ng)); if(ls->t == FLT_MAX) { /* distant light */ @@ -132,7 +151,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, ray->D = normalize_len(ray->D, &ray->t); } - ray->dP = sd->dP; + ray->dP = ccl_fetch(sd, dP); ray->dD = differential3_zero(); } else { @@ -154,14 +173,14 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader float3 L = shader_emissive_eval(kg, sd); #ifdef __HAIR__ - if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_ALL_TRIANGLE)) + if(!(path_flag & PATH_RAY_MIS_SKIP) && (ccl_fetch(sd, flag) & SD_USE_MIS) && (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)) #else - if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) + if(!(path_flag & PATH_RAY_MIS_SKIP) && (ccl_fetch(sd, flag) & SD_USE_MIS)) #endif { /* multiple importance sampling, get triangle light pdf, * and compute weight with respect to BSDF pdf */ - float pdf = triangle_light_pdf(kg, sd->Ng, sd->I, t); + float pdf = triangle_light_pdf(kg, ccl_fetch(sd, Ng), ccl_fetch(sd, I), t); float mis_weight = power_heuristic(bsdf_pdf, pdf); return L*mis_weight; @@ -172,7 +191,11 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader /* Indirect Lamp Emission */ -ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *state, Ray *ray, float3 *emission) +ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *state, Ray *ray, float3 *emission +#ifdef __SPLIT_KERNEL__ + ,ShaderData *sd +#endif + ) { bool hit_lamp = false; @@ -188,14 +211,21 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *st /* use visibility flag to skip lights */ if(ls.shader & SHADER_EXCLUDE_ANY) { if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || - ((ls.shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) || + ((ls.shader & SHADER_EXCLUDE_GLOSSY) && + ((state->flag & (PATH_RAY_GLOSSY|PATH_RAY_REFLECT)) == (PATH_RAY_GLOSSY|PATH_RAY_REFLECT))) || ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) || ((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER))) continue; } #endif - float3 L = direct_emissive_eval(kg, &ls, -ray->D, ray->dD, ls.t, ray->time, state->bounce, state->transparent_bounce); + float3 L = direct_emissive_eval(kg, &ls, -ray->D, ray->dD, ls.t, ray->time, + state->bounce, + state->transparent_bounce +#ifdef __SPLIT_KERNEL__ + ,sd +#endif + ); #ifdef __VOLUME__ if(state->volume_stack[0].shader != SHADER_NONE) { @@ -224,7 +254,11 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *st /* Indirect Background */ -ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *state, Ray *ray) +ccl_device_noinline float3 indirect_background(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space Ray *ray +#ifdef __SPLIT_KERNEL__ + ,ShaderData *sd_global +#endif + ) { #ifdef __BACKGROUND__ int shader = kernel_data.background.surface_shader; @@ -232,18 +266,25 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *sta /* use visibility flag to skip lights */ if(shader & SHADER_EXCLUDE_ANY) { if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || - ((shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) || + ((shader & SHADER_EXCLUDE_GLOSSY) && + ((state->flag & (PATH_RAY_GLOSSY|PATH_RAY_REFLECT)) == (PATH_RAY_GLOSSY|PATH_RAY_REFLECT))) || ((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) || ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) || ((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER))) return make_float3(0.0f, 0.0f, 0.0f); } +#ifdef __SPLIT_KERNEL__ /* evaluate background closure */ + Ray priv_ray = *ray; + shader_setup_from_background(kg, sd_global, &priv_ray, state->bounce+1, state->transparent_bounce); + float3 L = shader_eval_background(kg, sd_global, state->flag, SHADER_CONTEXT_EMISSION); +#else ShaderData sd; shader_setup_from_background(kg, &sd, ray, state->bounce+1, state->transparent_bounce); float3 L = shader_eval_background(kg, &sd, state->flag, SHADER_CONTEXT_EMISSION); +#endif #ifdef __BACKGROUND_MIS__ /* check if background light exists or if we should skip pdf */ @@ -252,7 +293,7 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *sta if(!(state->flag & PATH_RAY_MIS_SKIP) && res) { /* multiple importance sampling, get background light pdf for ray * direction, and compute weight with respect to BSDF pdf */ - float pdf = background_light_pdf(kg, ray->D); + float pdf = background_light_pdf(kg, ray->P, ray->D); float mis_weight = power_heuristic(state->ray_pdf, pdf); return L*mis_weight; diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index dc5f6e7ce38..f9e9b413898 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -27,7 +27,7 @@ ccl_device float4 film_map(KernelGlobals *kg, float4 irradiance, float scale) result.z = color_scene_linear_to_srgb(result.z*exposure); /* clamp since alpha might be > 1.0 due to russian roulette */ - result.w = clamp(result.w, 0.0f, 1.0f); + result.w = saturate(result.w); return result; } @@ -37,10 +37,10 @@ ccl_device uchar4 film_float_to_byte(float4 color) uchar4 result; /* simple float to byte conversion */ - result.x = (uchar)clamp(color.x*255.0f, 0.0f, 255.0f); - result.y = (uchar)clamp(color.y*255.0f, 0.0f, 255.0f); - result.z = (uchar)clamp(color.z*255.0f, 0.0f, 255.0f); - result.w = (uchar)clamp(color.w*255.0f, 0.0f, 255.0f); + result.x = (uchar)(saturate(color.x)*255.0f); + result.y = (uchar)(saturate(color.y)*255.0f); + result.z = (uchar)(saturate(color.z)*255.0f); + result.w = (uchar)(saturate(color.w)*255.0f); return result; } diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 6bd2ec0662c..17fa18909c4 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* Constant Globals */ @@ -80,7 +80,7 @@ typedef struct KernelGlobals {} KernelGlobals; #ifdef __KERNEL_OPENCL__ -typedef struct KernelGlobals { +typedef ccl_addr_space struct KernelGlobals { ccl_constant KernelData *data; #define KERNEL_TEX(type, ttype, name) \ @@ -94,7 +94,7 @@ typedef struct KernelGlobals { ccl_device float lookup_table_read(KernelGlobals *kg, float x, int offset, int size) { - x = clamp(x, 0.0f, 1.0f)*(size-1); + x = saturate(x)*(size-1); int index = min(float_to_int(x), size-1); int nindex = min(index+1, size-1); @@ -110,7 +110,7 @@ ccl_device float lookup_table_read(KernelGlobals *kg, float x, int offset, int s ccl_device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize) { - y = clamp(y, 0.0f, 1.0f)*(ysize-1); + y = saturate(y)*(ysize-1); int index = min(float_to_int(y), ysize-1); int nindex = min(index+1, ysize-1); diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h index 2a5b7689e57..9ba41635b9e 100644 --- a/intern/cycles/kernel/kernel_jitter.h +++ b/intern/cycles/kernel/kernel_jitter.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* TODO(sergey): Consider moving portable ctz/clz stuff to util. */ @@ -47,6 +47,8 @@ ccl_device_inline int cmj_fast_div_pow2(int a, int b) # else return a >> __builtin_ctz(b); # endif +#elif defined(__KERNEL_CUDA__) + return a >> (__ffs(b) - 1); #else return a/b; #endif @@ -63,6 +65,8 @@ ccl_device_inline uint cmj_w_mask(uint w) # else return ((1 << (32 - __builtin_clz(w))) - 1); # endif +#elif defined(__KERNEL_CUDA__) + return ((1 << (32 - __clz(w))) - 1); #else w |= w >> 1; w |= w >> 2; @@ -124,7 +128,7 @@ ccl_device_inline uint cmj_permute(uint i, uint l, uint p) i *= 0xc860a3df; i &= w; i ^= i >> 5; - } while (i >= l); + } while(i >= l); return (i + p) % l; } @@ -167,7 +171,11 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy) { kernel_assert(s < N); +#if defined(__KERNEL_CUDA__) + int m = float_to_int(__fsqrt_ru(N)); +#else int m = float_to_int(sqrtf(N)); +#endif int n = (N + m - 1)/m; float invN = 1.0f/N; float invm = 1.0f/m; diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index e7f62f230f8..7590ec2d706 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -33,11 +33,112 @@ typedef struct LightSample { LightType type; /* type of light */ } LightSample; +/* Area light sampling */ + +/* Uses the following paper: + * + * Carlos Urena et al. + * An Area-Preserving Parametrization for Spherical Rectangles. + * + * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf + * + * Note: light_p is modified when sample_coord is true. + */ +ccl_device float area_light_sample(float3 P, + float3 *light_p, + float3 axisu, float3 axisv, + float randu, float randv, + bool sample_coord) +{ + /* In our name system we're using P for the center, + * which is o in the paper. + */ + + float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f; + float axisu_len, axisv_len; + /* Compute local reference system R. */ + float3 x = normalize_len(axisu, &axisu_len); + float3 y = normalize_len(axisv, &axisv_len); + float3 z = cross(x, y); + /* Compute rectangle coords in local reference system. */ + float3 dir = corner - P; + float z0 = dot(dir, z); + /* Flip 'z' to make it point against Q. */ + if(z0 > 0.0f) { + z *= -1.0f; + z0 *= -1.0f; + } + float x0 = dot(dir, x); + float y0 = dot(dir, y); + float x1 = x0 + axisu_len; + float y1 = y0 + axisv_len; + /* Create vectors to four vertices. */ + float3 v00 = make_float3(x0, y0, z0); + float3 v01 = make_float3(x0, y1, z0); + float3 v10 = make_float3(x1, y0, z0); + float3 v11 = make_float3(x1, y1, z0); + /* Compute normals to edges. */ + float3 n0 = normalize(cross(v00, v10)); + float3 n1 = normalize(cross(v10, v11)); + float3 n2 = normalize(cross(v11, v01)); + float3 n3 = normalize(cross(v01, v00)); + /* Compute internal angles (gamma_i). */ + float g0 = safe_acosf(-dot(n0, n1)); + float g1 = safe_acosf(-dot(n1, n2)); + float g2 = safe_acosf(-dot(n2, n3)); + float g3 = safe_acosf(-dot(n3, n0)); + /* Compute predefined constants. */ + float b0 = n0.z; + float b1 = n2.z; + float b0sq = b0 * b0; + float k = M_2PI_F - g2 - g3; + /* Compute solid angle from internal angles. */ + float S = g0 + g1 - k; + + if(sample_coord) { + /* Compute cu. */ + float au = randu * S + k; + float fu = (cosf(au) * b0 - b1) / sinf(au); + float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f); + cu = clamp(cu, -1.0f, 1.0f); + /* Compute xu. */ + float xu = -(cu * z0) / sqrtf(1.0f - cu * cu); + xu = clamp(xu, x0, x1); + /* Compute yv. */ + float z0sq = z0 * z0; + float y0sq = y0 * y0; + float y1sq = y1 * y1; + float d = sqrtf(xu * xu + z0sq); + float h0 = y0 / sqrtf(d * d + y0sq); + float h1 = y1 / sqrtf(d * d + y1sq); + float hv = h0 + randv * (h1 - h0), hv2 = hv * hv; + float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1; + + /* Transform (xu, yv, z0) to world coords. */ + *light_p = P + xu * x + yv * y + z0 * z; + } + + /* return pdf */ + if(S != 0.0f) + return 1.0f / S; + else + return 0.0f; +} + /* Background Light */ #ifdef __BACKGROUND_MIS__ -ccl_device float3 background_light_sample(KernelGlobals *kg, float randu, float randv, float *pdf) +/* TODO(sergey): In theory it should be all fine to use noinline for all + * devices, but we're so close to the release so better not screw things + * up for CPU at least. + */ +#ifdef __KERNEL_GPU__ +ccl_device_noinline +#else +ccl_device +#endif +float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf) { /* for the following, the CDF values are actually a pair of floats, with the * function value as X and the actual CDF as Y. The last entry's function @@ -107,13 +208,19 @@ ccl_device float3 background_light_sample(KernelGlobals *kg, float randu, float else *pdf = (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom); - *pdf *= kernel_data.integrator.pdf_lights; - /* compute direction */ - return -equirectangular_to_direction(u, v); + return equirectangular_to_direction(u, v); } -ccl_device float background_light_pdf(KernelGlobals *kg, float3 direction) +/* TODO(sergey): Same as above, after the release we should consider using + * 'noinline' for all devices. + */ +#ifdef __KERNEL_GPU__ +ccl_device_noinline +#else +ccl_device +#endif +float background_map_pdf(KernelGlobals *kg, float3 direction) { float2 uv = direction_to_equirectangular(direction); int res = kernel_data.integrator.pdf_background_res; @@ -139,9 +246,223 @@ ccl_device float background_light_pdf(KernelGlobals *kg, float3 direction) float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * (res + 1) + index_u); float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v); - float pdf = (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom); + return (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom); +} + +ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals *kg, + float3 P, + int index, + float3 *lightpos, + float3 *dir) +{ + float4 data0 = kernel_tex_fetch(__light_data, (index + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 0); + float4 data3 = kernel_tex_fetch(__light_data, (index + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 3); + + *lightpos = make_float3(data0.y, data0.z, data0.w); + *dir = make_float3(data3.y, data3.z, data3.w); + + /* Check whether portal is on the right side. */ + if(dot(*dir, P - *lightpos) > 1e-5f) + return true; + + return false; +} + +ccl_device float background_portal_pdf(KernelGlobals *kg, + float3 P, + float3 direction, + int ignore_portal, + bool *is_possible) +{ + float portal_pdf = 0.0f; + + for(int p = 0; p < kernel_data.integrator.num_portals; p++) { + if(p == ignore_portal) + continue; + + float3 lightpos, dir; + if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir)) + continue; + + if(is_possible) { + /* There's a portal that could be sampled from this position. */ + *is_possible = true; + } + + float t = -(dot(P, dir) - dot(lightpos, dir)) / dot(direction, dir); + if(t <= 1e-5f) { + /* Either behind the portal or too close. */ + continue; + } + + float4 data1 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 1); + float4 data2 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 2); + + float3 axisu = make_float3(data1.y, data1.z, data1.w); + float3 axisv = make_float3(data2.y, data2.z, data2.w); + + float3 hit = P + t*direction; + float3 inplane = hit - lightpos; + /* Skip if the the ray doesn't pass through portal. */ + if(fabsf(dot(inplane, axisu) / dot(axisu, axisu)) > 0.5f) + continue; + if(fabsf(dot(inplane, axisv) / dot(axisv, axisv)) > 0.5f) + continue; + + portal_pdf += area_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false); + } - return pdf * kernel_data.integrator.pdf_lights; + return kernel_data.integrator.num_portals? portal_pdf / kernel_data.integrator.num_portals: 0.0f; +} + +ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P) +{ + int num_possible_portals = 0; + for(int p = 0; p < kernel_data.integrator.num_portals; p++) { + float3 lightpos, dir; + if(background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir)) + num_possible_portals++; + } + return num_possible_portals; +} + +ccl_device float3 background_portal_sample(KernelGlobals *kg, + float3 P, + float randu, + float randv, + int num_possible, + int *sampled_portal, + float *pdf) +{ + /* Pick a portal, then re-normalize randv. */ + randv *= num_possible; + int portal = (int)randv; + randv -= portal; + + /* TODO(sergey): Some smarter way of finding portal to sample + * is welcome. + */ + for(int p = 0; p < kernel_data.integrator.num_portals; p++) { + /* Search for the sampled portal. */ + float3 lightpos, dir; + if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir)) + continue; + + if(portal == 0) { + /* p is the portal to be sampled. */ + float4 data1 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 1); + float4 data2 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 2); + float3 axisu = make_float3(data1.y, data1.z, data1.w); + float3 axisv = make_float3(data2.y, data2.z, data2.w); + + *pdf = area_light_sample(P, &lightpos, + axisu, axisv, + randu, randv, + true); + + *pdf /= num_possible; + *sampled_portal = p; + return normalize(lightpos - P); + } + + portal--; + } + + return make_float3(0.0f, 0.0f, 0.0f); +} + +ccl_device float3 background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf) +{ + /* Probability of sampling portals instead of the map. */ + float portal_sampling_pdf = kernel_data.integrator.portal_pdf; + + /* Check if there are portals in the scene which we can sample. */ + if(portal_sampling_pdf > 0.0f) { + int num_portals = background_num_possible_portals(kg, P); + if(num_portals > 0) { + if(portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) { + if(portal_sampling_pdf < 1.0f) { + randu /= portal_sampling_pdf; + } + int portal; + float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf); + if(num_portals > 1) { + /* Ignore the chosen portal, its pdf is already included. */ + *pdf += background_portal_pdf(kg, P, D, portal, NULL); + } + /* We could also have sampled the map, so combine with MIS. */ + if(portal_sampling_pdf < 1.0f) { + float cdf_pdf = background_map_pdf(kg, D); + *pdf = (portal_sampling_pdf * (*pdf) + + (1.0f - portal_sampling_pdf) * cdf_pdf); + } + return D; + } else { + /* Sample map, but with nonzero portal_sampling_pdf for MIS. */ + randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf); + } + } else { + /* We can't sample a portal. + * Check if we can sample the map instead. + */ + if(portal_sampling_pdf == 1.0f) { + /* Use uniform as a fallback if we can't sample the map. */ + *pdf = 1.0f / M_4PI_F; + return sample_uniform_sphere(randu, randv); + } + else { + portal_sampling_pdf = 0.0f; + } + } + } + + float3 D = background_map_sample(kg, randu, randv, pdf); + /* Use MIS if portals could be sampled as well. */ + if(portal_sampling_pdf > 0.0f) { + float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL); + *pdf = (portal_sampling_pdf * portal_pdf + + (1.0f - portal_sampling_pdf) * (*pdf)); + } + return D; +} + +ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction) +{ + /* Probability of sampling portals instead of the map. */ + float portal_sampling_pdf = kernel_data.integrator.portal_pdf; + + if(portal_sampling_pdf > 0.0f) { + bool is_possible = false; + float portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible); + if(portal_pdf == 0.0f) { + if(portal_sampling_pdf == 1.0f) { + /* If there are no possible portals at this point, + * the fallback sampling would have been used. + * Otherwise, the direction would not be sampled at all => pdf = 0 + */ + return is_possible? 0.0f: kernel_data.integrator.pdf_lights / M_4PI_F; + } + else { + /* We can only sample the map. */ + return background_map_pdf(kg, direction) * kernel_data.integrator.pdf_lights; + } + } else { + if(portal_sampling_pdf == 1.0f) { + /* We can only sample portals. */ + return portal_pdf * kernel_data.integrator.pdf_lights; + } + else { + /* We can sample both, so combine with MIS. */ + return (background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf) + + portal_pdf * portal_sampling_pdf) * kernel_data.integrator.pdf_lights; + } + } + } + + /* No portals in the scene, so must sample the map. + * At least one of them is always possible if we have a LIGHT_BACKGROUND. + */ + return background_map_pdf(kg, direction) * kernel_data.integrator.pdf_lights; } #endif @@ -167,14 +488,6 @@ ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, flo return disk_light_sample(normalize(P - center), randu, randv)*radius; } -ccl_device float3 area_light_sample(float3 axisu, float3 axisv, float randu, float randv) -{ - randu = randu - 0.5f; - randv = randv - 0.5f; - - return axisu*randu + axisv*randv; -} - ccl_device float spot_light_attenuation(float4 data1, float4 data2, LightSample *ls) { float3 dir = make_float3(data2.y, data2.z, data2.w); @@ -245,13 +558,14 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp, #ifdef __BACKGROUND_MIS__ else if(type == LIGHT_BACKGROUND) { /* infinite area light (e.g. light dome or env light) */ - float3 D = background_light_sample(kg, randu, randv, &ls->pdf); + float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf); ls->P = D; ls->Ng = D; ls->D = -D; ls->t = FLT_MAX; ls->eval_fac = 1.0f; + ls->pdf *= kernel_data.integrator.pdf_lights; } #endif else { @@ -276,6 +590,7 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp, float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2); ls->eval_fac *= spot_light_attenuation(data1, data2, ls); } + ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); } else { /* area light */ @@ -286,22 +601,31 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp, float3 axisv = make_float3(data2.y, data2.z, data2.w); float3 D = make_float3(data3.y, data3.z, data3.w); - ls->P += area_light_sample(axisu, axisv, randu, randv); + ls->pdf = area_light_sample(P, &ls->P, + axisu, axisv, + randu, randv, + true); + ls->Ng = D; ls->D = normalize_len(ls->P - P, &ls->t); float invarea = data2.x; - ls->eval_fac = 0.25f*invarea; - ls->pdf = invarea; + + if(dot(ls->D, D) > 0.0f) + ls->pdf = 0.0f; } ls->eval_fac *= kernel_data.integrator.inv_pdf_lights; - ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); } } -ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls) +#if defined(__KERNEL_CUDA__) && (__CUDA_ARCH__ >= 500) && (defined(i386) || defined(_M_IX86)) +ccl_device_noinline +#else +ccl_device +#endif +bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls) { float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0); float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1); @@ -355,6 +679,7 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, ls->D = D; ls->t = FLT_MAX; + /* compute pdf */ float invarea = data1.w; ls->pdf = invarea/(costheta*costheta*costheta); ls->eval_fac = ls->pdf; @@ -386,6 +711,10 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, if(ls->eval_fac == 0.0f) return false; } + + /* compute pdf */ + if(ls->t != FLT_MAX) + ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); } else if(type == LIGHT_AREA) { /* area light */ @@ -404,24 +733,20 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, if(dot(D, Ng) >= 0.0f) return false; - ls->P = make_float3(data0.y, data0.z, data0.w); + float3 light_P = make_float3(data0.y, data0.z, data0.w); if(!ray_quad_intersect(P, D, t, - ls->P, axisu, axisv, &ls->P, &ls->t)) + light_P, axisu, axisv, &ls->P, &ls->t)) return false; ls->D = D; ls->Ng = Ng; - ls->pdf = invarea; - ls->eval_fac = 0.25f*ls->pdf; + ls->pdf = area_light_sample(P, &light_P, axisu, axisv, 0, 0, false); + ls->eval_fac = 0.25f*invarea; } else return false; - /* compute pdf */ - if(ls->t != FLT_MAX) - ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); - return true; } @@ -514,7 +839,13 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float randt) /* Generic Light */ -ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float randv, float time, float3 P, LightSample *ls) +ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce) +{ + float4 data4 = kernel_tex_fetch(__light_data, index*LIGHT_SIZE + 4); + return (bounce > __float_as_int(data4.x)); +} + +ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float randv, float time, float3 P, int bounce, LightSample *ls) { /* sample index */ int index = light_distribution_sample(kg, randt); @@ -536,6 +867,12 @@ ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float } else { int lamp = -prim-1; + + if(UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) { + ls->pdf = 0.0f; + return; + } + lamp_light_sample(kg, lamp, randu, randv, P, ls); } } @@ -546,22 +883,5 @@ ccl_device int light_select_num_samples(KernelGlobals *kg, int index) return __float_as_int(data3.x); } -ccl_device int lamp_light_eval_sample(KernelGlobals *kg, float randt) -{ - /* sample index */ - int index = light_distribution_sample(kg, randt); - - /* fetch light data */ - float4 l = kernel_tex_fetch(__light_distribution, index); - int prim = __float_as_int(l.y); - - if(prim < 0) { - int lamp = -prim-1; - return lamp; - } - else - return LAMP_NONE; -} - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_math.h b/intern/cycles/kernel/kernel_math.h index d95a5c76309..9e14d8cc7cb 100644 --- a/intern/cycles/kernel/kernel_math.h +++ b/intern/cycles/kernel/kernel_math.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __KERNEL_MATH_H__ @@ -19,6 +19,7 @@ #include "util_color.h" #include "util_math.h" +#include "util_math_fast.h" #include "util_transform.h" #include "util_distort.h" diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index b3b6fc02894..20cf3fa931b 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -19,23 +19,49 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, int sample, float value) { ccl_global float *buf = buffer; +#if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__) + atomic_add_float(buf, value); +#else *buf = (sample == 0)? value: *buf + value; +#endif // __SPLIT_KERNEL__ && __WORK_STEALING__ } ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sample, float3 value) { +#if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__) + ccl_global float *buf_x = buffer + 0; + ccl_global float *buf_y = buffer + 1; + ccl_global float *buf_z = buffer + 2; + + atomic_add_float(buf_x, value.x); + atomic_add_float(buf_y, value.y); + atomic_add_float(buf_z, value.z); +#else ccl_global float3 *buf = (ccl_global float3*)buffer; *buf = (sample == 0)? value: *buf + value; +#endif // __SPLIT_KERNEL__ && __WORK_STEALING__ } ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sample, float4 value) { +#if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__) + ccl_global float *buf_x = buffer + 0; + ccl_global float *buf_y = buffer + 1; + ccl_global float *buf_z = buffer + 2; + ccl_global float *buf_w = buffer + 3; + + atomic_add_float(buf_x, value.x); + atomic_add_float(buf_y, value.y); + atomic_add_float(buf_z, value.z); + atomic_add_float(buf_w, value.w); +#else ccl_global float4 *buf = (ccl_global float4*)buffer; *buf = (sample == 0)? value: *buf + value; +#endif // __SPLIT_KERNEL__ && __WORK_STEALING__ } ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, - ShaderData *sd, int sample, PathState *state, float3 throughput) + ShaderData *sd, int sample, ccl_addr_space PathState *state, float3 throughput) { #ifdef __PASSES__ int path_flag = state->flag; @@ -49,18 +75,18 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl return; if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { - if(!(sd->flag & SD_TRANSPARENT) || + if(!(ccl_fetch(sd, flag) & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { if(sample == 0) { if(flag & PASS_DEPTH) { - float depth = camera_distance(kg, sd->P); + float depth = camera_distance(kg, ccl_fetch(sd, P)); kernel_write_pass_float(buffer + kernel_data.film.pass_depth, sample, depth); } if(flag & PASS_OBJECT_ID) { - float id = object_pass_id(kg, sd->object); + float id = object_pass_id(kg, ccl_fetch(sd, object)); kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, sample, id); } if(flag & PASS_MATERIAL_ID) { @@ -70,7 +96,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl } if(flag & PASS_NORMAL) { - float3 normal = sd->N; + float3 normal = ccl_fetch(sd, N); kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, sample, normal); } if(flag & PASS_UV) { @@ -101,8 +127,8 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl float mist_start = kernel_data.film.mist_start; float mist_inv_depth = kernel_data.film.mist_inv_depth; - float depth = camera_distance(kg, sd->P); - float mist = clamp((depth - mist_start)*mist_inv_depth, 0.0f, 1.0f); + float depth = camera_distance(kg, ccl_fetch(sd, P)); + float mist = saturate((depth - mist_start)*mist_inv_depth); /* falloff */ float mist_falloff = kernel_data.film.mist_falloff; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index b8994d940fd..9794ad1d180 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifdef __OSL__ @@ -42,9 +42,14 @@ #include "kernel_path_state.h" #include "kernel_shadow.h" #include "kernel_emission.h" +#include "kernel_path_common.h" #include "kernel_path_surface.h" #include "kernel_path_volume.h" +#ifdef __KERNEL_DEBUG__ +#include "kernel_debug.h" +#endif + CCL_NAMESPACE_BEGIN ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, @@ -113,7 +118,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, /* direct light sampling */ kernel_branched_path_volume_connect_light(kg, rng, &volume_sd, - throughput, &state, L, 1.0f, all, &volume_ray, &volume_segment); + throughput, &state, L, all, &volume_ray, &volume_segment); /* indirect sample. if we use distance sampling and take just * one sample for direct and indirect light, we could share @@ -126,9 +131,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, rphase, rscatter, &volume_segment, NULL, true); } - if(result != VOLUME_PATH_SCATTERED) - throughput *= volume_segment.accum_transmittance; - /* free cached steps */ kernel_volume_decoupled_free(kg, &volume_segment); @@ -138,6 +140,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, else break; } + else { + throughput *= volume_segment.accum_transmittance; + } } else #endif @@ -269,8 +274,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, float bssrdf_u, bssrdf_v; path_state_rng_2D(kg, rng, &state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); - - state.flag |= PATH_RAY_BSSRDF_ANCESTOR; } } #endif @@ -303,17 +306,17 @@ ccl_device void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance * sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); - if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { + if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) { Ray light_ray; float3 ao_shadow; - light_ray.P = ray_offset(sd->P, sd->Ng); + light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng)); light_ray.D = ao_D; light_ray.t = kernel_data.background.ao_distance; #ifdef __OBJECT_MOTION__ - light_ray.time = sd->time; + light_ray.time = ccl_fetch(sd, time); #endif - light_ray.dP = sd->dP; + light_ray.dP = ccl_fetch(sd, dP); light_ray.dD = differential3_zero(); if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) @@ -321,78 +324,8 @@ ccl_device void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance * } } -ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput) -{ - int num_samples = kernel_data.integrator.ao_samples; - float num_samples_inv = 1.0f/num_samples; - float ao_factor = kernel_data.background.ao_factor; - float3 ao_N; - float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); - float3 ao_alpha = shader_bsdf_alpha(kg, sd); - - for(int j = 0; j < num_samples; j++) { - float bsdf_u, bsdf_v; - path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - - float3 ao_D; - float ao_pdf; - - sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); - - if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { - Ray light_ray; - float3 ao_shadow; - - light_ray.P = ray_offset(sd->P, sd->Ng); - light_ray.D = ao_D; - light_ray.t = kernel_data.background.ao_distance; -#ifdef __OBJECT_MOTION__ - light_ray.time = sd->time; -#endif - light_ray.dP = sd->dP; - light_ray.dD = differential3_zero(); - - if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) - path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce); - } - } -} - #ifdef __SUBSURFACE__ -#ifdef __VOLUME__ -ccl_device void kernel_path_subsurface_update_volume_stack(KernelGlobals *kg, - Ray *ray, - VolumeStack *stack) -{ - kernel_assert(kernel_data.integrator.use_volumes); - - Ray volume_ray = *ray; - Intersection isect; - const float3 Pend = volume_ray.P + volume_ray.D*volume_ray.t; - - while( - scene_intersect(kg, &volume_ray, PATH_RAY_ALL_VISIBILITY, - &isect, NULL, 0.0f, 0.0f)) { - ShaderData sd; - shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0); - kernel_volume_stack_enter_exit(kg, &sd, stack); - - /* Move ray forward. */ - volume_ray.P = ray_offset(sd.P, -sd.Ng); - volume_ray.D = normalize_len(Pend - volume_ray.P, - &volume_ray.t); - - /* TODO(sergey): Find a faster way detecting that ray_offset moved - * us pass through the end point. - */ - if(dot(ray->D, volume_ray.D) < 0.0f) { - break; - } - } -} -#endif - ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, Ray *ray, float3 *throughput) { float bssrdf_probability; @@ -411,6 +344,8 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); #ifdef __VOLUME__ Ray volume_ray = *ray; + bool need_update_volume_stack = kernel_data.integrator.use_volumes && + ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME; #endif /* compute lighting with the BSDF closure */ @@ -419,7 +354,6 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd PathState hit_state = *state; Ray hit_ray = *ray; - hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR; hit_state.rng_offset += PRNG_BOUNCE_NUM; kernel_path_surface_connect_light(kg, rng, &bssrdf_sd[hit], tp, state, L); @@ -430,12 +364,12 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd #endif #ifdef __VOLUME__ - if(kernel_data.integrator.use_volumes) { + if(need_update_volume_stack) { /* Setup ray from previous surface point to the new one. */ volume_ray.D = normalize_len(hit_ray.P - volume_ray.P, &volume_ray.t); - kernel_path_subsurface_update_volume_stack( + kernel_volume_stack_update_for_subsurface( kg, &volume_ray, hit_state.volume_stack); @@ -471,6 +405,11 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, PathState state; path_state_init(kg, &state, rng, sample, &ray); +#ifdef __KERNEL_DEBUG__ + DebugData debug_data; + debug_data_init(&debug_data); +#endif + /* path iteration */ for(;;) { /* intersect scene */ @@ -497,6 +436,14 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f); #endif +#ifdef __KERNEL_DEBUG__ + if(state.flag & PATH_RAY_CAMERA) { + debug_data.num_bvh_traversal_steps += isect.num_traversal_steps; + debug_data.num_bvh_traversed_instances += isect.num_traversed_instances; + } + debug_data.num_ray_bounces++; +#endif + #ifdef __LAMP_MIS__ if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) { /* ray starting from previous non-transparent bounce */ @@ -553,7 +500,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, /* direct light sampling */ kernel_branched_path_volume_connect_light(kg, rng, &volume_sd, - throughput, &state, &L, 1.0f, all, &volume_ray, &volume_segment); + throughput, &state, &L, all, &volume_ray, &volume_segment); /* indirect sample. if we use distance sampling and take just * one sample for direct and indirect light, we could share @@ -566,9 +513,6 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, rphase, rscatter, &volume_segment, NULL, true); } - if(result != VOLUME_PATH_SCATTERED) - throughput *= volume_segment.accum_transmittance; - /* free cached steps */ kernel_volume_decoupled_free(kg, &volume_segment); @@ -578,6 +522,9 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, else break; } + else { + throughput *= volume_segment.accum_transmittance; + } } else #endif @@ -717,460 +664,13 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, kernel_write_light_passes(kg, buffer, &L, sample); - return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent); -} - -#ifdef __BRANCHED_PATH__ - -/* branched path tracing: bounce off surface and integrate indirect light */ -ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg, - RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust, - PathState *state, PathRadiance *L) -{ - for(int i = 0; i< sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; - - if(!CLOSURE_IS_BSDF(sc->type)) - continue; - /* transparency is not handled here, but in outer loop */ - if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) - continue; - - int num_samples; - - if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) - num_samples = kernel_data.integrator.diffuse_samples; - else if(CLOSURE_IS_BSDF_BSSRDF(sc->type)) - num_samples = 1; - else if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) - num_samples = kernel_data.integrator.glossy_samples; - else - num_samples = kernel_data.integrator.transmission_samples; - - num_samples = ceil_to_int(num_samples_adjust*num_samples); - - float num_samples_inv = num_samples_adjust/num_samples; - RNG bsdf_rng = cmj_hash(*rng, i); - - for(int j = 0; j < num_samples; j++) { - PathState ps = *state; - float3 tp = throughput; - Ray bsdf_ray; - - if(!kernel_branched_path_surface_bounce(kg, &bsdf_rng, sd, sc, j, num_samples, &tp, &ps, L, &bsdf_ray)) - continue; - - kernel_path_indirect(kg, rng, bsdf_ray, tp*num_samples_inv, num_samples, ps, L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); - } - } -} - -#ifdef __SUBSURFACE__ -ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, - ShaderData *sd, - PathRadiance *L, - PathState *state, - RNG *rng, - Ray *ray, - float3 throughput) -{ - for(int i = 0; i< sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - - if(!CLOSURE_IS_BSSRDF(sc->type)) - continue; - - /* set up random number generator */ - uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb); - int num_samples = kernel_data.integrator.subsurface_samples; - float num_samples_inv = 1.0f/num_samples; - RNG bssrdf_rng = cmj_hash(*rng, i); - - state->flag |= PATH_RAY_BSSRDF_ANCESTOR; - - /* do subsurface scatter step with copy of shader data, this will - * replace the BSSRDF with a diffuse BSDF closure */ - for(int j = 0; j < num_samples; j++) { - ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; - float bssrdf_u, bssrdf_v; - path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true); -#ifdef __VOLUME__ - Ray volume_ray = *ray; -#endif - - /* compute lighting with the BSDF closure */ - for(int hit = 0; hit < num_hits; hit++) { - PathState hit_state = *state; - - path_state_branch(&hit_state, j, num_samples); - -#ifdef __VOLUME__ - if(kernel_data.integrator.use_volumes) { - /* Setup ray from previous surface point to the new one. */ - float3 P = ray_offset(bssrdf_sd[hit].P, -bssrdf_sd[hit].Ng); - volume_ray.D = normalize_len(P - volume_ray.P, - &volume_ray.t); - - kernel_path_subsurface_update_volume_stack( - kg, - &volume_ray, - hit_state.volume_stack); - - /* Move volume ray forward. */ - volume_ray.P = P; - } -#endif - -#if defined(__EMISSION__) && defined(__BRANCHED_PATH__) - /* direct light */ - if(kernel_data.integrator.use_direct_light) { - bool all = kernel_data.integrator.sample_all_lights_direct; - kernel_branched_path_surface_connect_light(kg, rng, - &bssrdf_sd[hit], &hit_state, throughput, num_samples_inv, L, all); - } -#endif - - /* indirect light */ - kernel_branched_path_surface_indirect_light(kg, rng, - &bssrdf_sd[hit], throughput, num_samples_inv, - &hit_state, L); - } - } - - state->flag &= ~PATH_RAY_BSSRDF_ANCESTOR; - } -} -#endif - -ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer) -{ - /* initialize */ - PathRadiance L; - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float L_transparent = 0.0f; - - path_radiance_init(&L, kernel_data.film.use_light_pass); - - PathState state; - path_state_init(kg, &state, rng, sample, &ray); - - for(;;) { - /* intersect scene */ - Intersection isect; - uint visibility = path_state_ray_visibility(kg, &state); - -#ifdef __HAIR__ - float difl = 0.0f, extmax = 0.0f; - uint lcg_state = 0; - - if(kernel_data.bvh.have_curves) { - if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) { - float3 pixdiff = ray.dD.dx + ray.dD.dy; - /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/ - difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f; - } - - extmax = kernel_data.curve.maximum_width; - lcg_state = lcg_state_init(rng, &state, 0x51633e2d); - } - - bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax); -#else - bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f); -#endif - -#ifdef __VOLUME__ - /* volume attenuation, emission, scatter */ - if(state.volume_stack[0].shader != SHADER_NONE) { - Ray volume_ray = ray; - volume_ray.t = (hit)? isect.t: FLT_MAX; - - bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); - -#ifdef __VOLUME_DECOUPLED__ - /* decoupled ray marching only supported on CPU */ - - /* cache steps along volume for repeated sampling */ - VolumeSegment volume_segment; - ShaderData volume_sd; - - shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce); - kernel_volume_decoupled_record(kg, &state, - &volume_ray, &volume_sd, &volume_segment, heterogeneous); - - /* direct light sampling */ - if(volume_segment.closure_flag & SD_SCATTER) { - volume_segment.sampling_method = volume_stack_sampling_method(kg, state.volume_stack); - - bool all = kernel_data.integrator.sample_all_lights_direct; - - kernel_branched_path_volume_connect_light(kg, rng, &volume_sd, - throughput, &state, &L, 1.0f, all, &volume_ray, &volume_segment); - - /* indirect light sampling */ - int num_samples = kernel_data.integrator.volume_samples; - float num_samples_inv = 1.0f/num_samples; - - for(int j = 0; j < num_samples; j++) { - /* workaround to fix correlation bug in T38710, can find better solution - * in random number generator later, for now this is done here to not impact - * performance of rendering without volumes */ - RNG tmp_rng = cmj_hash(*rng, state.rng_offset); - - PathState ps = state; - Ray pray = ray; - float3 tp = throughput; - - /* branch RNG state */ - path_state_branch(&ps, j, num_samples); - - /* scatter sample. if we use distance sampling and take just one - * sample for direct and indirect light, we could share this - * computation, but makes code a bit complex */ - float rphase = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_PHASE); - float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE); - - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false); - - (void)result; - kernel_assert(result == VOLUME_PATH_SCATTERED); - - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) { - kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&L); - path_radiance_reset_indirect(&L); - } - } - } - - /* emission and transmittance */ - if(volume_segment.closure_flag & SD_EMISSION) - path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce); - throughput *= volume_segment.accum_transmittance; - - /* free cached steps */ - kernel_volume_decoupled_free(kg, &volume_segment); -#else - /* GPU: no decoupled ray marching, scatter probalistically */ - int num_samples = kernel_data.integrator.volume_samples; - float num_samples_inv = 1.0f/num_samples; - - /* todo: we should cache the shader evaluations from stepping - * through the volume, for now we redo them multiple times */ - - for(int j = 0; j < num_samples; j++) { - PathState ps = state; - Ray pray = ray; - ShaderData volume_sd; - float3 tp = throughput * num_samples_inv; - - /* branch RNG state */ - path_state_branch(&ps, j, num_samples); - - VolumeIntegrateResult result = kernel_volume_integrate( - kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous); - -#ifdef __VOLUME_SCATTER__ - if(result == VOLUME_PATH_SCATTERED) { - /* todo: support equiangular, MIS and all light sampling. - * alternatively get decoupled ray marching working on the GPU */ - kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L); - - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) { - kernel_path_indirect(kg, rng, pray, tp, num_samples, ps, &L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&L); - path_radiance_reset_indirect(&L); - } - } -#endif - } - - /* todo: avoid this calculation using decoupled ray marching */ - kernel_volume_shadow(kg, &state, &volume_ray, &throughput); -#endif - } -#endif - - if(!hit) { - /* eval background shader if nothing hit */ - if(kernel_data.background.transparent) { - L_transparent += average(throughput); - -#ifdef __PASSES__ - if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) -#endif - break; - } - -#ifdef __BACKGROUND__ - /* sample background shader */ - float3 L_background = indirect_background(kg, &state, &ray); - path_radiance_accum_background(&L, throughput, L_background, state.bounce); -#endif - - break; - } - - /* setup shading */ - ShaderData sd; - shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce); - shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN); - shader_merge_closures(&sd); - - /* holdout */ -#ifdef __HOLDOUT__ - if(sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) { - if(kernel_data.background.transparent) { - float3 holdout_weight; - - if(sd.flag & SD_HOLDOUT_MASK) - holdout_weight = make_float3(1.0f, 1.0f, 1.0f); - else - holdout_weight = shader_holdout_eval(kg, &sd); - - /* any throughput is ok, should all be identical here */ - L_transparent += average(holdout_weight*throughput); - } - - if(sd.flag & SD_HOLDOUT_MASK) - break; - } -#endif - - /* holdout mask objects do not write data passes */ - kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput); - -#ifdef __EMISSION__ - /* emission */ - if(sd.flag & SD_EMISSION) { - float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf); - path_radiance_accum_emission(&L, throughput, emission, state.bounce); - } -#endif - - /* transparency termination */ - if(state.flag & PATH_RAY_TRANSPARENT) { - /* path termination. this is a strange place to put the termination, it's - * mainly due to the mixed in MIS that we use. gives too many unneeded - * shader evaluations, only need emission if we are going to terminate */ - float probability = path_state_terminate_probability(kg, &state, throughput); - - if(probability == 0.0f) { - break; - } - else if(probability != 1.0f) { - float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE); - - if(terminate >= probability) - break; - - throughput /= probability; - } - } - -#ifdef __AO__ - /* ambient occlusion */ - if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { - kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput); - } -#endif - -#ifdef __SUBSURFACE__ - /* bssrdf scatter to a different location on the same object */ - if(sd.flag & SD_BSSRDF) { - kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state, - rng, &ray, throughput); - } -#endif - - if(!(sd.flag & SD_HAS_ONLY_VOLUME)) { - PathState hit_state = state; - -#ifdef __EMISSION__ - /* direct light */ - if(kernel_data.integrator.use_direct_light) { - bool all = kernel_data.integrator.sample_all_lights_direct; - kernel_branched_path_surface_connect_light(kg, rng, - &sd, &hit_state, throughput, 1.0f, &L, all); - } -#endif - - /* indirect light */ - kernel_branched_path_surface_indirect_light(kg, rng, - &sd, throughput, 1.0f, &hit_state, &L); - - /* continue in case of transparency */ - throughput *= shader_bsdf_transparency(kg, &sd); - - if(is_zero(throughput)) - break; - } - - path_state_next(kg, &state, LABEL_TRANSPARENT); - ray.P = ray_offset(sd.P, -sd.Ng); - ray.t -= sd.ray_length; /* clipping works through transparent */ - - -#ifdef __RAY_DIFFERENTIALS__ - ray.dP = sd.dP; - ray.dD.dx = -sd.dI.dx; - ray.dD.dy = -sd.dI.dy; -#endif - -#ifdef __VOLUME__ - /* enter/exit volume */ - kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack); +#ifdef __KERNEL_DEBUG__ + kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample); #endif - } - - float3 L_sum = path_radiance_clamp_and_sum(kg, &L); - - kernel_write_light_passes(kg, buffer, &L, sample); return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent); } -#endif - -ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int x, int y, RNG *rng, Ray *ray) -{ - float filter_u; - float filter_v; - - int num_samples = kernel_data.integrator.aa_samples; - - path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v); - - /* sample camera ray */ - - float lens_u = 0.0f, lens_v = 0.0f; - - if(kernel_data.cam.aperturesize > 0.0f) - path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v); - - float time = 0.0f; - -#ifdef __CAMERA_MOTION__ - if(kernel_data.cam.shuttertime != -1.0f) - time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME); -#endif - - camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray); -} - ccl_device void kernel_path_trace(KernelGlobals *kg, ccl_global float *buffer, ccl_global uint *rng_state, int sample, int x, int y, int offset, int stride) @@ -1202,38 +702,5 @@ ccl_device void kernel_path_trace(KernelGlobals *kg, path_rng_end(kg, rng_state, rng); } -#ifdef __BRANCHED_PATH__ -ccl_device void kernel_branched_path_trace(KernelGlobals *kg, - ccl_global float *buffer, ccl_global uint *rng_state, - int sample, int x, int y, int offset, int stride) -{ - /* buffer offset */ - int index = offset + x + y*stride; - int pass_stride = kernel_data.film.pass_stride; - - rng_state += index; - buffer += index*pass_stride; - - /* initialize random numbers and ray */ - RNG rng; - Ray ray; - - kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray); - - /* integrate */ - float4 L; - - if(ray.t != 0.0f) - L = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer); - else - L = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - /* accumulate result in output buffer */ - kernel_write_pass_float4(buffer, sample, L); - - path_rng_end(kg, rng_state, rng); -} -#endif - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h new file mode 100644 index 00000000000..b6d64985f6a --- /dev/null +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -0,0 +1,534 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +#ifdef __BRANCHED_PATH__ + +ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput) +{ + int num_samples = kernel_data.integrator.ao_samples; + float num_samples_inv = 1.0f/num_samples; + float ao_factor = kernel_data.background.ao_factor; + float3 ao_N; + float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); + float3 ao_alpha = shader_bsdf_alpha(kg, sd); + + for(int j = 0; j < num_samples; j++) { + float bsdf_u, bsdf_v; + path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + + float3 ao_D; + float ao_pdf; + + sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); + + if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) { + Ray light_ray; + float3 ao_shadow; + + light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng)); + light_ray.D = ao_D; + light_ray.t = kernel_data.background.ao_distance; +#ifdef __OBJECT_MOTION__ + light_ray.time = ccl_fetch(sd, time); +#endif + light_ray.dP = ccl_fetch(sd, dP); + light_ray.dD = differential3_zero(); + + if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) + path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce); + } + } +} + + +/* bounce off surface and integrate indirect light */ +ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg, + RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust, + PathState *state, PathRadiance *L) +{ + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + const ShaderClosure *sc = &ccl_fetch(sd, closure)[i]; + + if(!CLOSURE_IS_BSDF(sc->type)) + continue; + /* transparency is not handled here, but in outer loop */ + if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) + continue; + + int num_samples; + + if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) + num_samples = kernel_data.integrator.diffuse_samples; + else if(CLOSURE_IS_BSDF_BSSRDF(sc->type)) + num_samples = 1; + else if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) + num_samples = kernel_data.integrator.glossy_samples; + else + num_samples = kernel_data.integrator.transmission_samples; + + num_samples = ceil_to_int(num_samples_adjust*num_samples); + + float num_samples_inv = num_samples_adjust/num_samples; + RNG bsdf_rng = cmj_hash(*rng, i); + + for(int j = 0; j < num_samples; j++) { + PathState ps = *state; + float3 tp = throughput; + Ray bsdf_ray; + + if(!kernel_branched_path_surface_bounce(kg, &bsdf_rng, sd, sc, j, num_samples, &tp, &ps, L, &bsdf_ray)) + continue; + + kernel_path_indirect(kg, rng, bsdf_ray, tp*num_samples_inv, num_samples, ps, L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); + } + } +} + +#ifdef __SUBSURFACE__ +ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, + ShaderData *sd, + PathRadiance *L, + PathState *state, + RNG *rng, + Ray *ray, + float3 throughput) +{ + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = &ccl_fetch(sd, closure)[i]; + + if(!CLOSURE_IS_BSSRDF(sc->type)) + continue; + + /* set up random number generator */ + uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb); + int num_samples = kernel_data.integrator.subsurface_samples; + float num_samples_inv = 1.0f/num_samples; + RNG bssrdf_rng = cmj_hash(*rng, i); + + /* do subsurface scatter step with copy of shader data, this will + * replace the BSSRDF with a diffuse BSDF closure */ + for(int j = 0; j < num_samples; j++) { + ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; + float bssrdf_u, bssrdf_v; + path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true); +#ifdef __VOLUME__ + Ray volume_ray = *ray; + bool need_update_volume_stack = kernel_data.integrator.use_volumes && + ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME; +#endif + + /* compute lighting with the BSDF closure */ + for(int hit = 0; hit < num_hits; hit++) { + PathState hit_state = *state; + + path_state_branch(&hit_state, j, num_samples); + +#ifdef __VOLUME__ + if(need_update_volume_stack) { + /* Setup ray from previous surface point to the new one. */ + float3 P = ray_offset(bssrdf_sd[hit].P, -bssrdf_sd[hit].Ng); + volume_ray.D = normalize_len(P - volume_ray.P, + &volume_ray.t); + + kernel_volume_stack_update_for_subsurface( + kg, + &volume_ray, + hit_state.volume_stack); + + /* Move volume ray forward. */ + volume_ray.P = P; + } +#endif + +#ifdef __EMISSION__ + /* direct light */ + if(kernel_data.integrator.use_direct_light) { + bool all = kernel_data.integrator.sample_all_lights_direct; + kernel_branched_path_surface_connect_light(kg, rng, + &bssrdf_sd[hit], &hit_state, throughput, num_samples_inv, L, all); + } +#endif + + /* indirect light */ + kernel_branched_path_surface_indirect_light(kg, rng, + &bssrdf_sd[hit], throughput, num_samples_inv, + &hit_state, L); + } + } + } +} +#endif + +ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer) +{ + /* initialize */ + PathRadiance L; + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + float L_transparent = 0.0f; + + path_radiance_init(&L, kernel_data.film.use_light_pass); + + PathState state; + path_state_init(kg, &state, rng, sample, &ray); + +#ifdef __KERNEL_DEBUG__ + DebugData debug_data; + debug_data_init(&debug_data); +#endif + + /* Main Loop + * Here we only handle transparency intersections from the camera ray. + * Indirect bounces are handled in kernel_branched_path_surface_indirect_light(). + */ + for(;;) { + /* intersect scene */ + Intersection isect; + uint visibility = path_state_ray_visibility(kg, &state); + +#ifdef __HAIR__ + float difl = 0.0f, extmax = 0.0f; + uint lcg_state = 0; + + if(kernel_data.bvh.have_curves) { + if(kernel_data.cam.resolution == 1) { + float3 pixdiff = ray.dD.dx + ray.dD.dy; + /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/ + difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f; + } + + extmax = kernel_data.curve.maximum_width; + lcg_state = lcg_state_init(rng, &state, 0x51633e2d); + } + + bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax); +#else + bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f); +#endif + +#ifdef __KERNEL_DEBUG__ + debug_data.num_bvh_traversal_steps += isect.num_traversal_steps; + debug_data.num_bvh_traversed_instances += isect.num_traversed_instances; + debug_data.num_ray_bounces++; +#endif + +#ifdef __VOLUME__ + /* volume attenuation, emission, scatter */ + if(state.volume_stack[0].shader != SHADER_NONE) { + Ray volume_ray = ray; + volume_ray.t = (hit)? isect.t: FLT_MAX; + + bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); + +#ifdef __VOLUME_DECOUPLED__ + /* decoupled ray marching only supported on CPU */ + + /* cache steps along volume for repeated sampling */ + VolumeSegment volume_segment; + ShaderData volume_sd; + + shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce); + kernel_volume_decoupled_record(kg, &state, + &volume_ray, &volume_sd, &volume_segment, heterogeneous); + + /* direct light sampling */ + if(volume_segment.closure_flag & SD_SCATTER) { + volume_segment.sampling_method = volume_stack_sampling_method(kg, state.volume_stack); + + bool all = kernel_data.integrator.sample_all_lights_direct; + + kernel_branched_path_volume_connect_light(kg, rng, &volume_sd, + throughput, &state, &L, all, &volume_ray, &volume_segment); + + /* indirect light sampling */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f/num_samples; + + for(int j = 0; j < num_samples; j++) { + /* workaround to fix correlation bug in T38710, can find better solution + * in random number generator later, for now this is done here to not impact + * performance of rendering without volumes */ + RNG tmp_rng = cmj_hash(*rng, state.rng_offset); + + PathState ps = state; + Ray pray = ray; + float3 tp = throughput; + + /* branch RNG state */ + path_state_branch(&ps, j, num_samples); + + /* scatter sample. if we use distance sampling and take just one + * sample for direct and indirect light, we could share this + * computation, but makes code a bit complex */ + float rphase = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_PHASE); + float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE); + + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false); + + (void)result; + kernel_assert(result == VOLUME_PATH_SCATTERED); + + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) { + kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&L); + path_radiance_reset_indirect(&L); + } + } + } + + /* emission and transmittance */ + if(volume_segment.closure_flag & SD_EMISSION) + path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce); + throughput *= volume_segment.accum_transmittance; + + /* free cached steps */ + kernel_volume_decoupled_free(kg, &volume_segment); +#else + /* GPU: no decoupled ray marching, scatter probalistically */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f/num_samples; + + /* todo: we should cache the shader evaluations from stepping + * through the volume, for now we redo them multiple times */ + + for(int j = 0; j < num_samples; j++) { + PathState ps = state; + Ray pray = ray; + ShaderData volume_sd; + float3 tp = throughput * num_samples_inv; + + /* branch RNG state */ + path_state_branch(&ps, j, num_samples); + + VolumeIntegrateResult result = kernel_volume_integrate( + kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous); + +#ifdef __VOLUME_SCATTER__ + if(result == VOLUME_PATH_SCATTERED) { + /* todo: support equiangular, MIS and all light sampling. + * alternatively get decoupled ray marching working on the GPU */ + kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L); + + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) { + kernel_path_indirect(kg, rng, pray, tp, num_samples, ps, &L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&L); + path_radiance_reset_indirect(&L); + } + } +#endif + } + + /* todo: avoid this calculation using decoupled ray marching */ + kernel_volume_shadow(kg, &state, &volume_ray, &throughput); +#endif + } +#endif + + if(!hit) { + /* eval background shader if nothing hit */ + if(kernel_data.background.transparent) { + L_transparent += average(throughput); + +#ifdef __PASSES__ + if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) +#endif + break; + } + +#ifdef __BACKGROUND__ + /* sample background shader */ + float3 L_background = indirect_background(kg, &state, &ray); + path_radiance_accum_background(&L, throughput, L_background, state.bounce); +#endif + + break; + } + + /* setup shading */ + ShaderData sd; + shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce); + shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN); + shader_merge_closures(&sd); + + /* holdout */ +#ifdef __HOLDOUT__ + if(sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) { + if(kernel_data.background.transparent) { + float3 holdout_weight; + + if(sd.flag & SD_HOLDOUT_MASK) + holdout_weight = make_float3(1.0f, 1.0f, 1.0f); + else + holdout_weight = shader_holdout_eval(kg, &sd); + + /* any throughput is ok, should all be identical here */ + L_transparent += average(holdout_weight*throughput); + } + + if(sd.flag & SD_HOLDOUT_MASK) + break; + } +#endif + + /* holdout mask objects do not write data passes */ + kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput); + +#ifdef __EMISSION__ + /* emission */ + if(sd.flag & SD_EMISSION) { + float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf); + path_radiance_accum_emission(&L, throughput, emission, state.bounce); + } +#endif + + /* transparency termination */ + if(state.flag & PATH_RAY_TRANSPARENT) { + /* path termination. this is a strange place to put the termination, it's + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate */ + float probability = path_state_terminate_probability(kg, &state, throughput); + + if(probability == 0.0f) { + break; + } + else if(probability != 1.0f) { + float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE); + + if(terminate >= probability) + break; + + throughput /= probability; + } + } + +#ifdef __AO__ + /* ambient occlusion */ + if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { + kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput); + } +#endif + +#ifdef __SUBSURFACE__ + /* bssrdf scatter to a different location on the same object */ + if(sd.flag & SD_BSSRDF) { + kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state, + rng, &ray, throughput); + } +#endif + + if(!(sd.flag & SD_HAS_ONLY_VOLUME)) { + PathState hit_state = state; + +#ifdef __EMISSION__ + /* direct light */ + if(kernel_data.integrator.use_direct_light) { + bool all = kernel_data.integrator.sample_all_lights_direct; + kernel_branched_path_surface_connect_light(kg, rng, + &sd, &hit_state, throughput, 1.0f, &L, all); + } +#endif + + /* indirect light */ + kernel_branched_path_surface_indirect_light(kg, rng, + &sd, throughput, 1.0f, &hit_state, &L); + + /* continue in case of transparency */ + throughput *= shader_bsdf_transparency(kg, &sd); + + if(is_zero(throughput)) + break; + } + + /* Update Path State */ + state.flag |= PATH_RAY_TRANSPARENT; + state.transparent_bounce++; + + ray.P = ray_offset(sd.P, -sd.Ng); + ray.t -= sd.ray_length; /* clipping works through transparent */ + + +#ifdef __RAY_DIFFERENTIALS__ + ray.dP = sd.dP; + ray.dD.dx = -sd.dI.dx; + ray.dD.dy = -sd.dI.dy; +#endif + +#ifdef __VOLUME__ + /* enter/exit volume */ + kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack); +#endif + } + + float3 L_sum = path_radiance_clamp_and_sum(kg, &L); + + kernel_write_light_passes(kg, buffer, &L, sample); + +#ifdef __KERNEL_DEBUG__ + kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample); +#endif + + return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent); +} + +ccl_device void kernel_branched_path_trace(KernelGlobals *kg, + ccl_global float *buffer, ccl_global uint *rng_state, + int sample, int x, int y, int offset, int stride) +{ + /* buffer offset */ + int index = offset + x + y*stride; + int pass_stride = kernel_data.film.pass_stride; + + rng_state += index; + buffer += index*pass_stride; + + /* initialize random numbers and ray */ + RNG rng; + Ray ray; + + kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray); + + /* integrate */ + float4 L; + + if(ray.t != 0.0f) + L = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer); + else + L = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + /* accumulate result in output buffer */ + kernel_write_pass_float4(buffer, sample, L); + + path_rng_end(kg, rng_state, rng); +} + +#endif /* __BRANCHED_PATH__ */ + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/kernel_path_common.h b/intern/cycles/kernel/kernel_path_common.h new file mode 100644 index 00000000000..1912dfa16ed --- /dev/null +++ b/intern/cycles/kernel/kernel_path_common.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg, + ccl_global uint *rng_state, + int sample, + int x, int y, + ccl_addr_space RNG *rng, + ccl_addr_space Ray *ray) +{ + float filter_u; + float filter_v; + + int num_samples = kernel_data.integrator.aa_samples; + + path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v); + + /* sample camera ray */ + + float lens_u = 0.0f, lens_v = 0.0f; + + if(kernel_data.cam.aperturesize > 0.0f) + path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v); + + float time = 0.0f; + +#ifdef __CAMERA_MOTION__ + if(kernel_data.cam.shuttertime != -1.0f) + time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME); +#endif + + camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h index f29168642a4..15efb2371de 100644 --- a/intern/cycles/kernel/kernel_path_state.h +++ b/intern/cycles/kernel/kernel_path_state.h @@ -11,12 +11,12 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN -ccl_device_inline void path_state_init(KernelGlobals *kg, PathState *state, RNG *rng, int sample, Ray *ray) +ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space RNG *rng, int sample, ccl_addr_space Ray *ray) { state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP; @@ -51,7 +51,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, PathState *state, RNG #endif } -ccl_device_inline void path_state_next(KernelGlobals *kg, PathState *state, int label) +ccl_device_inline void path_state_next(KernelGlobals *kg, ccl_addr_space PathState *state, int label) { /* ray through transparent keeps same flags from previous ray and is * not counted as a regular bounce, transparent has separate max */ @@ -106,7 +106,7 @@ ccl_device_inline void path_state_next(KernelGlobals *kg, PathState *state, int state->flag &= ~(PATH_RAY_GLOSSY|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP); } else if(label & LABEL_GLOSSY) { - state->flag |= PATH_RAY_GLOSSY|PATH_RAY_GLOSSY_ANCESTOR; + state->flag |= PATH_RAY_GLOSSY; state->flag &= ~(PATH_RAY_DIFFUSE|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP); } else { @@ -138,7 +138,7 @@ ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *s return flag; } -ccl_device_inline float path_state_terminate_probability(KernelGlobals *kg, PathState *state, const float3 throughput) +ccl_device_inline float path_state_terminate_probability(KernelGlobals *kg, ccl_addr_space PathState *state, const float3 throughput) { if(state->flag & PATH_RAY_TRANSPARENT) { /* transparent rays treated separately */ diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 9553c2da0df..fe85a6b6e4b 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -24,7 +24,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN { #ifdef __EMISSION__ /* sample illumination from lights to find path contribution */ - if(!(sd->flag & SD_BSDF_HAS_EVAL)) + if(!(ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)) return; Ray light_ray; @@ -32,12 +32,15 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN bool is_lamp; #ifdef __OBJECT_MOTION__ - light_ray.time = sd->time; + light_ray.time = ccl_fetch(sd, time); #endif if(sample_all_lights) { /* lamp sampling */ for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { + if(UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) + continue; + int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i)); float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights); RNG lamp_rng = cmj_hash(*rng, i); @@ -50,7 +53,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls); + lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls); if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ @@ -82,7 +85,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN light_t = 0.5f*light_t; LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls); if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ @@ -103,7 +106,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls); /* sample random light */ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { @@ -146,15 +149,15 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng, path_state_next(kg, state, label); /* setup ray */ - ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng); + ray->P = ray_offset(ccl_fetch(sd, P), (label & LABEL_TRANSMIT)? -ccl_fetch(sd, Ng): ccl_fetch(sd, Ng)); ray->D = bsdf_omega_in; ray->t = FLT_MAX; #ifdef __RAY_DIFFERENTIALS__ - ray->dP = sd->dP; + ray->dP = ccl_fetch(sd, dP); ray->dD = bsdf_domega_in; #endif #ifdef __OBJECT_MOTION__ - ray->time = sd->time; + ray->time = ccl_fetch(sd, time); #endif #ifdef __VOLUME__ @@ -178,12 +181,13 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng, #endif +#ifndef __SPLIT_KERNEL__ /* path tracing: connect path directly to position on a light and add it to L */ -ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG *rng, - ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L) +ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng, + ShaderData *sd, float3 throughput, ccl_addr_space PathState *state, PathRadiance *L) { #ifdef __EMISSION__ - if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) + if(!(kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL))) return; /* sample illumination from lights to find path contribution */ @@ -196,11 +200,11 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG bool is_lamp; #ifdef __OBJECT_MOTION__ - light_ray.time = sd->time; + light_ray.time = ccl_fetch(sd, time); #endif LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls); if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ @@ -213,13 +217,14 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG } #endif } +#endif /* path tracing: bounce off or through surface to with new direction stored in ray */ -ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, RNG *rng, - ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray) +ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, ccl_addr_space RNG *rng, + ShaderData *sd, ccl_addr_space float3 *throughput, ccl_addr_space PathState *state, PathRadiance *L, ccl_addr_space Ray *ray) { /* no BSDF? we can stop here */ - if(sd->flag & SD_BSDF) { + if(ccl_fetch(sd, flag) & SD_BSDF) { /* sample BSDF */ float bsdf_pdf; BsdfEval bsdf_eval; @@ -251,16 +256,16 @@ ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, RNG *rng, path_state_next(kg, state, label); /* setup ray */ - ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng); + ray->P = ray_offset(ccl_fetch(sd, P), (label & LABEL_TRANSMIT)? -ccl_fetch(sd, Ng): ccl_fetch(sd, Ng)); ray->D = bsdf_omega_in; if(state->bounce == 0) - ray->t -= sd->ray_length; /* clipping works through transparent */ + ray->t -= ccl_fetch(sd, ray_length); /* clipping works through transparent */ else ray->t = FLT_MAX; #ifdef __RAY_DIFFERENTIALS__ - ray->dP = sd->dP; + ray->dP = ccl_fetch(sd, dP); ray->dD = bsdf_domega_in; #endif @@ -272,16 +277,21 @@ ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, RNG *rng, return true; } #ifdef __VOLUME__ - else if(sd->flag & SD_HAS_ONLY_VOLUME) { + else if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME) { /* no surface shader but have a volume shader? act transparent */ /* update path state, count as transparent */ path_state_next(kg, state, LABEL_TRANSPARENT); + if(state->bounce == 0) + ray->t -= ccl_fetch(sd, ray_length); /* clipping works through transparent */ + else + ray->t = FLT_MAX; + /* setup ray position, direction stays unchanged */ - ray->P = ray_offset(sd->P, -sd->Ng); + ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng)); #ifdef __RAY_DIFFERENTIALS__ - ray->dP = sd->dP; + ray->dP = ccl_fetch(sd, dP); #endif /* enter/exit volume */ diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index da2d5e6eca8..82dc0f97622 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -40,7 +40,7 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, light_ray.time = sd->time; #endif - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls); if(ls.pdf == 0.0f) return; @@ -56,7 +56,12 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, #endif } -ccl_device bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng, +#ifdef __KERNEL_GPU__ +ccl_device_noinline +#else +ccl_device +#endif +bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng, ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray) { /* sample phase function */ @@ -102,7 +107,7 @@ ccl_device bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng, ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng, ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L, - float num_samples_adjust, bool sample_all_lights, Ray *ray, const VolumeSegment *segment) + bool sample_all_lights, Ray *ray, const VolumeSegment *segment) { #ifdef __EMISSION__ if(!kernel_data.integrator.use_direct_light) @@ -119,8 +124,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG if(sample_all_lights) { /* lamp sampling */ for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { - int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i)); - float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights); + if(UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) + continue; + + int num_samples = light_select_num_samples(kg, i); + float num_samples_inv = 1.0f/(num_samples*kernel_data.integrator.num_all_lights); RNG lamp_rng = cmj_hash(*rng, i); if(kernel_data.integrator.pdf_triangles != 0.0f) @@ -166,8 +174,8 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG /* mesh light sampling */ if(kernel_data.integrator.pdf_triangles != 0.0f) { - int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples); - float num_samples_inv = num_samples_adjust/num_samples; + int num_samples = kernel_data.integrator.mesh_light_samples; + float num_samples_inv = 1.0f/num_samples; if(kernel_data.integrator.num_all_lights) num_samples_inv *= 0.5f; @@ -183,7 +191,7 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG light_t = 0.5f*light_t; LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, state->bounce, &ls); float3 tp = throughput; @@ -198,7 +206,7 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG kernel_assert(result == VOLUME_PATH_SCATTERED); /* todo: split up light_sample so we don't have to call it again with new position */ - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls); if(ls.pdf == 0.0f) continue; @@ -222,7 +230,7 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, state->bounce, &ls); float3 tp = throughput; @@ -237,7 +245,7 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG kernel_assert(result == VOLUME_PATH_SCATTERED); /* todo: split up light_sample so we don't have to call it again with new position */ - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls); if(ls.pdf == 0.0f) return; diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h index 6744471d659..62922df3286 100644 --- a/intern/cycles/kernel/kernel_projection.h +++ b/intern/cycles/kernel/kernel_projection.h @@ -55,18 +55,18 @@ ccl_device float3 spherical_to_direction(float theta, float phi) /* Equirectangular coordinates <-> Cartesian direction */ -ccl_device float2 direction_to_equirectangular(float3 dir) +ccl_device float2 direction_to_equirectangular_range(float3 dir, float4 range) { - float u = -atan2f(dir.y, dir.x)/(M_2PI_F) + 0.5f; - float v = atan2f(dir.z, hypotf(dir.x, dir.y))/M_PI_F + 0.5f; + float u = (atan2f(dir.y, dir.x) - range.y) / range.x; + float v = (acosf(dir.z / len(dir)) - range.w) / range.z; return make_float2(u, v); } -ccl_device float3 equirectangular_to_direction(float u, float v) +ccl_device float3 equirectangular_range_to_direction(float u, float v, float4 range) { - float phi = M_PI_F*(1.0f - 2.0f*u); - float theta = M_PI_F*(1.0f - v); + float phi = range.x*u + range.y; + float theta = range.z*v + range.w; return make_float3( sinf(theta)*cosf(phi), @@ -74,6 +74,16 @@ ccl_device float3 equirectangular_to_direction(float u, float v) cosf(theta)); } +ccl_device float2 direction_to_equirectangular(float3 dir) +{ + return direction_to_equirectangular_range(dir, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F)); +} + +ccl_device float3 equirectangular_to_direction(float u, float v) +{ + return equirectangular_range_to_direction(u, v, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F)); +} + /* Fisheye <-> Cartesian direction */ ccl_device float2 direction_to_fisheye(float3 dir, float fov) @@ -153,6 +163,10 @@ ccl_device float3 mirrorball_to_direction(float u, float v) dir.x = 2.0f*u - 1.0f; dir.z = 2.0f*v - 1.0f; + + if(dir.x*dir.x + dir.z*dir.z > 1.0f) + return make_float3(0.0f, 0.0f, 0.0f); + dir.y = -sqrtf(max(1.0f - dir.x*dir.x - dir.z*dir.z, 0.0f)); /* reflection */ @@ -180,7 +194,9 @@ ccl_device float3 panorama_to_direction(KernelGlobals *kg, float u, float v) { switch(kernel_data.cam.panorama_type) { case PANORAMA_EQUIRECTANGULAR: - return equirectangular_to_direction(u, v); + return equirectangular_range_to_direction(u, v, kernel_data.cam.equirectangular_range); + case PANORAMA_MIRRORBALL: + return mirrorball_to_direction(u, v); case PANORAMA_FISHEYE_EQUIDISTANT: return fisheye_to_direction(u, v, kernel_data.cam.fisheye_fov); case PANORAMA_FISHEYE_EQUISOLID: @@ -194,7 +210,9 @@ ccl_device float2 direction_to_panorama(KernelGlobals *kg, float3 dir) { switch(kernel_data.cam.panorama_type) { case PANORAMA_EQUIRECTANGULAR: - return direction_to_equirectangular(dir); + return direction_to_equirectangular_range(dir, kernel_data.cam.equirectangular_range); + case PANORAMA_MIRRORBALL: + return direction_to_mirrorball(dir); case PANORAMA_FISHEYE_EQUIDISTANT: return direction_to_fisheye(dir, kernel_data.cam.fisheye_fov); case PANORAMA_FISHEYE_EQUISOLID: diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h new file mode 100644 index 00000000000..cf5614b8a86 --- /dev/null +++ b/intern/cycles/kernel/kernel_queues.h @@ -0,0 +1,125 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KERNEL_QUEUE_H__ +#define __KERNEL_QUEUE_H__ + +/* + * Queue utility functions for split kernel + */ + +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable + +/* + * Enqueue ray index into the queue + */ +ccl_device void enqueue_ray_index( + int ray_index, /* Ray index to be enqueued. */ + int queue_number, /* Queue in which the ray index should be enqueued. */ + ccl_global int *queues, /* Buffer of all queues. */ + int queue_size, /* Size of each queue. */ + ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */ +{ + /* This thread's queue index. */ + int my_queue_index = atomic_inc(&queue_index[queue_number]) + (queue_number * queue_size); + queues[my_queue_index] = ray_index; +} + +/* + * Get the ray index for this thread + * Returns a positive ray_index for threads that have to do some work; + * Returns 'QUEUE_EMPTY_SLOT' for threads that don't have any work + * i.e All ray's in the queue has been successfully allocated and there + * is no more ray to allocate to other threads. + */ +ccl_device int get_ray_index( + int thread_index, /* Global thread index. */ + int queue_number, /* Queue to operate on. */ + ccl_global int *queues, /* Buffer of all queues. */ + int queuesize, /* Size of a queue. */ + int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */ +{ + int ray_index = queues[queue_number * queuesize + thread_index]; + if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) { + queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + } + return ray_index; +} + +/* The following functions are to realize Local memory variant of enqueue ray index function. */ + +/* All threads should call this function. */ +ccl_device void enqueue_ray_index_local( + int ray_index, /* Ray index to enqueue. */ + int queue_number, /* Queue in which to enqueue ray index. */ + char enqueue_flag, /* True for threads whose ray index has to be enqueued. */ + int queuesize, /* queue size. */ + ccl_local unsigned int *local_queue_atomics, /* To to local queue atomics. */ + ccl_global int *Queue_data, /* Queues. */ + ccl_global int *Queue_index) /* To do global queue atomics. */ +{ + int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0); + + /* Get local queue id .*/ + unsigned int lqidx; + if(enqueue_flag) { + lqidx = atomic_inc(local_queue_atomics); + } + barrier(CLK_LOCAL_MEM_FENCE); + + /* Get global queue offset. */ + if(lidx == 0) { + *local_queue_atomics = atomic_add(&Queue_index[queue_number], *local_queue_atomics); + } + barrier(CLK_LOCAL_MEM_FENCE); + + /* Get global queue index and enqueue ray. */ + if(enqueue_flag) { + unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx; + Queue_data[my_gqidx] = ray_index; + } +} + +ccl_device unsigned int get_local_queue_index( + int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */ + ccl_local unsigned int *local_queue_atomics) +{ + int my_lqidx = atomic_inc(&local_queue_atomics[queue_number]); + return my_lqidx; +} + +ccl_device unsigned int get_global_per_queue_offset( + int queue_number, + ccl_local unsigned int *local_queue_atomics, + ccl_global int* global_queue_atomics) +{ + unsigned int queue_offset = atomic_add(&global_queue_atomics[queue_number], + local_queue_atomics[queue_number]); + return queue_offset; +} + +ccl_device unsigned int get_global_queue_index( + int queue_number, + int queuesize, + unsigned int lqidx, + ccl_local unsigned int * global_per_queue_offset) +{ + int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number]; + return my_gqidx; +} + +#endif // __KERNEL_QUEUE_H__ diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index 236f74c0a82..631a2cb75de 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "kernel_jitter.h" @@ -98,7 +98,7 @@ ccl_device uint sobol_lookup(const uint m, const uint frame, const uint ex, cons return index; } -ccl_device_inline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension) +ccl_device_inline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension) { #ifdef __CMJ__ if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) { @@ -132,7 +132,7 @@ ccl_device_inline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int #endif } -ccl_device_inline void path_rng_2D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy) +ccl_device_inline void path_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy) { #ifdef __CMJ__ if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) { @@ -149,7 +149,7 @@ ccl_device_inline void path_rng_2D(KernelGlobals *kg, RNG *rng, int sample, int } } -ccl_device_inline void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy) +ccl_device_inline void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, ccl_addr_space RNG *rng, int x, int y, float *fx, float *fy) { #ifdef __SOBOL_FULL_SCREEN__ uint px, py; @@ -261,12 +261,12 @@ ccl_device uint lcg_init(uint seed) * For branches in the path we must be careful not to reuse the same number * in a sequence and offset accordingly. */ -ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension) +ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension) { return path_rng_1D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension); } -ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension) +ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension) { /* the rng_offset is not increased for transparent bounces. if we do then * fully transparent objects can become subtly visible by the different @@ -279,23 +279,23 @@ ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, RNG *r return path_rng_1D(kg, rng, state->sample, state->num_samples, rng_offset + dimension); } -ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, RNG *rng, const PathState *state, int dimension, float *fx, float *fy) +ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy) { path_rng_2D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension, fx, fy); } -ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension) +ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension) { return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension); } -ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension) +ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension) { int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM; return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, rng_offset + dimension); } -ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy) +ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy) { path_rng_2D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension, fx, fy); } diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index db08c328d7e..6b560f5fdb2 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* @@ -37,13 +37,13 @@ CCL_NAMESPACE_BEGIN #ifdef __OBJECT_MOTION__ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time) { - if(sd->flag & SD_OBJECT_MOTION) { - sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time); - sd->ob_itfm = transform_quick_inverse(sd->ob_tfm); + if(ccl_fetch(sd, flag) & SD_OBJECT_MOTION) { + ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time); + ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm)); } else { - sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); + ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); } } #endif @@ -52,55 +52,55 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, int bounce, int transparent_bounce) { #ifdef __INSTANCING__ - sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object; + ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object; #endif - sd->type = isect->type; - sd->flag = kernel_tex_fetch(__object_flag, sd->object); + ccl_fetch(sd, type) = isect->type; + ccl_fetch(sd, flag) = kernel_tex_fetch(__object_flag, ccl_fetch(sd, object)); /* matrices and time */ #ifdef __OBJECT_MOTION__ shader_setup_object_transforms(kg, sd, ray->time); - sd->time = ray->time; + ccl_fetch(sd, time) = ray->time; #endif - sd->prim = kernel_tex_fetch(__prim_index, isect->prim); - sd->ray_length = isect->t; - sd->ray_depth = bounce; - sd->transparent_depth = transparent_bounce; + ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim); + ccl_fetch(sd, ray_length) = isect->t; + ccl_fetch(sd, ray_depth) = bounce; + ccl_fetch(sd, transparent_depth) = transparent_bounce; #ifdef __UV__ - sd->u = isect->u; - sd->v = isect->v; + ccl_fetch(sd, u) = isect->u; + ccl_fetch(sd, v) = isect->v; #endif #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) { + if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { /* curve */ - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); - sd->shader = __float_as_int(curvedata.z); - sd->P = bvh_curve_refine(kg, sd, isect, ray); + ccl_fetch(sd, shader) = __float_as_int(curvedata.z); + ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray); } else #endif - if(sd->type & PRIMITIVE_TRIANGLE) { + if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) { /* static triangle */ float3 Ng = triangle_normal(kg, sd); - sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); + ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim)); /* vectors */ - sd->P = triangle_refine(kg, sd, isect, ray); - sd->Ng = Ng; - sd->N = Ng; + ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray); + ccl_fetch(sd, Ng) = Ng; + ccl_fetch(sd, N) = Ng; /* smooth normal */ - if(sd->shader & SHADER_SMOOTH_NORMAL) - sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); + if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) + ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v)); #ifdef __DPDU__ /* dPdu/dPdv */ - triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); + triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv)); #endif } else { @@ -108,40 +108,40 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, motion_triangle_shader_setup(kg, sd, isect, ray, false); } - sd->I = -ray->D; + ccl_fetch(sd, I) = -ray->D; - sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); + ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2); #ifdef __INSTANCING__ if(isect->object != OBJECT_NONE) { /* instance transform */ - object_normal_transform(kg, sd, &sd->N); - object_normal_transform(kg, sd, &sd->Ng); + object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N)); + object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng)); #ifdef __DPDU__ - object_dir_transform(kg, sd, &sd->dPdu); - object_dir_transform(kg, sd, &sd->dPdv); + object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu)); + object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv)); #endif } #endif /* backfacing test */ - bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); + bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f); if(backfacing) { - sd->flag |= SD_BACKFACING; - sd->Ng = -sd->Ng; - sd->N = -sd->N; + ccl_fetch(sd, flag) |= SD_BACKFACING; + ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng); + ccl_fetch(sd, N) = -ccl_fetch(sd, N); #ifdef __DPDU__ - sd->dPdu = -sd->dPdu; - sd->dPdv = -sd->dPdv; + ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu); + ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv); #endif } #ifdef __RAY_DIFFERENTIALS__ /* differentials */ - differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t); - differential_incoming(&sd->dI, ray->dD); - differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); + differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t); + differential_incoming(&ccl_fetch(sd, dI), ray->dD); + differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng)); #endif } @@ -166,7 +166,7 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat /* fetch triangle data */ if(sd->type == PRIMITIVE_TRIANGLE) { float3 Ng = triangle_normal(kg, sd); - sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); + sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); /* static triangle */ sd->P = triangle_refine_subsurface(kg, sd, isect, ray); @@ -230,105 +230,105 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, int shader, int object, int prim, float u, float v, float t, float time, int bounce, int transparent_bounce) { /* vectors */ - sd->P = P; - sd->N = Ng; - sd->Ng = Ng; - sd->I = I; - sd->shader = shader; - sd->type = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE; + ccl_fetch(sd, P) = P; + ccl_fetch(sd, N) = Ng; + ccl_fetch(sd, Ng) = Ng; + ccl_fetch(sd, I) = I; + ccl_fetch(sd, shader) = shader; + ccl_fetch(sd, type) = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE; /* primitive */ #ifdef __INSTANCING__ - sd->object = object; + ccl_fetch(sd, object) = object; #endif /* currently no access to bvh prim index for strand sd->prim*/ - sd->prim = prim; + ccl_fetch(sd, prim) = prim; #ifdef __UV__ - sd->u = u; - sd->v = v; + ccl_fetch(sd, u) = u; + ccl_fetch(sd, v) = v; #endif - sd->ray_length = t; - sd->ray_depth = bounce; - sd->transparent_depth = transparent_bounce; + ccl_fetch(sd, ray_length) = t; + ccl_fetch(sd, ray_depth) = bounce; + ccl_fetch(sd, transparent_depth) = transparent_bounce; /* detect instancing, for non-instanced the object index is -object-1 */ #ifdef __INSTANCING__ bool instanced = false; - if(sd->prim != PRIM_NONE) { - if(sd->object >= 0) + if(ccl_fetch(sd, prim) != PRIM_NONE) { + if(ccl_fetch(sd, object) >= 0) instanced = true; else #endif - sd->object = ~sd->object; + ccl_fetch(sd, object) = ~ccl_fetch(sd, object); #ifdef __INSTANCING__ } #endif - sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); - if(sd->object != OBJECT_NONE) { - sd->flag |= kernel_tex_fetch(__object_flag, sd->object); + ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2); + if(ccl_fetch(sd, object) != OBJECT_NONE) { + ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object)); #ifdef __OBJECT_MOTION__ shader_setup_object_transforms(kg, sd, time); } - sd->time = time; + ccl_fetch(sd, time) = time; #else } #endif - if(sd->type & PRIMITIVE_TRIANGLE) { + if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) { /* smooth normal */ - if(sd->shader & SHADER_SMOOTH_NORMAL) { - sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); + if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) { + ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v)); #ifdef __INSTANCING__ if(instanced) - object_normal_transform(kg, sd, &sd->N); + object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N)); #endif } /* dPdu/dPdv */ #ifdef __DPDU__ - triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); + triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv)); #ifdef __INSTANCING__ if(instanced) { - object_dir_transform(kg, sd, &sd->dPdu); - object_dir_transform(kg, sd, &sd->dPdv); + object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu)); + object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv)); } #endif #endif } else { #ifdef __DPDU__ - sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); - sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); + ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f); + ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f); #endif } /* backfacing test */ - if(sd->prim != PRIM_NONE) { - bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); + if(ccl_fetch(sd, prim) != PRIM_NONE) { + bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f); if(backfacing) { - sd->flag |= SD_BACKFACING; - sd->Ng = -sd->Ng; - sd->N = -sd->N; + ccl_fetch(sd, flag) |= SD_BACKFACING; + ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng); + ccl_fetch(sd, N) = -ccl_fetch(sd, N); #ifdef __DPDU__ - sd->dPdu = -sd->dPdu; - sd->dPdv = -sd->dPdv; + ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu); + ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv); #endif } } #ifdef __RAY_DIFFERENTIALS__ /* no ray differentials here yet */ - sd->dP = differential3_zero(); - sd->dI = differential3_zero(); - sd->du = differential_zero(); - sd->dv = differential_zero(); + ccl_fetch(sd, dP) = differential3_zero(); + ccl_fetch(sd, dI) = differential3_zero(); + ccl_fetch(sd, du) = differential_zero(); + ccl_fetch(sd, dv) = differential_zero(); #endif } @@ -355,47 +355,46 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce, int transparent_bounce) { /* vectors */ - sd->P = ray->D; - sd->N = -ray->D; - sd->Ng = -ray->D; - sd->I = -ray->D; - sd->shader = kernel_data.background.surface_shader; - sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); + ccl_fetch(sd, P) = ray->D; + ccl_fetch(sd, N) = -ray->D; + ccl_fetch(sd, Ng) = -ray->D; + ccl_fetch(sd, I) = -ray->D; + ccl_fetch(sd, shader) = kernel_data.background.surface_shader; + ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2); #ifdef __OBJECT_MOTION__ - sd->time = ray->time; + ccl_fetch(sd, time) = ray->time; #endif - sd->ray_length = 0.0f; - sd->ray_depth = bounce; - sd->transparent_depth = transparent_bounce; + ccl_fetch(sd, ray_length) = 0.0f; + ccl_fetch(sd, ray_depth) = bounce; + ccl_fetch(sd, transparent_depth) = transparent_bounce; #ifdef __INSTANCING__ - sd->object = PRIM_NONE; + ccl_fetch(sd, object) = PRIM_NONE; #endif - sd->prim = PRIM_NONE; + ccl_fetch(sd, prim) = PRIM_NONE; #ifdef __UV__ - sd->u = 0.0f; - sd->v = 0.0f; + ccl_fetch(sd, u) = 0.0f; + ccl_fetch(sd, v) = 0.0f; #endif #ifdef __DPDU__ /* dPdu/dPdv */ - sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); - sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); + ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f); + ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f); #endif #ifdef __RAY_DIFFERENTIALS__ /* differentials */ - sd->dP = ray->dD; - differential_incoming(&sd->dI, sd->dP); - sd->du.dx = 0.0f; - sd->du.dy = 0.0f; - sd->dv.dx = 0.0f; - sd->dv.dy = 0.0f; + ccl_fetch(sd, dP) = ray->dD; + differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP)); + ccl_fetch(sd, du) = differential_zero(); + ccl_fetch(sd, dv) = differential_zero(); #endif } /* ShaderData setup from point inside volume */ +#ifdef __VOLUME__ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce, int transparent_bounce) { /* vectors */ @@ -441,6 +440,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s sd->ray_P = ray->P; sd->ray_dP = ray->dP; } +#endif /* Merging */ @@ -459,7 +459,7 @@ ccl_device void shader_merge_closures(ShaderData *sd) continue; #endif - if(!(sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1)) + if(!(sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1 && sci->data2 == scj->data2)) continue; if(CLOSURE_IS_BSDF_OR_BSSRDF(sci->type)) { @@ -480,6 +480,7 @@ ccl_device void shader_merge_closures(ShaderData *sd) } sd->num_closure--; + kernel_assert(sd->num_closure >= 0); j--; } } @@ -493,11 +494,11 @@ ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderDa { /* this is the veach one-sample model with balance heuristic, some pdf * factors drop out when using balance heuristic weighting */ - for(int i = 0; i< sd->num_closure; i++) { + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { if(i == skip_bsdf) continue; - const ShaderClosure *sc = &sd->closure[i]; + const ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_BSDF(sc->type)) { float bsdf_pdf = 0.0f; @@ -515,7 +516,7 @@ ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderDa *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f; } -ccl_device void shader_bsdf_eval(KernelGlobals *kg, const ShaderData *sd, +ccl_device void shader_bsdf_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, BsdfEval *eval, float *pdf) { bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); @@ -529,22 +530,22 @@ ccl_device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd, { int sampled = 0; - if(sd->num_closure > 1) { + if(ccl_fetch(sd, num_closure) > 1) { /* pick a BSDF closure based on sample weights */ float sum = 0.0f; - for(sampled = 0; sampled < sd->num_closure; sampled++) { - const ShaderClosure *sc = &sd->closure[sampled]; + for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) { + const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled); if(CLOSURE_IS_BSDF(sc->type)) sum += sc->sample_weight; } - float r = sd->randb_closure*sum; + float r = ccl_fetch(sd, randb_closure)*sum; sum = 0.0f; - for(sampled = 0; sampled < sd->num_closure; sampled++) { - const ShaderClosure *sc = &sd->closure[sampled]; + for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) { + const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled); if(CLOSURE_IS_BSDF(sc->type)) { sum += sc->sample_weight; @@ -554,13 +555,14 @@ ccl_device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd, } } - if(sampled == sd->num_closure) { + if(sampled == ccl_fetch(sd, num_closure)) { *pdf = 0.0f; return LABEL_NONE; } } - const ShaderClosure *sc = &sd->closure[sampled]; + const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled); + int label; float3 eval; @@ -570,7 +572,7 @@ ccl_device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd, if(*pdf != 0.0f) { bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass); - if(sd->num_closure > 1) { + if(ccl_fetch(sd, num_closure) > 1) { float sweight = sc->sample_weight; _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight); } @@ -597,8 +599,8 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, const ShaderData *s ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness) { - for(int i = 0; i< sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_BSDF(sc->type)) bsdf_blur(kg, sc, roughness); @@ -607,13 +609,13 @@ ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughn ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd) { - if(sd->flag & SD_HAS_ONLY_VOLUME) + if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME) return make_float3(1.0f, 1.0f, 1.0f); float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i< sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl eval += sc->weight; @@ -636,8 +638,8 @@ ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i< sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) eval += sc->weight; @@ -650,8 +652,8 @@ ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i< sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) eval += sc->weight; @@ -664,8 +666,8 @@ ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i< sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) eval += sc->weight; @@ -678,10 +680,10 @@ ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i< sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, i); - if(CLOSURE_IS_BSSRDF(sc->type)) + if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type)) eval += sc->weight; } @@ -693,8 +695,8 @@ ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_fac float3 eval = make_float3(0.0f, 0.0f, 0.0f); float3 N = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i< sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { eval += sc->weight*ao_factor; @@ -702,12 +704,12 @@ ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_fac } else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) { eval += sc->weight; - N += sd->N*average(sc->weight); + N += ccl_fetch(sd, N)*average(sc->weight); } } if(is_zero(N)) - N = sd->N; + N = ccl_fetch(sd, N); else N = normalize(N); @@ -721,8 +723,8 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b float3 N = make_float3(0.0f, 0.0f, 0.0f); float texture_blur = 0.0f, weight_sum = 0.0f; - for(int i = 0; i< sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_BSSRDF(sc->type)) { float avg_weight = fabsf(average(sc->weight)); @@ -735,7 +737,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b } if(N_) - *N_ = (is_zero(N))? sd->N: normalize(N); + *N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N); if(texture_blur_) *texture_blur_ = texture_blur/weight_sum; @@ -747,7 +749,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc) { - return emissive_simple_eval(sd->Ng, sd->I); + return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I)); } ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd) @@ -755,8 +757,8 @@ ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd) float3 eval; eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_EMISSION(sc->type)) eval += emissive_eval(kg, sd, sc)*sc->weight; @@ -771,8 +773,8 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) { float3 weight = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_HOLDOUT(sc->type)) weight += sc->weight; @@ -786,8 +788,8 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, float randb, int path_flag, ShaderContext ctx) { - sd->num_closure = 0; - sd->randb_closure = randb; + ccl_fetch(sd, num_closure) = 0; + ccl_fetch(sd, randb_closure) = randb; #ifdef __OSL__ if(kg->osl) @@ -798,9 +800,11 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, #ifdef __SVM__ svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, path_flag); #else - sd->closure->weight = make_float3(0.8f, 0.8f, 0.8f); - sd->closure->N = sd->N; - sd->flag |= bsdf_diffuse_setup(&sd->closure); + ccl_fetch_array(sd, closure, 0)->weight = make_float3(0.8f, 0.8f, 0.8f); + ccl_fetch_array(sd, closure, 0)->N = ccl_fetch(sd, N); + ccl_fetch_array(sd, closure, 0)->data0 = 0.0f; + ccl_fetch_array(sd, closure, 0)->data1 = 0.0f; + ccl_fetch(sd, flag) |= bsdf_diffuse_setup(ccl_fetch_array(sd, closure, 0)); #endif } } @@ -809,8 +813,8 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int path_flag, ShaderContext ctx) { - sd->num_closure = 0; - sd->randb_closure = 0.0f; + ccl_fetch(sd, num_closure) = 0; + ccl_fetch(sd, randb_closure) = 0.0f; #ifdef __OSL__ if(kg->osl) { @@ -825,8 +829,8 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i< sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; + for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + const ShaderClosure *sc = ccl_fetch_array(sd, closure, i); if(CLOSURE_IS_BACKGROUND(sc->type)) eval += sc->weight; @@ -846,7 +850,7 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf, int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight) { - for(int i = 0; i< sd->num_closure; i++) { + for(int i = 0; i < sd->num_closure; i++) { if(i == skip_phase) continue; @@ -999,8 +1003,8 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderContext ctx) { - sd->num_closure = 0; - sd->randb_closure = 0.0f; + ccl_fetch(sd, num_closure) = 0; + ccl_fetch(sd, randb_closure) = 0.0f; /* this will modify sd->P */ #ifdef __SVM__ diff --git a/intern/cycles/kernel/kernel_shaderdata_vars.h b/intern/cycles/kernel/kernel_shaderdata_vars.h new file mode 100644 index 00000000000..b157b82e023 --- /dev/null +++ b/intern/cycles/kernel/kernel_shaderdata_vars.h @@ -0,0 +1,99 @@ +/* +* Copyright 2011-2015 Blender Foundation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#ifndef SD_VAR +#define SD_VAR(type, what) +#endif +#ifndef SD_CLOSURE_VAR +#define SD_CLOSURE_VAR(type, what, max_closure) +#endif + +/* position */ +SD_VAR(float3, P) +/* smooth normal for shading */ +SD_VAR(float3, N) +/* true geometric normal */ +SD_VAR(float3, Ng) +/* view/incoming direction */ +SD_VAR(float3, I) +/* shader id */ +SD_VAR(int, shader) +/* booleans describing shader, see ShaderDataFlag */ +SD_VAR(int, flag) + +/* primitive id if there is one, ~0 otherwise */ +SD_VAR(int, prim) + +/* combined type and curve segment for hair */ +SD_VAR(int, type) + +/* parametric coordinates +* - barycentric weights for triangles */ +SD_VAR(float, u) +SD_VAR(float, v) +/* object id if there is one, ~0 otherwise */ +SD_VAR(int, object) + +/* motion blur sample time */ +SD_VAR(float, time) + +/* length of the ray being shaded */ +SD_VAR(float, ray_length) + +/* ray bounce depth */ +SD_VAR(int, ray_depth) + +/* ray transparent depth */ +SD_VAR(int, transparent_depth) + +#ifdef __RAY_DIFFERENTIALS__ +/* differential of P. these are orthogonal to Ng, not N */ +SD_VAR(differential3, dP) +/* differential of I */ +SD_VAR(differential3, dI) +/* differential of u, v */ +SD_VAR(differential, du) +SD_VAR(differential, dv) +#endif +#ifdef __DPDU__ +/* differential of P w.r.t. parametric coordinates. note that dPdu is +* not readily suitable as a tangent for shading on triangles. */ +SD_VAR(float3, dPdu) +SD_VAR(float3, dPdv) +#endif + +#ifdef __OBJECT_MOTION__ +/* object <-> world space transformations, cached to avoid +* re-interpolating them constantly for shading */ +SD_VAR(Transform, ob_tfm) +SD_VAR(Transform, ob_itfm) +#endif + +/* Closure data, we store a fixed array of closures */ +SD_CLOSURE_VAR(ShaderClosure, closure, MAX_CLOSURE) +SD_VAR(int, num_closure) +SD_VAR(float, randb_closure) + +/* ray start position, only set for backgrounds */ +SD_VAR(float3, ray_P) +SD_VAR(differential3, ray_dP) + +#ifdef __OSL__ +SD_VAR(struct KernelGlobals *, osl_globals) +#endif + +#undef SD_VAR +#undef SD_CLOSURE_VAR diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 61954282c28..2811a8348ca 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -39,19 +39,6 @@ CCL_NAMESPACE_BEGIN * This is CPU only because of qsort, and malloc or high stack space usage to * record all these intersections. */ -ccl_device_noinline int shadow_intersections_compare(const void *a, const void *b) -{ - const Intersection *isect_a = (const Intersection*)a; - const Intersection *isect_b = (const Intersection*)b; - - if(isect_a->t < isect_b->t) - return -1; - else if(isect_a->t > isect_b->t) - return 1; - else - return 0; -} - #define STACK_MAX_HITS 64 ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow) @@ -95,7 +82,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * PathState ps = *state; #endif - qsort(hits, num_hits, sizeof(Intersection), shadow_intersections_compare); + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); for(int hit = 0; hit < num_hits; hit++, isect++) { /* adjust intersection distance for moving ray forward */ @@ -193,19 +180,36 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * * potentially transparent, and only in that case start marching. this gives * one extra ray cast for the cases were we do want transparency. */ -ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow) +ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space Ray *ray_input, float3 *shadow +#ifdef __SPLIT_KERNEL__ + , ShaderData *sd_mem, Intersection *isect_mem +#endif + ) { *shadow = make_float3(1.0f, 1.0f, 1.0f); - if(ray->t == 0.0f) + if(ray_input->t == 0.0f) return false; - Intersection isect; - bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); +#ifdef __SPLIT_KERNEL__ + Ray private_ray = *ray_input; + Ray *ray = &private_ray; +#else + Ray *ray = ray_input; +#endif + +#ifdef __SPLIT_KERNEL__ + Intersection *isect = isect_mem; +#else + Intersection isect_object; + Intersection *isect = &isect_object; +#endif + + bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); #ifdef __TRANSPARENT_SHADOWS__ if(blocked && kernel_data.integrator.transparent_shadows) { - if(shader_transparent_shadow(kg, &isect)) { + if(shader_transparent_shadow(kg, isect)) { float3 throughput = make_float3(1.0f, 1.0f, 1.0f); float3 Pend = ray->P + ray->D*ray->t; int bounce = state->transparent_bounce; @@ -217,9 +221,8 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * if(bounce >= kernel_data.integrator.transparent_max_bounce) return true; - if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect, NULL, 0.0f, 0.0f)) + if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) { - #ifdef __VOLUME__ /* attenuation for last line segment towards light */ if(ps.volume_stack[0].shader != SHADER_NONE) @@ -231,39 +234,44 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * return false; } - if(!shader_transparent_shadow(kg, &isect)) + if(!shader_transparent_shadow(kg, isect)) return true; #ifdef __VOLUME__ /* attenuation between last surface and next surface */ if(ps.volume_stack[0].shader != SHADER_NONE) { Ray segment_ray = *ray; - segment_ray.t = isect.t; + segment_ray.t = isect->t; kernel_volume_shadow(kg, &ps, &segment_ray, &throughput); } #endif /* setup shader data at surface */ - ShaderData sd; - shader_setup_from_ray(kg, &sd, &isect, ray, state->bounce+1, bounce); +#ifdef __SPLIT_KERNEL__ + ShaderData *sd = sd_mem; +#else + ShaderData sd_object; + ShaderData *sd = &sd_object; +#endif + shader_setup_from_ray(kg, sd, isect, ray, state->bounce+1, bounce); /* attenuation from transparent surface */ - if(!(sd.flag & SD_HAS_ONLY_VOLUME)) { - shader_eval_surface(kg, &sd, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); - throughput *= shader_bsdf_transparency(kg, &sd); + if(!(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)) { + shader_eval_surface(kg, sd, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); + throughput *= shader_bsdf_transparency(kg, sd); } if(is_zero(throughput)) return true; /* move ray forward */ - ray->P = ray_offset(sd.P, -sd.Ng); + ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng)); if(ray->t != FLT_MAX) ray->D = normalize_len(Pend - ray->P, &ray->t); #ifdef __VOLUME__ /* exit/enter volume */ - kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack); + kernel_volume_stack_enter_exit(kg, sd, ps.volume_stack); #endif bounce++; diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index fb927e81f22..2da060c32a2 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index ef46b2f707f..f545a056cc8 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef KERNEL_TEX @@ -24,6 +24,7 @@ /* bvh */ KERNEL_TEX(float4, texture_float4, __bvh_nodes) +KERNEL_TEX(float4, texture_float4, __bvh_leaf_nodes) KERNEL_TEX(float4, texture_float4, __tri_woop) KERNEL_TEX(uint, texture_uint, __prim_type) KERNEL_TEX(uint, texture_uint, __prim_visibility) diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 0ec34dae87a..60973a71d20 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __KERNEL_TYPES_H__ @@ -24,12 +24,19 @@ #define __KERNEL_CPU__ #endif +/* TODO(sergey): This is only to make it possible to include this header + * from outside of the kernel. but this could be done somewhat cleaner? + */ +#ifndef ccl_addr_space +#define ccl_addr_space +#endif + CCL_NAMESPACE_BEGIN /* constants */ #define OBJECT_SIZE 11 #define OBJECT_VECTOR_SIZE 6 -#define LIGHT_SIZE 4 +#define LIGHT_SIZE 5 #define FILTER_TABLE_SIZE 256 #define RAMP_TABLE_SIZE 256 #define PARTICLE_SIZE 5 @@ -38,12 +45,6 @@ CCL_NAMESPACE_BEGIN #define BSSRDF_MIN_RADIUS 1e-8f #define BSSRDF_MAX_HITS 4 -#define BB_DRAPPER 800.0f -#define BB_MAX_TABLE_RANGE 12000.0f -#define BB_TABLE_XPOWER 1.5f -#define BB_TABLE_YPOWER 5.0f -#define BB_TABLE_SPACING 2.0f - #define BECKMANN_TABLE_SIZE 256 #define TEX_NUM_FLOAT_IMAGES 5 @@ -57,6 +58,9 @@ CCL_NAMESPACE_BEGIN /* device capabilities */ #ifdef __KERNEL_CPU__ +#ifdef __KERNEL_SSE2__ +# define __QBVH__ +#endif #define __KERNEL_SHADING__ #define __KERNEL_ADV_SHADING__ #define __BRANCHED_PATH__ @@ -69,6 +73,7 @@ CCL_NAMESPACE_BEGIN #define __VOLUME_DECOUPLED__ #define __VOLUME_SCATTER__ #define __SHADOW_RECORD_ALL__ +#define __VOLUME_RECORD_ALL__ #define __CAMERA_RAY_NODES__ #endif @@ -80,7 +85,7 @@ CCL_NAMESPACE_BEGIN #define __VOLUME_SCATTER__ /* Experimental on GPU */ -#ifdef __KERNEL_CUDA_EXPERIMENTAL__ +#ifdef __KERNEL_EXPERIMENTAL__ #define __SUBSURFACE__ #define __CMJ__ #endif @@ -92,38 +97,51 @@ CCL_NAMESPACE_BEGIN /* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */ #ifdef __KERNEL_OPENCL_NVIDIA__ -#define __KERNEL_SHADING__ -#define __KERNEL_ADV_SHADING__ +# define __KERNEL_SHADING__ +# define __KERNEL_ADV_SHADING__ +# ifdef __KERNEL_EXPERIMENTAL__ +# define __CMJ__ +# endif #endif #ifdef __KERNEL_OPENCL_APPLE__ -#define __KERNEL_SHADING__ -//#define __KERNEL_ADV_SHADING__ +# define __KERNEL_SHADING__ +# define __KERNEL_ADV_SHADING__ +/* TODO(sergey): Currently experimental section is ignored here, + * this is because megakernel in device_opencl does not support + * custom cflags depending on the scene features. + */ +# ifdef __KERNEL_EXPERIMENTAL__ +# define __CMJ__ +# endif #endif #ifdef __KERNEL_OPENCL_AMD__ -#define __CL_USE_NATIVE__ -#define __KERNEL_SHADING__ -//__KERNEL_ADV_SHADING__ -#define __MULTI_CLOSURE__ -#define __TRANSPARENT_SHADOWS__ -#define __PASSES__ -#define __BACKGROUND_MIS__ -#define __LAMP_MIS__ -#define __AO__ -//#define __CAMERA_MOTION__ -//#define __OBJECT_MOTION__ -//#define __HAIR__ -//end __KERNEL_ADV_SHADING__ +# define __CL_USE_NATIVE__ +# define __KERNEL_SHADING__ +# define __MULTI_CLOSURE__ +# define __PASSES__ +# define __BACKGROUND_MIS__ +# define __LAMP_MIS__ +# define __AO__ +# define __CAMERA_MOTION__ +# define __OBJECT_MOTION__ +# define __HAIR__ +# ifdef __KERNEL_EXPERIMENTAL__ +# define __TRANSPARENT_SHADOWS__ +# endif #endif #ifdef __KERNEL_OPENCL_INTEL_CPU__ -#define __CL_USE_NATIVE__ -#define __KERNEL_SHADING__ -#define __KERNEL_ADV_SHADING__ +# define __CL_USE_NATIVE__ +# define __KERNEL_SHADING__ +# define __KERNEL_ADV_SHADING__ +# ifdef __KERNEL_EXPERIMENTAL__ +# define __CMJ__ +# endif #endif -#endif +#endif // __KERNEL_OPENCL__ /* kernel features */ #define __SOBOL__ @@ -158,6 +176,21 @@ CCL_NAMESPACE_BEGIN #define __HAIR__ #endif +#ifdef WITH_CYCLES_DEBUG +# define __KERNEL_DEBUG__ +#endif + +/* Scene-based selective featrues compilation/ */ +#ifdef __NO_CAMERA_MOTION__ +# undef __CAMERA_MOTION__ +#endif +#ifdef __NO_OBJECT_MOTION__ +# undef __OBJECT_MOTION__ +#endif +#ifdef __NO_HAIR__ +# undef __HAIR__ +#endif + /* Random Numbers */ typedef uint RNG; @@ -263,9 +296,7 @@ enum PathRayFlag { PATH_RAY_MIS_SKIP = 2048, PATH_RAY_DIFFUSE_ANCESTOR = 4096, - PATH_RAY_GLOSSY_ANCESTOR = 8192, - PATH_RAY_BSSRDF_ANCESTOR = 16384, - PATH_RAY_SINGLE_PASS_DONE = 32768, + PATH_RAY_SINGLE_PASS_DONE = 8192, /* we need layer member flags to be the 20 upper bits */ PATH_RAY_LAYER_SHIFT = (32-20) @@ -288,39 +319,44 @@ typedef enum ClosureLabel { typedef enum PassType { PASS_NONE = 0, - PASS_COMBINED = 1, - PASS_DEPTH = 2, - PASS_NORMAL = 4, - PASS_UV = 8, - PASS_OBJECT_ID = 16, - PASS_MATERIAL_ID = 32, - PASS_DIFFUSE_COLOR = 64, - PASS_GLOSSY_COLOR = 128, - PASS_TRANSMISSION_COLOR = 256, - PASS_DIFFUSE_INDIRECT = 512, - PASS_GLOSSY_INDIRECT = 1024, - PASS_TRANSMISSION_INDIRECT = 2048, - PASS_DIFFUSE_DIRECT = 4096, - PASS_GLOSSY_DIRECT = 8192, - PASS_TRANSMISSION_DIRECT = 16384, - PASS_EMISSION = 32768, - PASS_BACKGROUND = 65536, - PASS_AO = 131072, - PASS_SHADOW = 262144, - PASS_MOTION = 524288, - PASS_MOTION_WEIGHT = 1048576, - PASS_MIST = 2097152, - PASS_SUBSURFACE_DIRECT = 4194304, - PASS_SUBSURFACE_INDIRECT = 8388608, - PASS_SUBSURFACE_COLOR = 16777216, - PASS_LIGHT = 33554432, /* no real pass, used to force use_light_pass */ + PASS_COMBINED = (1 << 0), + PASS_DEPTH = (1 << 1), + PASS_NORMAL = (1 << 2), + PASS_UV = (1 << 3), + PASS_OBJECT_ID = (1 << 4), + PASS_MATERIAL_ID = (1 << 5), + PASS_DIFFUSE_COLOR = (1 << 6), + PASS_GLOSSY_COLOR = (1 << 7), + PASS_TRANSMISSION_COLOR = (1 << 8), + PASS_DIFFUSE_INDIRECT = (1 << 9), + PASS_GLOSSY_INDIRECT = (1 << 10), + PASS_TRANSMISSION_INDIRECT = (1 << 11), + PASS_DIFFUSE_DIRECT = (1 << 12), + PASS_GLOSSY_DIRECT = (1 << 13), + PASS_TRANSMISSION_DIRECT = (1 << 14), + PASS_EMISSION = (1 << 15), + PASS_BACKGROUND = (1 << 16), + PASS_AO = (1 << 17), + PASS_SHADOW = (1 << 18), + PASS_MOTION = (1 << 19), + PASS_MOTION_WEIGHT = (1 << 20), + PASS_MIST = (1 << 21), + PASS_SUBSURFACE_DIRECT = (1 << 22), + PASS_SUBSURFACE_INDIRECT = (1 << 23), + PASS_SUBSURFACE_COLOR = (1 << 24), + PASS_LIGHT = (1 << 25), /* no real pass, used to force use_light_pass */ +#ifdef __KERNEL_DEBUG__ + PASS_BVH_TRAVERSAL_STEPS = (1 << 26), + PASS_BVH_TRAVERSED_INSTANCES = (1 << 27), + PASS_RAY_BOUNCES = (1 << 28), +#endif } PassType; #define PASS_ALL (~0) #ifdef __PASSES__ -typedef struct PathRadiance { +typedef ccl_addr_space struct PathRadiance { int use_light_pass; float3 emission; @@ -372,7 +408,7 @@ typedef struct BsdfEval { #else -typedef float3 PathRadiance; +typedef ccl_addr_space float3 PathRadiance; typedef float3 BsdfEval; #endif @@ -417,6 +453,7 @@ enum CameraType { enum PanoramaType { PANORAMA_EQUIRECTANGULAR, + PANORAMA_MIRRORBALL, PANORAMA_FISHEYE_EQUIDISTANT, PANORAMA_FISHEYE_EQUISOLID }; @@ -436,10 +473,26 @@ typedef struct differential { /* Ray */ typedef struct Ray { +/* TODO(sergey): This is only needed because current AMD + * compiler has hard time building the kernel with this + * reshuffle. And at the same time reshuffle will cause + * less optimal CPU code in certain places. + * + * We'll get rid of this nasty exception once AMD compiler + * is fixed. + */ +#ifndef __KERNEL_OPENCL_AMD__ float3 P; /* origin */ float3 D; /* direction */ + + float t; /* length of the ray */ + float time; /* time (for motion blur) */ +#else float t; /* length of the ray */ float time; /* time (for motion blur) */ + float3 P; /* origin */ + float3 D; /* direction */ +#endif #ifdef __RAY_DIFFERENTIALS__ differential3 dP; @@ -449,11 +502,16 @@ typedef struct Ray { /* Intersection */ -typedef struct Intersection { +typedef ccl_addr_space struct Intersection { float t, u, v; int prim; int object; int type; + +#ifdef __KERNEL_DEBUG__ + int num_traversal_steps; + int num_traversed_instances; +#endif } Intersection; /* Primitives */ @@ -468,7 +526,12 @@ typedef enum PrimitiveType { PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE|PRIMITIVE_MOTION_TRIANGLE), PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE|PRIMITIVE_MOTION_CURVE), PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE|PRIMITIVE_MOTION_CURVE), - PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE|PRIMITIVE_ALL_CURVE) + PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE|PRIMITIVE_ALL_CURVE), + + /* Total number of different primitives. + * NOTE: This is an actual value, not a bitflag. + */ + PRIMITIVE_NUM_TOTAL = 4, } PrimitiveType; #define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << 16) | type) @@ -516,6 +579,7 @@ typedef enum AttributeStandard { ATTR_STD_VOLUME_FLAME, ATTR_STD_VOLUME_HEAT, ATTR_STD_VOLUME_VELOCITY, + ATTR_STD_POINTINESS, ATTR_STD_NUM, ATTR_STD_NOT_FOUND = ~0 @@ -524,39 +588,34 @@ typedef enum AttributeStandard { /* Closure data */ #ifdef __MULTI_CLOSURE__ -#define MAX_CLOSURE 64 +# ifndef __MAX_CLOSURE__ +# define MAX_CLOSURE 64 +# else +# define MAX_CLOSURE __MAX_CLOSURE__ +# endif #else #define MAX_CLOSURE 1 #endif -/* TODO(sergey): This is rather nasty bug happening in here, which - * could be simply a compilers bug for which we can't find a generic - * platform independent workaround. Also even if it's a compiler - * issue, it's not so simple to upgrade the compiler in the release - * environment for linux and doing it so closer to the release is - * rather a risky business. - * - * For this release it's probably safer to stick with such a rather - * dirty solution, and look for a cleaner fix during the next release - * cycle. +/* This struct is to be 16 bytes aligned, we also keep some extra precautions: + * - All the float3 members are in the beginning of the struct, so compiler + * does not put own padding trying to align this members. + * - We make sure OSL pointer is also 16 bytes aligned. */ -typedef struct ShaderClosure { - ClosureType type; +typedef ccl_addr_space struct ShaderClosure { float3 weight; -#ifndef __APPLE__ + float3 N; + float3 T; + + ClosureType type; float sample_weight; -#endif float data0; float data1; float data2; + int pad1, pad2, pad3; - float3 N; - float3 T; -#ifdef __APPLE__ - float sample_weight; -#endif #ifdef __OSL__ - void *prim; + void *prim, *pad4; #endif } ShaderClosure; @@ -581,119 +640,70 @@ typedef enum ShaderContext { enum ShaderDataFlag { /* runtime flags */ - SD_BACKFACING = 1, /* backside of surface? */ - SD_EMISSION = 2, /* have emissive closure? */ - SD_BSDF = 4, /* have bsdf closure? */ - SD_BSDF_HAS_EVAL = 8, /* have non-singular bsdf closure? */ - SD_PHASE_HAS_EVAL = 8, /* have non-singular phase closure? */ - SD_BSDF_GLOSSY = 16, /* have glossy bsdf */ - SD_BSSRDF = 32, /* have bssrdf */ - SD_HOLDOUT = 64, /* have holdout closure? */ - SD_ABSORPTION = 128, /* have volume absorption closure? */ - SD_SCATTER = 256, /* have volume phase closure? */ - SD_AO = 512, /* have ao closure? */ - SD_TRANSPARENT = 1024, /* have transparent closure? */ - - SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY| - SD_BSSRDF|SD_HOLDOUT|SD_ABSORPTION|SD_SCATTER|SD_AO), + SD_BACKFACING = (1 << 0), /* backside of surface? */ + SD_EMISSION = (1 << 1), /* have emissive closure? */ + SD_BSDF = (1 << 2), /* have bsdf closure? */ + SD_BSDF_HAS_EVAL = (1 << 3), /* have non-singular bsdf closure? */ + SD_BSSRDF = (1 << 4), /* have bssrdf */ + SD_HOLDOUT = (1 << 5), /* have holdout closure? */ + SD_ABSORPTION = (1 << 6), /* have volume absorption closure? */ + SD_SCATTER = (1 << 7), /* have volume phase closure? */ + SD_AO = (1 << 8), /* have ao closure? */ + SD_TRANSPARENT = (1 << 9), /* have transparent closure? */ + + SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF| + SD_HOLDOUT|SD_ABSORPTION|SD_SCATTER|SD_AO), /* shader flags */ - SD_USE_MIS = 2048, /* direct light sample */ - SD_HAS_TRANSPARENT_SHADOW = 4096, /* has transparent shadow */ - SD_HAS_VOLUME = 8192, /* has volume shader */ - SD_HAS_ONLY_VOLUME = 16384, /* has only volume shader, no surface */ - SD_HETEROGENEOUS_VOLUME = 32768, /* has heterogeneous volume */ - SD_HAS_BSSRDF_BUMP = 65536, /* bssrdf normal uses bump */ - SD_VOLUME_EQUIANGULAR = 131072, /* use equiangular sampling */ - SD_VOLUME_MIS = 262144, /* use multiple importance sampling */ + SD_USE_MIS = (1 << 10), /* direct light sample */ + SD_HAS_TRANSPARENT_SHADOW = (1 << 11), /* has transparent shadow */ + SD_HAS_VOLUME = (1 << 12), /* has volume shader */ + SD_HAS_ONLY_VOLUME = (1 << 13), /* has only volume shader, no surface */ + SD_HETEROGENEOUS_VOLUME = (1 << 14), /* has heterogeneous volume */ + SD_HAS_BSSRDF_BUMP = (1 << 15), /* bssrdf normal uses bump */ + SD_VOLUME_EQUIANGULAR = (1 << 16), /* use equiangular sampling */ + SD_VOLUME_MIS = (1 << 17), /* use multiple importance sampling */ + SD_VOLUME_CUBIC = (1 << 18), /* use cubic interpolation for voxels */ + SD_HAS_BUMP = (1 << 19), /* has data connected to the displacement input */ SD_SHADER_FLAGS = (SD_USE_MIS|SD_HAS_TRANSPARENT_SHADOW|SD_HAS_VOLUME| SD_HAS_ONLY_VOLUME|SD_HETEROGENEOUS_VOLUME| - SD_HAS_BSSRDF_BUMP|SD_VOLUME_EQUIANGULAR|SD_VOLUME_MIS), + SD_HAS_BSSRDF_BUMP|SD_VOLUME_EQUIANGULAR|SD_VOLUME_MIS| + SD_VOLUME_CUBIC|SD_HAS_BUMP), /* object flags */ - SD_HOLDOUT_MASK = 524288, /* holdout for camera rays */ - SD_OBJECT_MOTION = 1048576, /* has object motion blur */ - SD_TRANSFORM_APPLIED = 2097152, /* vertices have transform applied */ - SD_NEGATIVE_SCALE_APPLIED = 4194304, /* vertices have negative scale applied */ - - SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED) + SD_HOLDOUT_MASK = (1 << 20), /* holdout for camera rays */ + SD_OBJECT_MOTION = (1 << 21), /* has object motion blur */ + SD_TRANSFORM_APPLIED = (1 << 22), /* vertices have transform applied */ + SD_NEGATIVE_SCALE_APPLIED = (1 << 23), /* vertices have negative scale applied */ + SD_OBJECT_HAS_VOLUME = (1 << 24), /* object has a volume shader */ + SD_OBJECT_INTERSECTS_VOLUME = (1 << 25), /* object intersects AABB of an object with volume shader */ + SD_OBJECT_HAS_VERTEX_MOTION = (1 << 26), /* has position for motion vertices */ + + SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED| + SD_NEGATIVE_SCALE_APPLIED|SD_OBJECT_HAS_VOLUME| + SD_OBJECT_INTERSECTS_VOLUME) }; struct KernelGlobals; -typedef struct ShaderData { - /* position */ - float3 P; - /* smooth normal for shading */ - float3 N; - /* true geometric normal */ - float3 Ng; - /* view/incoming direction */ - float3 I; - /* shader id */ - int shader; - /* booleans describing shader, see ShaderDataFlag */ - int flag; - - /* primitive id if there is one, ~0 otherwise */ - int prim; - - /* combined type and curve segment for hair */ - int type; - - /* parametric coordinates - * - barycentric weights for triangles */ - float u, v; - /* object id if there is one, ~0 otherwise */ - int object; - - /* motion blur sample time */ - float time; - - /* length of the ray being shaded */ - float ray_length; - - /* ray bounce depth */ - int ray_depth; - - /* ray transparent depth */ - int transparent_depth; - -#ifdef __RAY_DIFFERENTIALS__ - /* differential of P. these are orthogonal to Ng, not N */ - differential3 dP; - /* differential of I */ - differential3 dI; - /* differential of u, v */ - differential du; - differential dv; -#endif -#ifdef __DPDU__ - /* differential of P w.r.t. parametric coordinates. note that dPdu is - * not readily suitable as a tangent for shading on triangles. */ - float3 dPdu, dPdv; -#endif - -#ifdef __OBJECT_MOTION__ - /* object <-> world space transformations, cached to avoid - * re-interpolating them constantly for shading */ - Transform ob_tfm; - Transform ob_itfm; +#ifdef __SPLIT_KERNEL__ +#define SD_VAR(type, what) ccl_global type *what; +#define SD_CLOSURE_VAR(type, what, max_closure) type *what; +#define TIDX (get_global_id(1) * get_global_size(0) + get_global_id(0)) +#define ccl_fetch(s, t) (s->t[TIDX]) +#define ccl_fetch_array(s, t, index) (&s->t[TIDX * MAX_CLOSURE + index]) +#else +#define SD_VAR(type, what) type what; +#define SD_CLOSURE_VAR(type, what, max_closure) type what[max_closure]; +#define ccl_fetch(s, t) (s->t) +#define ccl_fetch_array(s, t, index) (&s->t[index]) #endif - /* Closure data, we store a fixed array of closures */ - ShaderClosure closure[MAX_CLOSURE]; - int num_closure; - float randb_closure; +typedef ccl_addr_space struct ShaderData { - /* ray start position, only set for backgrounds */ - float3 ray_P; - differential3 ray_dP; +#include "kernel_shaderdata_vars.h" -#ifdef __OSL__ - struct KernelGlobals *osl_globals; -#endif } ShaderData; /* Path State */ @@ -711,7 +721,6 @@ typedef struct PathState { /* random number generator state */ int rng_offset; /* dimension offset */ - int rng_offset_bsdf; /* dimension offset for picking bsdf */ int sample; /* path sample number */ int num_samples; /* total number of times this path will be sampled */ @@ -751,6 +760,7 @@ typedef struct KernelCamera { int panorama_type; float fisheye_fov; float fisheye_lens; + float4 equirectangular_range; /* matrices */ Transform cameratoworld; @@ -768,7 +778,7 @@ typedef struct KernelCamera { /* motion blur */ float shuttertime; - int have_motion; + int have_motion, have_perspective_motion; /* clipping */ float nearclip; @@ -789,7 +799,7 @@ typedef struct KernelCamera { int shader; float focal_length; - float pad[3]; + float pad[2]; /* more matrices */ Transform screentoworld; @@ -803,6 +813,11 @@ typedef struct KernelCamera { Transform worldtocamera; MotionTransform motion; + + /* Denotes changes in the projective matrix, namely in rastertocamera. + * Used for camera zoom motion blur, + */ + PerspectiveMotionTransform perspective_motion; } KernelCamera; typedef struct KernelFilm { @@ -850,6 +865,13 @@ typedef struct KernelFilm { float mist_start; float mist_inv_depth; float mist_falloff; + +#ifdef __KERNEL_DEBUG__ + int pass_bvh_traversal_steps; + int pass_bvh_traversed_instances; + int pass_ray_bounces; + int pass_pad3; +#endif } KernelFilm; typedef struct KernelBackground { @@ -876,6 +898,11 @@ typedef struct KernelIntegrator { float inv_pdf_lights; int pdf_background_res; + /* light portals */ + float portal_pdf; + int num_portals; + int portal_offset; + /* bounces */ int min_bounce; int max_bounce; @@ -928,6 +955,8 @@ typedef struct KernelIntegrator { int volume_max_steps; float volume_step_size; int volume_samples; + + int pad; } KernelIntegrator; typedef struct KernelBVH { @@ -937,8 +966,8 @@ typedef struct KernelBVH { int have_motion; int have_curves; int have_instancing; - - int pad1, pad2, pad3; + int use_qbvh; + int pad1, pad2; } KernelBVH; typedef enum CurveFlag { @@ -961,9 +990,8 @@ typedef struct KernelCurves { } KernelCurves; typedef struct KernelTables { - int blackbody_offset; int beckmann_offset; - int pad1, pad2; + int pad1, pad2, pad3; } KernelTables; typedef struct KernelData { @@ -983,6 +1011,68 @@ typedef struct CameraData { int shader; } CameraData; +#ifdef __KERNEL_DEBUG__ +typedef ccl_addr_space struct DebugData { + // Total number of BVH node traversal steps and primitives intersections + // for the camera rays. + int num_bvh_traversal_steps; + int num_bvh_traversed_instances; + int num_ray_bounces; +} DebugData; +#endif + +/* Declarations required for split kernel */ + +/* Macro for queues */ +/* Value marking queue's empty slot */ +#define QUEUE_EMPTY_SLOT -1 + +/* +* Queue 1 - Active rays +* Queue 2 - Background queue +* Queue 3 - Shadow ray cast kernel - AO +* Queeu 4 - Shadow ray cast kernel - direct lighting +*/ +#define NUM_QUEUES 4 + +/* Queue names */ +enum QueueNumber { + QUEUE_ACTIVE_AND_REGENERATED_RAYS = 0, /* All active rays and regenerated rays are enqueued here. */ + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS = 1, /* All + * 1. Background-hit rays, + * 2. Rays that has exited path-iteration but needs to update output buffer + * 3. Rays to be regenerated + * are enqueued here. + */ + QUEUE_SHADOW_RAY_CAST_AO_RAYS = 2, /* All rays for which a shadow ray should be cast to determine radiance + * contribution for AO are enqueued here. + */ + QUEUE_SHADOW_RAY_CAST_DL_RAYS = 3, /* All rays for which a shadow ray should be cast to determine radiance + * contributing for direct lighting are enqueued here. + */ +}; + +/* We use RAY_STATE_MASK to get ray_state (enums 0 to 5) */ +#define RAY_STATE_MASK 0x007 +#define RAY_FLAG_MASK 0x0F8 +enum RayState { + RAY_ACTIVE = 0, // Denotes ray is actively involved in path-iteration + RAY_INACTIVE = 1, // Denotes ray has completed processing all samples and is inactive + RAY_UPDATE_BUFFER = 2, // Denoted ray has exited path-iteration and needs to update output buffer + RAY_HIT_BACKGROUND = 3, // Donotes ray has hit background + RAY_TO_REGENERATE = 4, // Denotes ray has to be regenerated + RAY_REGENERATED = 5, // Denotes ray has been regenerated + RAY_SKIP_DL = 6, // Denotes ray should skip direct lighting + RAY_SHADOW_RAY_CAST_AO = 16, // Flag's ray has to execute shadow blocked function in AO part + RAY_SHADOW_RAY_CAST_DL = 32 // Flag's ray has to execute shadow blocked function in direct lighting part +}; + +#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state)) +#define IS_STATE(ray_state, ray_index, state) ((ray_state[ray_index] & RAY_STATE_MASK) == state) +#define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag)) +#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag))) +#define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag) + CCL_NAMESPACE_END #endif /* __KERNEL_TYPES_H__ */ diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 1273869ca28..0a74a9deba9 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -374,7 +374,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba /* distance sampling */ sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf); - /* modifiy pdf for hit/miss decision */ + /* modify pdf for hit/miss decision */ if(probalistic_scatter) pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t); @@ -422,7 +422,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba /* heterogeneous volume distance sampling: integrate stepping through the * volume until we reach the end, get absorbed entirely, or run out of - * iterations. this does probalistically scatter or get transmitted through + * iterations. this does probabilistically scatter or get transmitted through * for path tracing where we don't want to branch. */ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng) @@ -578,10 +578,11 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals /* Decoupled Volume Sampling * * VolumeSegment is list of coefficients and transmittance stored at all steps - * through a volume. This can then latter be used for decoupled sampling as in: + * through a volume. This can then later be used for decoupled sampling as in: * "Importance Sampling Techniques for Path Tracing in Participating Media" * - * On the GPU this is only supported for homogeneous volumes (1 step), due to + * On the GPU this is only supported (but currently not enabled) + * for homogeneous volumes (1 step), due to * no support for malloc/free and too much stack usage with a fix size array. */ typedef struct VolumeStep { @@ -595,6 +596,7 @@ typedef struct VolumeStep { } VolumeStep; typedef struct VolumeSegment { + VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */ VolumeStep *steps; /* recorded steps */ int numsteps; /* number of steps */ int closure_flag; /* accumulated closure flags from all steps */ @@ -608,7 +610,7 @@ typedef struct VolumeSegment { /* record volume steps to the end of the volume. * * it would be nice if we could only record up to the point that we need to scatter, - * but the entire segment is needed to do always scattering, rather than probalistically + * but the entire segment is needed to do always scattering, rather than probabilistically * hitting or missing the volume. if we don't know the transmittance at the end of the * volume we can't generate stratified distance samples up to that transmittance */ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state, @@ -621,17 +623,22 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta float step_size, random_jitter_offset; if(heterogeneous) { - max_steps = kernel_data.integrator.volume_max_steps; + const int global_max_steps = kernel_data.integrator.volume_max_steps; step_size = kernel_data.integrator.volume_step_size; - random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size; - /* compute exact steps in advance for malloc */ max_steps = max((int)ceilf(ray->t/step_size), 1); + if(max_steps > global_max_steps) { + max_steps = global_max_steps; + step_size = ray->t / (float)max_steps; + } + segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps); + random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size; } else { max_steps = 1; step_size = ray->t; random_jitter_offset = 0.0f; + segment->steps = &segment->stack_step; } /* init accumulation variables */ @@ -640,10 +647,9 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f); float t = 0.0f; - segment->closure_flag = 0; segment->numsteps = 0; - - segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps); + segment->closure_flag = 0; + bool is_last_step_empty = false; VolumeStep *step = segment->steps; @@ -685,12 +691,24 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta step->closure_flag = closure_flag; segment->closure_flag |= closure_flag; + + is_last_step_empty = false; + segment->numsteps++; } else { - /* store empty step (todo: skip consecutive empty steps) */ - step->sigma_t = make_float3(0.0f, 0.0f, 0.0f); - step->sigma_s = make_float3(0.0f, 0.0f, 0.0f); - step->closure_flag = 0; + if(is_last_step_empty) { + /* consecutive empty step, merge */ + step--; + } + else { + /* store empty step */ + step->sigma_t = make_float3(0.0f, 0.0f, 0.0f); + step->sigma_s = make_float3(0.0f, 0.0f, 0.0f); + step->closure_flag = 0; + + segment->numsteps++; + is_last_step_empty = true; + } } step->accum_transmittance = accum_transmittance; @@ -698,8 +716,6 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta step->t = new_t; step->shade_t = t + random_jitter_offset; - segment->numsteps++; - /* stop if at the end of the volume */ t = new_t; if(t == ray->t) @@ -729,16 +745,13 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment) { - free(segment->steps); + if(segment->steps != &segment->stack_step) + free(segment->steps); } /* scattering for homogeneous and heterogeneous volumes, using decoupled ray - * marching. unlike the non-decoupled functions, these do not do probalistic - * scattering, they always scatter if there is any non-zero scattering - * coefficient. + * marching. this function does not do emission or modify throughput. * - * these also do not do emission or modify throughput. - * * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, @@ -753,7 +766,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( sd->randb_closure = rphase*3.0f - channel; float xi = rscatter; - /* probalistic scattering decision based on transmittance */ + /* probabilistic scattering decision based on transmittance */ if(probalistic_scatter) { float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel); @@ -833,7 +846,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( float3 distance_pdf; sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf); - /* modifiy pdf for hit/miss decision */ + /* modify pdf for hit/miss decision */ if(probalistic_scatter) distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance; @@ -929,7 +942,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( /* decide if we need to use decoupled or not */ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method) { - /* decoupled ray marching for heterogenous volumes not supported on the GPU, + /* decoupled ray marching for heterogeneous volumes not supported on the GPU, * which also means equiangular and multiple importance sampling is not * support for that case */ #ifdef __KERNEL_GPU__ @@ -958,7 +971,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, Ray *ray, VolumeStack *stack) { - /* NULL ray happens in the baker, does it need proper initializetion of + /* NULL ray happens in the baker, does it need proper initialization of * camera in volume? */ if(!kernel_data.cam.is_inside_volume || ray == NULL) { @@ -976,25 +989,26 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, return; } - const float3 Pend = ray->P + ray->D*ray->t; Ray volume_ray = *ray; - int stack_index = 0, enclosed_index = 0; - int enclosed_volumes[VOLUME_STACK_SIZE]; + volume_ray.t = FLT_MAX; - while(stack_index < VOLUME_STACK_SIZE - 1 && - enclosed_index < VOLUME_STACK_SIZE - 1) - { - Intersection isect; - bool hit = scene_intersect(kg, &volume_ray, PATH_RAY_ALL_VISIBILITY, - &isect, - NULL, 0.0f, 0.0f); - if(!hit) { - break; - } + int stack_index = 0, enclosed_index = 0; - ShaderData sd; - shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0); - if(sd.flag & SD_HAS_VOLUME) { +#ifdef __VOLUME_RECORD_ALL__ + Intersection hits[2*VOLUME_STACK_SIZE]; + uint num_hits = scene_intersect_volume_all(kg, + &volume_ray, + hits, + 2*VOLUME_STACK_SIZE); + if(num_hits > 0) { + int enclosed_volumes[VOLUME_STACK_SIZE]; + Intersection *isect = hits; + + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); + + for(uint hit = 0; hit < num_hits; ++hit, ++isect) { + ShaderData sd; + shader_setup_from_ray(kg, &sd, isect, &volume_ray, 0, 0); if(sd.flag & SD_BACKFACING) { /* If ray exited the volume and never entered to that volume * it means that camera is inside such a volume. @@ -1014,24 +1028,56 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, } else { /* If ray from camera enters the volume, this volume shouldn't - * be added to the stak on exit. + * be added to the stack on exit. */ enclosed_volumes[enclosed_index++] = sd.object; } } + } +#else + int enclosed_volumes[VOLUME_STACK_SIZE]; + int step = 0; - /* Move ray forward. */ - volume_ray.P = ray_offset(sd.P, -sd.Ng); - if(volume_ray.t != FLT_MAX) { - volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t); - /* TODO(sergey): Find a faster way detecting that ray_offset moved - * us pass through the end point. + while(stack_index < VOLUME_STACK_SIZE - 1 && + enclosed_index < VOLUME_STACK_SIZE - 1 && + step < 2 * VOLUME_STACK_SIZE) + { + Intersection isect; + if(!scene_intersect_volume(kg, &volume_ray, &isect)) { + break; + } + + ShaderData sd; + shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0); + if(sd.flag & SD_BACKFACING) { + /* If ray exited the volume and never entered to that volume + * it means that camera is inside such a volume. */ - if(dot(ray->D, volume_ray.D) < 0.0f) { - break; + bool is_enclosed = false; + for(int i = 0; i < enclosed_index; ++i) { + if(enclosed_volumes[i] == sd.object) { + is_enclosed = true; + break; + } } + if(is_enclosed == false) { + stack[stack_index].object = sd.object; + stack[stack_index].shader = sd.shader; + ++stack_index; + } + } + else { + /* If ray from camera enters the volume, this volume shouldn't + * be added to the stack on exit. + */ + enclosed_volumes[enclosed_index++] = sd.object; } + + /* Move ray forward. */ + volume_ray.P = ray_offset(sd.P, -sd.Ng); + ++step; } +#endif /* stack_index of 0 means quick checks outside of the kernel gave false * positive, nothing to worry about, just we've wasted quite a few of * ticks just to come into conclusion that camera is in the air. @@ -1094,4 +1140,49 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd } } +#ifdef __SUBSURFACE__ +ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, + Ray *ray, + VolumeStack *stack) +{ + kernel_assert(kernel_data.integrator.use_volumes); + + Ray volume_ray = *ray; + +#ifdef __VOLUME_RECORD_ALL__ + Intersection hits[2*VOLUME_STACK_SIZE]; + uint num_hits = scene_intersect_volume_all(kg, + &volume_ray, + hits, + 2*VOLUME_STACK_SIZE); + if(num_hits > 0) { + Intersection *isect = hits; + + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); + + for(uint hit = 0; hit < num_hits; ++hit, ++isect) { + ShaderData sd; + shader_setup_from_ray(kg, &sd, isect, &volume_ray, 0, 0); + kernel_volume_stack_enter_exit(kg, &sd, stack); + } + } +#else + Intersection isect; + int step = 0; + while(step < 2 * VOLUME_STACK_SIZE && + scene_intersect_volume(kg, &volume_ray, &isect)) + { + ShaderData sd; + shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0); + kernel_volume_stack_enter_exit(kg, &sd, stack); + + /* Move ray forward. */ + volume_ray.P = ray_offset(sd.P, -sd.Ng); + volume_ray.t -= sd.ray_length; + ++step; + } +#endif +} +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h new file mode 100644 index 00000000000..9b83d972e97 --- /dev/null +++ b/intern/cycles/kernel/kernel_work_stealing.h @@ -0,0 +1,193 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KERNEL_WORK_STEALING_H__ +#define __KERNEL_WORK_STEALING_H__ + +/* + * Utility functions for work stealing + */ + +#ifdef __WORK_STEALING__ + +#ifdef __KERNEL_OPENCL__ +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#endif + +uint get_group_id_with_ray_index(uint ray_index, + uint tile_dim_x, + uint tile_dim_y, + uint parallel_samples, + int dim) +{ + if(dim == 0) { + uint x_span = ray_index % (tile_dim_x * parallel_samples); + return x_span / get_local_size(0); + } + else /*if(dim == 1)*/ { + kernel_assert(dim == 1); + uint y_span = ray_index / (tile_dim_x * parallel_samples); + return y_span / get_local_size(1); + } +} + +uint get_total_work(uint tile_dim_x, + uint tile_dim_y, + uint grp_idx, + uint grp_idy, + uint num_samples) +{ + uint threads_within_tile_border_x = + (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) + : get_local_size(0); + uint threads_within_tile_border_y = + (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1) + : get_local_size(1); + + threads_within_tile_border_x = + (threads_within_tile_border_x == 0) ? get_local_size(0) + : threads_within_tile_border_x; + threads_within_tile_border_y = + (threads_within_tile_border_y == 0) ? get_local_size(1) + : threads_within_tile_border_y; + + return threads_within_tile_border_x * + threads_within_tile_border_y * + num_samples; +} + +/* Returns 0 in case there is no next work available */ +/* Returns 1 in case work assigned is valid */ +int get_next_work(ccl_global uint *work_pool, + ccl_private uint *my_work, + uint tile_dim_x, + uint tile_dim_y, + uint num_samples, + uint parallel_samples, + uint ray_index) +{ + uint grp_idx = get_group_id_with_ray_index(ray_index, + tile_dim_x, + tile_dim_y, + parallel_samples, + 0); + uint grp_idy = get_group_id_with_ray_index(ray_index, + tile_dim_x, + tile_dim_y, + parallel_samples, + 1); + uint total_work = get_total_work(tile_dim_x, + tile_dim_y, + grp_idx, + grp_idy, + num_samples); + uint group_index = grp_idy * get_num_groups(0) + grp_idx; + *my_work = atomic_inc(&work_pool[group_index]); + return (*my_work < total_work) ? 1 : 0; +} + +/* This function assumes that the passed my_work is valid. */ +/* Decode sample number w.r.t. assigned my_work. */ +uint get_my_sample(uint my_work, + uint tile_dim_x, + uint tile_dim_y, + uint parallel_samples, + uint ray_index) +{ + uint grp_idx = get_group_id_with_ray_index(ray_index, + tile_dim_x, + tile_dim_y, + parallel_samples, + 0); + uint grp_idy = get_group_id_with_ray_index(ray_index, + tile_dim_x, + tile_dim_y, + parallel_samples, + 1); + uint threads_within_tile_border_x = + (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) + : get_local_size(0); + uint threads_within_tile_border_y = + (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1) + : get_local_size(1); + + threads_within_tile_border_x = + (threads_within_tile_border_x == 0) ? get_local_size(0) + : threads_within_tile_border_x; + threads_within_tile_border_y = + (threads_within_tile_border_y == 0) ? get_local_size(1) + : threads_within_tile_border_y; + + return my_work / + (threads_within_tile_border_x * threads_within_tile_border_y); +} + +/* Decode pixel and tile position w.r.t. assigned my_work. */ +void get_pixel_tile_position(ccl_private uint *pixel_x, + ccl_private uint *pixel_y, + ccl_private uint *tile_x, + ccl_private uint *tile_y, + uint my_work, + uint tile_dim_x, + uint tile_dim_y, + uint tile_offset_x, + uint tile_offset_y, + uint parallel_samples, + uint ray_index) +{ + uint grp_idx = get_group_id_with_ray_index(ray_index, + tile_dim_x, + tile_dim_y, + parallel_samples, + 0); + uint grp_idy = get_group_id_with_ray_index(ray_index, + tile_dim_x, + tile_dim_y, + parallel_samples, + 1); + uint threads_within_tile_border_x = + (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) + : get_local_size(0); + uint threads_within_tile_border_y = + (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1) + : get_local_size(1); + + threads_within_tile_border_x = + (threads_within_tile_border_x == 0) ? get_local_size(0) + : threads_within_tile_border_x; + threads_within_tile_border_y = + (threads_within_tile_border_y == 0) ? get_local_size(1) + : threads_within_tile_border_y; + + uint total_associated_pixels = + threads_within_tile_border_x * threads_within_tile_border_y; + uint work_group_pixel_index = my_work % total_associated_pixels; + uint work_group_pixel_x = + work_group_pixel_index % threads_within_tile_border_x; + uint work_group_pixel_y = + work_group_pixel_index / threads_within_tile_border_x; + + *pixel_x = + tile_offset_x + (grp_idx * get_local_size(0)) + work_group_pixel_x; + *pixel_y = + tile_offset_y + (grp_idy * get_local_size(1)) + work_group_pixel_y; + *tile_x = *pixel_x - tile_offset_x; + *tile_y = *pixel_y - tile_offset_y; +} + +#endif /* __WORK_STEALING__ */ + +#endif /* __KERNEL_WORK_STEALING_H__ */ diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index fa2113fbb46..2c8d3503c1a 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -11,18 +11,19 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* CPU kernel entry points */ -#include "kernel.h" #include "kernel_compat_cpu.h" +#include "kernel.h" #include "kernel_math.h" #include "kernel_types.h" #include "kernel_globals.h" #include "kernel_film.h" #include "kernel_path.h" +#include "kernel_path_branched.h" #include "kernel_bake.h" CCL_NAMESPACE_BEGIN @@ -37,7 +38,14 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s assert(0); } -void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height, size_t depth, InterpolationType interpolation) +void kernel_tex_copy(KernelGlobals *kg, + const char *name, + device_ptr mem, + size_t width, + size_t height, + size_t depth, + InterpolationType interpolation, + ExtensionType extension) { if(0) { } @@ -55,7 +63,7 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t int id = atoi(name + strlen("__tex_image_float_")); int array_index = id; - if (array_index >= 0 && array_index < MAX_FLOAT_IMAGES) { + if(array_index >= 0 && array_index < MAX_FLOAT_IMAGES) { tex = &kg->texture_float_images[array_index]; } @@ -63,6 +71,7 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t tex->data = (float4*)mem; tex->dimensions_set(width, height, depth); tex->interpolation = interpolation; + tex->extension = extension; } } else if(strstr(name, "__tex_image")) { @@ -70,7 +79,7 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t int id = atoi(name + strlen("__tex_image_")); int array_index = id - MAX_FLOAT_IMAGES; - if (array_index >= 0 && array_index < MAX_BYTE_IMAGES) { + if(array_index >= 0 && array_index < MAX_BYTE_IMAGES) { tex = &kg->texture_byte_images[array_index]; } @@ -78,6 +87,7 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t tex->data = (uchar4*)mem; tex->dimensions_set(width, height, depth); tex->interpolation = interpolation; + tex->extension = extension; } } else diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp index e7ff21a6f09..df77bedc729 100644 --- a/intern/cycles/kernel/kernel_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* Optimized CPU kernel entry points. This file is compiled with AVX @@ -31,13 +31,14 @@ #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX -#include "kernel.h" #include "kernel_compat_cpu.h" +#include "kernel.h" #include "kernel_math.h" #include "kernel_types.h" #include "kernel_globals.h" #include "kernel_film.h" #include "kernel_path.h" +#include "kernel_path_branched.h" #include "kernel_bake.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp index cb1662bbfbe..b3192369794 100644 --- a/intern/cycles/kernel/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* Optimized CPU kernel entry points. This file is compiled with AVX2 @@ -32,13 +32,14 @@ #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 -#include "kernel.h" #include "kernel_compat_cpu.h" +#include "kernel.h" #include "kernel_math.h" #include "kernel_types.h" #include "kernel_globals.h" #include "kernel_film.h" #include "kernel_path.h" +#include "kernel_path_branched.h" #include "kernel_bake.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp index 740998e8c92..f9c5134e442 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* Optimized CPU kernel entry points. This file is compiled with SSE2 @@ -27,13 +27,14 @@ #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 -#include "kernel.h" #include "kernel_compat_cpu.h" +#include "kernel.h" #include "kernel_math.h" #include "kernel_types.h" #include "kernel_globals.h" #include "kernel_film.h" #include "kernel_path.h" +#include "kernel_path_branched.h" #include "kernel_bake.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp index da73a3a1c97..2dbe4b81821 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 @@ -29,13 +29,14 @@ #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -#include "kernel.h" #include "kernel_compat_cpu.h" +#include "kernel.h" #include "kernel_math.h" #include "kernel_types.h" #include "kernel_globals.h" #include "kernel_film.h" #include "kernel_path.h" +#include "kernel_path_branched.h" #include "kernel_bake.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp index 5704f60e138..5c57ad01181 100644 --- a/intern/cycles/kernel/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 @@ -30,13 +30,14 @@ #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 -#include "kernel.h" #include "kernel_compat_cpu.h" +#include "kernel.h" #include "kernel_math.h" #include "kernel_types.h" #include "kernel_globals.h" #include "kernel_film.h" #include "kernel_path.h" +#include "kernel_path_branched.h" #include "kernel_bake.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index 9ed4592f604..bcd55b8c676 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -11,18 +11,19 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* CUDA kernel entry points */ -#include "kernel_compat_cuda.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_bake.h" +#include "../../kernel_compat_cuda.h" +#include "../../kernel_math.h" +#include "../../kernel_types.h" +#include "../../kernel_globals.h" +#include "../../kernel_film.h" +#include "../../kernel_path.h" +#include "../../kernel_path_branched.h" +#include "../../kernel_bake.h" /* device data taken from CUDA occupancy calculator */ @@ -52,6 +53,18 @@ #define CUDA_KERNEL_MAX_REGISTERS 63 #define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 +/* 3.2 */ +#elif __CUDA_ARCH__ == 320 +#define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768 +#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 +#define CUDA_BLOCK_MAX_THREADS 1024 +#define CUDA_THREAD_MAX_REGISTERS 63 + +/* tunable parameters */ +#define CUDA_THREADS_BLOCK_WIDTH 16 +#define CUDA_KERNEL_MAX_REGISTERS 63 +#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 + /* 5.0 and 5.2 */ #elif __CUDA_ARCH__ == 500 || __CUDA_ARCH__ == 520 #define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index 4f20ef9ca15..57db6fd9098 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -11,19 +11,22 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* OpenCL kernel entry points - unfinished */ -#include "kernel_compat_opencl.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" +#include "../../kernel_compat_opencl.h" +#include "../../kernel_math.h" +#include "../../kernel_types.h" +#include "../../kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_bake.h" +#include "../../kernel_film.h" +#include "../../kernel_path.h" +#include "../../kernel_path_branched.h" +#include "../../kernel_bake.h" + +#ifdef __COMPILE_ONLY_MEGAKERNEL__ __kernel void kernel_ocl_path_trace( ccl_constant KernelData *data, @@ -32,7 +35,7 @@ __kernel void kernel_ocl_path_trace( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../../kernel_textures.h" int sample, int sx, int sy, int sw, int sh, int offset, int stride) @@ -43,7 +46,7 @@ __kernel void kernel_ocl_path_trace( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../../kernel_textures.h" int x = sx + get_global_id(0); int y = sy + get_global_id(1); @@ -52,17 +55,18 @@ __kernel void kernel_ocl_path_trace( kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } -__kernel void kernel_ocl_convert_to_byte( +#else // __COMPILE_ONLY_MEGAKERNEL__ + +__kernel void kernel_ocl_shader( ccl_constant KernelData *data, - ccl_global uchar4 *rgba, - ccl_global float *buffer, + ccl_global uint4 *input, + ccl_global float4 *output, #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../../kernel_textures.h" - float sample_scale, - int sx, int sy, int sw, int sh, int offset, int stride) + int type, int sx, int sw, int offset, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -70,26 +74,24 @@ __kernel void kernel_ocl_convert_to_byte( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../../kernel_textures.h" int x = sx + get_global_id(0); - int y = sy + get_global_id(1); - if(x < sx + sw && y < sy + sh) - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); + if(x < sx + sw) + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample); } -__kernel void kernel_ocl_convert_to_half_float( +__kernel void kernel_ocl_bake( ccl_constant KernelData *data, - ccl_global uchar4 *rgba, - ccl_global float *buffer, + ccl_global uint4 *input, + ccl_global float4 *output, #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../../kernel_textures.h" - float sample_scale, - int sx, int sy, int sw, int sh, int offset, int stride) + int type, int sx, int sw, int offset, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -97,25 +99,30 @@ __kernel void kernel_ocl_convert_to_half_float( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../../kernel_textures.h" int x = sx + get_global_id(0); - int y = sy + get_global_id(1); - if(x < sx + sw && y < sy + sh) - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); + if(x < sx + sw) { +#ifdef __NO_BAKING__ + output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); +#else + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample); +#endif + } } -__kernel void kernel_ocl_shader( +__kernel void kernel_ocl_convert_to_byte( ccl_constant KernelData *data, - ccl_global uint4 *input, - ccl_global float4 *output, + ccl_global uchar4 *rgba, + ccl_global float *buffer, #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../../kernel_textures.h" - int type, int sx, int sw, int offset, int sample) + float sample_scale, + int sx, int sy, int sw, int sh, int offset, int stride) { KernelGlobals kglobals, *kg = &kglobals; @@ -123,24 +130,26 @@ __kernel void kernel_ocl_shader( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../../kernel_textures.h" int x = sx + get_global_id(0); + int y = sy + get_global_id(1); - if(x < sx + sw) - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample); + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); } -__kernel void kernel_ocl_bake( +__kernel void kernel_ocl_convert_to_half_float( ccl_constant KernelData *data, - ccl_global uint4 *input, - ccl_global float4 *output, + ccl_global uchar4 *rgba, + ccl_global float *buffer, #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../../kernel_textures.h" - int type, int sx, int sw, int offset, int sample) + float sample_scale, + int sx, int sy, int sw, int sh, int offset, int stride) { KernelGlobals kglobals, *kg = &kglobals; @@ -148,11 +157,13 @@ __kernel void kernel_ocl_bake( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../../kernel_textures.h" int x = sx + get_global_id(0); + int y = sy + get_global_id(1); - if(x < sx + sw) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample); + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); } +#endif // __COMPILE_ONLY_MEGAKERNEL__
\ No newline at end of file diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl new file mode 100644 index 00000000000..eff77b89a0a --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl @@ -0,0 +1,128 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_background_buffer_update.h" + +__kernel void kernel_ocl_path_trace_background_buffer_update( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_state, + ccl_global uint *rng_coop, /* Required for buffer Update */ + ccl_global float3 *throughput_coop, /* Required for background hit processing */ + PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */ + ccl_global Ray *Ray_coop, /* Required for background hit processing */ + ccl_global PathState *PathState_coop, /* Required for background hit processing */ + ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */ + ccl_global char *ray_state, /* Stores information on the current state of a ray */ + int sw, int sh, int sx, int sy, int stride, + int rng_state_offset_x, + int rng_state_offset_y, + int rng_state_stride, + ccl_global unsigned int *work_array, /* Denotes work of each ray */ + ccl_global int *Queue_data, /* Queues memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + int end_sample, + int start_sample, +#ifdef __WORK_STEALING__ + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + ccl_local unsigned int local_queue_atomics; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_queue_atomics = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + if(ray_index == 0) { + /* We will empty this queue in this kernel. */ + Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } + char enqueue_flag = 0; + ray_index = get_ray_index(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + Queue_data, + queuesize, + 1); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + enqueue_flag = + kernel_background_buffer_update(globals, + data, + shader_data, + per_sample_output_buffers, + rng_state, + rng_coop, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + L_transparent_coop, + ray_state, + sw, sh, sx, sy, stride, + rng_state_offset_x, + rng_state_offset_y, + rng_state_stride, + work_array, + end_sample, + start_sample, +#ifdef __WORK_STEALING__ + work_pool_wgs, + num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples, + ray_index); +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS; + * These rays will be made active during next SceneIntersectkernel. + */ + enqueue_ray_index_local(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + enqueue_flag, + queuesize, + &local_queue_atomics, + Queue_data, + Queue_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl new file mode 100644 index 00000000000..c3277676029 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -0,0 +1,241 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_data_init.h" + +__kernel void kernel_ocl_path_trace_data_init( + ccl_global char *globals, + ccl_global char *shader_data_sd, /* Arguments related to ShaderData */ + ccl_global char *shader_data_sd_DL_shadow, /* Arguments related to ShaderData */ + + ccl_global float3 *P_sd, + ccl_global float3 *P_sd_DL_shadow, + + ccl_global float3 *N_sd, + ccl_global float3 *N_sd_DL_shadow, + + ccl_global float3 *Ng_sd, + ccl_global float3 *Ng_sd_DL_shadow, + + ccl_global float3 *I_sd, + ccl_global float3 *I_sd_DL_shadow, + + ccl_global int *shader_sd, + ccl_global int *shader_sd_DL_shadow, + + ccl_global int *flag_sd, + ccl_global int *flag_sd_DL_shadow, + + ccl_global int *prim_sd, + ccl_global int *prim_sd_DL_shadow, + + ccl_global int *type_sd, + ccl_global int *type_sd_DL_shadow, + + ccl_global float *u_sd, + ccl_global float *u_sd_DL_shadow, + + ccl_global float *v_sd, + ccl_global float *v_sd_DL_shadow, + + ccl_global int *object_sd, + ccl_global int *object_sd_DL_shadow, + + ccl_global float *time_sd, + ccl_global float *time_sd_DL_shadow, + + ccl_global float *ray_length_sd, + ccl_global float *ray_length_sd_DL_shadow, + + ccl_global int *ray_depth_sd, + ccl_global int *ray_depth_sd_DL_shadow, + + ccl_global int *transparent_depth_sd, + ccl_global int *transparent_depth_sd_DL_shadow, + + /* Ray differentials. */ + ccl_global differential3 *dP_sd, + ccl_global differential3 *dP_sd_DL_shadow, + + ccl_global differential3 *dI_sd, + ccl_global differential3 *dI_sd_DL_shadow, + + ccl_global differential *du_sd, + ccl_global differential *du_sd_DL_shadow, + + ccl_global differential *dv_sd, + ccl_global differential *dv_sd_DL_shadow, + + /* Dp/Du */ + ccl_global float3 *dPdu_sd, + ccl_global float3 *dPdu_sd_DL_shadow, + + ccl_global float3 *dPdv_sd, + ccl_global float3 *dPdv_sd_DL_shadow, + + /* Object motion. */ + ccl_global Transform *ob_tfm_sd, + ccl_global Transform *ob_tfm_sd_DL_shadow, + + ccl_global Transform *ob_itfm_sd, + ccl_global Transform *ob_itfm_sd_DL_shadow, + + ShaderClosure *closure_sd, + ShaderClosure *closure_sd_DL_shadow, + + ccl_global int *num_closure_sd, + ccl_global int *num_closure_sd_DL_shadow, + + ccl_global float *randb_closure_sd, + ccl_global float *randb_closure_sd_DL_shadow, + + ccl_global float3 *ray_P_sd, + ccl_global float3 *ray_P_sd_DL_shadow, + + ccl_global differential3 *ray_dP_sd, + ccl_global differential3 *ray_dP_sd_DL_shadow, + + ccl_constant KernelData *data, + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_state, + ccl_global uint *rng_coop, /* rng array to store rng values for all rays */ + ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */ + ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */ + PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */ + ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */ + ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */ + ccl_global char *ray_state, /* Stores information on current state of a ray */ + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + int start_sample, int sx, int sy, int sw, int sh, int offset, int stride, + int rng_state_offset_x, + int rng_state_offset_y, + int rng_state_stride, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* size (capacity) of the queue */ + ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ + ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */ +#ifdef __WORK_STEALING__ + ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */ + unsigned int num_samples, /* Total number of samples per pixel */ +#endif +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + kernel_data_init(globals, + shader_data_sd, + shader_data_sd_DL_shadow, + P_sd, + P_sd_DL_shadow, + N_sd, + N_sd_DL_shadow, + Ng_sd, + Ng_sd_DL_shadow, + I_sd, + I_sd_DL_shadow, + shader_sd, + shader_sd_DL_shadow, + flag_sd, + flag_sd_DL_shadow, + prim_sd, + prim_sd_DL_shadow, + type_sd, + type_sd_DL_shadow, + u_sd, + u_sd_DL_shadow, + v_sd, + v_sd_DL_shadow, + object_sd, + object_sd_DL_shadow, + time_sd, + time_sd_DL_shadow, + ray_length_sd, + ray_length_sd_DL_shadow, + ray_depth_sd, + ray_depth_sd_DL_shadow, + transparent_depth_sd, + transparent_depth_sd_DL_shadow, + + /* Ray differentials. */ + dP_sd, + dP_sd_DL_shadow, + dI_sd, + dI_sd_DL_shadow, + du_sd, + du_sd_DL_shadow, + dv_sd, + dv_sd_DL_shadow, + + /* Dp/Du */ + dPdu_sd, + dPdu_sd_DL_shadow, + dPdv_sd, + dPdv_sd_DL_shadow, + + /* Object motion. */ + ob_tfm_sd, + ob_tfm_sd_DL_shadow, + ob_itfm_sd, + ob_itfm_sd_DL_shadow, + + closure_sd, + closure_sd_DL_shadow, + num_closure_sd, + num_closure_sd_DL_shadow, + randb_closure_sd, + randb_closure_sd_DL_shadow, + ray_P_sd, + ray_P_sd_DL_shadow, + ray_dP_sd, + ray_dP_sd_DL_shadow, + data, + per_sample_output_buffers, + rng_state, + rng_coop, + throughput_coop, + L_transparent_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + ray_state, + +#define KERNEL_TEX(type, ttype, name) name, +#include "../../kernel_textures.h" + + start_sample, sx, sy, sw, sh, offset, stride, + rng_state_offset_x, + rng_state_offset_y, + rng_state_stride, + Queue_data, + Queue_index, + queuesize, + use_queues_flag, + work_array, +#ifdef __WORK_STEALING__ + work_pool_wgs, + num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl new file mode 100644 index 00000000000..6ec75013b3a --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -0,0 +1,90 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_direct_lighting.h" + +__kernel void kernel_ocl_path_trace_direct_lighting( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for direct lighting */ + ccl_global char *shader_DL, /* Required for direct lighting */ + ccl_global uint *rng_coop, /* Required for direct lighting */ + ccl_global PathState *PathState_coop, /* Required for direct lighting */ + ccl_global int *ISLamp_coop, /* Required for direct lighting */ + ccl_global Ray *LightRay_coop, /* Required for direct lighting */ + ccl_global BsdfEval *BSDFEval_coop, /* Required for direct lighting */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize) /* Size (capacity) of each queue */ +{ + ccl_local unsigned int local_queue_atomics; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_queue_atomics = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + char enqueue_flag = 0; + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + enqueue_flag = kernel_direct_lighting(globals, + data, + shader_data, + shader_DL, + rng_coop, + PathState_coop, + ISLamp_coop, + LightRay_coop, + BSDFEval_coop, + ray_state, + ray_index); + +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + +#ifdef __EMISSION__ + /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_SHADOW_RAY_CAST_DL_RAYS, + enqueue_flag, + queuesize, + &local_queue_atomics, + Queue_data, + Queue_index); +#endif +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl new file mode 100644 index 00000000000..ae5f5cd1b3b --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -0,0 +1,124 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" + +__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */ + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */ + ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */ + ccl_global float *L_transparent_coop, /* Required for handling holdout material */ + PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */ + ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */ + Intersection *Intersection_coop, /* Required for indirect primitive emission */ + ccl_global float3 *AOAlpha_coop, /* Required for AO */ + ccl_global float3 *AOBSDF_coop, /* Required for AO */ + ccl_global Ray *AOLightRay_coop, /* Required for AO */ + int sw, int sh, int sx, int sy, int stride, + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ +#ifdef __WORK_STEALING__ + unsigned int start_sample, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + ccl_local unsigned int local_queue_atomics_bg; + ccl_local unsigned int local_queue_atomics_ao; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_queue_atomics_bg = 0; + local_queue_atomics_ao = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + char enqueue_flag = 0; + char enqueue_flag_AO_SHADOW_RAY_CAST = 0; + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif /* __COMPUTE_DEVICE_GPU__ */ + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + kernel_holdout_emission_blurring_pathtermination_ao( + globals, + data, + shader_data, + per_sample_output_buffers, + rng_coop, + throughput_coop, + L_transparent_coop, + PathRadiance_coop, + PathState_coop, + Intersection_coop, + AOAlpha_coop, + AOBSDF_coop, + AOLightRay_coop, + sw, sh, sx, sy, stride, + ray_state, + work_array, +#ifdef __WORK_STEALING__ + start_sample, +#endif + parallel_samples, + ray_index, + &enqueue_flag, + &enqueue_flag_AO_SHADOW_RAY_CAST); +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_UPDATE_BUFFER rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + queuesize, + &local_queue_atomics_bg, + Queue_data, + Queue_index); + +#ifdef __AO__ + /* Enqueue to-shadow-ray-cast rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_SHADOW_RAY_CAST_AO_RAYS, + enqueue_flag_AO_SHADOW_RAY_CAST, + queuesize, + &local_queue_atomics_ao, + Queue_data, + Queue_index); +#endif +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl new file mode 100644 index 00000000000..1bc7808d834 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -0,0 +1,84 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_lamp_emission.h" + +__kernel void kernel_ocl_path_trace_lamp_emission( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for lamp emission */ + ccl_global float3 *throughput_coop, /* Required for lamp emission */ + PathRadiance *PathRadiance_coop, /* Required for lamp emission */ + ccl_global Ray *Ray_coop, /* Required for lamp emission */ + ccl_global PathState *PathState_coop, /* Required for lamp emission */ + Intersection *Intersection_coop, /* Required for lamp emission */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int sw, int sh, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* Size (capacity) of queues */ + ccl_global char *use_queues_flag, /* Used to decide if this kernel should use + * queues to fetch ray index + */ + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + int x = get_global_id(0); + int y = get_global_id(1); + + /* We will empty this queue in this kernel. */ + if(get_global_id(0) == 0 && get_global_id(1) == 0) { + Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + } + /* Fetch use_queues_flag. */ + ccl_local char local_use_queues_flag; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_use_queues_flag = use_queues_flag[0]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int ray_index; + if(local_use_queues_flag) { + int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(thread_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 1); + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } else { + if(x < (sw * parallel_samples) && y < sh){ + ray_index = x + y * (sw * parallel_samples); + } else { + return; + } + } + + kernel_lamp_emission(globals, + data, + shader_data, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + sw, sh, + use_queues_flag, + parallel_samples, + ray_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl new file mode 100644 index 00000000000..dcf4db40411 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -0,0 +1,115 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_next_iteration_setup.h" + +__kernel void kernel_ocl_path_trace_next_iteration_setup( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for setting up ray for next iteration */ + ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */ + ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */ + PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */ + ccl_global Ray *Ray_coop, /* Required for setting up ray for next iteration */ + ccl_global PathState *PathState_coop, /* Required for setting up ray for next iteration */ + ccl_global Ray *LightRay_dl_coop, /* Required for radiance update - direct lighting */ + ccl_global int *ISLamp_coop, /* Required for radiance update - direct lighting */ + ccl_global BsdfEval *BSDFEval_coop, /* Required for radiance update - direct lighting */ + ccl_global Ray *LightRay_ao_coop, /* Required for radiance update - AO */ + ccl_global float3 *AOBSDF_coop, /* Required for radiance update - AO */ + ccl_global float3 *AOAlpha_coop, /* Required for radiance update - AO */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + ccl_global char *use_queues_flag) /* flag to decide if scene_intersect kernel should + * use queues to fetch ray index */ +{ + ccl_local unsigned int local_queue_atomics; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_queue_atomics = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if(get_global_id(0) == 0 && get_global_id(1) == 0) { + /* If we are here, then it means that scene-intersect kernel + * has already been executed atleast once. From the next time, + * scene-intersect kernel may operate on queues to fetch ray index + */ + use_queues_flag[0] = 1; + + /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and + * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the + * previous kernel. + */ + Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; + Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; + } + + char enqueue_flag = 0; + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + enqueue_flag = kernel_next_iteration_setup(globals, + data, + shader_data, + rng_coop, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + LightRay_dl_coop, + ISLamp_coop, + BSDFEval_coop, + LightRay_ao_coop, + AOBSDF_coop, + AOAlpha_coop, + ray_state, + use_queues_flag, + ray_index); +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_UPDATE_BUFFER rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + queuesize, + &local_queue_atomics, + Queue_data, + Queue_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl new file mode 100644 index 00000000000..3156dc255fb --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -0,0 +1,106 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../../kernel_compat_opencl.h" +#include "../../kernel_math.h" +#include "../../kernel_types.h" +#include "../../kernel_globals.h" +#include "../../kernel_queues.h" + +/* + * The kernel "kernel_queue_enqueue" enqueues rays of + * different ray state into their appropriate Queues; + * 1. Rays that have been determined to hit the background from the + * "kernel_scene_intersect" kernel + * are enqueued in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; + * 2. Rays that have been determined to be actively participating in path-iteration will be enqueued into QUEUE_ACTIVE_AND_REGENERATED_RAYS. + * + * The input and output of the kernel is as follows, + * + * ray_state -------------------------------------------|--- kernel_queue_enqueue --|--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| | + * queuesize -------------------------------------------| | + * + * Note on Queues : + * State of queues during the first time this kernel is called : + * At entry, + * Both QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty. + * At exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays + * QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_HIT_BACKGROUND rays. + * + * State of queue during other times this kernel is called : + * At entry, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be empty. + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will contain RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays. + * At exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays. + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays. + */ +__kernel void kernel_ocl_path_trace_queue_enqueue( + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int queuesize) /* Size (capacity) of each queue */ +{ + /* We have only 2 cases (Hit/Not-Hit) */ + ccl_local unsigned int local_queue_atomics[2]; + + int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0); + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + + if(lidx < 2 ) { + local_queue_atomics[lidx] = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int queue_number = -1; + + if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { + queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; + } + else if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; + } + + unsigned int my_lqidx; + if(queue_number != -1) { + my_lqidx = get_local_queue_index(queue_number, local_queue_atomics); + } + barrier(CLK_LOCAL_MEM_FENCE); + + if(lidx == 0) { + local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = + get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS, + local_queue_atomics, + Queue_index); + local_queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = + get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + local_queue_atomics, + Queue_index); + } + barrier(CLK_LOCAL_MEM_FENCE); + + unsigned int my_gqidx; + if(queue_number != -1) { + my_gqidx = get_global_queue_index(queue_number, + queuesize, + my_lqidx, + local_queue_atomics); + Queue_data[my_gqidx] = ray_index; + } +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl new file mode 100644 index 00000000000..e5fad7bce50 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -0,0 +1,82 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_scene_intersect.h" + +__kernel void kernel_ocl_path_trace_scene_intersect( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global uint *rng_coop, + ccl_global Ray *Ray_coop, /* Required for scene_intersect */ + ccl_global PathState *PathState_coop, /* Required for scene_intersect */ + Intersection *Intersection_coop, /* Required for scene_intersect */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int sw, int sh, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* Size (capacity) of queues */ + ccl_global char *use_queues_flag, /* used to decide if this kernel should use + * queues to fetch ray index */ +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + int x = get_global_id(0); + int y = get_global_id(1); + + /* Fetch use_queues_flag */ + ccl_local char local_use_queues_flag; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_use_queues_flag = use_queues_flag[0]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int ray_index; + if(local_use_queues_flag) { + int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(thread_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 0); + + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } else { + if(x < (sw * parallel_samples) && y < sh){ + ray_index = x + y * (sw * parallel_samples); + } else { + return; + } + } + + kernel_scene_intersect(globals, + data, + rng_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + sw, sh, + use_queues_flag, +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples, + ray_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl new file mode 100644 index 00000000000..b9f616e6bdf --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -0,0 +1,69 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_shader_eval.h" + +__kernel void kernel_ocl_path_trace_shader_eval( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Output ShaderData structure to be filled */ + ccl_global uint *rng_coop, /* Required for rbsdf calculation */ + ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */ + ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */ + Intersection *Intersection_coop, /* Required for setting up shader from ray */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize) /* Size (capacity) of each queue */ +{ + /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ + ccl_local unsigned int local_queue_atomics; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_queue_atomics = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 0); + + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + + char enqueue_flag = (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0; + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + queuesize, + &local_queue_atomics, + Queue_data, + Queue_index); + + /* Continue on with shader evaluation. */ + kernel_shader_eval(globals, + data, + shader_data, + rng_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + ray_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl new file mode 100644 index 00000000000..03886c0a030 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl @@ -0,0 +1,83 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_shadow_blocked.h" + +__kernel void kernel_ocl_path_trace_shadow_blocked( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_shadow, /* Required for shadow blocked */ + ccl_global PathState *PathState_coop, /* Required for shadow blocked */ + ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */ + ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */ + Intersection *Intersection_coop_AO, + Intersection *Intersection_coop_DL, + ccl_global char *ray_state, + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + int total_num_rays) +{ +#if 0 + /* We will make the Queue_index entries '0' in the next kernel. */ + if(get_global_id(0) == 0 && get_global_id(1) == 0) { + /* We empty this queue here */ + Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; + Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; + } +#endif + + int lidx = get_local_id(1) * get_local_id(0) + get_local_id(0); + + ccl_local unsigned int ao_queue_length; + ccl_local unsigned int dl_queue_length; + if(lidx == 0) { + ao_queue_length = Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; + dl_queue_length = Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + /* flag determining if the current ray is to process shadow ray for AO or DL */ + char shadow_blocked_type = -1; + + int ray_index = QUEUE_EMPTY_SLOT; + int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + if(thread_index < ao_queue_length + dl_queue_length) { + if(thread_index < ao_queue_length) { + ray_index = get_ray_index(thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, Queue_data, queuesize, 1); + shadow_blocked_type = RAY_SHADOW_RAY_CAST_AO; + } else { + ray_index = get_ray_index(thread_index - ao_queue_length, QUEUE_SHADOW_RAY_CAST_DL_RAYS, Queue_data, queuesize, 1); + shadow_blocked_type = RAY_SHADOW_RAY_CAST_DL; + } + } + + if(ray_index == QUEUE_EMPTY_SLOT) + return; + + kernel_shadow_blocked(globals, + data, + shader_shadow, + PathState_coop, + LightRay_dl_coop, + LightRay_ao_coop, + Intersection_coop_AO, + Intersection_coop_DL, + ray_state, + total_num_rays, + shadow_blocked_type, + ray_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl new file mode 100644 index 00000000000..88a1ed830af --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl @@ -0,0 +1,38 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_sum_all_radiance.h" + +__kernel void kernel_ocl_path_trace_sum_all_radiance( + ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */ + ccl_global float *buffer, /* Output buffer of RenderTile */ + ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */ + int parallel_samples, int sw, int sh, int stride, + int buffer_offset_x, + int buffer_offset_y, + int buffer_stride, + int start_sample) +{ + kernel_sum_all_radiance(data, + buffer, + per_sample_output_buffer, + parallel_samples, + sw, sh, stride, + buffer_offset_x, + buffer_offset_y, + buffer_stride, + start_sample); +} diff --git a/intern/cycles/kernel/osl/SConscript b/intern/cycles/kernel/osl/SConscript index 4685bb7753e..74ba5e1020c 100644 --- a/intern/cycles/kernel/osl/SConscript +++ b/intern/cycles/kernel/osl/SConscript @@ -38,11 +38,38 @@ incs.append(env['BF_OIIO_INC']) incs.append(env['BF_BOOST_INC']) incs.append(env['BF_OSL_INC']) incs.append(env['BF_OPENEXR_INC'].split()) +incs.append('#/intern/atomic') defs.append('CCL_NAMESPACE_BEGIN=namespace ccl {') defs.append('CCL_NAMESPACE_END=}') defs.append('WITH_OSL') +if env['WITH_UNORDERED_MAP_SUPPORT']: + if env['UNORDERED_MAP_HEADER'] == 'unordered_map': + if env['UNORDERED_MAP_NAMESPACE'] == 'std': + defs.append('CYCLES_STD_UNORDERED_MAP') + elif env['UNORDERED_MAP_NAMESPACE'] == 'std::tr1': + defs.append('CYCLES_STD_UNORDERED_MAP_IN_TR1_NAMESPACE') + elif env['UNORDERED_MAP_NAMESPACE'] == 'std::tr1': + defs.append('CYCLES_TR1_UNORDERED_MAP') +else: + print("-- Replacing unordered_map/set with map/set (warning: slower!)") + defs.append('CYCLES_NO_UNORDERED_MAP') + +if env['WITH_BF_CYCLES_DEBUG']: + defs.append('WITH_CYCLES_DEBUG') + +if env['WITH_BF_CYCLES_LOGGING']: + defs.append('WITH_CYCLES_LOGGING') + defs.append('GOOGLE_GLOG_DLL_DECL=') + defs.append('CYCLES_GFLAGS_NAMESPACE=gflags') + if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'linuxcross', 'win64-vc', 'win64-mingw'): + incs.append('#extern/libmv/third_party/glog/src/windows') + incs.append('#extern/libmv/third_party/gflags') + else: + incs.append('#extern/libmv/third_party/glog/src') + incs.append('#extern/libmv/third_party/gflags') + if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'): cxxflags.append('-DBOOST_NO_RTTI -DBOOST_NO_TYPEID /fp:fast'.split()) incs.append(env['BF_PTHREADS_INC']) diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp index 2facced0914..4d70bc80006 100644 --- a/intern/cycles/kernel/osl/background.cpp +++ b/intern/cycles/kernel/osl/background.cpp @@ -77,7 +77,7 @@ public: ClosureParam *closure_background_params() { static ClosureParam params[] = { - CLOSURE_STRING_KEYPARAM("label"), + CLOSURE_STRING_KEYPARAM(GenericBackgroundClosure, label, "label"), CLOSURE_FINISH_PARAM(GenericBackgroundClosure) }; return params; @@ -98,7 +98,7 @@ CCLOSURE_PREPARE(closure_holdout_prepare, HoldoutClosure) ClosureParam *closure_ambient_occlusion_params() { static ClosureParam params[] = { - CLOSURE_STRING_KEYPARAM("label"), + CLOSURE_STRING_KEYPARAM(AmbientOcclusionClosure, label, "label"), CLOSURE_FINISH_PARAM(AmbientOcclusionClosure) }; return params; diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp index 8f9c2efd470..b3c71e4a706 100644 --- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp +++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp @@ -34,6 +34,7 @@ #include <OSL/genclosure.h> +#include "kernel_compat_cpu.h" #include "osl_closures.h" #include "kernel_types.h" @@ -92,7 +93,7 @@ ClosureParam *closure_bsdf_diffuse_ramp_params() static ClosureParam params[] = { CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, sc.N), CLOSURE_COLOR_ARRAY_PARAM(DiffuseRampClosure, colors, 8), - CLOSURE_STRING_KEYPARAM("label"), + CLOSURE_STRING_KEYPARAM(DiffuseRampClosure, label, "label"), CLOSURE_FINISH_PARAM(DiffuseRampClosure) }; return params; diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp index c5851747b54..99f510d31ed 100644 --- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp +++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp @@ -34,6 +34,7 @@ #include <OSL/genclosure.h> +#include "kernel_compat_cpu.h" #include "osl_closures.h" #include "kernel_types.h" @@ -92,7 +93,7 @@ ClosureParam *closure_bsdf_phong_ramp_params() CLOSURE_FLOAT3_PARAM(PhongRampClosure, sc.N), CLOSURE_FLOAT_PARAM(PhongRampClosure, sc.data0), CLOSURE_COLOR_ARRAY_PARAM(PhongRampClosure, colors, 8), - CLOSURE_STRING_KEYPARAM("label"), + CLOSURE_STRING_KEYPARAM(PhongRampClosure, label, "label"), CLOSURE_FINISH_PARAM(PhongRampClosure) }; return params; diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp index 02935542c56..9a95fa57a81 100644 --- a/intern/cycles/kernel/osl/emissive.cpp +++ b/intern/cycles/kernel/osl/emissive.cpp @@ -77,7 +77,7 @@ public: ClosureParam *closure_emission_params() { static ClosureParam params[] = { - CLOSURE_STRING_KEYPARAM("label"), + CLOSURE_STRING_KEYPARAM(GenericEmissiveClosure, label, "label"), CLOSURE_FINISH_PARAM(GenericEmissiveClosure) }; return params; diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index 84ef85e089d..bc395922077 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -34,6 +34,7 @@ #include <OSL/genclosure.h> +#include "kernel_compat_cpu.h" #include "osl_bssrdf.h" #include "osl_closures.h" @@ -68,7 +69,7 @@ ClosureParam *closure_bssrdf_cubic_params() CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius), CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1), CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.T.x), - CLOSURE_STRING_KEYPARAM("label"), + CLOSURE_STRING_KEYPARAM(CubicBSSRDFClosure, label, "label"), CLOSURE_FINISH_PARAM(CubicBSSRDFClosure) }; return params; @@ -96,7 +97,7 @@ ClosureParam *closure_bssrdf_gaussian_params() CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N), CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius), CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1), - CLOSURE_STRING_KEYPARAM("label"), + CLOSURE_STRING_KEYPARAM(GaussianBSSRDFClosure, label, "label"), CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure) }; return params; diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index d7789edcfff..461ce8f7598 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -54,7 +54,6 @@ #include "closure/bsdf_refraction.h" #include "closure/bsdf_transparent.h" #include "closure/bsdf_ashikhmin_shirley.h" -#include "closure/bsdf_westin.h" #include "closure/bsdf_toon.h" #include "closure/bsdf_hair.h" #include "closure/volume.h" @@ -87,16 +86,6 @@ BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, refraction, LABEL_SINGULAR) CLOSURE_FLOAT_PARAM(RefractionClosure, sc.data0), BSDF_CLOSURE_CLASS_END(Refraction, refraction) -BSDF_CLOSURE_CLASS_BEGIN(WestinBackscatter, westin_backscatter, westin_backscatter, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(WestinBackscatterClosure, sc.N), - CLOSURE_FLOAT_PARAM(WestinBackscatterClosure, sc.data0), -BSDF_CLOSURE_CLASS_END(WestinBackscatter, westin_backscatter) - -BSDF_CLOSURE_CLASS_BEGIN(WestinSheen, westin_sheen, westin_sheen, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(WestinSheenClosure, sc.N), - CLOSURE_FLOAT_PARAM(WestinSheenClosure, sc.data0), -BSDF_CLOSURE_CLASS_END(WestinSheen, westin_sheen) - BSDF_CLOSURE_CLASS_BEGIN(Transparent, transparent, transparent, LABEL_SINGULAR) BSDF_CLOSURE_CLASS_END(Transparent, transparent) @@ -164,26 +153,16 @@ BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, hair_reflection, LABEL CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.N), CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data0), CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data1), -#ifdef __HAIR__ CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.T), CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data2), -#else - CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.N), - CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data1), -#endif BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection) BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, hair_transmission, LABEL_GLOSSY) CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, sc.N), CLOSURE_FLOAT_PARAM(HairTransmissionClosure, sc.data0), CLOSURE_FLOAT_PARAM(HairTransmissionClosure, sc.data1), -#ifdef __HAIR__ CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.T), CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data2), -#else - CLOSURE_FLOAT3_PARAM(HairReflectionClosure, sc.N), - CLOSURE_FLOAT_PARAM(HairReflectionClosure, sc.data1), -#endif BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission) VOLUME_CLOSURE_CLASS_BEGIN(VolumeHenyeyGreenstein, henyey_greenstein, LABEL_VOLUME_SCATTER) @@ -200,11 +179,7 @@ static void register_closure(OSL::ShadingSystem *ss, const char *name, int id, O /* optimization: it's possible to not use a prepare function at all and * only initialize the actual class when accessing the closure component * data, but then we need to map the id to the class somehow */ -#ifdef CLOSURE_PREPARE - ss->register_closure(name, id, params, prepare, NULL, NULL); -#else - ss->register_closure(name, id, params, prepare, NULL); -#endif + ss->register_closure(name, id, params, prepare, NULL, 16); } void OSLShader::register_closures(OSLShadingSystem *ss_) @@ -244,10 +219,6 @@ void OSLShader::register_closures(OSLShadingSystem *ss_) bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare); register_closure(ss, "glossy_toon", id++, bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare); - register_closure(ss, "westin_backscatter", id++, - bsdf_westin_backscatter_params(), bsdf_westin_backscatter_prepare); - register_closure(ss, "westin_sheen", id++, - bsdf_westin_sheen_params(), bsdf_westin_sheen_prepare); register_closure(ss, "emission", id++, closure_emission_params(), closure_emission_prepare); diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h index 58d215295dc..97bd1b1ac92 100644 --- a/intern/cycles/kernel/osl/osl_closures.h +++ b/intern/cycles/kernel/osl/osl_closures.h @@ -48,8 +48,6 @@ OSL::ClosureParam *closure_holdout_params(); OSL::ClosureParam *closure_ambient_occlusion_params(); OSL::ClosureParam *closure_bsdf_diffuse_ramp_params(); OSL::ClosureParam *closure_bsdf_phong_ramp_params(); -OSL::ClosureParam *closure_westin_backscatter_params(); -OSL::ClosureParam *closure_westin_sheen_params(); OSL::ClosureParam *closure_bssrdf_cubic_params(); OSL::ClosureParam *closure_bssrdf_gaussian_params(); OSL::ClosureParam *closure_henyey_greenstein_volume_params(); @@ -60,8 +58,6 @@ void closure_holdout_prepare(OSL::RendererServices *, int id, void *data); void closure_ambient_occlusion_prepare(OSL::RendererServices *, int id, void *data); void closure_bsdf_diffuse_ramp_prepare(OSL::RendererServices *, int id, void *data); void closure_bsdf_phong_ramp_prepare(OSL::RendererServices *, int id, void *data); -void closure_westin_backscatter_prepare(OSL::RendererServices *, int id, void *data); -void closure_westin_sheen_prepare(OSL::RendererServices *, int id, void *data); void closure_bssrdf_cubic_prepare(OSL::RendererServices *, int id, void *data); void closure_bssrdf_gaussian_prepare(OSL::RendererServices *, int id, void *data); void closure_henyey_greenstein_volume_prepare(OSL::RendererServices *, int id, void *data); @@ -82,6 +78,11 @@ void name(RendererServices *, int id, void *data) \ #define TO_COLOR3(v) OSL::Color3(v.x, v.y, v.z) #define TO_FLOAT3(v) make_float3(v[0], v[1], v[2]) +#if OSL_LIBRARY_VERSION_CODE < 10700 +# undef CLOSURE_STRING_KEYPARAM +# define CLOSURE_STRING_KEYPARAM(st, fld, key) { TypeDesc::TypeString, 0, key, 0 } +#endif + /* Closure */ class CClosurePrimitive { @@ -101,6 +102,10 @@ public: virtual void setup() {} Category category; + +#if OSL_LIBRARY_VERSION_CODE >= 10700 + OSL::ustring label; +#endif }; /* BSDF */ @@ -151,14 +156,14 @@ public: \ \ float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float& pdf) const \ { \ - pdf = 0; \ - return make_float3(0, 0, 0); \ + pdf = 0.0f; \ + return make_float3(0.0f, 0.0f, 0.0f); \ } \ \ float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float& pdf) const \ { \ - pdf = 0; \ - return make_float3(0, 0, 0); \ + pdf = 0.0f; \ + return make_float3(0.0f, 0.0f, 0.0f); \ } \ \ int sample(const float3 &Ng, \ @@ -179,7 +184,7 @@ static ClosureParam *bsdf_##lower##_params() \ /* parameters */ #define BSDF_CLOSURE_CLASS_END(Upper, lower) \ - CLOSURE_STRING_KEYPARAM("label"), \ + CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), \ CLOSURE_FINISH_PARAM(Upper##Closure) \ }; \ return params; \ @@ -227,7 +232,7 @@ static ClosureParam *volume_##lower##_params() \ /* parameters */ #define VOLUME_CLOSURE_CLASS_END(Upper, lower) \ - CLOSURE_STRING_KEYPARAM("label"), \ + CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), \ CLOSURE_FINISH_PARAM(Upper##Closure) \ }; \ return params; \ diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h index 5a658d8244a..e349ac676b0 100644 --- a/intern/cycles/kernel/osl/osl_globals.h +++ b/intern/cycles/kernel/osl/osl_globals.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __OSL_GLOBALS_H__ @@ -20,7 +20,6 @@ #ifdef WITH_OSL #include <OSL/oslexec.h> -#include <cmath> #include "util_map.h" #include "util_param.h" diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 1475e5a0a62..3c1955a1e1e 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -11,9 +11,18 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ +/* TODO(sergey): There is a bit of headers dependency hell going on + * here, so for now we just put here. In the future it might be better + * to have dedicated file for such tweaks. + */ +#if defined(__GNUC__) && defined(NDEBUG) +# pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +# pragma GCC diagnostic ignored "-Wuninitialized" +#endif + #include <string.h> #include "mesh.h" @@ -130,12 +139,12 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result { /* this is only used for shader and object space, we don't really have * a concept of shader space, so we just use object space for both. */ - if (xform) { + if(xform) { const ShaderData *sd = (const ShaderData *)xform; KernelGlobals *kg = sd->osl_globals; int object = sd->object; - if (object != OBJECT_NONE) { + if(object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ Transform tfm; @@ -160,12 +169,12 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 { /* this is only used for shader and object space, we don't really have * a concept of shader space, so we just use object space for both. */ - if (xform) { + if(xform) { const ShaderData *sd = (const ShaderData *)xform; KernelGlobals *kg = sd->osl_globals; int object = sd->object; - if (object != OBJECT_NONE) { + if(object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ Transform itfm; @@ -190,27 +199,27 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result { KernelGlobals *kg = kernel_globals; - if (from == u_ndc) { + if(from == u_ndc) { Transform tfm = transform_transpose(transform_quick_inverse(kernel_data.cam.worldtondc)); COPY_MATRIX44(&result, &tfm); return true; } - else if (from == u_raster) { + else if(from == u_raster) { Transform tfm = transform_transpose(kernel_data.cam.rastertoworld); COPY_MATRIX44(&result, &tfm); return true; } - else if (from == u_screen) { + else if(from == u_screen) { Transform tfm = transform_transpose(kernel_data.cam.screentoworld); COPY_MATRIX44(&result, &tfm); return true; } - else if (from == u_camera) { + else if(from == u_camera) { Transform tfm = transform_transpose(kernel_data.cam.cameratoworld); COPY_MATRIX44(&result, &tfm); return true; } - else if (from == u_world) { + else if(from == u_world) { result.makeIdentity(); return true; } @@ -222,27 +231,27 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 { KernelGlobals *kg = kernel_globals; - if (to == u_ndc) { + if(to == u_ndc) { Transform tfm = transform_transpose(kernel_data.cam.worldtondc); COPY_MATRIX44(&result, &tfm); return true; } - else if (to == u_raster) { + else if(to == u_raster) { Transform tfm = transform_transpose(kernel_data.cam.worldtoraster); COPY_MATRIX44(&result, &tfm); return true; } - else if (to == u_screen) { + else if(to == u_screen) { Transform tfm = transform_transpose(kernel_data.cam.worldtoscreen); COPY_MATRIX44(&result, &tfm); return true; } - else if (to == u_camera) { + else if(to == u_camera) { Transform tfm = transform_transpose(kernel_data.cam.worldtocamera); COPY_MATRIX44(&result, &tfm); return true; } - else if (to == u_world) { + else if(to == u_world) { result.makeIdentity(); return true; } @@ -254,11 +263,11 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result { /* this is only used for shader and object space, we don't really have * a concept of shader space, so we just use object space for both. */ - if (xform) { + if(xform) { const ShaderData *sd = (const ShaderData *)xform; int object = sd->object; - if (object != OBJECT_NONE) { + if(object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_tfm; #else @@ -279,11 +288,11 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 { /* this is only used for shader and object space, we don't really have * a concept of shader space, so we just use object space for both. */ - if (xform) { + if(xform) { const ShaderData *sd = (const ShaderData *)xform; int object = sd->object; - if (object != OBJECT_NONE) { + if(object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_itfm; #else @@ -304,22 +313,22 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result { KernelGlobals *kg = kernel_globals; - if (from == u_ndc) { + if(from == u_ndc) { Transform tfm = transform_transpose(transform_quick_inverse(kernel_data.cam.worldtondc)); COPY_MATRIX44(&result, &tfm); return true; } - else if (from == u_raster) { + else if(from == u_raster) { Transform tfm = transform_transpose(kernel_data.cam.rastertoworld); COPY_MATRIX44(&result, &tfm); return true; } - else if (from == u_screen) { + else if(from == u_screen) { Transform tfm = transform_transpose(kernel_data.cam.screentoworld); COPY_MATRIX44(&result, &tfm); return true; } - else if (from == u_camera) { + else if(from == u_camera) { Transform tfm = transform_transpose(kernel_data.cam.cameratoworld); COPY_MATRIX44(&result, &tfm); return true; @@ -332,22 +341,22 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 { KernelGlobals *kg = kernel_globals; - if (to == u_ndc) { + if(to == u_ndc) { Transform tfm = transform_transpose(kernel_data.cam.worldtondc); COPY_MATRIX44(&result, &tfm); return true; } - else if (to == u_raster) { + else if(to == u_raster) { Transform tfm = transform_transpose(kernel_data.cam.worldtoraster); COPY_MATRIX44(&result, &tfm); return true; } - else if (to == u_screen) { + else if(to == u_screen) { Transform tfm = transform_transpose(kernel_data.cam.worldtoscreen); COPY_MATRIX44(&result, &tfm); return true; } - else if (to == u_camera) { + else if(to == u_camera) { Transform tfm = transform_transpose(kernel_data.cam.worldtocamera); COPY_MATRIX44(&result, &tfm); return true; @@ -365,8 +374,8 @@ bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg, bool derivat static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, void *val) { - if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || - type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) + if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || + type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) { float *fval = (float *)val; @@ -374,7 +383,7 @@ static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, v fval[1] = f[0].y; fval[2] = f[0].z; - if (derivatives) { + if(derivatives) { fval[3] = f[1].x; fval[4] = f[1].y; fval[5] = f[1].z; @@ -390,7 +399,7 @@ static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, v float *fval = (float *)val; fval[0] = average(f[0]); - if (derivatives) { + if(derivatives) { fval[1] = average(f[1]); fval[2] = average(f[2]); } @@ -414,15 +423,15 @@ static bool set_attribute_float3(float3 f, TypeDesc type, bool derivatives, void static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val) { - if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || - type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) + if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || + type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) { float *fval = (float *)val; fval[0] = f[0]; fval[1] = f[1]; fval[2] = f[2]; - if (derivatives) { + if(derivatives) { fval[3] = f[1]; fval[4] = f[1]; fval[5] = f[1]; @@ -438,7 +447,7 @@ static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, voi float *fval = (float *)val; fval[0] = f[0]; - if (derivatives) { + if(derivatives) { fval[1] = f[1]; fval[2] = f[2]; } @@ -466,7 +475,7 @@ static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val) int *ival = (int *)val; ival[0] = i; - if (derivatives) { + if(derivatives) { ival[1] = 0; ival[2] = 0; } @@ -483,7 +492,7 @@ static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, v ustring *sval = (ustring *)val; sval[0] = str; - if (derivatives) { + if(derivatives) { sval[1] = OSLRenderServices::u_empty; sval[2] = OSLRenderServices::u_empty; } @@ -513,7 +522,7 @@ static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives, if(type.arraylen > 3) memset(fval + 3*3, 0, sizeof(float)*3*(type.arraylen - 3)); - if (derivatives) + if(derivatives) memset(fval + type.arraylen*3, 0, sizeof(float)*2*3*type.arraylen); return true; @@ -536,15 +545,15 @@ static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val) static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr, const TypeDesc& type, bool derivatives, void *val) { - if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector || - attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) + if(attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector || + attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) { float3 fval[3]; fval[0] = primitive_attribute_float3(kg, sd, attr.elem, attr.offset, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); return set_attribute_float3(fval, type, derivatives, val); } - else if (attr.type == TypeDesc::TypeFloat) { + else if(attr.type == TypeDesc::TypeFloat) { float fval[3]; fval[0] = primitive_attribute_float(kg, sd, attr.elem, attr.offset, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); @@ -558,7 +567,7 @@ static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd, static bool get_mesh_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr, const TypeDesc& type, bool derivatives, void *val) { - if (attr.type == TypeDesc::TypeMatrix) { + if(attr.type == TypeDesc::TypeMatrix) { Transform tfm = primitive_attribute_matrix(kg, sd, attr.offset); return set_attribute_matrix(tfm, type, val); } @@ -572,7 +581,7 @@ static void get_object_attribute(const OSLGlobals::Attribute& attr, bool derivat size_t datasize = attr.value.datasize(); memcpy(val, attr.value.data(), datasize); - if (derivatives) + if(derivatives) memset((char *)val + datasize, 0, datasize * 2); } @@ -582,80 +591,80 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD /* todo: turn this into hash table? */ /* Object Attributes */ - if (name == u_object_location) { + if(name == u_object_location) { float3 f = object_location(kg, sd); return set_attribute_float3(f, type, derivatives, val); } - else if (name == u_object_index) { + else if(name == u_object_index) { float f = object_pass_id(kg, sd->object); return set_attribute_float(f, type, derivatives, val); } - else if (name == u_geom_dupli_generated) { + else if(name == u_geom_dupli_generated) { float3 f = object_dupli_generated(kg, sd->object); return set_attribute_float3(f, type, derivatives, val); } - else if (name == u_geom_dupli_uv) { + else if(name == u_geom_dupli_uv) { float3 f = object_dupli_uv(kg, sd->object); return set_attribute_float3(f, type, derivatives, val); } - else if (name == u_material_index) { + else if(name == u_material_index) { float f = shader_pass_id(kg, sd); return set_attribute_float(f, type, derivatives, val); } - else if (name == u_object_random) { + else if(name == u_object_random) { float f = object_random_number(kg, sd->object); return set_attribute_float(f, type, derivatives, val); } /* Particle Attributes */ - else if (name == u_particle_index) { + else if(name == u_particle_index) { int particle_id = object_particle_id(kg, sd->object); float f = particle_index(kg, particle_id); return set_attribute_float(f, type, derivatives, val); } - else if (name == u_particle_age) { + else if(name == u_particle_age) { int particle_id = object_particle_id(kg, sd->object); float f = particle_age(kg, particle_id); return set_attribute_float(f, type, derivatives, val); } - else if (name == u_particle_lifetime) { + else if(name == u_particle_lifetime) { int particle_id = object_particle_id(kg, sd->object); float f = particle_lifetime(kg, particle_id); return set_attribute_float(f, type, derivatives, val); } - else if (name == u_particle_location) { + else if(name == u_particle_location) { int particle_id = object_particle_id(kg, sd->object); float3 f = particle_location(kg, particle_id); return set_attribute_float3(f, type, derivatives, val); } #if 0 /* unsupported */ - else if (name == u_particle_rotation) { + else if(name == u_particle_rotation) { int particle_id = object_particle_id(kg, sd->object); float4 f = particle_rotation(kg, particle_id); return set_attribute_float4(f, type, derivatives, val); } #endif - else if (name == u_particle_size) { + else if(name == u_particle_size) { int particle_id = object_particle_id(kg, sd->object); float f = particle_size(kg, particle_id); return set_attribute_float(f, type, derivatives, val); } - else if (name == u_particle_velocity) { + else if(name == u_particle_velocity) { int particle_id = object_particle_id(kg, sd->object); float3 f = particle_velocity(kg, particle_id); return set_attribute_float3(f, type, derivatives, val); } - else if (name == u_particle_angular_velocity) { + else if(name == u_particle_angular_velocity) { int particle_id = object_particle_id(kg, sd->object); float3 f = particle_angular_velocity(kg, particle_id); return set_attribute_float3(f, type, derivatives, val); } /* Geometry Attributes */ - else if (name == u_geom_numpolyvertices) { + else if(name == u_geom_numpolyvertices) { return set_attribute_int(3, type, derivatives, val); } - else if ((name == u_geom_trianglevertices || name == u_geom_polyvertices) + else if((name == u_geom_trianglevertices || name == u_geom_polyvertices) #ifdef __HAIR__ && sd->type & PRIMITIVE_ALL_TRIANGLE) #else @@ -681,21 +690,21 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD ustring object_name = kg->osl->object_names[sd->object]; return set_attribute_string(object_name, type, derivatives, val); } - else if (name == u_is_smooth) { + else if(name == u_is_smooth) { float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0); return set_attribute_float(f, type, derivatives, val); } #ifdef __HAIR__ /* Hair Attributes */ - else if (name == u_is_curve) { + else if(name == u_is_curve) { float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0; return set_attribute_float(f, type, derivatives, val); } - else if (name == u_curve_thickness) { + else if(name == u_curve_thickness) { float f = curve_thickness(kg, sd); return set_attribute_float(f, type, derivatives, val); } - else if (name == u_curve_tangent_normal) { + else if(name == u_curve_tangent_normal) { float3 f = curve_tangent_normal(kg, sd); return set_attribute_float3(f, type, derivatives, val); } @@ -707,22 +716,22 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val) { - if (name == u_path_ray_length) { + if(name == u_path_ray_length) { /* Ray Length */ float f = sd->ray_length; return set_attribute_float(f, type, derivatives, val); } - else if (name == u_path_ray_depth) { + else if(name == u_path_ray_depth) { /* Ray Depth */ int f = sd->ray_depth; return set_attribute_int(f, type, derivatives, val); } - else if (name == u_path_transparent_depth) { + else if(name == u_path_transparent_depth) { /* Transparent Ray Depth */ int f = sd->transparent_depth; return set_attribute_int(f, type, derivatives, val); } - else if (name == u_ndc) { + else if(name == u_ndc) { /* NDC coordinates with special exception for otho */ OSLThreadData *tdata = kg->osl_tdata; OSL::ShaderGlobals *globals = &tdata->globals; @@ -754,7 +763,7 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData * bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val) { - if (sg->renderstate == NULL) + if(sg->renderstate == NULL) return false; ShaderData *sd = (ShaderData *)(sg->renderstate); @@ -769,10 +778,10 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring int object; /* lookup of attribute on another object */ - if (object_name != u_empty) { + if(object_name != u_empty) { OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name); - if (it == kg->osl->object_name_map.end()) + if(it == kg->osl->object_name_map.end()) return false; object = it->second; @@ -782,7 +791,7 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring object = sd->object; is_curve = (sd->type & PRIMITIVE_ALL_CURVE) != 0; - if (object == OBJECT_NONE) + if(object == OBJECT_NONE) return get_background_attribute(kg, sd, name, type, derivatives, val); } @@ -791,10 +800,10 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring OSLGlobals::AttributeMap& attribute_map = kg->osl->attribute_map[object]; OSLGlobals::AttributeMap::iterator it = attribute_map.find(name); - if (it != attribute_map.end()) { + if(it != attribute_map.end()) { const OSLGlobals::Attribute& attr = it->second; - if (attr.elem != ATTR_ELEMENT_OBJECT) { + if(attr.elem != ATTR_ELEMENT_OBJECT) { /* triangle and vertex attributes */ if(get_mesh_element_attribute(kg, sd, attr, type, derivatives, val)) return true; @@ -811,7 +820,7 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring /* not found in attribute, check standard object info */ bool is_std_object_attribute = get_object_standard_attribute(kg, sd, name, type, derivatives, val); - if (is_std_object_attribute) + if(is_std_object_attribute) return true; return get_background_attribute(kg, sd, name, type, derivatives, val); @@ -834,7 +843,7 @@ bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, OSL::ShaderGlo bool OSLRenderServices::texture(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, float s, float t, float dsdx, float dtdx, - float dsdy, float dtdy, float *result) + float dsdy, float dtdy, int nchannels, float *result) { OSL::TextureSystem *ts = osl_ts; ShaderData *sd = (ShaderData *)(sg->renderstate); @@ -869,9 +878,9 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options, PtexFilter::Options opts(PtexFilter::f_bicubic, mipmaplerp, sharpness); PtexPtr<PtexFilter> f(PtexFilter::getFilter(r, opts)); - f->eval(result, options.firstchannel, options.nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy); + f->eval(result, options.firstchannel, nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy); - for(int c = r->numChannels(); c < options.nchannels; c++) + for(int c = r->numChannels(); c < nchannels; c++) result[c] = result[0]; return true; @@ -879,16 +888,16 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options, #endif bool status; - if(filename[0] == '@' && filename.find('.') == -1) { - int slot = atoi(filename.c_str() + 1); + if(filename[0] == '@') { + int slot = atoi(filename.c_str() + 1); float4 rgba = kernel_tex_image_interp(slot, s, 1.0f - t); result[0] = rgba[0]; - if(options.nchannels > 1) + if(nchannels > 1) result[1] = rgba[1]; - if(options.nchannels > 2) + if(nchannels > 2) result[2] = rgba[2]; - if(options.nchannels > 3) + if(nchannels > 3) result[3] = rgba[3]; status = true; } @@ -898,17 +907,24 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options, OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info); +#if OIIO_VERSION < 10500 status = ts->texture(th, thread_info, - options, s, t, dsdx, dtdx, dsdy, dtdy, result); + options, s, t, dsdx, dtdx, dsdy, dtdy, + result); +#else + status = ts->texture(th, thread_info, + options, s, t, dsdx, dtdx, dsdy, dtdy, + nchannels, result); +#endif } if(!status) { - if(options.nchannels == 3 || options.nchannels == 4) { + if(nchannels == 3 || nchannels == 4) { result[0] = 1.0f; result[1] = 0.0f; result[2] = 1.0f; - if(options.nchannels == 4) + if(nchannels == 4) result[3] = 1.0f; } } @@ -919,26 +935,46 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options, bool OSLRenderServices::texture3d(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, const OSL::Vec3 &P, const OSL::Vec3 &dPdx, const OSL::Vec3 &dPdy, - const OSL::Vec3 &dPdz, float *result) + const OSL::Vec3 &dPdz, int nchannels, float *result) { OSL::TextureSystem *ts = osl_ts; ShaderData *sd = (ShaderData *)(sg->renderstate); KernelGlobals *kg = sd->osl_globals; - OSLThreadData *tdata = kg->osl_tdata; - OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info; - - OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info); + bool status; + if(filename[0] == '@') { + int slot = atoi(filename.c_str() + 1); + float4 rgba = kernel_tex_image_interp_3d(slot, P.x, P.y, P.z); - bool status = ts->texture3d(th, thread_info, - options, P, dPdx, dPdy, dPdz, result); + result[0] = rgba[0]; + if(nchannels > 1) + result[1] = rgba[1]; + if(nchannels > 2) + result[2] = rgba[2]; + if(nchannels > 3) + result[3] = rgba[3]; + status = true; + } + else { + OSLThreadData *tdata = kg->osl_tdata; + OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info; + OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info); +#if OIIO_VERSION < 10500 + status = ts->texture3d(th, thread_info, + options, P, dPdx, dPdy, dPdz, result); +#else + status = ts->texture3d(th, thread_info, + options, P, dPdx, dPdy, dPdz, + nchannels, result); +#endif + } if(!status) { - if(options.nchannels == 3 || options.nchannels == 4) { + if(nchannels == 3 || nchannels == 4) { result[0] = 1.0f; result[1] = 0.0f; result[2] = 1.0f; - if(options.nchannels == 4) + if(nchannels == 4) result[3] = 1.0f; } @@ -949,7 +985,8 @@ bool OSLRenderServices::texture3d(ustring filename, TextureOpt &options, bool OSLRenderServices::environment(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, const OSL::Vec3 &R, - const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, float *result) + const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, + int nchannels, float *result) { OSL::TextureSystem *ts = osl_ts; ShaderData *sd = (ShaderData *)(sg->renderstate); @@ -957,17 +994,24 @@ bool OSLRenderServices::environment(ustring filename, TextureOpt &options, OSLThreadData *tdata = kg->osl_tdata; OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info; - OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info); + OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info); + +#if OIIO_VERSION < 10500 bool status = ts->environment(th, thread_info, options, R, dRdx, dRdy, result); +#else + bool status = ts->environment(th, thread_info, + options, R, dRdx, dRdy, + nchannels, result); +#endif if(!status) { - if(options.nchannels == 3 || options.nchannels == 4) { + if(nchannels == 3 || nchannels == 4) { result[0] = 1.0f; result[1] = 0.0f; result[2] = 1.0f; - if(options.nchannels == 4) + if(nchannels == 4) result[3] = 1.0f; } } @@ -1018,7 +1062,7 @@ bool OSLRenderServices::trace(TraceOpt &options, OSL::ShaderGlobals *sg, ray.P = TO_FLOAT3(P); ray.D = TO_FLOAT3(R); - ray.t = (options.maxdist == 1.0e30)? FLT_MAX: options.maxdist - options.mindist; + ray.t = (options.maxdist == 1.0e30f)? FLT_MAX: options.maxdist - options.mindist; ray.time = sd->time; if(options.mindist == 0.0f) { diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index 6f928a0d103..cb6f2311ad8 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __OSL_SERVICES_H__ @@ -97,16 +97,64 @@ public: bool texture(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, float s, float t, float dsdx, float dtdx, - float dsdy, float dtdy, float *result); + float dsdy, float dtdy, int nchannels, float *result); bool texture3d(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, const OSL::Vec3 &P, const OSL::Vec3 &dPdx, const OSL::Vec3 &dPdy, - const OSL::Vec3 &dPdz, float *result); + const OSL::Vec3 &dPdz, int nchannels, float *result); + +#if OSL_LIBRARY_VERSION_CODE >= 10700 + bool texture(ustring filename, + TextureHandle * /*texture_handle*/, + TexturePerthread * /*texture_thread_info*/, + TextureOpt &options, + OSL::ShaderGlobals *sg, + float s, float t, + float dsdx, float dtdx, float dsdy, float dtdy, + int nchannels, + float *result, + float * /*dresultds*/, + float * /*dresultdt*/) + { + return texture(filename, + options, + sg, + s, t, + dsdx, dtdx, dsdy, dtdy, + nchannels, + result); + } + + bool texture3d(ustring filename, + TextureHandle * /*texture_handle*/, + TexturePerthread * /*texture_thread_info*/, + TextureOpt &options, + OSL::ShaderGlobals *sg, + const OSL::Vec3 &P, + const OSL::Vec3 &dPdx, + const OSL::Vec3 &dPdy, + const OSL::Vec3 &dPdz, + int nchannels, + float *result, + float * /*dresultds*/, + float * /*dresultdt*/, + float * /*dresultdr*/) + { + return texture3d(filename, + options, + sg, + P, + dPdx, dPdy, dPdz, + nchannels, + result); + } +#endif bool environment(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, const OSL::Vec3 &R, - const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, float *result); + const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, + int nchannels, float *result); bool get_texture_info(OSL::ShaderGlobals *sg, ustring filename, int subimage, ustring dataname, TypeDesc datatype, void *data); @@ -159,70 +207,37 @@ public: static ustring u_v; static ustring u_empty; -#if OSL_LIBRARY_VERSION_CODE < 10500 - bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) { - return get_matrix(NULL, result, xform, time); - } - - bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) { - return get_inverse_matrix(NULL, result, xform, time); - } - - bool get_matrix(OSL::Matrix44 &result, ustring from, float time) { - return get_matrix(NULL, result, from, time); - } - - bool get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time) { - return get_inverse_matrix(NULL, result, to, time); - } - - bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) { - return get_matrix(NULL, result, xform); - } - - bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) { - return get_inverse_matrix(NULL, result, xform); - } - - bool get_matrix(OSL::Matrix44 &result, ustring from) { - return get_matrix(NULL, result, from); - } - - bool get_inverse_matrix(OSL::Matrix44 &result, ustring to) { - return get_inverse_matrix(NULL, result, to); - } + /* Code to make OSL versions transition smooth. */ - bool get_array_attribute(void *renderstate, bool derivatives, - ustring object, TypeDesc type, ustring name, - int index, void *val) { - OSL::ShaderGlobals sg; - sg.renderstate = renderstate; - return get_array_attribute(&sg, derivatives, - object, type, name, - index, val); - } - - bool get_attribute(void *renderstate, bool derivatives, ustring object_name, - TypeDesc type, ustring name, void *val) { - OSL::ShaderGlobals sg; - sg.renderstate = renderstate; - return get_attribute(&sg, derivatives, object_name, type, name, val); +#if OSL_LIBRARY_VERSION_CODE < 10600 + inline bool texture(ustring filename, TextureOpt &options, + OSL::ShaderGlobals *sg, + float s, float t, float dsdx, float dtdx, + float dsdy, float dtdy, float *result) + { + return texture(filename, options, sg, s, t, dsdx, dtdx, dsdy, dtdy, + options.nchannels, result); } - bool has_userdata(ustring name, TypeDesc type, void *renderstate) { - return has_userdata(name, type, (OSL::ShaderGlobals *) renderstate); + inline bool texture3d(ustring filename, TextureOpt &options, + OSL::ShaderGlobals *sg, const OSL::Vec3 &P, + const OSL::Vec3 &dPdx, const OSL::Vec3 &dPdy, + const OSL::Vec3 &dPdz, float *result) + { + return texture3d(filename, options, sg, P, dPdx, dPdy, dPdz, + options.nchannels, result); } - bool get_userdata(bool derivatives, ustring name, TypeDesc type, - void *renderstate, void *val) { - return get_userdata(derivatives, name, type, (OSL::ShaderGlobals *) renderstate, val); - } - - bool get_texture_info(ustring filename, int subimage, - ustring dataname, TypeDesc datatype, void *data) { - return get_texture_info(NULL, filename, subimage, dataname, datatype, data); + inline bool environment(ustring filename, TextureOpt &options, + OSL::ShaderGlobals *sg, const OSL::Vec3 &R, + const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, + float *result) + { + return environment(filename, options, sg, R, dRdx, dRdy, + options.nchannels, result); } #endif + private: KernelGlobals *kernel_globals; OSL::TextureSystem *osl_ts; diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 48498116874..2f234aa25ea 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -11,9 +11,11 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ +#include <OSL/oslexec.h> + #include "kernel_compat_cpu.h" #include "kernel_montecarlo.h" #include "kernel_types.h" @@ -34,7 +36,6 @@ #include "attribute.h" -#include <OSL/oslexec.h> CCL_NAMESPACE_BEGIN @@ -145,162 +146,175 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag, /* OSL gives us a closure tree, we flatten it into arrays per * closure type, for evaluation, sampling, etc later on. */ - if (closure->type == OSL::ClosureColor::COMPONENT) { - OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; - CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); +#if OSL_LIBRARY_VERSION_CODE < 10700 + switch(closure->type) { +#else + switch(closure->id) { +#endif + case OSL::ClosureColor::MUL: { + OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; + flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight); + break; + } + case OSL::ClosureColor::ADD: { + OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; + flatten_surface_closure_tree(sd, path_flag, add->closureA, weight); + flatten_surface_closure_tree(sd, path_flag, add->closureB, weight); + break; + } + default: { + OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; + CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); - if (prim) { - ShaderClosure sc; + if(prim) { + ShaderClosure sc; #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS - weight = weight*TO_FLOAT3(comp->w); + weight = weight*TO_FLOAT3(comp->w); #endif - sc.weight = weight; + sc.weight = weight; - prim->setup(); + prim->setup(); - switch (prim->category) { - case CClosurePrimitive::BSDF: { - CBSDFClosure *bsdf = (CBSDFClosure *)prim; - int scattering = bsdf->scattering(); + switch(prim->category) { + case CClosurePrimitive::BSDF: { + CBSDFClosure *bsdf = (CBSDFClosure *)prim; + int scattering = bsdf->scattering(); - /* caustic options */ - if((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) { - KernelGlobals *kg = sd->osl_globals; + /* caustic options */ + if((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) { + KernelGlobals *kg = sd->osl_globals; - if((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) || - (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) { - return; + if((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) || + (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) + { + return; + } } - } - /* sample weight */ - float sample_weight = fabsf(average(weight)); + /* sample weight */ + float sample_weight = fabsf(average(weight)); - sc.sample_weight = sample_weight; + sc.sample_weight = sample_weight; - sc.type = bsdf->sc.type; - sc.N = bsdf->sc.N; - sc.T = bsdf->sc.T; - sc.data0 = bsdf->sc.data0; - sc.data1 = bsdf->sc.data1; - sc.data2 = bsdf->sc.data2; - sc.prim = bsdf->sc.prim; + sc.type = bsdf->sc.type; + sc.N = bsdf->sc.N; + sc.T = bsdf->sc.T; + sc.data0 = bsdf->sc.data0; + sc.data1 = bsdf->sc.data1; + sc.data2 = bsdf->sc.data2; + sc.prim = bsdf->sc.prim; - /* add */ - if(sc.sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure < MAX_CLOSURE) { - sd->closure[sd->num_closure++] = sc; - sd->flag |= bsdf->shaderdata_flag(); - } - break; - } - case CClosurePrimitive::Emissive: { - /* sample weight */ - float sample_weight = fabsf(average(weight)); - - sc.sample_weight = sample_weight; - sc.type = CLOSURE_EMISSION_ID; - sc.data0 = 0.0f; - sc.data1 = 0.0f; - sc.data2 = 0.0f; - sc.prim = NULL; - - /* flag */ - if(sd->num_closure < MAX_CLOSURE) { - sd->closure[sd->num_closure++] = sc; - sd->flag |= SD_EMISSION; - } - break; - } - case CClosurePrimitive::AmbientOcclusion: { - /* sample weight */ - float sample_weight = fabsf(average(weight)); - - sc.sample_weight = sample_weight; - sc.type = CLOSURE_AMBIENT_OCCLUSION_ID; - sc.data0 = 0.0f; - sc.data1 = 0.0f; - sc.data2 = 0.0f; - sc.prim = NULL; - - if(sd->num_closure < MAX_CLOSURE) { - sd->closure[sd->num_closure++] = sc; - sd->flag |= SD_AO; - } - break; - } - case CClosurePrimitive::Holdout: { - sc.sample_weight = 0.0f; - sc.type = CLOSURE_HOLDOUT_ID; - sc.data0 = 0.0f; - sc.data1 = 0.0f; - sc.data2 = 0.0f; - sc.prim = NULL; - - if(sd->num_closure < MAX_CLOSURE) { - sd->closure[sd->num_closure++] = sc; - sd->flag |= SD_HOLDOUT; + /* add */ + if(sc.sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure < MAX_CLOSURE) { + sd->closure[sd->num_closure++] = sc; + sd->flag |= bsdf->shaderdata_flag(); + } + break; } - break; - } - case CClosurePrimitive::BSSRDF: { - CBSSRDFClosure *bssrdf = (CBSSRDFClosure *)prim; - float sample_weight = fabsf(average(weight)); + case CClosurePrimitive::Emissive: { + /* sample weight */ + float sample_weight = fabsf(average(weight)); - if(sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure+2 < MAX_CLOSURE) { sc.sample_weight = sample_weight; - - sc.type = bssrdf->sc.type; - sc.N = bssrdf->sc.N; - sc.data1 = bssrdf->sc.data1; - sc.T.x = bssrdf->sc.T.x; + sc.type = CLOSURE_EMISSION_ID; + sc.data0 = 0.0f; + sc.data1 = 0.0f; + sc.data2 = 0.0f; sc.prim = NULL; - /* disable in case of diffuse ancestor, can't see it well then and - * adds considerably noise due to probabilities of continuing path - * getting lower and lower */ - if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) - bssrdf->radius = make_float3(0.0f, 0.0f, 0.0f); - - /* create one closure for each color channel */ - if(fabsf(weight.x) > 0.0f) { - sc.weight = make_float3(weight.x, 0.0f, 0.0f); - sc.data0 = bssrdf->radius.x; - sd->flag |= bssrdf_setup(&sc, sc.type); + /* flag */ + if(sd->num_closure < MAX_CLOSURE) { sd->closure[sd->num_closure++] = sc; + sd->flag |= SD_EMISSION; } + break; + } + case CClosurePrimitive::AmbientOcclusion: { + /* sample weight */ + float sample_weight = fabsf(average(weight)); + + sc.sample_weight = sample_weight; + sc.type = CLOSURE_AMBIENT_OCCLUSION_ID; + sc.data0 = 0.0f; + sc.data1 = 0.0f; + sc.data2 = 0.0f; + sc.prim = NULL; - if(fabsf(weight.y) > 0.0f) { - sc.weight = make_float3(0.0f, weight.y, 0.0f); - sc.data0 = bssrdf->radius.y; - sd->flag |= bssrdf_setup(&sc, sc.type); + if(sd->num_closure < MAX_CLOSURE) { sd->closure[sd->num_closure++] = sc; + sd->flag |= SD_AO; } + break; + } + case CClosurePrimitive::Holdout: { + sc.sample_weight = 0.0f; + sc.type = CLOSURE_HOLDOUT_ID; + sc.data0 = 0.0f; + sc.data1 = 0.0f; + sc.data2 = 0.0f; + sc.prim = NULL; - if(fabsf(weight.z) > 0.0f) { - sc.weight = make_float3(0.0f, 0.0f, weight.z); - sc.data0 = bssrdf->radius.z; - sd->flag |= bssrdf_setup(&sc, sc.type); + if(sd->num_closure < MAX_CLOSURE) { sd->closure[sd->num_closure++] = sc; + sd->flag |= SD_HOLDOUT; + } + break; + } + case CClosurePrimitive::BSSRDF: { + CBSSRDFClosure *bssrdf = (CBSSRDFClosure *)prim; + float sample_weight = fabsf(average(weight)); + + if(sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure+2 < MAX_CLOSURE) { + sc.sample_weight = sample_weight; + + sc.type = bssrdf->sc.type; + sc.N = bssrdf->sc.N; + sc.data1 = bssrdf->sc.data1; + sc.T.x = bssrdf->sc.T.x; + sc.prim = NULL; + + /* disable in case of diffuse ancestor, can't see it well then and + * adds considerably noise due to probabilities of continuing path + * getting lower and lower */ + if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) + bssrdf->radius = make_float3(0.0f, 0.0f, 0.0f); + + /* create one closure for each color channel */ + if(fabsf(weight.x) > 0.0f) { + sc.weight = make_float3(weight.x, 0.0f, 0.0f); + sc.data0 = bssrdf->radius.x; + sc.data1 = 0.0f; + sd->flag |= bssrdf_setup(&sc, sc.type); + sd->closure[sd->num_closure++] = sc; + } + + if(fabsf(weight.y) > 0.0f) { + sc.weight = make_float3(0.0f, weight.y, 0.0f); + sc.data0 = bssrdf->radius.y; + sc.data1 = 0.0f; + sd->flag |= bssrdf_setup(&sc, sc.type); + sd->closure[sd->num_closure++] = sc; + } + + if(fabsf(weight.z) > 0.0f) { + sc.weight = make_float3(0.0f, 0.0f, weight.z); + sc.data0 = bssrdf->radius.z; + sc.data1 = 0.0f; + sd->flag |= bssrdf_setup(&sc, sc.type); + sd->closure[sd->num_closure++] = sc; + } } + break; } - break; + case CClosurePrimitive::Background: + case CClosurePrimitive::Volume: + break; /* not relevant */ } - case CClosurePrimitive::Background: - case CClosurePrimitive::Volume: - break; /* not relevant */ } + break; } } - else if (closure->type == OSL::ClosureColor::MUL) { - OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight); - } - else if (closure->type == OSL::ClosureColor::ADD) { - OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - flatten_surface_closure_tree(sd, path_flag, add->closureA, weight); - flatten_surface_closure_tree(sd, path_flag, add->closureB, weight); - } } void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, int path_flag, ShaderContext ctx) @@ -315,11 +329,11 @@ void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, int path_flag, S OSL::ShadingContext *octx = tdata->context[(int)ctx]; int shader = sd->shader & SHADER_MASK; - if (kg->osl->surface_state[shader]) + if(kg->osl->surface_state[shader]) ss->execute(*octx, *(kg->osl->surface_state[shader]), *globals); /* flatten closure tree */ - if (globals->Ci) + if(globals->Ci) flatten_surface_closure_tree(sd, path_flag, globals->Ci); } @@ -331,27 +345,33 @@ static float3 flatten_background_closure_tree(const OSL::ClosureColor *closure) * is only one supported closure type at the moment, which has no evaluation * functions, so we just sum the weights */ - if (closure->type == OSL::ClosureColor::COMPONENT) { - OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; - CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); - - if (prim && prim->category == CClosurePrimitive::Background) -#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS - return TO_FLOAT3(comp->w); +#if OSL_LIBRARY_VERSION_CODE < 10700 + switch(closure->type) { #else - return make_float3(1.0f, 1.0f, 1.0f); + switch(closure->id) { #endif - } - else if (closure->type == OSL::ClosureColor::MUL) { - OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; + case OSL::ClosureColor::MUL: { + OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - return TO_FLOAT3(mul->weight) * flatten_background_closure_tree(mul->closure); - } - else if (closure->type == OSL::ClosureColor::ADD) { - OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; + return TO_FLOAT3(mul->weight) * flatten_background_closure_tree(mul->closure); + } + case OSL::ClosureColor::ADD: { + OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - return flatten_background_closure_tree(add->closureA) + - flatten_background_closure_tree(add->closureB); + return flatten_background_closure_tree(add->closureA) + + flatten_background_closure_tree(add->closureB); + } + default: { + OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; + CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); + + if(prim && prim->category == CClosurePrimitive::Background) +#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS + return TO_FLOAT3(comp->w); +#else + return make_float3(1.0f, 1.0f, 1.0f); +#endif + } } return make_float3(0.0f, 0.0f, 0.0f); @@ -368,11 +388,11 @@ float3 OSLShader::eval_background(KernelGlobals *kg, ShaderData *sd, int path_fl OSL::ShaderGlobals *globals = &tdata->globals; OSL::ShadingContext *octx = tdata->context[(int)ctx]; - if (kg->osl->background_state) + if(kg->osl->background_state) ss->execute(*octx, *(kg->osl->background_state), *globals); /* return background color immediately */ - if (globals->Ci) + if(globals->Ci) return flatten_background_closure_tree(globals->Ci); return make_float3(0.0f, 0.0f, 0.0f); @@ -386,76 +406,84 @@ static void flatten_volume_closure_tree(ShaderData *sd, /* OSL gives us a closure tree, we flatten it into arrays per * closure type, for evaluation, sampling, etc later on. */ - if (closure->type == OSL::ClosureColor::COMPONENT) { - OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; - CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); +#if OSL_LIBRARY_VERSION_CODE < 10700 + switch(closure->type) { +#else + switch(closure->id) { +#endif + case OSL::ClosureColor::MUL: { + OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; + flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight); + break; + } + case OSL::ClosureColor::ADD: { + OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; + flatten_volume_closure_tree(sd, add->closureA, weight); + flatten_volume_closure_tree(sd, add->closureB, weight); + break; + } + default: { + OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; + CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); - if (prim) { - ShaderClosure sc; + if(prim) { + ShaderClosure sc; #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS - weight = weight*TO_FLOAT3(comp->w); + weight = weight*TO_FLOAT3(comp->w); #endif - sc.weight = weight; - - prim->setup(); - - switch (prim->category) { - case CClosurePrimitive::Volume: { - CVolumeClosure *volume = (CVolumeClosure *)prim; - /* sample weight */ - float sample_weight = fabsf(average(weight)); - - sc.sample_weight = sample_weight; - sc.type = volume->sc.type; - sc.data0 = volume->sc.data0; - sc.data1 = volume->sc.data1; - - /* add */ - if((sc.sample_weight > CLOSURE_WEIGHT_CUTOFF) && - (sd->num_closure < MAX_CLOSURE)) - { - sd->closure[sd->num_closure++] = sc; - sd->flag |= volume->shaderdata_flag(); + sc.weight = weight; + + prim->setup(); + + switch(prim->category) { + case CClosurePrimitive::Volume: { + CVolumeClosure *volume = (CVolumeClosure *)prim; + /* sample weight */ + float sample_weight = fabsf(average(weight)); + + sc.sample_weight = sample_weight; + sc.type = volume->sc.type; + sc.data0 = volume->sc.data0; + sc.data1 = volume->sc.data1; + + /* add */ + if((sc.sample_weight > CLOSURE_WEIGHT_CUTOFF) && + (sd->num_closure < MAX_CLOSURE)) + { + sd->closure[sd->num_closure++] = sc; + sd->flag |= volume->shaderdata_flag(); + } + break; } - break; - } - case CClosurePrimitive::Emissive: { - /* sample weight */ - float sample_weight = fabsf(average(weight)); - - sc.sample_weight = sample_weight; - sc.type = CLOSURE_EMISSION_ID; - sc.data0 = 0.0f; - sc.data1 = 0.0f; - sc.prim = NULL; - - /* flag */ - if(sd->num_closure < MAX_CLOSURE) { - sd->closure[sd->num_closure++] = sc; - sd->flag |= SD_EMISSION; + case CClosurePrimitive::Emissive: { + /* sample weight */ + float sample_weight = fabsf(average(weight)); + + sc.sample_weight = sample_weight; + sc.type = CLOSURE_EMISSION_ID; + sc.data0 = 0.0f; + sc.data1 = 0.0f; + sc.prim = NULL; + + /* flag */ + if(sd->num_closure < MAX_CLOSURE) { + sd->closure[sd->num_closure++] = sc; + sd->flag |= SD_EMISSION; + } + break; } - break; + case CClosurePrimitive::Holdout: + break; /* not implemented */ + case CClosurePrimitive::Background: + case CClosurePrimitive::BSDF: + case CClosurePrimitive::BSSRDF: + case CClosurePrimitive::AmbientOcclusion: + break; /* not relevant */ } - case CClosurePrimitive::Holdout: - break; /* not implemented */ - case CClosurePrimitive::Background: - case CClosurePrimitive::BSDF: - case CClosurePrimitive::BSSRDF: - case CClosurePrimitive::AmbientOcclusion: - break; /* not relevant */ } } } - else if (closure->type == OSL::ClosureColor::MUL) { - OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight); - } - else if (closure->type == OSL::ClosureColor::ADD) { - OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - flatten_volume_closure_tree(sd, add->closureA, weight); - flatten_volume_closure_tree(sd, add->closureB, weight); - } } void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, int path_flag, ShaderContext ctx) @@ -470,11 +498,11 @@ void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, int path_flag, Sh OSL::ShadingContext *octx = tdata->context[(int)ctx]; int shader = sd->shader & SHADER_MASK; - if (kg->osl->volume_state[shader]) + if(kg->osl->volume_state[shader]) ss->execute(*octx, *(kg->osl->volume_state[shader]), *globals); /* flatten closure tree */ - if (globals->Ci) + if(globals->Ci) flatten_volume_closure_tree(sd, globals->Ci); } @@ -492,7 +520,7 @@ void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderConte OSL::ShadingContext *octx = tdata->context[(int)ctx]; int shader = sd->shader & SHADER_MASK; - if (kg->osl->displacement_state[shader]) + if(kg->osl->displacement_state[shader]) ss->execute(*octx, *(kg->osl->displacement_state[shader]), *globals); /* get back position */ @@ -519,7 +547,7 @@ float3 OSLShader::bsdf_eval(const ShaderData *sd, const ShaderClosure *sc, const CBSDFClosure *bsdf = (CBSDFClosure *)sc->prim; float3 bsdf_eval; - if (dot(sd->Ng, omega_in) >= 0.0f) + if(dot(sd->Ng, omega_in) >= 0.0f) bsdf_eval = bsdf->eval_reflect(sd->I, omega_in, pdf); else bsdf_eval = bsdf->eval_transmit(sd->I, omega_in, pdf); @@ -547,7 +575,7 @@ int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, ustring stdname(std::string("geom:") + std::string(Attribute::standard_name((AttributeStandard)id))); OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname); - if (it != attr_map.end()) { + if(it != attr_map.end()) { const OSLGlobals::Attribute &osl_attr = it->second; *elem = osl_attr.elem; diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h index 40f50d8b0e9..15dd74f9d38 100644 --- a/intern/cycles/kernel/osl/osl_shader.h +++ b/intern/cycles/kernel/osl/osl_shader.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __OSL_SHADER_H__ diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt index 0b735ede701..81931463cad 100644 --- a/intern/cycles/kernel/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/shaders/CMakeLists.txt @@ -74,6 +74,7 @@ set(SRC_OSL node_vector_transform.osl node_velvet_bsdf.osl node_voronoi_texture.osl + node_voxel_texture.osl node_wavelength.osl node_blackbody.osl node_wave_texture.osl diff --git a/intern/cycles/kernel/shaders/node_absorption_volume.osl b/intern/cycles/kernel/shaders/node_absorption_volume.osl index 6bac83ba4f5..18f662ebbbd 100644 --- a/intern/cycles/kernel/shaders/node_absorption_volume.osl +++ b/intern/cycles/kernel/shaders/node_absorption_volume.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_add_closure.osl b/intern/cycles/kernel/shaders/node_add_closure.osl index b826fb22784..b6596e0b6bd 100644 --- a/intern/cycles/kernel/shaders/node_add_closure.osl +++ b/intern/cycles/kernel/shaders/node_add_closure.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl index 961aed1016b..5f056122bbe 100644 --- a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl +++ b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl index da1e4f77107..281ed4e8726 100644 --- a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_attribute.osl b/intern/cycles/kernel/shaders/node_attribute.osl index 43f69fab053..67183e9ffe0 100644 --- a/intern/cycles/kernel/shaders/node_attribute.osl +++ b/intern/cycles/kernel/shaders/node_attribute.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_background.osl b/intern/cycles/kernel/shaders/node_background.osl index c4379a8f71b..613d4e360fa 100644 --- a/intern/cycles/kernel/shaders/node_background.osl +++ b/intern/cycles/kernel/shaders/node_background.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_blackbody.osl b/intern/cycles/kernel/shaders/node_blackbody.osl index d26e56ab06d..1da6894d0f0 100644 --- a/intern/cycles/kernel/shaders/node_blackbody.osl +++ b/intern/cycles/kernel/shaders/node_blackbody.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_brick_texture.osl b/intern/cycles/kernel/shaders/node_brick_texture.osl index 70a6a6ea7ce..35e01178ba8 100644 --- a/intern/cycles/kernel/shaders/node_brick_texture.osl +++ b/intern/cycles/kernel/shaders/node_brick_texture.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" @@ -22,6 +22,7 @@ float brick_noise(int n) /* fast integer noise */ { int nn; + n = (n + 1013) & 2147483647; n = (n >> 13) ^ n; nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 2147483647; return 0.5 * ((float)nn / 1073741824.0); @@ -87,12 +88,9 @@ shader node_brick_texture( if (Fac != 1.0) { float facm = 1.0 - tint; - - Col[0] = facm * (Color1[0]) + tint * Color2[0]; - Col[1] = facm * (Color1[1]) + tint * Color2[1]; - Col[2] = facm * (Color1[2]) + tint * Color2[2]; + Col = facm * Color1 + tint * Color2; } - Color = (Fac == 1.0) ? Mortar: Col; + Color = (Fac == 1.0) ? Mortar : Col; } diff --git a/intern/cycles/kernel/shaders/node_brightness.osl b/intern/cycles/kernel/shaders/node_brightness.osl index 468b0f052c3..00cfb167885 100644 --- a/intern/cycles/kernel/shaders/node_brightness.osl +++ b/intern/cycles/kernel/shaders/node_brightness.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_bump.osl b/intern/cycles/kernel/shaders/node_bump.osl index bbc08760cd5..9882857f2ec 100644 --- a/intern/cycles/kernel/shaders/node_bump.osl +++ b/intern/cycles/kernel/shaders/node_bump.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_camera.osl b/intern/cycles/kernel/shaders/node_camera.osl index 20ebb7dc095..5e90cb8b8ee 100644 --- a/intern/cycles/kernel/shaders/node_camera.osl +++ b/intern/cycles/kernel/shaders/node_camera.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_checker_texture.osl b/intern/cycles/kernel/shaders/node_checker_texture.osl index a6d21fd36f3..ae84c71dd42 100644 --- a/intern/cycles/kernel/shaders/node_checker_texture.osl +++ b/intern/cycles/kernel/shaders/node_checker_texture.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_color.h b/intern/cycles/kernel/shaders/node_color.h index 095e628f20c..4a17286a07f 100644 --- a/intern/cycles/kernel/shaders/node_color.h +++ b/intern/cycles/kernel/shaders/node_color.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ float color_srgb_to_scene_linear(float c) diff --git a/intern/cycles/kernel/shaders/node_combine_hsv.osl b/intern/cycles/kernel/shaders/node_combine_hsv.osl index 010773acc5c..6b922bf4e6b 100644 --- a/intern/cycles/kernel/shaders/node_combine_hsv.osl +++ b/intern/cycles/kernel/shaders/node_combine_hsv.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_combine_rgb.osl b/intern/cycles/kernel/shaders/node_combine_rgb.osl index 8466a89b536..f343fdefd84 100644 --- a/intern/cycles/kernel/shaders/node_combine_rgb.osl +++ b/intern/cycles/kernel/shaders/node_combine_rgb.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_combine_xyz.osl b/intern/cycles/kernel/shaders/node_combine_xyz.osl index 933dee5bd78..86182056b09 100644 --- a/intern/cycles/kernel/shaders/node_combine_xyz.osl +++ b/intern/cycles/kernel/shaders/node_combine_xyz.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_convert_from_color.osl b/intern/cycles/kernel/shaders/node_convert_from_color.osl index 2f4503e66e3..44074317f42 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_color.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_color.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_convert_from_float.osl b/intern/cycles/kernel/shaders/node_convert_from_float.osl index f5b91903078..fc5c79c4c64 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_float.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_float.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_convert_from_int.osl b/intern/cycles/kernel/shaders/node_convert_from_int.osl index 110922a5df1..3c3785ebc0d 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_int.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_int.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_convert_from_normal.osl b/intern/cycles/kernel/shaders/node_convert_from_normal.osl index 995c86d8828..8ecc56ac8ce 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_normal.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_normal.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_convert_from_point.osl b/intern/cycles/kernel/shaders/node_convert_from_point.osl index 2ed151273a8..e5913b7a1e4 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_point.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_point.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_convert_from_string.osl b/intern/cycles/kernel/shaders/node_convert_from_string.osl index 50cce252be4..0466734277b 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_string.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_string.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_convert_from_vector.osl b/intern/cycles/kernel/shaders/node_convert_from_vector.osl index 035c46625a0..79c5cb04550 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_vector.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_vector.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl index e8c94660e4f..2bef2d65baa 100644 --- a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_emission.osl b/intern/cycles/kernel/shaders/node_emission.osl index b28d731c19f..c36e2a4c0f3 100644 --- a/intern/cycles/kernel/shaders/node_emission.osl +++ b/intern/cycles/kernel/shaders/node_emission.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_environment_texture.osl b/intern/cycles/kernel/shaders/node_environment_texture.osl index 136ccdf8b18..14f0226a0e5 100644 --- a/intern/cycles/kernel/shaders/node_environment_texture.osl +++ b/intern/cycles/kernel/shaders/node_environment_texture.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_fresnel.h b/intern/cycles/kernel/shaders/node_fresnel.h index 9f10ba8023e..de2d40a849c 100644 --- a/intern/cycles/kernel/shaders/node_fresnel.h +++ b/intern/cycles/kernel/shaders/node_fresnel.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ float fresnel_dielectric_cos(float cosi, float eta) @@ -36,14 +36,14 @@ float fresnel_dielectric_cos(float cosi, float eta) color fresnel_conductor(float cosi, color eta, color k) { - color cosi2 = color(cosi*cosi); + color cosi2 = color(cosi * cosi); color one = color(1, 1, 1); color tmp_f = eta * eta + k * k; color tmp = tmp_f * cosi2; color Rparl2 = (tmp - (2.0 * eta * cosi) + one) / - (tmp + (2.0 * eta * cosi) + one); + (tmp + (2.0 * eta * cosi) + one); color Rperp2 = (tmp_f - (2.0 * eta * cosi) + cosi2) / - (tmp_f + (2.0 * eta * cosi) + cosi2); + (tmp_f + (2.0 * eta * cosi) + cosi2); return (Rparl2 + Rperp2) * 0.5; } diff --git a/intern/cycles/kernel/shaders/node_fresnel.osl b/intern/cycles/kernel/shaders/node_fresnel.osl index 7ef553c0f39..8bec7b432f5 100644 --- a/intern/cycles/kernel/shaders/node_fresnel.osl +++ b/intern/cycles/kernel/shaders/node_fresnel.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_gamma.osl b/intern/cycles/kernel/shaders/node_gamma.osl index a2ad3f766fe..bc4c1b34266 100644 --- a/intern/cycles/kernel/shaders/node_gamma.osl +++ b/intern/cycles/kernel/shaders/node_gamma.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_geometry.osl b/intern/cycles/kernel/shaders/node_geometry.osl index 7bef2051865..b0bd7692489 100644 --- a/intern/cycles/kernel/shaders/node_geometry.osl +++ b/intern/cycles/kernel/shaders/node_geometry.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" @@ -26,7 +26,8 @@ shader node_geometry( output normal TrueNormal = normal(0.0, 0.0, 0.0), output vector Incoming = vector(0.0, 0.0, 0.0), output point Parametric = point(0.0, 0.0, 0.0), - output float Backfacing = 0.0) + output float Backfacing = 0.0, + output float Pointiness = 0.0) { Position = P; Normal = NormalIn; @@ -49,7 +50,7 @@ shader node_geometry( /* try to create spherical tangent from generated coordinates */ if (getattribute("geom:generated", generated)) { - normal data = normal(-(generated[1]-0.5), (generated[0]-0.5), 0.0); + normal data = normal(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0); vector T = transform("object", "world", data); Tangent = cross(Normal, normalize(cross(T, Normal))); } @@ -57,5 +58,13 @@ shader node_geometry( /* otherwise use surface derivatives */ Tangent = normalize(dPdu); } + + getattribute("geom:pointiness", Pointiness); + if (bump_offset == "dx") { + Pointiness += Dx(Pointiness); + } + else if (bump_offset == "dy") { + Pointiness += Dy(Pointiness); + } } diff --git a/intern/cycles/kernel/shaders/node_glass_bsdf.osl b/intern/cycles/kernel/shaders/node_glass_bsdf.osl index b3d6133553b..68bc107cc5e 100644 --- a/intern/cycles/kernel/shaders/node_glass_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_glass_bsdf.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl index 5c727ca6917..d3250b32d0b 100644 --- a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_gradient_texture.osl b/intern/cycles/kernel/shaders/node_gradient_texture.osl index 5aa05917dc2..52b49688ab3 100644 --- a/intern/cycles/kernel/shaders/node_gradient_texture.osl +++ b/intern/cycles/kernel/shaders/node_gradient_texture.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_hair_info.osl b/intern/cycles/kernel/shaders/node_hair_info.osl index 1d1ba1983e1..965d2a3c7f7 100644 --- a/intern/cycles/kernel/shaders/node_hair_info.osl +++ b/intern/cycles/kernel/shaders/node_hair_info.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_holdout.osl b/intern/cycles/kernel/shaders/node_holdout.osl index cafad1b5757..78a9f46fd15 100644 --- a/intern/cycles/kernel/shaders/node_holdout.osl +++ b/intern/cycles/kernel/shaders/node_holdout.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_hsv.osl b/intern/cycles/kernel/shaders/node_hsv.osl index 4722bde4cd7..8d9e50fed6b 100644 --- a/intern/cycles/kernel/shaders/node_hsv.osl +++ b/intern/cycles/kernel/shaders/node_hsv.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" @@ -35,6 +35,11 @@ shader node_hsv( Color = hsv_to_rgb(Color); + // Clamp color to prevent negative values cauzed by oversaturation. + Color[0] = max(Color[0], 0.0); + Color[1] = max(Color[1], 0.0); + Color[2] = max(Color[2], 0.0); + ColorOut = mix(ColorIn, Color, Fac); } diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl index 7238a1e8862..d3a347b70db 100644 --- a/intern/cycles/kernel/shaders/node_image_texture.osl +++ b/intern/cycles/kernel/shaders/node_image_texture.osl @@ -11,15 +11,60 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" #include "node_color.h" -color image_texture_lookup(string filename, string color_space, float u, float v, output float Alpha, int use_alpha, int is_float, string interpolation) +point texco_remap_square(point co) { - color rgb = (color)texture(filename, u, 1.0 - v, "wrap", "periodic", "interp", interpolation, "alpha", Alpha); + return (co - point(0.5, 0.5, 0.5)) * 2.0; +} + +point map_to_tube(vector dir) +{ + float u, v; + v = (dir[2] + 1.0) * 0.5; + float len = sqrt(dir[0] * dir[0] + dir[1] * dir[1]); + if (len > 0.0) { + u = (1.0 - (atan2(dir[0] / len, dir[1] / len) / M_PI)) * 0.5; + } + else { + v = u = 0.0; /* To avoid un-initialized variables. */ + } + return point(u, v, 0.0); +} + +point map_to_sphere(vector dir) +{ + float len = length(dir); + float v, u; + if (len > 0.0) { + if (dir[0] == 0.0 && dir[1] == 0.0) { + u = 0.0; /* Othwise domain error. */ + } + else { + u = (1.0 - atan2(dir[0], dir[1]) / M_PI) / 2.0; + } + v = 1.0 - acos(dir[2] / len) / M_PI; + } + else { + v = u = 0.0; /* To avoid un-initialized variables. */ + } + return point(u, v, 0.0); +} + +color image_texture_lookup(string filename, + string color_space, + float u, float v, + output float Alpha, + int use_alpha, + int is_float, + string interpolation, + string wrap) +{ + color rgb = (color)texture(filename, u, 1.0 - v, "wrap", wrap, "interp", interpolation, "alpha", Alpha); if (use_alpha) { rgb = color_unpremultiply(rgb, Alpha); @@ -43,6 +88,7 @@ shader node_image_texture( string color_space = "sRGB", string projection = "Flat", string interpolation = "smartcubic", + string wrap = "periodic", float projection_blend = 0.0, int is_float = 1, int use_alpha = 1, @@ -55,7 +101,14 @@ shader node_image_texture( p = transform(mapping, p); if (projection == "Flat") { - Color = image_texture_lookup(filename, color_space, p[0], p[1], Alpha, use_alpha, is_float, interpolation); + Color = image_texture_lookup(filename, + color_space, + p[0], p[1], + Alpha, + use_alpha, + is_float, + interpolation, + wrap); } else if (projection == "Box") { /* object space normal */ @@ -113,6 +166,10 @@ shader node_image_texture( weight[2] = ((2.0 - limit) * Nob[2] + (limit - 1.0)) / (2.0 * limit - 1.0); } } + else { + /* Desperate mode, no valid choice anyway, fallback to one side.*/ + weight[0] = 1.0; + } Color = color(0.0, 0.0, 0.0); Alpha = 0.0; @@ -120,17 +177,59 @@ shader node_image_texture( float tmp_alpha; if (weight[0] > 0.0) { - Color += weight[0] * image_texture_lookup(filename, color_space, p[1], p[2], tmp_alpha, use_alpha, is_float, interpolation); + Color += weight[0] * image_texture_lookup(filename, + color_space, + p[1], p[2], + tmp_alpha, + use_alpha, + is_float, + interpolation, + wrap); Alpha += weight[0] * tmp_alpha; } if (weight[1] > 0.0) { - Color += weight[1] * image_texture_lookup(filename, color_space, p[0], p[2], tmp_alpha, use_alpha, is_float, interpolation); + Color += weight[1] * image_texture_lookup(filename, + color_space, + p[0], p[2], + tmp_alpha, + use_alpha, + is_float, + interpolation, + wrap); Alpha += weight[1] * tmp_alpha; } if (weight[2] > 0.0) { - Color += weight[2] * image_texture_lookup(filename, color_space, p[1], p[0], tmp_alpha, use_alpha, is_float, interpolation); + Color += weight[2] * image_texture_lookup(filename, + color_space, + p[1], p[0], + tmp_alpha, + use_alpha, + is_float, + interpolation, + wrap); Alpha += weight[2] * tmp_alpha; } } + else if (projection == "Sphere") { + point projected = map_to_sphere(texco_remap_square(p)); + Color = image_texture_lookup(filename, + color_space, + projected[0], projected[1], + Alpha, + use_alpha, + is_float, + interpolation, + wrap); + } + else if (projection == "Tube") { + point projected = map_to_tube(texco_remap_square(p)); + Color = image_texture_lookup(filename, + color_space, + projected[0], projected[1], + Alpha, + use_alpha, + is_float, + interpolation, + wrap); + } } - diff --git a/intern/cycles/kernel/shaders/node_invert.osl b/intern/cycles/kernel/shaders/node_invert.osl index 81ef2d0dc3d..b33b0a43d63 100644 --- a/intern/cycles/kernel/shaders/node_invert.osl +++ b/intern/cycles/kernel/shaders/node_invert.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_layer_weight.osl b/intern/cycles/kernel/shaders/node_layer_weight.osl index d03ebe2239a..f583df25773 100644 --- a/intern/cycles/kernel/shaders/node_layer_weight.osl +++ b/intern/cycles/kernel/shaders/node_layer_weight.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_light_falloff.osl b/intern/cycles/kernel/shaders/node_light_falloff.osl index 311b87f3764..a594e33d643 100644 --- a/intern/cycles/kernel/shaders/node_light_falloff.osl +++ b/intern/cycles/kernel/shaders/node_light_falloff.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_light_path.osl b/intern/cycles/kernel/shaders/node_light_path.osl index 95fbcabf917..99a92c4f403 100644 --- a/intern/cycles/kernel/shaders/node_light_path.osl +++ b/intern/cycles/kernel/shaders/node_light_path.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_magic_texture.osl b/intern/cycles/kernel/shaders/node_magic_texture.osl index b8afc6e29ac..c09523f205b 100644 --- a/intern/cycles/kernel/shaders/node_magic_texture.osl +++ b/intern/cycles/kernel/shaders/node_magic_texture.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_mapping.osl b/intern/cycles/kernel/shaders/node_mapping.osl index 46ff9f05e07..69106957ee4 100644 --- a/intern/cycles/kernel/shaders/node_mapping.osl +++ b/intern/cycles/kernel/shaders/node_mapping.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl index abb6a359e75..7eef97fd7e8 100644 --- a/intern/cycles/kernel/shaders/node_math.osl +++ b/intern/cycles/kernel/shaders/node_math.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" @@ -93,8 +93,8 @@ shader node_math( Value = Value1 > Value2; else if (type == "Modulo") Value = safe_modulo(Value1, Value2); - else if (type == "Absolute") - Value = fabs(Value1); + else if (type == "Absolute") + Value = fabs(Value1); if (Clamp) Value = clamp(Value, 0.0, 1.0); diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl index dd54fd814de..9ef58e4cbba 100644 --- a/intern/cycles/kernel/shaders/node_mix.osl +++ b/intern/cycles/kernel/shaders/node_mix.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_mix_closure.osl b/intern/cycles/kernel/shaders/node_mix_closure.osl index 79d71c97371..5946dfdaaba 100644 --- a/intern/cycles/kernel/shaders/node_mix_closure.osl +++ b/intern/cycles/kernel/shaders/node_mix_closure.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/shaders/node_musgrave_texture.osl index 60762539002..4f95dec910a 100644 --- a/intern/cycles/kernel/shaders/node_musgrave_texture.osl +++ b/intern/cycles/kernel/shaders/node_musgrave_texture.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" @@ -26,7 +26,7 @@ * from "Texturing and Modelling: A procedural approach" */ -float noise_musgrave_fBm(point p, string basis, float H, float lacunarity, float octaves) +float noise_musgrave_fBm(point p, float H, float lacunarity, float octaves) { float rmd; float value = 0.0; @@ -35,14 +35,14 @@ float noise_musgrave_fBm(point p, string basis, float H, float lacunarity, float int i; for (i = 0; i < (int)octaves; i++) { - value += safe_noise(p, 0) * pwr; + value += safe_noise(p, "signed") * pwr; pwr *= pwHL; p *= lacunarity; } rmd = octaves - floor(octaves); if (rmd != 0.0) - value += rmd * safe_noise(p, 0) * pwr; + value += rmd * safe_noise(p, "signed") * pwr; return value; } @@ -54,7 +54,7 @@ float noise_musgrave_fBm(point p, string basis, float H, float lacunarity, float * octaves: number of frequencies in the fBm */ -float noise_musgrave_multi_fractal(point p, string basis, float H, float lacunarity, float octaves) +float noise_musgrave_multi_fractal(point p, float H, float lacunarity, float octaves) { float rmd; float value = 1.0; @@ -63,14 +63,14 @@ float noise_musgrave_multi_fractal(point p, string basis, float H, float lacunar int i; for (i = 0; i < (int)octaves; i++) { - value *= (pwr * safe_noise(p, 0) + 1.0); + value *= (pwr * safe_noise(p, "signed") + 1.0); pwr *= pwHL; p *= lacunarity; } rmd = octaves - floor(octaves); if (rmd != 0.0) - value *= (rmd * pwr * safe_noise(p, 0) + 1.0); /* correct? */ + value *= (rmd * pwr * safe_noise(p, "signed") + 1.0); /* correct? */ return value; } @@ -83,7 +83,7 @@ float noise_musgrave_multi_fractal(point p, string basis, float H, float lacunar * offset: raises the terrain from `sea level' */ -float noise_musgrave_hetero_terrain(point p, string basis, float H, float lacunarity, float octaves, float offset) +float noise_musgrave_hetero_terrain(point p, float H, float lacunarity, float octaves, float offset) { float value, increment, rmd; float pwHL = pow(lacunarity, -H); @@ -91,11 +91,11 @@ float noise_musgrave_hetero_terrain(point p, string basis, float H, float lacuna int i; /* first unscaled octave of function; later octaves are scaled */ - value = offset + safe_noise(p, 0); + value = offset + safe_noise(p, "signed"); p *= lacunarity; for (i = 1; i < (int)octaves; i++) { - increment = (safe_noise(p, 0) + offset) * pwr * value; + increment = (safe_noise(p, "signed") + offset) * pwr * value; value += increment; pwr *= pwHL; p *= lacunarity; @@ -103,7 +103,7 @@ float noise_musgrave_hetero_terrain(point p, string basis, float H, float lacuna rmd = octaves - floor(octaves); if (rmd != 0.0) { - increment = (safe_noise(p, 0) + offset) * pwr * value; + increment = (safe_noise(p, "signed") + offset) * pwr * value; value += rmd * increment; } @@ -118,15 +118,15 @@ float noise_musgrave_hetero_terrain(point p, string basis, float H, float lacuna * offset: raises the terrain from `sea level' */ -float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H, - float lacunarity, float octaves, float offset, float gain) +float noise_musgrave_hybrid_multi_fractal(point p, float H, float lacunarity, + float octaves, float offset, float gain) { float result, signal, weight, rmd; float pwHL = pow(lacunarity, -H); float pwr = pwHL; int i; - result = safe_noise(p, 0) + offset; + result = safe_noise(p, "signed") + offset; weight = gain * result; p *= lacunarity; @@ -134,7 +134,7 @@ float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H, if (weight > 1.0) weight = 1.0; - signal = (safe_noise(p, 0) + offset) * pwr; + signal = (safe_noise(p, "signed") + offset) * pwr; pwr *= pwHL; result += weight * signal; weight *= gain * signal; @@ -143,7 +143,7 @@ float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H, rmd = octaves - floor(octaves); if (rmd != 0.0) - result += rmd * ((safe_noise(p, 0) + offset) * pwr); + result += rmd * ((safe_noise(p, "signed") + offset) * pwr); return result; } @@ -156,15 +156,15 @@ float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H, * offset: raises the terrain from `sea level' */ -float noise_musgrave_ridged_multi_fractal(point p, string basis, float H, - float lacunarity, float octaves, float offset, float gain) +float noise_musgrave_ridged_multi_fractal(point p, float H, float lacunarity, + float octaves, float offset, float gain) { float result, signal, weight; float pwHL = pow(lacunarity, -H); float pwr = pwHL; int i; - signal = offset - fabs(safe_noise(p, 0)); + signal = offset - fabs(safe_noise(p, "signed")); signal *= signal; result = signal; weight = 1.0; @@ -172,7 +172,7 @@ float noise_musgrave_ridged_multi_fractal(point p, string basis, float H, for (i = 1; i < (int)octaves; i++) { p *= lacunarity; weight = clamp(signal * gain, 0.0, 1.0); - signal = offset - fabs(safe_noise(p, 0)); + signal = offset - fabs(safe_noise(p, "signed")); signal *= signal; signal *= weight; result += signal * pwr; @@ -201,7 +201,6 @@ shader node_musgrave_texture( float dimension = max(Dimension, 1e-5); float octaves = clamp(Detail, 0.0, 16.0); float lacunarity = max(Lacunarity, 1e-5); - string Basis = "Perlin"; float intensity = 1.0; point p = Vector; @@ -212,15 +211,15 @@ shader node_musgrave_texture( p = p * Scale; if (Type == "Multifractal") - Fac = intensity * noise_musgrave_multi_fractal(p, Basis, dimension, lacunarity, octaves); + Fac = intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves); else if (Type == "fBM") - Fac = intensity * noise_musgrave_fBm(p, Basis, dimension, lacunarity, octaves); + Fac = intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves); else if (Type == "Hybrid Multifractal") - Fac = intensity * noise_musgrave_hybrid_multi_fractal(p, Basis, dimension, lacunarity, octaves, Offset, Gain); + Fac = intensity * noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain); else if (Type == "Ridged Multifractal") - Fac = intensity * noise_musgrave_ridged_multi_fractal(p, Basis, dimension, lacunarity, octaves, Offset, Gain); + Fac = intensity * noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain); else if (Type == "Hetero Terrain") - Fac = intensity * noise_musgrave_hetero_terrain(p, Basis, dimension, lacunarity, octaves, Offset); + Fac = intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, Offset); Color = color(Fac, Fac, Fac); } diff --git a/intern/cycles/kernel/shaders/node_noise_texture.osl b/intern/cycles/kernel/shaders/node_noise_texture.osl index 912795966e0..e83e5b5b211 100644 --- a/intern/cycles/kernel/shaders/node_noise_texture.osl +++ b/intern/cycles/kernel/shaders/node_noise_texture.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" @@ -19,23 +19,23 @@ /* Noise */ -float noise(point p, string basis, float distortion, float detail, float fac, color Color) +float noise(point p, float distortion, float detail, float fac, color Color) { point r; int hard = 0; if (distortion != 0.0) { - r[0] = noise_basis(p + point(13.5), basis) * distortion; - r[1] = noise_basis(p, basis) * distortion; - r[2] = noise_basis(p - point(13.5), basis) * distortion; + r[0] = safe_noise(p + point(13.5), "unsigned") * distortion; + r[1] = safe_noise(p, "unsigned") * distortion; + r[2] = safe_noise(p - point(13.5), "unsigned") * distortion; p += r; } - fac = noise_turbulence(p, basis, detail, hard); + fac = noise_turbulence(p, detail, hard); - Color = color(fac, noise_turbulence(point(p[1], p[0], p[2]), basis, detail, hard), - noise_turbulence(point(p[1], p[2], p[0]), basis, detail, hard)); + Color = color(fac, noise_turbulence(point(p[1], p[0], p[2]), detail, hard), + noise_turbulence(point(p[1], p[2], p[0]), detail, hard)); return fac; } @@ -55,7 +55,6 @@ shader node_noise_texture( if (use_mapping) p = transform(mapping, p); - string Basis = "Perlin"; - Fac = noise(p * Scale, Basis, Distortion, Detail, Fac, Color); + Fac = noise(p * Scale, Distortion, Detail, Fac, Color); } diff --git a/intern/cycles/kernel/shaders/node_normal.osl b/intern/cycles/kernel/shaders/node_normal.osl index 14af044e0c0..2d04978fc72 100644 --- a/intern/cycles/kernel/shaders/node_normal.osl +++ b/intern/cycles/kernel/shaders/node_normal.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" @@ -23,6 +23,6 @@ shader node_normal( output float Dot = 1.0) { NormalOut = normalize(Direction); - Dot = dot(NormalOut, NormalIn); + Dot = dot(NormalOut, normalize(NormalIn)); } diff --git a/intern/cycles/kernel/shaders/node_normal_map.osl b/intern/cycles/kernel/shaders/node_normal_map.osl index c2080ecb194..01be566fb20 100644 --- a/intern/cycles/kernel/shaders/node_normal_map.osl +++ b/intern/cycles/kernel/shaders/node_normal_map.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_object_info.osl b/intern/cycles/kernel/shaders/node_object_info.osl index 1ebe767e82d..dd7c663b8d8 100644 --- a/intern/cycles/kernel/shaders/node_object_info.osl +++ b/intern/cycles/kernel/shaders/node_object_info.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_output_displacement.osl b/intern/cycles/kernel/shaders/node_output_displacement.osl index 613d6be5f3b..d0688cfda8d 100644 --- a/intern/cycles/kernel/shaders/node_output_displacement.osl +++ b/intern/cycles/kernel/shaders/node_output_displacement.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_output_surface.osl b/intern/cycles/kernel/shaders/node_output_surface.osl index fb16e85ce0d..2cc4575a8c8 100644 --- a/intern/cycles/kernel/shaders/node_output_surface.osl +++ b/intern/cycles/kernel/shaders/node_output_surface.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_output_volume.osl b/intern/cycles/kernel/shaders/node_output_volume.osl index 11a884b7d75..f220ba866e3 100644 --- a/intern/cycles/kernel/shaders/node_output_volume.osl +++ b/intern/cycles/kernel/shaders/node_output_volume.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_particle_info.osl b/intern/cycles/kernel/shaders/node_particle_info.osl index 077b0c114da..768b7753d02 100644 --- a/intern/cycles/kernel/shaders/node_particle_info.osl +++ b/intern/cycles/kernel/shaders/node_particle_info.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl index 4a32415b482..d458ca730a4 100644 --- a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_rgb_curves.osl b/intern/cycles/kernel/shaders/node_rgb_curves.osl index 4e0f8721144..60cb273ba98 100644 --- a/intern/cycles/kernel/shaders/node_rgb_curves.osl +++ b/intern/cycles/kernel/shaders/node_rgb_curves.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_rgb_ramp.osl b/intern/cycles/kernel/shaders/node_rgb_ramp.osl index d3c2e9573d2..0202ba0bf79 100644 --- a/intern/cycles/kernel/shaders/node_rgb_ramp.osl +++ b/intern/cycles/kernel/shaders/node_rgb_ramp.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_scatter_volume.osl b/intern/cycles/kernel/shaders/node_scatter_volume.osl index 77c157bd92b..002e2750fca 100644 --- a/intern/cycles/kernel/shaders/node_scatter_volume.osl +++ b/intern/cycles/kernel/shaders/node_scatter_volume.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_separate_hsv.osl b/intern/cycles/kernel/shaders/node_separate_hsv.osl index 94fc5de9122..2a804040294 100644 --- a/intern/cycles/kernel/shaders/node_separate_hsv.osl +++ b/intern/cycles/kernel/shaders/node_separate_hsv.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_separate_rgb.osl b/intern/cycles/kernel/shaders/node_separate_rgb.osl index aebb63a0ee4..43d9e3aa4b1 100644 --- a/intern/cycles/kernel/shaders/node_separate_rgb.osl +++ b/intern/cycles/kernel/shaders/node_separate_rgb.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_separate_xyz.osl b/intern/cycles/kernel/shaders/node_separate_xyz.osl index 63725cb9995..e1963a1902f 100644 --- a/intern/cycles/kernel/shaders/node_separate_xyz.osl +++ b/intern/cycles/kernel/shaders/node_separate_xyz.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_set_normal.osl b/intern/cycles/kernel/shaders/node_set_normal.osl index 8eef152308a..7ca7ac9350c 100644 --- a/intern/cycles/kernel/shaders/node_set_normal.osl +++ b/intern/cycles/kernel/shaders/node_set_normal.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_sky_texture.osl b/intern/cycles/kernel/shaders/node_sky_texture.osl index 85c2dbdb2c2..05eed23bea8 100644 --- a/intern/cycles/kernel/shaders/node_sky_texture.osl +++ b/intern/cycles/kernel/shaders/node_sky_texture.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl index 1c0cd74c0be..dbbf657776c 100644 --- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl +++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_tangent.osl b/intern/cycles/kernel/shaders/node_tangent.osl index 41a2b2b0216..53a47396f9f 100644 --- a/intern/cycles/kernel/shaders/node_tangent.osl +++ b/intern/cycles/kernel/shaders/node_tangent.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_texture.h b/intern/cycles/kernel/shaders/node_texture.h index de51559f297..fc2cfdcd55c 100644 --- a/intern/cycles/kernel/shaders/node_texture.h +++ b/intern/cycles/kernel/shaders/node_texture.h @@ -11,35 +11,9 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ -/* Voronoi Distances */ - -float voronoi_distance(string distance_metric, vector d, float e) -{ -#if 0 - if (distance_metric == "Distance Squared") -#endif - return dot(d, d); -#if 0 - if (distance_metric == "Actual Distance") - return length(d); - if (distance_metric == "Manhattan") - return fabs(d[0]) + fabs(d[1]) + fabs(d[2]); - if (distance_metric == "Chebychev") - return max(fabs(d[0]), max(fabs(d[1]), fabs(d[2]))); - if (distance_metric == "Minkovsky 1/2") - return sqrt(fabs(d[0])) + sqrt(fabs(d[1])) + sqrt(fabs(d[1])); - if (distance_metric == "Minkovsky 4") - return sqrt(sqrt(dot(d * d, d * d))); - if (distance_metric == "Minkovsky") - return pow(pow(fabs(d[0]), e) + pow(fabs(d[1]), e) + pow(fabs(d[2]), e), 1.0 / e); - - return 0.0; -#endif -} - /* Voronoi / Worley like */ color cellnoise_color(point p) @@ -51,7 +25,7 @@ color cellnoise_color(point p) return color(r, g, b); } -void voronoi(point p, string distance_metric, float e, float da[4], point pa[4]) +void voronoi(point p, float e, float da[4], point pa[4]) { /* returns distances in da and point coords in pa */ int xx, yy, zz, xi, yi, zi; @@ -71,7 +45,7 @@ void voronoi(point p, string distance_metric, float e, float da[4], point pa[4]) point ip = point(xx, yy, zz); point vp = (point)cellnoise_color(ip); point pd = p - (vp + ip); - float d = voronoi_distance(distance_metric, pd, e); + float d = dot(pd, pd); vp += point(xx, yy, zz); @@ -111,54 +85,14 @@ void voronoi(point p, string distance_metric, float e, float da[4], point pa[4]) } } -float voronoi_Fn(point p, int n) -{ - float da[4]; - point pa[4]; - - voronoi(p, "Distance Squared", 0, da, pa); - - return da[n]; -} - -float voronoi_FnFn(point p, int n1, int n2) -{ - float da[4]; - point pa[4]; - - voronoi(p, "Distance Squared", 0, da, pa); - - return da[n2] - da[n1]; -} - -float voronoi_F1(point p) { return voronoi_Fn(p, 0); } -float voronoi_F2(point p) { return voronoi_Fn(p, 1); } -float voronoi_F3(point p) { return voronoi_Fn(p, 2); } -float voronoi_F4(point p) { return voronoi_Fn(p, 3); } -float voronoi_F1F2(point p) { return voronoi_FnFn(p, 0, 1); } - -float voronoi_Cr(point p) -{ - /* crackle type pattern, just a scale/clamp of F2-F1 */ - float t = 10.0 * voronoi_F1F2(p); - return (t > 1.0) ? 1.0 : t; -} - -float voronoi_F1S(point p) { return 2.0 * voronoi_F1(p) - 1.0; } -float voronoi_F2S(point p) { return 2.0 * voronoi_F2(p) - 1.0; } -float voronoi_F3S(point p) { return 2.0 * voronoi_F3(p) - 1.0; } -float voronoi_F4S(point p) { return 2.0 * voronoi_F4(p) - 1.0; } -float voronoi_F1F2S(point p) { return 2.0 * voronoi_F1F2(p) - 1.0; } -float voronoi_CrS(point p) { return 2.0 * voronoi_Cr(p) - 1.0; } - /* Noise Bases */ -float safe_noise(point p, int type) +float safe_noise(point p, string type) { float f = 0.0; /* Perlin noise in range -1..1 */ - if (type == 0) + if (type == "signed") f = noise("perlin", p); /* Perlin noise in range 0..1 */ @@ -172,39 +106,9 @@ float safe_noise(point p, int type) return f; } -float noise_basis(point p, string basis) -{ - if (basis == "Perlin") - return safe_noise(p, 1); - if (basis == "Voronoi F1") - return voronoi_F1S(p); - if (basis == "Voronoi F2") - return voronoi_F2S(p); - if (basis == "Voronoi F3") - return voronoi_F3S(p); - if (basis == "Voronoi F4") - return voronoi_F4S(p); - if (basis == "Voronoi F2-F1") - return voronoi_F1F2S(p); - if (basis == "Voronoi Crackle") - return voronoi_CrS(p); - if (basis == "Cell Noise") - return cellnoise(p); - - return 0.0; -} - -/* Soft/Hard Noise */ - -float noise_basis_hard(point p, string basis, int hard) -{ - float t = noise_basis(p, basis); - return (hard) ? fabs(2.0 * t - 1.0) : t; -} - /* Turbulence */ -float noise_turbulence(point p, string basis, float details, int hard) +float noise_turbulence(point p, float details, int hard) { float fscale = 1.0; float amp = 1.0; @@ -215,7 +119,7 @@ float noise_turbulence(point p, string basis, float details, int hard) n = (int)octaves; for (i = 0; i <= n; i++) { - float t = noise_basis(fscale * p, basis); + float t = safe_noise(fscale * p, "unsigned"); if (hard) t = fabs(2.0 * t - 1.0); @@ -228,7 +132,7 @@ float noise_turbulence(point p, string basis, float details, int hard) float rmd = octaves - floor(octaves); if (rmd != 0.0) { - float t = noise_basis(fscale * p, basis); + float t = safe_noise(fscale * p, "unsigned"); if (hard) t = fabs(2.0 * t - 1.0); diff --git a/intern/cycles/kernel/shaders/node_texture_coordinate.osl b/intern/cycles/kernel/shaders/node_texture_coordinate.osl index 8fdf469df21..9e2109fa082 100644 --- a/intern/cycles/kernel/shaders/node_texture_coordinate.osl +++ b/intern/cycles/kernel/shaders/node_texture_coordinate.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" @@ -21,7 +21,9 @@ shader node_texture_coordinate( int is_background = 0, int is_volume = 0, int from_dupli = 0, + int use_transform = 0, string bump_offset = "center", + matrix object_itfm = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), output point Generated = point(0.0, 0.0, 0.0), output point UV = point(0.0, 0.0, 0.0), @@ -60,7 +62,12 @@ shader node_texture_coordinate( getattribute("geom:uv", UV); } - Object = transform("object", P); + if (use_transform) { + Object = transform(object_itfm, P); + } + else { + Object = transform("object", P); + } Camera = transform("camera", P); Window = transform("NDC", P); Normal = transform("world", "object", NormalIn); diff --git a/intern/cycles/kernel/shaders/node_toon_bsdf.osl b/intern/cycles/kernel/shaders/node_toon_bsdf.osl index 1f7e1b8e6e1..75c5d06f847 100644 --- a/intern/cycles/kernel/shaders/node_toon_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_toon_bsdf.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl index 8059f5788ec..94d23d35326 100644 --- a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl index 552e4106b0c..5d6798f19a6 100644 --- a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_uv_map.osl b/intern/cycles/kernel/shaders/node_uv_map.osl index 01c984aff4c..77e2e8d12d7 100644 --- a/intern/cycles/kernel/shaders/node_uv_map.osl +++ b/intern/cycles/kernel/shaders/node_uv_map.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_value.osl b/intern/cycles/kernel/shaders/node_value.osl index aebfab35d2a..f75388d1f76 100644 --- a/intern/cycles/kernel/shaders/node_value.osl +++ b/intern/cycles/kernel/shaders/node_value.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_vector_curves.osl b/intern/cycles/kernel/shaders/node_vector_curves.osl index 137ebe112eb..7bbf97d95ea 100644 --- a/intern/cycles/kernel/shaders/node_vector_curves.osl +++ b/intern/cycles/kernel/shaders/node_vector_curves.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl index 0c8857deae2..f83412dc0f7 100644 --- a/intern/cycles/kernel/shaders/node_vector_math.osl +++ b/intern/cycles/kernel/shaders/node_vector_math.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_vector_transform.osl b/intern/cycles/kernel/shaders/node_vector_transform.osl index 6fb0ab1d8cc..8ebaa31ab25 100644 --- a/intern/cycles/kernel/shaders/node_vector_transform.osl +++ b/intern/cycles/kernel/shaders/node_vector_transform.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl index 37b26babc64..456c26998c8 100644 --- a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_voronoi_texture.osl b/intern/cycles/kernel/shaders/node_voronoi_texture.osl index 7a1e0016690..29e143ae207 100644 --- a/intern/cycles/kernel/shaders/node_voronoi_texture.osl +++ b/intern/cycles/kernel/shaders/node_voronoi_texture.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" @@ -37,7 +37,7 @@ shader node_voronoi_texture( float da[4]; point pa[4]; - voronoi(p * Scale, "Distance Squared", 1.0, da, pa); + voronoi(p * Scale, 1.0, da, pa); /* Colored output */ if (Coloring == "Intensity") { diff --git a/intern/cycles/kernel/shaders/node_voxel_texture.osl b/intern/cycles/kernel/shaders/node_voxel_texture.osl new file mode 100644 index 00000000000..e45af62220f --- /dev/null +++ b/intern/cycles/kernel/shaders/node_voxel_texture.osl @@ -0,0 +1,47 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stdosl.h" + +shader node_voxel_texture( + string filename = "", + string interpolation = "linear", + int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + point Vector = P, + output float Density = 0, + output color Color = 0) +{ + point p = Vector; + if (use_mapping) { + p = transform(mapping, p); + } + else { + p = transform("object", Vector); + matrix tfm; + if (getattribute("geom:generated_transform", tfm)) + p = transform(tfm, p); + } + if(p[0] < 0.0 || p[1] < 0.0 || p[2] < 0.0 || + p[0] > 1.0 || p[1] > 1.0 || p[2] > 1.0) + { + Density = 0; + Color = color(0, 0, 0); + } + else { + Color = (color)texture3d(filename, p, "wrap", "periodic", "interp", interpolation, "alpha", Density); + } +} diff --git a/intern/cycles/kernel/shaders/node_wave_texture.osl b/intern/cycles/kernel/shaders/node_wave_texture.osl index ba40207b446..569f284cbac 100644 --- a/intern/cycles/kernel/shaders/node_wave_texture.osl +++ b/intern/cycles/kernel/shaders/node_wave_texture.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" @@ -31,7 +31,7 @@ float wave(point p, string type, float detail, float distortion, float dscale) } if (distortion != 0.0) { - n = n + (distortion * noise_turbulence(p * dscale, "Perlin", detail, 0)); + n = n + (distortion * noise_turbulence(p * dscale, detail, 0)); } return 0.5 + 0.5 * sin(n); } diff --git a/intern/cycles/kernel/shaders/node_wavelength.osl b/intern/cycles/kernel/shaders/node_wavelength.osl index 4333c1fd944..79e7043d4bf 100644 --- a/intern/cycles/kernel/shaders/node_wavelength.osl +++ b/intern/cycles/kernel/shaders/node_wavelength.osl @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" diff --git a/intern/cycles/kernel/shaders/node_wireframe.osl b/intern/cycles/kernel/shaders/node_wireframe.osl index db8925c9efc..5cc214495dd 100644 --- a/intern/cycles/kernel/shaders/node_wireframe.osl +++ b/intern/cycles/kernel/shaders/node_wireframe.osl @@ -11,17 +11,31 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "stdosl.h" #include "oslutil.h" shader node_wireframe( + string bump_offset = "center", int use_pixel_size = 0, float Size = 0.01, output float Fac = 0.0) { Fac = wireframe("triangles", Size, use_pixel_size); + /* TODO(sergey): Since we can't use autodiff here we do algebraic + * calculation of derivatives by definition. We could probably + * optimize this a bit by doing some extra calculation in wireframe(). + */ + if (bump_offset == "dx") { + point dx = Dx(P); + P -= dx; + Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dx); + } + else if (bump_offset == "dy") { + point dy = Dy(P); + P -= dy; + Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dy); + } } - diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index f8e5fd510ee..697a1756119 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -249,7 +249,21 @@ point rotate (point p, float angle, point a, point b) { vector axis = normalize (b - a); float cosang, sinang; + /* Older OSX has major issues with sincos() function, + * it's likely a big in OSL or LLVM. For until we've + * updated to new versions of this libraries we'll + * use a workaround to prevent possible crashes on all + * the platforms. + * + * Shouldn't be that bad because it's mainly used for + * anisotropic shader where angle is usually constant. + */ +#if 0 sincos (angle, sinang, cosang); +#else + sinang = sin (angle); + cosang = cos (angle); +#endif float cosang1 = 1.0 - cosang; float x = axis[0], y = axis[1], z = axis[2]; matrix M = matrix (x * x + (1.0 - x * x) * cosang, @@ -476,8 +490,6 @@ closure color diffuse_ramp(normal N, color colors[8]) BUILTIN; closure color phong_ramp(normal N, float exponent, color colors[8]) BUILTIN; closure color diffuse_toon(normal N, float size, float smooth) BUILTIN; closure color glossy_toon(normal N, float size, float smooth) BUILTIN; -closure color westin_backscatter(normal N, float roughness) BUILTIN; -closure color westin_sheen(normal N, float edginess) BUILTIN; closure color translucent(normal N) BUILTIN; closure color reflection(normal N) BUILTIN; closure color refraction(normal N, float eta) BUILTIN; @@ -507,6 +519,47 @@ closure color hair_transmission(normal N, float roughnessu, float roughnessv, ve closure color henyey_greenstein(float g) BUILTIN; closure color absorption() BUILTIN; +// OSL 1.5 Microfacet functions +closure color microfacet(string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract) { + /* GGX */ + if (distribution == "ggx" || distribution == "default") { + if (!refract) { + if (xalpha == yalpha) { + /* Isotropic */ + return microfacet_ggx(N, xalpha); + } + else { + /* Anisotropic */ + return microfacet_ggx_aniso(N, U, xalpha, yalpha); + } + } + else { + return microfacet_ggx_refraction(N, xalpha, eta); + } + } + /* Beckmann */ + else { + if (!refract) { + if (xalpha == yalpha) { + /* Isotropic */ + return microfacet_beckmann(N, xalpha); + } + else { + /* Anisotropic */ + return microfacet_beckmann_aniso(N, U, xalpha, yalpha); + } + } + else { + return microfacet_beckmann_refraction(N, xalpha, eta); + } + } +} + +closure color microfacet (string distribution, normal N, float alpha, float eta, int refract) { + return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract); +} + + // Renderer state int backfacing () BUILTIN; int raytype (string typename) BUILTIN; diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h new file mode 100644 index 00000000000..0132ef9c2f2 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h @@ -0,0 +1,254 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_split_common.h" + +/* Note on kernel_background_buffer_update kernel. + * This is the fourth kernel in the ray tracing logic, and the third + * of the path iteration kernels. This kernel takes care of rays that hit + * the background (sceneintersect kernel), and for the rays of + * state RAY_UPDATE_BUFFER it updates the ray's accumulated radiance in + * the output buffer. This kernel also takes care of rays that have been determined + * to-be-regenerated. + * + * We will empty QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue in this kernel + * + * Typically all rays that are in state RAY_HIT_BACKGROUND, RAY_UPDATE_BUFFER + * will be eventually set to RAY_TO_REGENERATE state in this kernel. Finally all rays of ray_state + * RAY_TO_REGENERATE will be regenerated and put in queue QUEUE_ACTIVE_AND_REGENERATED_RAYS. + * + * The input and output are as follows, + * + * rng_coop ---------------------------------------------|--- kernel_background_buffer_update --|--- PathRadiance_coop + * throughput_coop --------------------------------------| |--- L_transparent_coop + * per_sample_output_buffers ----------------------------| |--- per_sample_output_buffers + * Ray_coop ---------------------------------------------| |--- ray_state + * PathState_coop ---------------------------------------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * L_transparent_coop -----------------------------------| |--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) + * ray_state --------------------------------------------| |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ----| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) + * Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- work_array + * parallel_samples -------------------------------------| |--- PathState_coop + * end_sample -------------------------------------------| |--- throughput_coop + * kg (globals + data) ----------------------------------| |--- rng_coop + * rng_state --------------------------------------------| |--- Ray + * PathRadiance_coop ------------------------------------| | + * sw ---------------------------------------------------| | + * sh ---------------------------------------------------| | + * sx ---------------------------------------------------| | + * sy ---------------------------------------------------| | + * stride -----------------------------------------------| | + * work_array -------------------------------------------| |--- work_array + * queuesize --------------------------------------------| | + * start_sample -----------------------------------------| |--- work_pool_wgs + * work_pool_wgs ----------------------------------------| | + * num_samples ------------------------------------------| | + * + * note on shader_data : shader_data argument is neither an input nor an output for this kernel. It is just filled and consumed here itself. + * Note on Queues : + * This kernel fetches rays from QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. + * + * State of queues when this kernel is called : + * At entry, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND, RAY_TO_REGENERATE rays + * At exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty + */ +ccl_device char kernel_background_buffer_update( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_state, + ccl_global uint *rng_coop, /* Required for buffer Update */ + ccl_global float3 *throughput_coop, /* Required for background hit processing */ + PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */ + ccl_global Ray *Ray_coop, /* Required for background hit processing */ + ccl_global PathState *PathState_coop, /* Required for background hit processing */ + ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */ + ccl_global char *ray_state, /* Stores information on the current state of a ray */ + int sw, int sh, int sx, int sy, int stride, + int rng_state_offset_x, + int rng_state_offset_y, + int rng_state_stride, + ccl_global unsigned int *work_array, /* Denotes work of each ray */ + int end_sample, + int start_sample, +#ifdef __WORK_STEALING__ + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples, /* Number of samples to be processed in parallel */ + int ray_index) +{ + char enqueue_flag = 0; + + /* Load kernel globals structure and ShaderData strucuture */ + KernelGlobals *kg = (KernelGlobals *)globals; + ShaderData *sd = (ShaderData *)shader_data; + +#ifdef __KERNEL_DEBUG__ + DebugData *debug_data = &debugdata_coop[ray_index]; +#endif + ccl_global PathState *state = &PathState_coop[ray_index]; + PathRadiance *L = L = &PathRadiance_coop[ray_index]; + ccl_global Ray *ray = &Ray_coop[ray_index]; + ccl_global float3 *throughput = &throughput_coop[ray_index]; + ccl_global float *L_transparent = &L_transparent_coop[ray_index]; + ccl_global uint *rng = &rng_coop[ray_index]; + +#ifdef __WORK_STEALING__ + unsigned int my_work; + ccl_global float *initial_per_sample_output_buffers; + ccl_global uint *initial_rng; +#endif + unsigned int sample; + unsigned int tile_x; + unsigned int tile_y; + unsigned int pixel_x; + unsigned int pixel_y; + unsigned int my_sample_tile; + +#ifdef __WORK_STEALING__ + my_work = work_array[ray_index]; + sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; + get_pixel_tile_position(&pixel_x, &pixel_y, + &tile_x, &tile_y, + my_work, + sw, sh, sx, sy, + parallel_samples, + ray_index); + my_sample_tile = 0; + initial_per_sample_output_buffers = per_sample_output_buffers; + initial_rng = rng_state; +#else /* __WORK_STEALING__ */ + sample = work_array[ray_index]; + int tile_index = ray_index / parallel_samples; + /* buffer and rng_state's stride is "stride". Find x and y using ray_index */ + tile_x = tile_index % sw; + tile_y = tile_index / sw; + my_sample_tile = ray_index - (tile_index * parallel_samples); +#endif /* __WORK_STEALING__ */ + + rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride; + per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride; + + if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { + /* eval background shader if nothing hit */ + if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) { + *L_transparent = (*L_transparent) + average((*throughput)); +#ifdef __PASSES__ + if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) +#endif + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } + + if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { +#ifdef __BACKGROUND__ + /* sample background shader */ + float3 L_background = indirect_background(kg, state, ray, sd); + path_radiance_accum_background(L, (*throughput), L_background, state->bounce); +#endif + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } + } + + if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { + float3 L_sum = path_radiance_clamp_and_sum(kg, L); + kernel_write_light_passes(kg, per_sample_output_buffers, L, sample); +#ifdef __KERNEL_DEBUG__ + kernel_write_debug_passes(kg, per_sample_output_buffers, state, debug_data, sample); +#endif + float4 L_rad = make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - (*L_transparent)); + + /* accumulate result in output buffer */ + kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad); + path_rng_end(kg, rng_state, *rng); + + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); + } + + if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { +#ifdef __WORK_STEALING__ + /* We have completed current work; So get next work */ + int valid_work = get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index); + if(!valid_work) { + /* If work is invalid, this means no more work is available and the thread may exit */ + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); + } +#else /* __WORK_STEALING__ */ + if((sample + parallel_samples) >= end_sample) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); + } +#endif /* __WORK_STEALING__ */ + + if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { +#ifdef __WORK_STEALING__ + work_array[ray_index] = my_work; + /* Get the sample associated with the current work */ + sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; + /* Get pixel and tile position associated with current work */ + get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index); + my_sample_tile = 0; + + /* Remap rng_state according to the current work */ + rng_state = initial_rng + ((rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride); + /* Remap per_sample_output_buffers according to the current work */ + per_sample_output_buffers = initial_per_sample_output_buffers + + (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride; +#else /* __WORK_STEALING__ */ + work_array[ray_index] = sample + parallel_samples; + sample = work_array[ray_index]; + + /* Get ray position from ray index */ + pixel_x = sx + ((ray_index / parallel_samples) % sw); + pixel_y = sy + ((ray_index / parallel_samples) / sw); +#endif /* __WORK_STEALING__ */ + + /* Initialize random numbers and ray. */ + kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray); + + if(ray->t != 0.0f) { + /* Initialize throughput, L_transparent, Ray, PathState; + * These rays proceed with path-iteration. + */ + *throughput = make_float3(1.0f, 1.0f, 1.0f); + *L_transparent = 0.0f; + path_radiance_init(L, kernel_data.film.use_light_pass); + path_state_init(kg, state, rng, sample, ray); +#ifdef __KERNEL_DEBUG__ + debug_data_init(debug_data); +#endif + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + enqueue_flag = 1; + } else { + /* These rays do not participate in path-iteration. */ + float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + /* Accumulate result in output buffer. */ + kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad); + path_rng_end(kg, rng_state, *rng); + + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); + } + } + } + return enqueue_flag; +} diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h new file mode 100644 index 00000000000..4dab79a5c67 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -0,0 +1,415 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_split_common.h" + +/* Note on kernel_data_initialization kernel + * This kernel Initializes structures needed in path-iteration kernels. + * This is the first kernel in ray-tracing logic. + * + * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE + * + * Its input and output are as follows, + * + * Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng + * Un-initialized throughput -------| |--- Initialized throughput + * Un-initialized L_transparent ----| |--- Initialized L_transparent + * Un-initialized PathRadiance -----| |--- Initialized PathRadiance + * Un-initialized Ray --------------| |--- Initialized Ray + * Un-initialized PathState --------| |--- Initialized PathState + * Un-initialized QueueData --------| |--- Initialized QueueData (to QUEUE_EMPTY_SLOT) + * Un-initialized QueueIndex -------| |--- Initialized QueueIndex (to 0) + * Un-initialized use_queues_flag---| |--- Initialized use_queues_flag (to false) + * Un-initialized ray_state --------| |--- Initialized ray_state + * parallel_samples --------------- | |--- Initialized per_sample_output_buffers + * rng_state -----------------------| |--- Initialized work_array + * data ----------------------------| |--- Initialized work_pool_wgs + * start_sample --------------------| | + * sx ------------------------------| | + * sy ------------------------------| | + * sw ------------------------------| | + * sh ------------------------------| | + * stride --------------------------| | + * queuesize -----------------------| | + * num_samples ---------------------| | + * + * Note on Queues : + * All slots in queues are initialized to queue empty slot; + * The number of elements in the queues is initialized to 0; + */ +ccl_device void kernel_data_init( + ccl_global char *globals, + ccl_global char *shader_data_sd, /* Arguments related to ShaderData */ + ccl_global char *shader_data_sd_DL_shadow, /* Arguments related to ShaderData */ + + ccl_global float3 *P_sd, + ccl_global float3 *P_sd_DL_shadow, + + ccl_global float3 *N_sd, + ccl_global float3 *N_sd_DL_shadow, + + ccl_global float3 *Ng_sd, + ccl_global float3 *Ng_sd_DL_shadow, + + ccl_global float3 *I_sd, + ccl_global float3 *I_sd_DL_shadow, + + ccl_global int *shader_sd, + ccl_global int *shader_sd_DL_shadow, + + ccl_global int *flag_sd, + ccl_global int *flag_sd_DL_shadow, + + ccl_global int *prim_sd, + ccl_global int *prim_sd_DL_shadow, + + ccl_global int *type_sd, + ccl_global int *type_sd_DL_shadow, + + ccl_global float *u_sd, + ccl_global float *u_sd_DL_shadow, + + ccl_global float *v_sd, + ccl_global float *v_sd_DL_shadow, + + ccl_global int *object_sd, + ccl_global int *object_sd_DL_shadow, + + ccl_global float *time_sd, + ccl_global float *time_sd_DL_shadow, + + ccl_global float *ray_length_sd, + ccl_global float *ray_length_sd_DL_shadow, + + ccl_global int *ray_depth_sd, + ccl_global int *ray_depth_sd_DL_shadow, + + ccl_global int *transparent_depth_sd, + ccl_global int *transparent_depth_sd_DL_shadow, + + /* Ray differentials. */ + ccl_global differential3 *dP_sd, + ccl_global differential3 *dP_sd_DL_shadow, + + ccl_global differential3 *dI_sd, + ccl_global differential3 *dI_sd_DL_shadow, + + ccl_global differential *du_sd, + ccl_global differential *du_sd_DL_shadow, + + ccl_global differential *dv_sd, + ccl_global differential *dv_sd_DL_shadow, + + /* Dp/Du */ + ccl_global float3 *dPdu_sd, + ccl_global float3 *dPdu_sd_DL_shadow, + + ccl_global float3 *dPdv_sd, + ccl_global float3 *dPdv_sd_DL_shadow, + + /* Object motion. */ + ccl_global Transform *ob_tfm_sd, + ccl_global Transform *ob_tfm_sd_DL_shadow, + + ccl_global Transform *ob_itfm_sd, + ccl_global Transform *ob_itfm_sd_DL_shadow, + + ShaderClosure *closure_sd, + ShaderClosure *closure_sd_DL_shadow, + + ccl_global int *num_closure_sd, + ccl_global int *num_closure_sd_DL_shadow, + + ccl_global float *randb_closure_sd, + ccl_global float *randb_closure_sd_DL_shadow, + + ccl_global float3 *ray_P_sd, + ccl_global float3 *ray_P_sd_DL_shadow, + + ccl_global differential3 *ray_dP_sd, + ccl_global differential3 *ray_dP_sd_DL_shadow, + + ccl_constant KernelData *data, + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_state, + ccl_global uint *rng_coop, /* rng array to store rng values for all rays */ + ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */ + ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */ + PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */ + ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */ + ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */ + ccl_global char *ray_state, /* Stores information on current state of a ray */ + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../kernel_textures.h" + + int start_sample, int sx, int sy, int sw, int sh, int offset, int stride, + int rng_state_offset_x, + int rng_state_offset_y, + int rng_state_stride, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* size (capacity) of the queue */ + ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ + ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */ +#ifdef __WORK_STEALING__ + ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */ + unsigned int num_samples, /* Total number of samples per pixel */ +#endif +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + + /* Load kernel globals structure */ + KernelGlobals *kg = (KernelGlobals *)globals; + + kg->data = data; +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../kernel_textures.h" + + /* Load ShaderData structure */ + ShaderData *sd = (ShaderData *)shader_data_sd; + ShaderData *sd_DL_shadow = (ShaderData *)shader_data_sd_DL_shadow; + + sd->P = P_sd; + sd_DL_shadow->P = P_sd_DL_shadow; + + sd->N = N_sd; + sd_DL_shadow->N = N_sd_DL_shadow; + + sd->Ng = Ng_sd; + sd_DL_shadow->Ng = Ng_sd_DL_shadow; + + sd->I = I_sd; + sd_DL_shadow->I = I_sd_DL_shadow; + + sd->shader = shader_sd; + sd_DL_shadow->shader = shader_sd_DL_shadow; + + sd->flag = flag_sd; + sd_DL_shadow->flag = flag_sd_DL_shadow; + + sd->prim = prim_sd; + sd_DL_shadow->prim = prim_sd_DL_shadow; + + sd->type = type_sd; + sd_DL_shadow->type = type_sd_DL_shadow; + + sd->u = u_sd; + sd_DL_shadow->u = u_sd_DL_shadow; + + sd->v = v_sd; + sd_DL_shadow->v = v_sd_DL_shadow; + + sd->object = object_sd; + sd_DL_shadow->object = object_sd_DL_shadow; + + sd->time = time_sd; + sd_DL_shadow->time = time_sd_DL_shadow; + + sd->ray_length = ray_length_sd; + sd_DL_shadow->ray_length = ray_length_sd_DL_shadow; + + sd->ray_depth = ray_depth_sd; + sd_DL_shadow->ray_depth = ray_depth_sd_DL_shadow; + + sd->transparent_depth = transparent_depth_sd; + sd_DL_shadow->transparent_depth = transparent_depth_sd_DL_shadow; + +#ifdef __RAY_DIFFERENTIALS__ + sd->dP = dP_sd; + sd_DL_shadow->dP = dP_sd_DL_shadow; + + sd->dI = dI_sd; + sd_DL_shadow->dI = dI_sd_DL_shadow; + + sd->du = du_sd; + sd_DL_shadow->du = du_sd_DL_shadow; + + sd->dv = dv_sd; + sd_DL_shadow->dv = dv_sd_DL_shadow; +#ifdef __DPDU__ + sd->dPdu = dPdu_sd; + sd_DL_shadow->dPdu = dPdu_sd_DL_shadow; + + sd->dPdv = dPdv_sd; + sd_DL_shadow->dPdv = dPdv_sd_DL_shadow; +#endif +#endif + +#ifdef __OBJECT_MOTION__ + sd->ob_tfm = ob_tfm_sd; + sd_DL_shadow->ob_tfm = ob_tfm_sd_DL_shadow; + + sd->ob_itfm = ob_itfm_sd; + sd_DL_shadow->ob_itfm = ob_itfm_sd_DL_shadow; +#endif + + sd->closure = closure_sd; + sd_DL_shadow->closure = closure_sd_DL_shadow; + + sd->num_closure = num_closure_sd; + sd_DL_shadow->num_closure = num_closure_sd_DL_shadow; + + sd->randb_closure = randb_closure_sd; + sd_DL_shadow->randb_closure = randb_closure_sd_DL_shadow; + + sd->ray_P = ray_P_sd; + sd_DL_shadow->ray_P = ray_P_sd_DL_shadow; + + sd->ray_dP = ray_dP_sd; + sd_DL_shadow->ray_dP = ray_dP_sd_DL_shadow; + + int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + +#ifdef __WORK_STEALING__ + int lid = get_local_id(1) * get_local_size(0) + get_local_id(0); + /* Initialize work_pool_wgs */ + if(lid == 0) { + int group_index = get_group_id(1) * get_num_groups(0) + get_group_id(0); + work_pool_wgs[group_index] = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); +#endif /* __WORK_STEALING__ */ + + /* Initialize queue data and queue index. */ + if(thread_index < queuesize) { + /* Initialize active ray queue. */ + Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + /* Initialize background and buffer update queue. */ + Queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + /* Initialize shadow ray cast of AO queue. */ + Queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + /* Initialize shadow ray cast of direct lighting queue. */ + Queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + } + + if(thread_index == 0) { + Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; + Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; + /* The scene-intersect kernel should not use the queues very first time. + * since the queue would be empty. + */ + use_queues_flag[0] = 0; + } + + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < (sw * parallel_samples) && y < sh) { + int ray_index = x + y * (sw * parallel_samples); + + /* This is the first assignment to ray_state; + * So we dont use ASSIGN_RAY_STATE macro. + */ + ray_state[ray_index] = RAY_ACTIVE; + + unsigned int my_sample; + unsigned int pixel_x; + unsigned int pixel_y; + unsigned int tile_x; + unsigned int tile_y; + unsigned int my_sample_tile; + +#ifdef __WORK_STEALING__ + unsigned int my_work = 0; + /* Get work. */ + get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index); + /* Get the sample associated with the work. */ + my_sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; + + my_sample_tile = 0; + + /* Get pixel and tile position associated with the work. */ + get_pixel_tile_position(&pixel_x, &pixel_y, + &tile_x, &tile_y, + my_work, + sw, sh, sx, sy, + parallel_samples, + ray_index); + work_array[ray_index] = my_work; +#else /* __WORK_STEALING__ */ + unsigned int tile_index = ray_index / parallel_samples; + tile_x = tile_index % sw; + tile_y = tile_index / sw; + my_sample_tile = ray_index - (tile_index * parallel_samples); + my_sample = my_sample_tile + start_sample; + + /* Initialize work array. */ + work_array[ray_index] = my_sample ; + + /* Calculate pixel position of this ray. */ + pixel_x = sx + tile_x; + pixel_y = sy + tile_y; +#endif /* __WORK_STEALING__ */ + + rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride; + + /* Initialise per_sample_output_buffers to all zeros. */ + per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride; + int per_sample_output_buffers_iterator = 0; + for(per_sample_output_buffers_iterator = 0; + per_sample_output_buffers_iterator < kernel_data.film.pass_stride; + per_sample_output_buffers_iterator++) + { + per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f; + } + + /* Initialize random numbers and ray. */ + kernel_path_trace_setup(kg, + rng_state, + my_sample, + pixel_x, pixel_y, + &rng_coop[ray_index], + &Ray_coop[ray_index]); + + if(Ray_coop[ray_index].t != 0.0f) { + /* Initialize throughput, L_transparent, Ray, PathState; + * These rays proceed with path-iteration. + */ + throughput_coop[ray_index] = make_float3(1.0f, 1.0f, 1.0f); + L_transparent_coop[ray_index] = 0.0f; + path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass); + path_state_init(kg, + &PathState_coop[ray_index], + &rng_coop[ray_index], + my_sample, + &Ray_coop[ray_index]); +#ifdef __KERNEL_DEBUG__ + debug_data_init(&debugdata_coop[ray_index]); +#endif + } else { + /* These rays do not participate in path-iteration. */ + float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + /* Accumulate result in output buffer. */ + kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad); + path_rng_end(kg, rng_state, rng_coop[ray_index]); + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); + } + } + + /* Mark rest of the ray-state indices as RAY_INACTIVE. */ + if(thread_index < (get_global_size(0) * get_global_size(1)) - (sh * (sw * parallel_samples))) { + /* First assignment, hence we dont use ASSIGN_RAY_STATE macro */ + ray_state[((sw * parallel_samples) * sh) + thread_index] = RAY_INACTIVE; + } +} diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h new file mode 100644 index 00000000000..50c83d06140 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -0,0 +1,116 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_split_common.h" + +/* Note on kernel_direct_lighting kernel. + * This is the eighth kernel in the ray tracing logic. This is the seventh + * of the path iteration kernels. This kernel takes care of direct lighting + * logic. However, the "shadow ray cast" part of direct lighting is handled + * in the next kernel. + * + * This kernels determines the rays for which a shadow_blocked() function associated with direct lighting should be executed. + * Those rays for which a shadow_blocked() function for direct-lighting must be executed, are marked with flag RAY_SHADOW_RAY_CAST_DL and + * enqueued into the queue QUEUE_SHADOW_RAY_CAST_DL_RAYS + * + * The input and output are as follows, + * + * rng_coop -----------------------------------------|--- kernel_direct_lighting --|--- BSDFEval_coop + * PathState_coop -----------------------------------| |--- ISLamp_coop + * shader_data --------------------------------------| |--- LightRay_coop + * ray_state ----------------------------------------| |--- ray_state + * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ---| | + * kg (globals + data) ------------------------------| | + * queuesize ----------------------------------------| | + * + * note on shader_DL : shader_DL is neither input nor output to this kernel; shader_DL is filled and consumed in this kernel itself. + * Note on Queues : + * This kernel only reads from the QUEUE_ACTIVE_AND_REGENERATED_RAYS queue and processes + * only the rays of state RAY_ACTIVE; If a ray needs to execute the corresponding shadow_blocked + * part, after direct lighting, the ray is marked with RAY_SHADOW_RAY_CAST_DL flag. + * + * State of queues when this kernel is called : + * state of queues QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be same + * before and after this kernel call. + * QUEUE_SHADOW_RAY_CAST_DL_RAYS queue will be filled with rays for which a shadow_blocked function must be executed, after this + * kernel call. Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty. + */ +ccl_device char kernel_direct_lighting( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for direct lighting */ + ccl_global char *shader_DL, /* Required for direct lighting */ + ccl_global uint *rng_coop, /* Required for direct lighting */ + ccl_global PathState *PathState_coop, /* Required for direct lighting */ + ccl_global int *ISLamp_coop, /* Required for direct lighting */ + ccl_global Ray *LightRay_coop, /* Required for direct lighting */ + ccl_global BsdfEval *BSDFEval_coop, /* Required for direct lighting */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int ray_index) +{ + char enqueue_flag = 0; + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + /* Load kernel globals structure and ShaderData structure. */ + KernelGlobals *kg = (KernelGlobals *)globals; + ShaderData *sd = (ShaderData *)shader_data; + ShaderData *sd_DL = (ShaderData *)shader_DL; + + ccl_global PathState *state = &PathState_coop[ray_index]; + + /* direct lighting */ +#ifdef __EMISSION__ + if((kernel_data.integrator.use_direct_light && + (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL))) + { + /* Sample illumination from lights to find path contribution. */ + ccl_global RNG* rng = &rng_coop[ray_index]; + float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT); + float light_u, light_v; + path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); + + LightSample ls; + light_sample(kg, + light_t, light_u, light_v, + ccl_fetch(sd, time), + ccl_fetch(sd, P), + state->bounce, + &ls); + + Ray light_ray; +#ifdef __OBJECT_MOTION__ + light_ray.time = ccl_fetch(sd, time); +#endif + + BsdfEval L_light; + bool is_lamp; + if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, + state->bounce, state->transparent_bounce, sd_DL)) + { + /* Write intermediate data to global memory to access from + * the next kernel. + */ + LightRay_coop[ray_index] = light_ray; + BSDFEval_coop[ray_index] = L_light; + ISLamp_coop[ray_index] = is_lamp; + /* Mark ray state for next shadow kernel. */ + ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL); + enqueue_flag = 1; + } + } +#endif /* __EMISSION__ */ + } + return enqueue_flag; +} diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h new file mode 100644 index 00000000000..a75523a3e53 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -0,0 +1,264 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_split_common.h" + +/* Note on kernel_holdout_emission_blurring_pathtermination_ao kernel. + * This is the sixth kernel in the ray tracing logic. This is the fifth + * of the path iteration kernels. This kernel takes care of the logic to process + * "material of type holdout", indirect primitive emission, bsdf blurring, + * probabilistic path termination and AO. + * + * This kernels determines the rays for which a shadow_blocked() function associated with AO should be executed. + * Those rays for which a shadow_blocked() function for AO must be executed are marked with flag RAY_SHADOW_RAY_CAST_ao and + * enqueued into the queue QUEUE_SHADOW_RAY_CAST_AO_RAYS + * + * Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER + * + * The input and output are as follows, + * + * rng_coop ---------------------------------------------|--- kernel_holdout_emission_blurring_pathtermination_ao ---|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * throughput_coop --------------------------------------| |--- PathState_coop + * PathRadiance_coop ------------------------------------| |--- throughput_coop + * Intersection_coop ------------------------------------| |--- L_transparent_coop + * PathState_coop ---------------------------------------| |--- per_sample_output_buffers + * L_transparent_coop -----------------------------------| |--- PathRadiance_coop + * shader_data ------------------------------------------| |--- ShaderData + * ray_state --------------------------------------------| |--- ray_state + * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- AOAlpha_coop + * kg (globals + data) ----------------------------------| |--- AOBSDF_coop + * parallel_samples -------------------------------------| |--- AOLightRay_coop + * per_sample_output_buffers ----------------------------| | + * sw ---------------------------------------------------| | + * sh ---------------------------------------------------| | + * sx ---------------------------------------------------| | + * sy ---------------------------------------------------| | + * stride -----------------------------------------------| | + * work_array -------------------------------------------| | + * queuesize --------------------------------------------| | + * start_sample -----------------------------------------| | + * + * Note on Queues : + * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS and processes only + * the rays of state RAY_ACTIVE. + * There are different points in this kernel where a ray may terminate and reach RAY_UPDATE_BUFFER + * state. These rays are enqueued into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will + * still be present in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has been + * changed to RAY_UPDATE_BUFFER, there is no problem. + * + * State of queues when this kernel is called : + * At entry, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays. + * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty. + * At exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED and RAY_UPDATE_BUFFER rays + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays + * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flag RAY_SHADOW_RAY_CAST_AO + */ +ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */ + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */ + ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */ + ccl_global float *L_transparent_coop, /* Required for handling holdout material */ + PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */ + ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */ + Intersection *Intersection_coop, /* Required for indirect primitive emission */ + ccl_global float3 *AOAlpha_coop, /* Required for AO */ + ccl_global float3 *AOBSDF_coop, /* Required for AO */ + ccl_global Ray *AOLightRay_coop, /* Required for AO */ + int sw, int sh, int sx, int sy, int stride, + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */ +#ifdef __WORK_STEALING__ + unsigned int start_sample, +#endif + int parallel_samples, /* Number of samples to be processed in parallel */ + int ray_index, + char *enqueue_flag, + char *enqueue_flag_AO_SHADOW_RAY_CAST) +{ + /* Load kernel globals structure and ShaderData structure */ + KernelGlobals *kg = (KernelGlobals *)globals; + ShaderData *sd = (ShaderData *)shader_data; + +#ifdef __WORK_STEALING__ + unsigned int my_work; + unsigned int pixel_x; + unsigned int pixel_y; +#endif + unsigned int tile_x; + unsigned int tile_y; + int my_sample_tile; + unsigned int sample; + + ccl_global RNG *rng = 0x0; + ccl_global PathState *state = 0x0; + float3 throughput; + + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + + throughput = throughput_coop[ray_index]; + state = &PathState_coop[ray_index]; + rng = &rng_coop[ray_index]; +#ifdef __WORK_STEALING__ + my_work = work_array[ray_index]; + sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; + get_pixel_tile_position(&pixel_x, &pixel_y, + &tile_x, &tile_y, + my_work, + sw, sh, sx, sy, + parallel_samples, + ray_index); + my_sample_tile = 0; +#else /* __WORK_STEALING__ */ + sample = work_array[ray_index]; + /* Buffer's stride is "stride"; Find x and y using ray_index. */ + int tile_index = ray_index / parallel_samples; + tile_x = tile_index % sw; + tile_y = tile_index / sw; + my_sample_tile = ray_index - (tile_index * parallel_samples); +#endif /* __WORK_STEALING__ */ + per_sample_output_buffers += + (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * + kernel_data.film.pass_stride; + + /* holdout */ +#ifdef __HOLDOUT__ + if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && + (state->flag & PATH_RAY_CAMERA)) + { + if(kernel_data.background.transparent) { + float3 holdout_weight; + + if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK) + holdout_weight = make_float3(1.0f, 1.0f, 1.0f); + else + holdout_weight = shader_holdout_eval(kg, sd); + + /* any throughput is ok, should all be identical here */ + L_transparent_coop[ray_index] += average(holdout_weight*throughput); + } + + if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + *enqueue_flag = 1; + } + } +#endif /* __HOLDOUT__ */ + } + + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + PathRadiance *L = &PathRadiance_coop[ray_index]; + /* Holdout mask objects do not write data passes. */ + kernel_write_data_passes(kg, + per_sample_output_buffers, + L, + sd, + sample, + state, + throughput); + /* Blurring of bsdf after bounces, for rays that have a small likelihood + * of following this particular path (diffuse, rough glossy. + */ + if(kernel_data.integrator.filter_glossy != FLT_MAX) { + float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf; + if(blur_pdf < 1.0f) { + float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f; + shader_bsdf_blur(kg, sd, blur_roughness); + } + } + +#ifdef __EMISSION__ + /* emission */ + if(ccl_fetch(sd, flag) & SD_EMISSION) { + /* TODO(sergey): is isect.t wrong here for transparent surfaces? */ + float3 emission = indirect_primitive_emission( + kg, + sd, + Intersection_coop[ray_index].t, + state->flag, + state->ray_pdf); + path_radiance_accum_emission(L, throughput, emission, state->bounce); + } +#endif /* __EMISSION__ */ + + /* Path termination. this is a strange place to put the termination, it's + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate. + */ + float probability = path_state_terminate_probability(kg, state, throughput); + + if(probability == 0.0f) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + *enqueue_flag = 1; + } + + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + if(probability != 1.0f) { + float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE); + if(terminate >= probability) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + *enqueue_flag = 1; + } else { + throughput_coop[ray_index] = throughput/probability; + } + } + } + } + +#ifdef __AO__ + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + /* ambient occlusion */ + if(kernel_data.integrator.use_ambient_occlusion || + (ccl_fetch(sd, flag) & SD_AO)) + { + /* todo: solve correlation */ + float bsdf_u, bsdf_v; + path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + + float ao_factor = kernel_data.background.ao_factor; + float3 ao_N; + AOBSDF_coop[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); + AOAlpha_coop[ray_index] = shader_bsdf_alpha(kg, sd); + + float3 ao_D; + float ao_pdf; + sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); + + if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) { + Ray _ray; + _ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng)); + _ray.D = ao_D; + _ray.t = kernel_data.background.ao_distance; +#ifdef __OBJECT_MOTION__ + _ray.time = ccl_fetch(sd, time); +#endif + _ray.dP = ccl_fetch(sd, dP); + _ray.dD = differential3_zero(); + AOLightRay_coop[ray_index] = _ray; + + ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO); + *enqueue_flag_AO_SHADOW_RAY_CAST = 1; + } + } + } +#endif /* __AO__ */ +} diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h new file mode 100644 index 00000000000..a8e4b0a06c8 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -0,0 +1,179 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_split_common.h" + +/* Note on kernel_lamp_emission + * This is the 3rd kernel in the ray-tracing logic. This is the second of the + * path-iteration kernels. This kernel takes care of the indirect lamp emission logic. + * This kernel operates on QUEUE_ACTIVE_AND_REGENERATED_RAYS. It processes rays of state RAY_ACTIVE + * and RAY_HIT_BACKGROUND. + * We will empty QUEUE_ACTIVE_AND_REGENERATED_RAYS queue in this kernel. + * The input/output of the kernel is as follows, + * Throughput_coop ------------------------------------|--- kernel_lamp_emission --|--- PathRadiance_coop + * Ray_coop -------------------------------------------| |--- Queue_data(QUEUE_ACTIVE_AND_REGENERATED_RAYS) + * PathState_coop -------------------------------------| |--- Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) + * kg (globals + data) --------------------------------| | + * Intersection_coop ----------------------------------| | + * ray_state ------------------------------------------| | + * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -----| | + * Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ----| | + * queuesize ------------------------------------------| | + * use_queues_flag ------------------------------------| | + * sw -------------------------------------------------| | + * sh -------------------------------------------------| | + * parallel_samples -----------------------------------| | + * + * note : shader_data is neither input nor output. Its just filled and consumed in the same, kernel_lamp_emission, kernel. + */ +ccl_device void kernel_lamp_emission( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for lamp emission */ + ccl_global float3 *throughput_coop, /* Required for lamp emission */ + PathRadiance *PathRadiance_coop, /* Required for lamp emission */ + ccl_global Ray *Ray_coop, /* Required for lamp emission */ + ccl_global PathState *PathState_coop, /* Required for lamp emission */ + Intersection *Intersection_coop, /* Required for lamp emission */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int sw, int sh, + ccl_global char *use_queues_flag, /* Used to decide if this kernel should use + * queues to fetch ray index + */ + int parallel_samples, /* Number of samples to be processed in parallel */ + int ray_index) +{ + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) + { + KernelGlobals *kg = (KernelGlobals *)globals; + ShaderData *sd = (ShaderData *)shader_data; + PathRadiance *L = &PathRadiance_coop[ray_index]; + + float3 throughput = throughput_coop[ray_index]; + Ray ray = Ray_coop[ray_index]; + PathState state = PathState_coop[ray_index]; + +#ifdef __LAMP_MIS__ + if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) { + /* ray starting from previous non-transparent bounce */ + Ray light_ray; + + light_ray.P = ray.P - state.ray_t*ray.D; + state.ray_t += Intersection_coop[ray_index].t; + light_ray.D = ray.D; + light_ray.t = state.ray_t; + light_ray.time = ray.time; + light_ray.dD = ray.dD; + light_ray.dP = ray.dP; + /* intersect with lamp */ + float3 emission; + + if(indirect_lamp_emission(kg, &state, &light_ray, &emission, sd)) { + path_radiance_accum_emission(L, throughput, emission, state.bounce); + } + } +#endif /* __LAMP_MIS__ */ + + /* __VOLUME__ feature is disabled */ +#if 0 +#ifdef __VOLUME__ + /* volume attenuation, emission, scatter */ + if(state.volume_stack[0].shader != SHADER_NONE) { + Ray volume_ray = ray; + volume_ray.t = (hit)? isect.t: FLT_MAX; + + bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); + +#ifdef __VOLUME_DECOUPLED__ + int sampling_method = volume_stack_sampling_method(kg, state.volume_stack); + bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method); + + if(decoupled) { + /* cache steps along volume for repeated sampling */ + VolumeSegment volume_segment; + ShaderData volume_sd; + + shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce); + kernel_volume_decoupled_record(kg, &state, + &volume_ray, &volume_sd, &volume_segment, heterogeneous); + + volume_segment.sampling_method = sampling_method; + + /* emission */ + if(volume_segment.closure_flag & SD_EMISSION) + path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce); + + /* scattering */ + VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED; + + if(volume_segment.closure_flag & SD_SCATTER) { + bool all = false; + + /* direct light sampling */ + kernel_branched_path_volume_connect_light(kg, rng, &volume_sd, + throughput, &state, &L, 1.0f, all, &volume_ray, &volume_segment); + + /* indirect sample. if we use distance sampling and take just + * one sample for direct and indirect light, we could share + * this computation, but makes code a bit complex */ + float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE); + float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE); + + result = kernel_volume_decoupled_scatter(kg, + &state, &volume_ray, &volume_sd, &throughput, + rphase, rscatter, &volume_segment, NULL, true); + } + + if(result != VOLUME_PATH_SCATTERED) + throughput *= volume_segment.accum_transmittance; + + /* free cached steps */ + kernel_volume_decoupled_free(kg, &volume_segment); + + if(result == VOLUME_PATH_SCATTERED) { + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray)) + continue; + else + break; + } + } + else +#endif /* __VOLUME_DECOUPLED__ */ + { + /* integrate along volume segment with distance sampling */ + ShaderData volume_sd; + VolumeIntegrateResult result = kernel_volume_integrate( + kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous); + +#ifdef __VOLUME_SCATTER__ + if(result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L); + + /* indirect light bounce */ + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray)) + continue; + else + break; + } +#endif /* __VOLUME_SCATTER__ */ + } + } +#endif /* __VOLUME__ */ +#endif + } +} diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h new file mode 100644 index 00000000000..e1a1577d7ae --- /dev/null +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -0,0 +1,144 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_split_common.h" + +/* Note on kernel_setup_next_iteration kernel. + * This is the tenth kernel in the ray tracing logic. This is the ninth + * of the path iteration kernels. This kernel takes care of setting up + * Ray for the next iteration of path-iteration and accumulating radiance + * corresponding to AO and direct-lighting + * + * Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER + * + * The input and output are as follows, + * + * rng_coop ---------------------------------------------|--- kernel_next_iteration_setup -|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * throughput_coop --------------------------------------| |--- Queue_data (QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) + * PathRadiance_coop ------------------------------------| |--- throughput_coop + * PathState_coop ---------------------------------------| |--- PathRadiance_coop + * shader_data ------------------------------------------| |--- PathState_coop + * ray_state --------------------------------------------| |--- ray_state + * Queue_data (QUEUE_ACTIVE_AND_REGENERATD_RAYS) --------| |--- Ray_coop + * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- use_queues_flag + * Ray_coop ---------------------------------------------| | + * kg (globals + data) ----------------------------------| | + * LightRay_dl_coop -------------------------------------| + * ISLamp_coop ------------------------------------------| + * BSDFEval_coop ----------------------------------------| + * LightRay_ao_coop -------------------------------------| + * AOBSDF_coop ------------------------------------------| + * AOAlpha_coop -----------------------------------------| + * + * Note on queues, + * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS and processes only + * the rays of state RAY_ACTIVE. + * There are different points in this kernel where a ray may terminate and reach RAY_UPDATE_BUFF + * state. These rays are enqueued into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will + * still be present in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has been + * changed to RAY_UPDATE_BUFF, there is no problem. + * + * State of queues when this kernel is called : + * At entry, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED, RAY_UPDATE_BUFFER rays. + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays + * At exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED and more RAY_UPDATE_BUFFER rays. + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays + */ +ccl_device char kernel_next_iteration_setup( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for setting up ray for next iteration */ + ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */ + ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */ + PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */ + ccl_global Ray *Ray_coop, /* Required for setting up ray for next iteration */ + ccl_global PathState *PathState_coop, /* Required for setting up ray for next iteration */ + ccl_global Ray *LightRay_dl_coop, /* Required for radiance update - direct lighting */ + ccl_global int *ISLamp_coop, /* Required for radiance update - direct lighting */ + ccl_global BsdfEval *BSDFEval_coop, /* Required for radiance update - direct lighting */ + ccl_global Ray *LightRay_ao_coop, /* Required for radiance update - AO */ + ccl_global float3 *AOBSDF_coop, /* Required for radiance update - AO */ + ccl_global float3 *AOAlpha_coop, /* Required for radiance update - AO */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global char *use_queues_flag, /* flag to decide if scene_intersect kernel should + * use queues to fetch ray index */ + int ray_index) +{ + char enqueue_flag = 0; + + /* Load kernel globals structure and ShaderData structure. */ + KernelGlobals *kg = (KernelGlobals *)globals; + ShaderData *sd = (ShaderData *)shader_data; + PathRadiance *L = 0x0; + ccl_global PathState *state = 0x0; + + /* Path radiance update for AO/Direct_lighting's shadow blocked. */ + if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) || + IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) + { + state = &PathState_coop[ray_index]; + L = &PathRadiance_coop[ray_index]; + float3 _throughput = throughput_coop[ray_index]; + + if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) { + float3 shadow = LightRay_ao_coop[ray_index].P; + char update_path_radiance = LightRay_ao_coop[ray_index].t; + if(update_path_radiance) { + path_radiance_accum_ao(L, + _throughput, + AOAlpha_coop[ray_index], + AOBSDF_coop[ray_index], + shadow, + state->bounce); + } + REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO); + } + + if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) { + float3 shadow = LightRay_dl_coop[ray_index].P; + char update_path_radiance = LightRay_dl_coop[ray_index].t; + if(update_path_radiance) { + BsdfEval L_light = BSDFEval_coop[ray_index]; + path_radiance_accum_light(L, + _throughput, + &L_light, + shadow, + 1.0f, + state->bounce, + ISLamp_coop[ray_index]); + } + REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL); + } + } + + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + ccl_global float3 *throughput = &throughput_coop[ray_index]; + ccl_global Ray *ray = &Ray_coop[ray_index]; + ccl_global RNG* rng = &rng_coop[ray_index]; + state = &PathState_coop[ray_index]; + L = &PathRadiance_coop[ray_index]; + + /* Compute direct lighting and next bounce. */ + if(!kernel_path_surface_bounce(kg, rng, sd, throughput, state, L, ray)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + enqueue_flag = 1; + } + } + + return enqueue_flag; +} diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h new file mode 100644 index 00000000000..7eb201ecf32 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -0,0 +1,138 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_split_common.h" + +/* Note on kernel_scene_intersect kernel. + * This is the second kernel in the ray tracing logic. This is the first + * of the path iteration kernels. This kernel takes care of scene_intersect function. + * + * This kernel changes the ray_state of RAY_REGENERATED rays to RAY_ACTIVE. + * This kernel processes rays of ray state RAY_ACTIVE + * This kernel determines the rays that have hit the background and changes their ray state to RAY_HIT_BACKGROUND. + * + * The input and output are as follows, + * + * Ray_coop ---------------------------------------|--------- kernel_scene_intersect----------|--- PathState + * PathState_coop ---------------------------------| |--- Intersection + * ray_state --------------------------------------| |--- ray_state + * use_queues_flag --------------------------------| | + * parallel_samples -------------------------------| | + * QueueData(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ---| | + * kg (data + globals) ----------------------------| | + * rng_coop ---------------------------------------| | + * sw ---------------------------------------------| | + * sh ---------------------------------------------| | + * queuesize --------------------------------------| | + * + * Note on Queues : + * Ideally we would want kernel_scene_intersect to work on queues. + * But during the very first time, the queues will be empty and hence we perform a direct mapping + * between ray-index and thread-index; From the next time onward, the queue will be filled and + * we may start operating on queues. + * + * State of queue during the first time this kernel is called : + * QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty.before and after this kernel + * + * State of queues during other times this kernel is called : + * At entry, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will have a mix of RAY_ACTIVE, RAY_UPDATE_BUFFER and RAY_REGENERATED rays; + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays ; + * (The rays that are in the state RAY_UPDATE_BUFFER in both the queues are actually the same rays; These + * are the rays that were in RAY_ACTIVE state during the initial enqueue but on further processing + * , by different kernels, have turned into RAY_UPDATE_BUFFER rays. Since all kernel, even after fetching from + * QUEUE_ACTIVE_AND_REGENERATED_RAYS, proceed further based on ray state information, RAY_UPDATE_BUFFER rays + * being present in QUEUE_ACTIVE_AND_REGENERATED_RAYS does not cause any logical issues) + * At exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS - All RAY_REGENERATED rays will have been converted to RAY_ACTIVE and + * Some rays in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue will move to state RAY_HIT_BACKGROUND + * QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS - no change + */ + +ccl_device void kernel_scene_intersect( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global uint *rng_coop, + ccl_global Ray *Ray_coop, /* Required for scene_intersect */ + ccl_global PathState *PathState_coop, /* Required for scene_intersect */ + Intersection *Intersection_coop, /* Required for scene_intersect */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int sw, int sh, + ccl_global char *use_queues_flag, /* used to decide if this kernel should use + * queues to fetch ray index */ +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples, /* Number of samples to be processed in parallel */ + int ray_index) +{ + /* All regenerated rays become active here */ + if(IS_STATE(ray_state, ray_index, RAY_REGENERATED)) + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE); + + if(!IS_STATE(ray_state, ray_index, RAY_ACTIVE)) + return; + + /* Load kernel globals structure */ + KernelGlobals *kg = (KernelGlobals *)globals; + +#ifdef __KERNEL_DEBUG__ + DebugData *debug_data = &debugdata_coop[ray_index]; +#endif + Intersection *isect = &Intersection_coop[ray_index]; + PathState state = PathState_coop[ray_index]; + Ray ray = Ray_coop[ray_index]; + + /* intersect scene */ + uint visibility = path_state_ray_visibility(kg, &state); + +#ifdef __HAIR__ + float difl = 0.0f, extmax = 0.0f; + uint lcg_state = 0; + RNG rng = rng_coop[ray_index]; + + if(kernel_data.bvh.have_curves) { + if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) { + float3 pixdiff = ray.dD.dx + ray.dD.dy; + /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/ + difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f; + } + + extmax = kernel_data.curve.maximum_width; + lcg_state = lcg_state_init(&rng, &state, 0x51633e2d); + } + + bool hit = scene_intersect(kg, &ray, visibility, isect, &lcg_state, difl, extmax); +#else + bool hit = scene_intersect(kg, &ray, visibility, isect, NULL, 0.0f, 0.0f); +#endif + +#ifdef __KERNEL_DEBUG__ + if(state.flag & PATH_RAY_CAMERA) { + debug_data->num_bvh_traversal_steps += isect->num_traversal_steps; + debug_data->num_bvh_traversed_instances += isect->num_traversed_instances; + } + debug_data->num_ray_bounces++; +#endif + + if(!hit) { + /* Change the state of rays that hit the background; + * These rays undergo special processing in the + * background_bufferUpdate kernel. + */ + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); + } +} diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h new file mode 100644 index 00000000000..e6fdc592586 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -0,0 +1,75 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_split_common.h" + +/* Note on kernel_shader_eval kernel + * This kernel is the 5th kernel in the ray tracing logic. This is + * the 4rd kernel in path iteration. This kernel sets up the ShaderData + * structure from the values computed by the previous kernels. It also identifies + * the rays of state RAY_TO_REGENERATE and enqueues them in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. + * + * The input and output of the kernel is as follows, + * rng_coop -------------------------------------------|--- kernel_shader_eval --|--- shader_data + * Ray_coop -------------------------------------------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * PathState_coop -------------------------------------| |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * Intersection_coop ----------------------------------| | + * Queue_data (QUEUE_ACTIVE_AND_REGENERATD_RAYS)-------| | + * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)---| | + * ray_state ------------------------------------------| | + * kg (globals + data) --------------------------------| | + * queuesize ------------------------------------------| | + * + * Note on Queues : + * This kernel reads from the QUEUE_ACTIVE_AND_REGENERATED_RAYS queue and processes + * only the rays of state RAY_ACTIVE; + * State of queues when this kernel is called, + * at entry, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty. + * at exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays + */ +ccl_device void kernel_shader_eval( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Output ShaderData structure to be filled */ + ccl_global uint *rng_coop, /* Required for rbsdf calculation */ + ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */ + ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */ + Intersection *Intersection_coop, /* Required for setting up shader from ray */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int ray_index) +{ + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + KernelGlobals *kg = (KernelGlobals *)globals; + ShaderData *sd = (ShaderData *)shader_data; + Intersection *isect = &Intersection_coop[ray_index]; + ccl_global uint *rng = &rng_coop[ray_index]; + ccl_global PathState *state = &PathState_coop[ray_index]; + Ray ray = Ray_coop[ray_index]; + + shader_setup_from_ray(kg, + sd, + isect, + &ray, + state->bounce, + state->transparent_bounce); + float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF); + shader_eval_surface(kg, sd, rbsdf, state->flag, SHADER_CONTEXT_MAIN); + } +} diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h new file mode 100644 index 00000000000..28351c2b1ae --- /dev/null +++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h @@ -0,0 +1,99 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_split_common.h" + +/* Note on kernel_shadow_blocked kernel. + * This is the ninth kernel in the ray tracing logic. This is the eighth + * of the path iteration kernels. This kernel takes care of "shadow ray cast" + * logic of the direct lighting and AO part of ray tracing. + * + * The input and output are as follows, + * + * PathState_coop ----------------------------------|--- kernel_shadow_blocked --| + * LightRay_dl_coop --------------------------------| |--- LightRay_dl_coop + * LightRay_ao_coop --------------------------------| |--- LightRay_ao_coop + * ray_state ---------------------------------------| |--- ray_state + * Queue_data(QUEUE_SHADOW_RAY_CAST_AO_RAYS & | |--- Queue_data (QUEUE_SHADOW_RAY_CAST_AO_RAYS & QUEUE_SHADOW_RAY_CAST_AO_RAYS) + QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| | + * Queue_index(QUEUE_SHADOW_RAY_CAST_AO_RAYS& + QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| | + * kg (globals + data) -----------------------------| | + * queuesize ---------------------------------------| | + * + * Note on shader_shadow : shader_shadow is neither input nor output to this kernel. shader_shadow is filled and consumed in this kernel itself. + * Note on queues : + * The kernel fetches from QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS queues. We will empty + * these queues this kernel. + * State of queues when this kernel is called : + * state of queues QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be same + * before and after this kernel call. + * QUEUE_SHADOW_RAY_CAST_AO_RAYS & QUEUE_SHADOW_RAY_CAST_DL_RAYS will be filled with rays marked with flags RAY_SHADOW_RAY_CAST_AO + * and RAY_SHADOW_RAY_CAST_DL respectively, during kernel entry. + * QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty at kernel exit. + */ +ccl_device void kernel_shadow_blocked( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_shadow, /* Required for shadow blocked */ + ccl_global PathState *PathState_coop, /* Required for shadow blocked */ + ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */ + ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */ + Intersection *Intersection_coop_AO, + Intersection *Intersection_coop_DL, + ccl_global char *ray_state, + int total_num_rays, + char shadow_blocked_type, + int ray_index) +{ + /* Flag determining if we need to update L. */ + char update_path_radiance = 0; + + if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) || + IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) + { + /* Load kernel global structure. */ + KernelGlobals *kg = (KernelGlobals *)globals; + ShaderData *sd_shadow = (ShaderData *)shader_shadow; + + ccl_global PathState *state = &PathState_coop[ray_index]; + ccl_global Ray *light_ray_dl_global = &LightRay_dl_coop[ray_index]; + ccl_global Ray *light_ray_ao_global = &LightRay_ao_coop[ray_index]; + Intersection *isect_ao_global = &Intersection_coop_AO[ray_index]; + Intersection *isect_dl_global = &Intersection_coop_DL[ray_index]; + + ccl_global Ray *light_ray_global = + shadow_blocked_type == RAY_SHADOW_RAY_CAST_AO + ? light_ray_ao_global + : light_ray_dl_global; + Intersection *isect_global = + RAY_SHADOW_RAY_CAST_AO ? isect_ao_global : isect_dl_global; + + float3 shadow; + update_path_radiance = !(shadow_blocked(kg, + state, + light_ray_global, + &shadow, + sd_shadow, + isect_global)); + + /* We use light_ray_global's P and t to store shadow and + * update_path_radiance. + */ + light_ray_global->P = shadow; + light_ray_global->t = update_path_radiance; + } +} diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h new file mode 100644 index 00000000000..e1c7e2cea99 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -0,0 +1,62 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KERNEL_SPLIT_H__ +#define __KERNEL_SPLIT_H__ + +#include "kernel_compat_opencl.h" +#include "kernel_math.h" +#include "kernel_types.h" +#include "kernel_globals.h" + +#include "util_atomic.h" + +#include "kernel_random.h" +#include "kernel_projection.h" +#include "kernel_montecarlo.h" +#include "kernel_differential.h" +#include "kernel_camera.h" + +#include "geom/geom.h" + +#include "kernel_accumulate.h" +#include "kernel_shader.h" +#include "kernel_light.h" +#include "kernel_passes.h" + +#ifdef __SUBSURFACE__ +#include "kernel_subsurface.h" +#endif + +#ifdef __VOLUME__ +#include "kernel_volume.h" +#endif + +#include "kernel_path_state.h" +#include "kernel_shadow.h" +#include "kernel_emission.h" +#include "kernel_path_common.h" +#include "kernel_path_surface.h" +#include "kernel_path_volume.h" + +#ifdef __KERNEL_DEBUG__ +#include "kernel_debug.h" +#endif + +#include "kernel_queues.h" +#include "kernel_work_stealing.h" + +#endif /* __KERNEL_SPLIT_H__ */ diff --git a/intern/cycles/kernel/split/kernel_sum_all_radiance.h b/intern/cycles/kernel/split/kernel_sum_all_radiance.h new file mode 100644 index 00000000000..a21e9b6a0b1 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_sum_all_radiance.h @@ -0,0 +1,59 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../kernel_compat_opencl.h" +#include "../kernel_math.h" +#include "../kernel_types.h" +#include "../kernel_globals.h" + +/* Since we process various samples in parallel; The output radiance of different samples + * are stored in different locations; This kernel combines the output radiance contributed + * by all different samples and stores them in the RenderTile's output buffer. + */ +ccl_device void kernel_sum_all_radiance( + ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */ + ccl_global float *buffer, /* Output buffer of RenderTile */ + ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */ + int parallel_samples, int sw, int sh, int stride, + int buffer_offset_x, + int buffer_offset_y, + int buffer_stride, + int start_sample) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < sw && y < sh) { + buffer += ((buffer_offset_x + x) + (buffer_offset_y + y) * buffer_stride) * (data->film.pass_stride); + per_sample_output_buffer += ((x + y * stride) * parallel_samples) * (data->film.pass_stride); + + int sample_stride = (data->film.pass_stride); + + int sample_iterator = 0; + int pass_stride_iterator = 0; + int num_floats = data->film.pass_stride; + + for(sample_iterator = 0; sample_iterator < parallel_samples; sample_iterator++) { + for(pass_stride_iterator = 0; pass_stride_iterator < num_floats; pass_stride_iterator++) { + *(buffer + pass_stride_iterator) = + (start_sample == 0 && sample_iterator == 0) + ? *(per_sample_output_buffer + pass_stride_iterator) + : *(buffer + pass_stride_iterator) + *(per_sample_output_buffer + pass_stride_iterator); + } + per_sample_output_buffer += sample_stride; + } + } +} diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index efbffacf375..84fc0fcf587 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __SVM_H__ @@ -102,7 +102,7 @@ ccl_device_inline int stack_load_int(float *stack, uint a) return __float_as_int(stack[a]); } -ccl_device_inline float stack_load_int_default(float *stack, uint a, uint value) +ccl_device_inline int stack_load_int_default(float *stack, uint a, uint value) { return (a == (uint)SVM_STACK_INVALID)? (int)value: stack_load_int(stack, a); } @@ -157,6 +157,8 @@ CCL_NAMESPACE_END #include "svm_noise.h" #include "svm_texture.h" +#include "svm_math_util.h" + #include "svm_attribute.h" #include "svm_gradient.h" #include "svm_blackbody.h" @@ -192,20 +194,24 @@ CCL_NAMESPACE_END #include "svm_checker.h" #include "svm_brick.h" #include "svm_vector_transform.h" +#include "svm_voxel.h" CCL_NAMESPACE_BEGIN -/* Main Interpreter Loop */ +#define NODES_GROUP(group) ((group) <= __NODES_MAX_GROUP__) +#define NODES_FEATURE(feature) ((__NODES_FEATURES__ & (feature)) != 0) +/* Main Interpreter Loop */ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderType type, int path_flag) { float stack[SVM_STACK_SIZE]; - int offset = sd->shader & SHADER_MASK; + int offset = ccl_fetch(sd, shader) & SHADER_MASK; while(1) { uint4 node = read_node(kg, &offset); switch(node.x) { +#if NODES_GROUP(NODE_GROUP_LEVEL_0) case NODE_SHADER_JUMP: { if(type == SHADER_TYPE_SURFACE) offset = node.y; else if(type == SHADER_TYPE_VOLUME) offset = node.z; @@ -222,15 +228,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade case NODE_CLOSURE_BACKGROUND: svm_node_closure_background(sd, stack, node); break; - case NODE_CLOSURE_HOLDOUT: - svm_node_closure_holdout(sd, stack, node); - break; - case NODE_CLOSURE_AMBIENT_OCCLUSION: - svm_node_closure_ambient_occlusion(sd, stack, node); - break; - case NODE_CLOSURE_VOLUME: - svm_node_closure_volume(kg, sd, stack, node, path_flag); - break; case NODE_CLOSURE_SET_WEIGHT: svm_node_closure_set_weight(sd, node.y, node.z, node.w); break; @@ -251,13 +248,137 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade if(stack_load_float(stack, node.z) == 1.0f) offset += node.y; break; -#ifdef __TEXTURES__ + case NODE_GEOMETRY: + svm_node_geometry(kg, sd, stack, node.y, node.z); + break; + case NODE_CONVERT: + svm_node_convert(sd, stack, node.y, node.z, node.w); + break; + case NODE_TEX_COORD: + svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset); + break; + case NODE_VALUE_F: + svm_node_value_f(kg, sd, stack, node.y, node.z); + break; + case NODE_VALUE_V: + svm_node_value_v(kg, sd, stack, node.y, &offset); + break; + case NODE_ATTR: + svm_node_attr(kg, sd, stack, node); + break; +# if NODES_FEATURE(NODE_FEATURE_BUMP) + case NODE_GEOMETRY_BUMP_DX: + svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); + break; + case NODE_GEOMETRY_BUMP_DY: + svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); + break; + case NODE_SET_DISPLACEMENT: + svm_node_set_displacement(sd, stack, node.y); + break; +# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ +# ifdef __TEXTURES__ case NODE_TEX_IMAGE: svm_node_tex_image(kg, sd, stack, node); break; case NODE_TEX_IMAGE_BOX: svm_node_tex_image_box(kg, sd, stack, node); break; + case NODE_TEX_NOISE: + svm_node_tex_noise(kg, sd, stack, node, &offset); + break; +# endif /* __TEXTURES__ */ +# ifdef __EXTRA_NODES__ +# if NODES_FEATURE(NODE_FEATURE_BUMP) + case NODE_SET_BUMP: + svm_node_set_bump(kg, sd, stack, node); + break; + case NODE_ATTR_BUMP_DX: + svm_node_attr_bump_dx(kg, sd, stack, node); + break; + case NODE_ATTR_BUMP_DY: + svm_node_attr_bump_dy(kg, sd, stack, node); + break; + case NODE_TEX_COORD_BUMP_DX: + svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset); + break; + case NODE_TEX_COORD_BUMP_DY: + svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset); + break; + case NODE_CLOSURE_SET_NORMAL: + svm_node_set_normal(kg, sd, stack, node.y, node.z); + break; +# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ + case NODE_HSV: + svm_node_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); + break; +# endif /* __EXTRA_NODES__ */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */ + +#if NODES_GROUP(NODE_GROUP_LEVEL_1) + case NODE_CLOSURE_HOLDOUT: + svm_node_closure_holdout(sd, stack, node); + break; + case NODE_CLOSURE_AMBIENT_OCCLUSION: + svm_node_closure_ambient_occlusion(sd, stack, node); + break; + case NODE_FRESNEL: + svm_node_fresnel(sd, stack, node.y, node.z, node.w); + break; + case NODE_LAYER_WEIGHT: + svm_node_layer_weight(sd, stack, node); + break; +# if NODES_FEATURE(NODE_FEATURE_VOLUME) + case NODE_CLOSURE_VOLUME: + svm_node_closure_volume(kg, sd, stack, node, path_flag); + break; +# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ +# ifdef __EXTRA_NODES__ + case NODE_MATH: + svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset); + break; + case NODE_VECTOR_MATH: + svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset); + break; + case NODE_RGB_RAMP: + svm_node_rgb_ramp(kg, sd, stack, node, &offset); + break; + case NODE_GAMMA: + svm_node_gamma(sd, stack, node.y, node.z, node.w); + break; + case NODE_BRIGHTCONTRAST: + svm_node_brightness(sd, stack, node.y, node.z, node.w); + break; + case NODE_LIGHT_PATH: + svm_node_light_path(sd, stack, node.y, node.z, path_flag); + break; + case NODE_OBJECT_INFO: + svm_node_object_info(kg, sd, stack, node.y, node.z); + break; + case NODE_PARTICLE_INFO: + svm_node_particle_info(kg, sd, stack, node.y, node.z); + break; +# ifdef __HAIR__ +# if NODES_FEATURE(NODE_FEATURE_HAIR) + case NODE_HAIR_INFO: + svm_node_hair_info(kg, sd, stack, node.y, node.z); + break; +# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */ +# endif /* __HAIR__ */ +# endif /* __EXTRA_NODES__ */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */ + +#if NODES_GROUP(NODE_GROUP_LEVEL_2) + case NODE_MAPPING: + svm_node_mapping(kg, sd, stack, node.y, node.z, &offset); + break; + case NODE_MIN_MAX: + svm_node_min_max(kg, sd, stack, node.y, node.z, &offset); + break; + case NODE_CAMERA: + svm_node_camera(kg, sd, stack, node.y, node.z, node.w); + break; +# ifdef __TEXTURES__ case NODE_TEX_ENVIRONMENT: svm_node_tex_environment(kg, sd, stack, node); break; @@ -267,9 +388,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade case NODE_TEX_GRADIENT: svm_node_tex_gradient(sd, stack, node); break; - case NODE_TEX_NOISE: - svm_node_tex_noise(kg, sd, stack, node, &offset); - break; case NODE_TEX_VORONOI: svm_node_tex_voronoi(kg, sd, stack, node, &offset); break; @@ -288,55 +406,34 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade case NODE_TEX_BRICK: svm_node_tex_brick(kg, sd, stack, node, &offset); break; -#endif - case NODE_CAMERA: - svm_node_camera(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_GEOMETRY: - svm_node_geometry(kg, sd, stack, node.y, node.z); - break; -#ifdef __EXTRA_NODES__ - case NODE_GEOMETRY_BUMP_DX: - svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); - break; - case NODE_GEOMETRY_BUMP_DY: - svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); - break; - case NODE_LIGHT_PATH: - svm_node_light_path(sd, stack, node.y, node.z, path_flag); - break; - case NODE_OBJECT_INFO: - svm_node_object_info(kg, sd, stack, node.y, node.z); - break; - case NODE_PARTICLE_INFO: - svm_node_particle_info(kg, sd, stack, node.y, node.z); +# endif /* __TEXTURES__ */ +# ifdef __EXTRA_NODES__ + case NODE_NORMAL: + svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset); break; -#ifdef __HAIR__ - case NODE_HAIR_INFO: - svm_node_hair_info(kg, sd, stack, node.y, node.z); + case NODE_LIGHT_FALLOFF: + svm_node_light_falloff(sd, stack, node); break; -#endif +# endif /* __EXTRA_NODES__ */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */ -#endif - case NODE_CONVERT: - svm_node_convert(sd, stack, node.y, node.z, node.w); +#if NODES_GROUP(NODE_GROUP_LEVEL_3) + case NODE_RGB_CURVES: + svm_node_rgb_curves(kg, sd, stack, node, &offset); break; - case NODE_VALUE_F: - svm_node_value_f(kg, sd, stack, node.y, node.z); + case NODE_VECTOR_CURVES: + svm_node_vector_curves(kg, sd, stack, node, &offset); break; - case NODE_VALUE_V: - svm_node_value_v(kg, sd, stack, node.y, &offset); + case NODE_TANGENT: + svm_node_tangent(kg, sd, stack, node); break; -#ifdef __EXTRA_NODES__ + case NODE_NORMAL_MAP: + svm_node_normal_map(kg, sd, stack, node); + break; +# ifdef __EXTRA_NODES__ case NODE_INVERT: svm_node_invert(sd, stack, node.y, node.z, node.w); break; - case NODE_GAMMA: - svm_node_gamma(sd, stack, node.y, node.z, node.w); - break; - case NODE_BRIGHTCONTRAST: - svm_node_brightness(sd, stack, node.y, node.z, node.w); - break; case NODE_MIX: svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset); break; @@ -352,30 +449,11 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade case NODE_COMBINE_HSV: svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); break; - case NODE_HSV: - svm_node_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); - break; -#endif - case NODE_ATTR: - svm_node_attr(kg, sd, stack, node); - break; -#ifdef __EXTRA_NODES__ - case NODE_ATTR_BUMP_DX: - svm_node_attr_bump_dx(kg, sd, stack, node); - break; - case NODE_ATTR_BUMP_DY: - svm_node_attr_bump_dy(kg, sd, stack, node); - break; -#endif - case NODE_FRESNEL: - svm_node_fresnel(sd, stack, node.y, node.z, node.w); - break; - case NODE_LAYER_WEIGHT: - svm_node_layer_weight(sd, stack, node); + case NODE_VECTOR_TRANSFORM: + svm_node_vector_transform(kg, sd, stack, node); break; -#ifdef __EXTRA_NODES__ case NODE_WIREFRAME: - svm_node_wireframe(kg, sd, stack, node.y, node.z, node.w); + svm_node_wireframe(kg, sd, stack, node); break; case NODE_WAVELENGTH: svm_node_wavelength(sd, stack, node.y, node.z); @@ -383,70 +461,25 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade case NODE_BLACKBODY: svm_node_blackbody(kg, sd, stack, node.y, node.z); break; - case NODE_SET_DISPLACEMENT: - svm_node_set_displacement(sd, stack, node.y); - break; - case NODE_SET_BUMP: - svm_node_set_bump(kg, sd, stack, node); - break; - case NODE_MATH: - svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset); - break; - case NODE_VECTOR_MATH: - svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset); - break; - case NODE_VECTOR_TRANSFORM: - svm_node_vector_transform(kg, sd, stack, node); - break; - case NODE_NORMAL: - svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset); - break; -#endif - case NODE_MAPPING: - svm_node_mapping(kg, sd, stack, node.y, node.z, &offset); - break; - case NODE_MIN_MAX: - svm_node_min_max(kg, sd, stack, node.y, node.z, &offset); - break; - case NODE_TEX_COORD: - svm_node_tex_coord(kg, sd, path_flag, stack, node.y, node.z); - break; -#ifdef __EXTRA_NODES__ - case NODE_TEX_COORD_BUMP_DX: - svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node.y, node.z); - break; - case NODE_TEX_COORD_BUMP_DY: - svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node.y, node.z); - break; - case NODE_CLOSURE_SET_NORMAL: - svm_node_set_normal(kg, sd, stack, node.y, node.z ); - break; - case NODE_RGB_RAMP: - svm_node_rgb_ramp(kg, sd, stack, node, &offset); - break; - case NODE_RGB_CURVES: - svm_node_rgb_curves(kg, sd, stack, node, &offset); - break; - case NODE_VECTOR_CURVES: - svm_node_vector_curves(kg, sd, stack, node, &offset); - break; - case NODE_LIGHT_FALLOFF: - svm_node_light_falloff(sd, stack, node); - break; -#endif - case NODE_TANGENT: - svm_node_tangent(kg, sd, stack, node); - break; - case NODE_NORMAL_MAP: - svm_node_normal_map(kg, sd, stack, node); +# endif /* __EXTRA_NODES__ */ +# if NODES_FEATURE(NODE_FEATURE_VOLUME) && !defined(__KERNEL_GPU__) + case NODE_TEX_VOXEL: + svm_node_tex_voxel(kg, sd, stack, node, &offset); break; +# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) && !defined(__KERNEL_GPU__) */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */ case NODE_END: + return; default: + kernel_assert(!"Unknown node type was passed to the SVM machine"); return; } } } +#undef NODES_GROUP +#undef NODES_FEATURE + CCL_NAMESPACE_END #ifdef __CAMERA_RAY_NODES__ diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index fd0ea7fef31..025ae96f59d 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -22,12 +22,12 @@ ccl_device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeType *type, NodeAttributeType *mesh_type, AttributeElement *elem, int *offset, uint *out_offset) { - if(sd->object != OBJECT_NONE) { + if(ccl_fetch(sd, object) != OBJECT_NONE) { /* find attribute by unique id */ uint id = node.y; - uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride; + uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride; #ifdef __HAIR__ - attr_offset = (sd->type & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset; + attr_offset = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset; #endif uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h index 15257aed92e..b750ad87b7f 100644 --- a/intern/cycles/kernel/svm/svm_blackbody.h +++ b/intern/cycles/kernel/svm/svm_blackbody.h @@ -36,48 +36,12 @@ CCL_NAMESPACE_BEGIN ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset) { - /* Output */ - float3 color_rgb = make_float3(0.0f, 0.0f, 0.0f); - /* Input */ float temperature = stack_load_float(stack, temperature_offset); - if (temperature < BB_DRAPPER) { - /* just return very very dim red */ - color_rgb = make_float3(1.0e-6f,0.0f,0.0f); - } - else if (temperature <= BB_MAX_TABLE_RANGE) { - /* This is the overall size of the table */ - const int lookuptablesize = 956; - const float lookuptablenormalize = 1.0f/956.0f; - - /* reconstruct a proper index for the table lookup, compared to OSL we don't look up two colors - just one (the OSL-lerp is also automatically done for us by "lookup_table_read") */ - float t = powf((temperature - BB_DRAPPER) * (1.0f / BB_TABLE_SPACING), (1.0f / BB_TABLE_XPOWER)); - - int blackbody_table_offset = kernel_data.tables.blackbody_offset; - - /* Retrieve colors from the lookup table */ - float lutval = t*lookuptablenormalize; - float R = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize); - lutval = (t + 319.0f*1.0f)*lookuptablenormalize; - float G = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize); - lutval = (t + 319.0f*2.0f)*lookuptablenormalize; - float B = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize); - - R = powf(R, BB_TABLE_YPOWER); - G = powf(G, BB_TABLE_YPOWER); - B = powf(B, BB_TABLE_YPOWER); - - color_rgb = make_float3(R, G, B); - } - - /* Luminance */ - float l = linear_rgb_to_gray(color_rgb); - if (l != 0.0f) - color_rgb /= l; + float3 color_rgb = svm_math_blackbody_color(temperature); - if (stack_valid(col_offset)) + if(stack_valid(col_offset)) stack_store_float3(stack, col_offset, color_rgb); } diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h index 97c2b545c5f..9b0cf5ab8c4 100644 --- a/intern/cycles/kernel/svm/svm_brick.h +++ b/intern/cycles/kernel/svm/svm_brick.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -21,6 +21,7 @@ CCL_NAMESPACE_BEGIN ccl_device_noinline float brick_noise(int n) /* fast integer noise */ { int nn; + n = (n + 1013) & 0x7fffffff; n = (n >> 13) ^ n; nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff; return 0.5f * ((float)nn / 1073741824.0f); @@ -47,7 +48,7 @@ ccl_device_noinline float2 svm_brick(float3 p, float mortar_size, float bias, y = p.y - row_height*rownum; return make_float2( - clamp((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias), 0.0f, 1.0f), + saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias)), (x < mortar_size || y < mortar_size || x > (brick_width - mortar_size) || @@ -95,10 +96,7 @@ ccl_device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *sta if(f != 1.0f) { float facm = 1.0f - tint; - - color1.x = facm * (color1.x) + tint * color2.x; - color1.y = facm * (color1.y) + tint * color2.y; - color1.z = facm * (color1.z) + tint * color2.z; + color1 = facm * color1 + tint * color2; } if(stack_valid(color_offset)) diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h index 9b330b3213f..e4d545a00ae 100644 --- a/intern/cycles/kernel/svm/svm_brightness.h +++ b/intern/cycles/kernel/svm/svm_brightness.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -32,7 +32,7 @@ ccl_device void svm_node_brightness(ShaderData *sd, float *stack, uint in_color, color.y = max(a*color.y + b, 0.0f); color.z = max(a*color.z + b, 0.0f); - if (stack_valid(out_color)) + if(stack_valid(out_color)) stack_store_float3(stack, out_color, color); } diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h index bfe9289fa02..00678a49d70 100644 --- a/intern/cycles/kernel/svm/svm_camera.h +++ b/intern/cycles/kernel/svm/svm_camera.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -23,17 +23,17 @@ ccl_device void svm_node_camera(KernelGlobals *kg, ShaderData *sd, float *stack, float3 vector; Transform tfm = kernel_data.cam.worldtocamera; - vector = transform_point(&tfm, sd->P); + vector = transform_point(&tfm, ccl_fetch(sd, P)); zdepth = vector.z; distance = len(vector); - if (stack_valid(out_vector)) + if(stack_valid(out_vector)) stack_store_float3(stack, out_vector, normalize(vector)); - if (stack_valid(out_zdepth)) + if(stack_valid(out_zdepth)) stack_store_float(stack, out_zdepth, zdepth); - if (stack_valid(out_distance)) + if(stack_valid(out_distance)) stack_store_float(stack, out_distance, distance); } diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h index e0408ad334a..186bf7df55f 100644 --- a/intern/cycles/kernel/svm/svm_checker.h +++ b/intern/cycles/kernel/svm/svm_checker.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 30110db3ef9..c495ebb35bd 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -25,10 +25,14 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type sc->data0 = eta; sc->data1 = 0.0f; sc->data2 = 0.0f; - sd->flag |= bsdf_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_refraction_setup(sc); + } + else { + sc->data0 = 0.0f; + sc->data1 = 0.0f; + sc->data2 = 0.0f; + ccl_fetch(sd, flag) |= bsdf_reflection_setup(sc); } - else - sd->flag |= bsdf_reflection_setup(sc); } else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) { sc->data0 = roughness; @@ -36,9 +40,9 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type sc->data2 = eta; if(refract) - sd->flag |= bsdf_microfacet_beckmann_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_refraction_setup(sc); else - sd->flag |= bsdf_microfacet_beckmann_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_setup(sc); } else { sc->data0 = roughness; @@ -46,23 +50,26 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type sc->data2 = eta; if(refract) - sd->flag |= bsdf_microfacet_ggx_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_refraction_setup(sc); else - sd->flag |= bsdf_microfacet_ggx_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_setup(sc); } } ccl_device_inline ShaderClosure *svm_node_closure_get_non_bsdf(ShaderData *sd, ClosureType type, float mix_weight) { - ShaderClosure *sc = &sd->closure[sd->num_closure]; + ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); - if(sd->num_closure < MAX_CLOSURE) { + if(ccl_fetch(sd, num_closure) < MAX_CLOSURE) { sc->weight *= mix_weight; sc->type = type; + sc->data0 = 0.0f; + sc->data1 = 0.0f; + sc->data2 = 0.0f; #ifdef __OSL__ sc->prim = NULL; #endif - sd->num_closure++; + ccl_fetch(sd, num_closure)++; return sc; } @@ -71,14 +78,15 @@ ccl_device_inline ShaderClosure *svm_node_closure_get_non_bsdf(ShaderData *sd, C ccl_device_inline ShaderClosure *svm_node_closure_get_bsdf(ShaderData *sd, float mix_weight) { - ShaderClosure *sc = &sd->closure[sd->num_closure]; + ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); + float3 weight = sc->weight * mix_weight; float sample_weight = fabsf(average(weight)); - if(sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure < MAX_CLOSURE) { + if(sample_weight > CLOSURE_WEIGHT_CUTOFF && ccl_fetch(sd, num_closure) < MAX_CLOSURE) { sc->weight = weight; sc->sample_weight = sample_weight; - sd->num_closure++; + ccl_fetch(sd, num_closure)++; #ifdef __OSL__ sc->prim = NULL; #endif @@ -90,14 +98,15 @@ ccl_device_inline ShaderClosure *svm_node_closure_get_bsdf(ShaderData *sd, float ccl_device_inline ShaderClosure *svm_node_closure_get_absorption(ShaderData *sd, float mix_weight) { - ShaderClosure *sc = &sd->closure[sd->num_closure]; + ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); + float3 weight = (make_float3(1.0f, 1.0f, 1.0f) - sc->weight) * mix_weight; float sample_weight = fabsf(average(weight)); - if(sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure < MAX_CLOSURE) { + if(sample_weight > CLOSURE_WEIGHT_CUTOFF && ccl_fetch(sd, num_closure) < MAX_CLOSURE) { sc->weight = weight; sc->sample_weight = sample_weight; - sd->num_closure++; + ccl_fetch(sd, num_closure)++; #ifdef __OSL__ sc->prim = NULL; #endif @@ -121,7 +130,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(mix_weight == 0.0f) return; - float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N; + float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): ccl_fetch(sd, N); float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z); float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w); @@ -139,13 +148,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->data0 = 0.0f; sc->data1 = 0.0f; sc->data2 = 0.0f; - sd->flag |= bsdf_diffuse_setup(sc); + ccl_fetch(sd, flag) |= bsdf_diffuse_setup(sc); } else { sc->data0 = roughness; sc->data1 = 0.0f; sc->data2 = 0.0f; - sd->flag |= bsdf_oren_nayar_setup(sc); + ccl_fetch(sd, flag) |= bsdf_oren_nayar_setup(sc); } } break; @@ -158,7 +167,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->data1 = 0.0f; sc->data2 = 0.0f; sc->N = N; - sd->flag |= bsdf_translucent_setup(sc); + ccl_fetch(sd, flag) |= bsdf_translucent_setup(sc); } break; } @@ -170,7 +179,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->data1 = 0.0f; sc->data2 = 0.0f; sc->N = N; - sd->flag |= bsdf_transparent_setup(sc); + ccl_fetch(sd, flag) |= bsdf_transparent_setup(sc); } break; } @@ -192,13 +201,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * /* setup bsdf */ if(type == CLOSURE_BSDF_REFLECTION_ID) - sd->flag |= bsdf_reflection_setup(sc); + ccl_fetch(sd, flag) |= bsdf_reflection_setup(sc); else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID) - sd->flag |= bsdf_microfacet_beckmann_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_setup(sc); else if(type == CLOSURE_BSDF_MICROFACET_GGX_ID) - sd->flag |= bsdf_microfacet_ggx_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_setup(sc); else - sd->flag |= bsdf_ashikhmin_shirley_setup(sc); + ccl_fetch(sd, flag) |= bsdf_ashikhmin_shirley_setup(sc); } break; @@ -216,7 +225,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->N = N; float eta = fmaxf(param2, 1e-5f); - eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; + eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta; /* setup bsdf */ if(type == CLOSURE_BSDF_REFRACTION_ID) { @@ -224,7 +233,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->data1 = 0.0f; sc->data2 = 0.0f; - sd->flag |= bsdf_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_refraction_setup(sc); } else { sc->data0 = param1; @@ -232,9 +241,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->data2 = eta; if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) - sd->flag |= bsdf_microfacet_beckmann_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_refraction_setup(sc); else - sd->flag |= bsdf_microfacet_ggx_refraction_setup(sc); + ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_refraction_setup(sc); } } @@ -251,15 +260,15 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * #endif /* index of refraction */ float eta = fmaxf(param2, 1e-5f); - eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; + eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta; /* fresnel */ - float cosNO = dot(N, sd->I); + float cosNO = dot(N, ccl_fetch(sd, I)); float fresnel = fresnel_dielectric_cos(cosNO, eta); float roughness = param1; /* reflection */ - ShaderClosure *sc = &sd->closure[sd->num_closure]; + ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); float3 weight = sc->weight; float sample_weight = sc->sample_weight; @@ -280,15 +289,17 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * #endif /* refraction */ - sc = &sd->closure[sd->num_closure]; - sc->weight = weight; - sc->sample_weight = sample_weight; + if(ccl_fetch(sd, num_closure) < MAX_CLOSURE) { + sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); + sc->weight = weight; + sc->sample_weight = sample_weight; - sc = svm_node_closure_get_bsdf(sd, mix_weight*(1.0f - fresnel)); + sc = svm_node_closure_get_bsdf(sd, mix_weight*(1.0f - fresnel)); - if(sc) { - sc->N = N; - svm_node_glass_setup(sd, sc, type, eta, roughness, true); + if(sc) { + sc->N = N; + svm_node_glass_setup(sd, sc, type, eta, roughness, true); + } } break; @@ -328,12 +339,12 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->data2 = 0.0f; - if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) - sd->flag |= bsdf_microfacet_beckmann_aniso_setup(sc); - else if (type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) - sd->flag |= bsdf_microfacet_ggx_aniso_setup(sc); + if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) + ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_aniso_setup(sc); + else if(type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) + ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_aniso_setup(sc); else - sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(sc); + ccl_fetch(sd, flag) |= bsdf_ashikhmin_shirley_aniso_setup(sc); } break; } @@ -344,10 +355,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->N = N; /* sigma */ - sc->data0 = clamp(param1, 0.0f, 1.0f); + sc->data0 = saturate(param1); sc->data1 = 0.0f; sc->data2 = 0.0f; - sd->flag |= bsdf_ashikhmin_velvet_setup(sc); + ccl_fetch(sd, flag) |= bsdf_ashikhmin_velvet_setup(sc); } break; } @@ -362,10 +373,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->data1 = param2; sc->data2 = 0.0f; - if (type == CLOSURE_BSDF_DIFFUSE_TOON_ID) - sd->flag |= bsdf_diffuse_toon_setup(sc); + if(type == CLOSURE_BSDF_DIFFUSE_TOON_ID) + ccl_fetch(sd, flag) |= bsdf_diffuse_toon_setup(sc); else - sd->flag |= bsdf_glossy_toon_setup(sc); + ccl_fetch(sd, flag) |= bsdf_glossy_toon_setup(sc); } break; } @@ -373,7 +384,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * case CLOSURE_BSDF_HAIR_REFLECTION_ID: case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: { - if(sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) { + if(ccl_fetch(sd, flag) & SD_BACKFACING && ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); if(sc) { @@ -384,11 +395,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * * spawned by transmission from the front */ sc->weight = make_float3(1.0f, 1.0f, 1.0f); sc->N = N; - sd->flag |= bsdf_transparent_setup(sc); + sc->data0 = 0.0f; + sc->data1 = 0.0f; + sc->data2 = 0.0f; + ccl_fetch(sd, flag) |= bsdf_transparent_setup(sc); } } else { - ShaderClosure *sc = &sd->closure[sd->num_closure]; + ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); sc = svm_node_closure_get_bsdf(sd, mix_weight); if(sc) { @@ -397,18 +411,18 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->data1 = param2; sc->data2 = -stack_load_float(stack, data_node.z); - if(!(sd->type & PRIMITIVE_ALL_CURVE)) { - sc->T = normalize(sd->dPdv); + if(!(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)) { + sc->T = normalize(ccl_fetch(sd, dPdv)); sc->data2 = 0.0f; } else - sc->T = sd->dPdu; + sc->T = normalize(ccl_fetch(sd, dPdu)); if(type == CLOSURE_BSDF_HAIR_REFLECTION_ID) { - sd->flag |= bsdf_hair_reflection_setup(sc); + ccl_fetch(sd, flag) |= bsdf_hair_reflection_setup(sc); } else { - sd->flag |= bsdf_hair_transmission_setup(sc); + ccl_fetch(sd, flag) |= bsdf_hair_transmission_setup(sc); } } } @@ -418,9 +432,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * #endif #ifdef __SUBSURFACE__ +#ifndef __SPLIT_KERNEL__ +# define sc_next(sc) sc++ +# else +# define sc_next(sc) sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)) +# endif case CLOSURE_BSSRDF_CUBIC_ID: case CLOSURE_BSSRDF_GAUSSIAN_ID: { - ShaderClosure *sc = &sd->closure[sd->num_closure]; + ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); float3 weight = sc->weight * mix_weight; float sample_weight = fabsf(average(weight)); @@ -430,7 +449,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) param1 = 0.0f; - if(sample_weight > CLOSURE_WEIGHT_CUTOFF && sd->num_closure+2 < MAX_CLOSURE) { + if(sample_weight > CLOSURE_WEIGHT_CUTOFF && ccl_fetch(sd, num_closure)+2 < MAX_CLOSURE) { /* radius * scale */ float3 radius = stack_load_float3(stack, data_node.z)*param1; /* sharpness */ @@ -450,10 +469,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->prim = NULL; #endif sc->N = N; - sd->flag |= bssrdf_setup(sc, (ClosureType)type); + ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type); - sd->num_closure++; - sc++; + ccl_fetch(sd, num_closure)++; + sc_next(sc); } if(fabsf(weight.y) > 0.0f) { @@ -467,10 +486,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->prim = NULL; #endif sc->N = N; - sd->flag |= bssrdf_setup(sc, (ClosureType)type); + ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type); - sd->num_closure++; - sc++; + ccl_fetch(sd, num_closure)++; + sc_next(sc); } if(fabsf(weight.z) > 0.0f) { @@ -484,15 +503,16 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->prim = NULL; #endif sc->N = N; - sd->flag |= bssrdf_setup(sc, (ClosureType)type); + ccl_fetch(sd, flag) |= bssrdf_setup(sc, (ClosureType)type); - sd->num_closure++; - sc++; + ccl_fetch(sd, num_closure)++; + sc_next(sc); } } break; } +# undef sc_next #endif default: break; @@ -520,7 +540,7 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float ShaderClosure *sc = svm_node_closure_get_absorption(sd, mix_weight * density); if(sc) { - sd->flag |= volume_absorption_setup(sc); + ccl_fetch(sd, flag) |= volume_absorption_setup(sc); } break; } @@ -528,9 +548,10 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight * density); if(sc) { - float g = param2; - sc->data0 = g; - sd->flag |= volume_henyey_greenstein_setup(sc); + sc->data0 = param2; /* g */ + sc->data1 = 0.0f; + sc->data2 = 0.0f; + ccl_fetch(sd, flag) |= volume_henyey_greenstein_setup(sc); } break; } @@ -555,7 +576,7 @@ ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 no else svm_node_closure_get_non_bsdf(sd, CLOSURE_EMISSION_ID, 1.0f); - sd->flag |= SD_EMISSION; + ccl_fetch(sd, flag) |= SD_EMISSION; } ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node) @@ -589,7 +610,7 @@ ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 nod else svm_node_closure_get_non_bsdf(sd, CLOSURE_HOLDOUT_ID, 1.0f); - sd->flag |= SD_HOLDOUT; + ccl_fetch(sd, flag) |= SD_HOLDOUT; } ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, uint4 node) @@ -607,15 +628,17 @@ ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, else svm_node_closure_get_non_bsdf(sd, CLOSURE_AMBIENT_OCCLUSION_ID, 1.0f); - sd->flag |= SD_AO; + ccl_fetch(sd, flag) |= SD_AO; } /* Closure Nodes */ ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight) { - if(sd->num_closure < MAX_CLOSURE) - sd->closure[sd->num_closure].weight = weight; + if(ccl_fetch(sd, num_closure) < MAX_CLOSURE) { + ShaderClosure *sc = ccl_fetch_array(sd, closure, ccl_fetch(sd, num_closure)); + sc->weight = weight; + } } ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b) @@ -650,7 +673,7 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node) decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset); float weight = stack_load_float(stack, weight_offset); - weight = clamp(weight, 0.0f, 1.0f); + weight = saturate(weight); float in_weight = (stack_valid(in_weight_offset))? stack_load_float(stack, in_weight_offset): 1.0f; @@ -665,7 +688,7 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node) ccl_device void svm_node_set_normal(KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal) { float3 normal = stack_load_float3(stack, in_direction); - sd->N = normal; + ccl_fetch(sd, N) = normal; stack_store_float3(stack, out_normal, normal); } diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h index b221e0728ec..34080377083 100644 --- a/intern/cycles/kernel/svm/svm_convert.h +++ b/intern/cycles/kernel/svm/svm_convert.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h index 6cd5ee4b375..8d4b07c9973 100644 --- a/intern/cycles/kernel/svm/svm_displace.h +++ b/intern/cycles/kernel/svm/svm_displace.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -25,11 +25,11 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac uint normal_offset, distance_offset, invert; decode_node_uchar4(node.y, &normal_offset, &distance_offset, &invert, NULL); - float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N; + float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): ccl_fetch(sd, N); /* get surface tangents from normal */ - float3 Rx = cross(sd->dP.dy, normal_in); - float3 Ry = cross(normal_in, sd->dP.dx); + float3 Rx = cross(ccl_fetch(sd, dP).dy, normal_in); + float3 Ry = cross(normal_in, ccl_fetch(sd, dP).dx); /* get bump values */ uint c_offset, x_offset, y_offset, strength_offset; @@ -40,7 +40,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac float h_y = stack_load_float(stack, y_offset); /* compute surface gradient and determinant */ - float det = dot(sd->dP.dx, Rx); + float det = dot(ccl_fetch(sd, dP).dx, Rx); float3 surfgrad = (h_x - h_c)*Rx + (h_y - h_c)*Ry; float absdet = fabsf(det); @@ -65,7 +65,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac ccl_device void svm_node_set_displacement(ShaderData *sd, float *stack, uint fac_offset) { float d = stack_load_float(stack, fac_offset); - sd->P += sd->N*d*0.1f; /* todo: get rid of this factor */ + ccl_fetch(sd, P) += ccl_fetch(sd, N)*d*0.1f; /* todo: get rid of this factor */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h index 5def52205eb..23c97d80cb0 100644 --- a/intern/cycles/kernel/svm/svm_fresnel.h +++ b/intern/cycles/kernel/svm/svm_fresnel.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -23,12 +23,12 @@ ccl_device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset, uint normal_offset, out_offset; decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL); float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __uint_as_float(ior_value); - float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N; + float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): ccl_fetch(sd, N); eta = fmaxf(eta, 1e-5f); - eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; + eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta; - float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta); + float f = fresnel_dielectric_cos(dot(ccl_fetch(sd, I), normal_in), eta); stack_store_float(stack, out_offset, f); } @@ -44,18 +44,18 @@ ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node) decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL); float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __uint_as_float(blend_value); - float3 normal_in = (stack_valid(normal_offset))? stack_load_float3(stack, normal_offset): sd->N; + float3 normal_in = (stack_valid(normal_offset))? stack_load_float3(stack, normal_offset): ccl_fetch(sd, N); float f; if(type == NODE_LAYER_WEIGHT_FRESNEL) { float eta = fmaxf(1.0f - blend, 1e-5f); - eta = (sd->flag & SD_BACKFACING)? eta: 1.0f/eta; + eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? eta: 1.0f/eta; - f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta); + f = fresnel_dielectric_cos(dot(ccl_fetch(sd, I), normal_in), eta); } else { - f = fabsf(dot(sd->I, normal_in)); + f = fabsf(dot(ccl_fetch(sd, I), normal_in)); if(blend != 0.5f) { blend = clamp(blend, 0.0f, 1.0f-1e-5f); diff --git a/intern/cycles/kernel/svm/svm_gamma.h b/intern/cycles/kernel/svm/svm_gamma.h index c4749e7b936..b645ff3f0f9 100644 --- a/intern/cycles/kernel/svm/svm_gamma.h +++ b/intern/cycles/kernel/svm/svm_gamma.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -21,14 +21,14 @@ ccl_device void svm_node_gamma(ShaderData *sd, float *stack, uint in_gamma, uint float3 color = stack_load_float3(stack, in_color); float gamma = stack_load_float(stack, in_gamma); - if (color.x > 0.0f) + if(color.x > 0.0f) color.x = powf(color.x, gamma); - if (color.y > 0.0f) + if(color.y > 0.0f) color.y = powf(color.y, gamma); - if (color.z > 0.0f) + if(color.z > 0.0f) color.z = powf(color.z, gamma); - if (stack_valid(out_color)) + if(stack_valid(out_color)) stack_store_float3(stack, out_color, color); } diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h index fe681ec92af..bb06254c3a9 100644 --- a/intern/cycles/kernel/svm/svm_geometry.h +++ b/intern/cycles/kernel/svm/svm_geometry.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -23,15 +23,15 @@ ccl_device void svm_node_geometry(KernelGlobals *kg, ShaderData *sd, float *stac float3 data; switch(type) { - case NODE_GEOM_P: data = sd->P; break; - case NODE_GEOM_N: data = sd->N; break; + case NODE_GEOM_P: data = ccl_fetch(sd, P); break; + case NODE_GEOM_N: data = ccl_fetch(sd, N); break; #ifdef __DPDU__ case NODE_GEOM_T: data = primitive_tangent(kg, sd); break; #endif - case NODE_GEOM_I: data = sd->I; break; - case NODE_GEOM_Ng: data = sd->Ng; break; + case NODE_GEOM_I: data = ccl_fetch(sd, I); break; + case NODE_GEOM_Ng: data = ccl_fetch(sd, Ng); break; #ifdef __UV__ - case NODE_GEOM_uv: data = make_float3(sd->u, sd->v, 0.0f); break; + case NODE_GEOM_uv: data = make_float3(ccl_fetch(sd, u), ccl_fetch(sd, v), 0.0f); break; #endif } @@ -44,8 +44,8 @@ ccl_device void svm_node_geometry_bump_dx(KernelGlobals *kg, ShaderData *sd, flo float3 data; switch(type) { - case NODE_GEOM_P: data = sd->P + sd->dP.dx; break; - case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f); break; + case NODE_GEOM_P: data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx; break; + case NODE_GEOM_uv: data = make_float3(ccl_fetch(sd, u) + ccl_fetch(sd, du).dx, ccl_fetch(sd, v) + ccl_fetch(sd, dv).dx, 0.0f); break; default: svm_node_geometry(kg, sd, stack, type, out_offset); return; } @@ -61,8 +61,8 @@ ccl_device void svm_node_geometry_bump_dy(KernelGlobals *kg, ShaderData *sd, flo float3 data; switch(type) { - case NODE_GEOM_P: data = sd->P + sd->dP.dy; break; - case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f); break; + case NODE_GEOM_P: data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy; break; + case NODE_GEOM_uv: data = make_float3(ccl_fetch(sd, u) + ccl_fetch(sd, du).dy, ccl_fetch(sd, v) + ccl_fetch(sd, dv).dy, 0.0f); break; default: svm_node_geometry(kg, sd, stack, type, out_offset); return; } @@ -83,9 +83,9 @@ ccl_device void svm_node_object_info(KernelGlobals *kg, ShaderData *sd, float *s stack_store_float3(stack, out_offset, object_location(kg, sd)); return; } - case NODE_INFO_OB_INDEX: data = object_pass_id(kg, sd->object); break; + case NODE_INFO_OB_INDEX: data = object_pass_id(kg, ccl_fetch(sd, object)); break; case NODE_INFO_MAT_INDEX: data = shader_pass_id(kg, sd); break; - case NODE_INFO_OB_RANDOM: data = object_random_number(kg, sd->object); break; + case NODE_INFO_OB_RANDOM: data = object_random_number(kg, ccl_fetch(sd, object)); break; default: data = 0.0f; break; } @@ -98,44 +98,44 @@ ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float { switch(type) { case NODE_INFO_PAR_INDEX: { - int particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); stack_store_float(stack, out_offset, particle_index(kg, particle_id)); break; } case NODE_INFO_PAR_AGE: { - int particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); stack_store_float(stack, out_offset, particle_age(kg, particle_id)); break; } case NODE_INFO_PAR_LIFETIME: { - int particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id)); break; } case NODE_INFO_PAR_LOCATION: { - int particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); stack_store_float3(stack, out_offset, particle_location(kg, particle_id)); break; } #if 0 /* XXX float4 currently not supported in SVM stack */ case NODE_INFO_PAR_ROTATION: { - int particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id)); break; } #endif case NODE_INFO_PAR_SIZE: { - int particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); stack_store_float(stack, out_offset, particle_size(kg, particle_id)); break; } case NODE_INFO_PAR_VELOCITY: { - int particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id)); break; } case NODE_INFO_PAR_ANGULAR_VELOCITY: { - int particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id)); break; } @@ -153,7 +153,7 @@ ccl_device void svm_node_hair_info(KernelGlobals *kg, ShaderData *sd, float *sta switch(type) { case NODE_INFO_CURVE_IS_STRAND: { - data = (sd->type & PRIMITIVE_ALL_CURVE) != 0; + data = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) != 0; stack_store_float(stack, out_offset, data); break; } @@ -165,7 +165,7 @@ ccl_device void svm_node_hair_info(KernelGlobals *kg, ShaderData *sd, float *sta break; } /*case NODE_INFO_CURVE_FADE: { - data = sd->curve_transparency; + data = ccl_fetch(sd, curve_transparency); stack_store_float(stack, out_offset, data); break; }*/ diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h index a4b3c0583f7..53d7b4f812c 100644 --- a/intern/cycles/kernel/svm/svm_gradient.h +++ b/intern/cycles/kernel/svm/svm_gradient.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -66,7 +66,7 @@ ccl_device void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node) float3 co = stack_load_float3(stack, co_offset); float f = svm_gradient(co, (NodeGradientType)type); - f = clamp(f, 0.0f, 1.0f); + f = saturate(f); if(stack_valid(fac_offset)) stack_store_float(stack, fac_offset, f); diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h index 11dfc4f096b..1f2cad60df7 100644 --- a/intern/cycles/kernel/svm/svm_hsv.h +++ b/intern/cycles/kernel/svm/svm_hsv.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __SVM_HSV_H__ @@ -46,7 +46,12 @@ ccl_device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, ui color.y = fac*color.y + (1.0f - fac)*in_color.y; color.z = fac*color.z + (1.0f - fac)*in_color.z; - if (stack_valid(out_color_offset)) + /* Clamp color to prevent negative values caused by oversaturation. */ + color.x = max(color.x, 0.0f); + color.y = max(color.y, 0.0f); + color.z = max(color.z, 0.0f); + + if(stack_valid(out_color_offset)) stack_store_float3(stack, out_color_offset, color); } diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index a7abeda18e5..caf0b37ba35 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -65,7 +65,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, float4 r; int ix, iy, nix, niy; - if (interpolation == INTERPOLATION_CLOSEST) { + if(interpolation == INTERPOLATION_CLOSEST) { svm_image_texture_frac(x*width, &ix); svm_image_texture_frac(y*height, &iy); @@ -251,9 +251,9 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, case 95: r = kernel_tex_image_interp(__tex_image_095, x, y); break; case 96: r = kernel_tex_image_interp(__tex_image_096, x, y); break; case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break; - case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break; #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300) + case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break; case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break; case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break; case 101: r = kernel_tex_image_interp(__tex_image_101, x, y); break; @@ -354,6 +354,12 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, #endif +/* Remap coordnate from 0..1 box to -1..-1 */ +ccl_device_inline float3 texco_remap_square(float3 co) +{ + return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f; +} + ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { uint id = node.y; @@ -362,8 +368,20 @@ ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *sta decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb); float3 co = stack_load_float3(stack, co_offset); + float2 tex_co; uint use_alpha = stack_valid(alpha_offset); - float4 f = svm_image_texture(kg, id, co.x, co.y, srgb, use_alpha); + if(node.w == NODE_IMAGE_PROJ_SPHERE) { + co = texco_remap_square(co); + tex_co = map_to_sphere(co); + } + else if(node.w == NODE_IMAGE_PROJ_TUBE) { + co = texco_remap_square(co); + tex_co = map_to_tube(co); + } + else { + tex_co = make_float2(co.x, co.y); + } + float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, srgb, use_alpha); if(stack_valid(out_offset)) stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); @@ -374,10 +392,10 @@ ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *sta ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { /* get object space normal */ - float3 N = sd->N; + float3 N = ccl_fetch(sd, N); - N = sd->N; - if(sd->object != OBJECT_NONE) + N = ccl_fetch(sd, N); + if(ccl_fetch(sd, object) != OBJECT_NONE) object_inverse_normal_transform(kg, sd, &N); /* project from direction vector to barycentric coordinates in triangles */ @@ -415,17 +433,17 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float /* in case of blending, test for mixes between two textures */ if(N.z < (1.0f - limit)*(N.y + N.x)) { weight.x = N.x/(N.x + N.y); - weight.x = clamp((weight.x - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f); + weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend); weight.y = 1.0f - weight.x; } else if(N.x < (1.0f - limit)*(N.y + N.z)) { weight.y = N.y/(N.y + N.z); - weight.y = clamp((weight.y - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f); + weight.y = saturate((weight.y - 0.5f*(1.0f - blend))/blend); weight.z = 1.0f - weight.y; } else if(N.y < (1.0f - limit)*(N.x + N.z)) { weight.x = N.x/(N.x + N.z); - weight.x = clamp((weight.x - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f); + weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend); weight.z = 1.0f - weight.x; } else { @@ -435,6 +453,10 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float weight.z = ((2.0f - limit)*N.z + (limit - 1.0f))/(2.0f*limit - 1.0f); } } + else { + /* Desperate mode, no valid choice anyway, fallback to one side.*/ + weight.x = 1.0f; + } /* now fetch textures */ uint co_offset, out_offset, alpha_offset, srgb; @@ -459,7 +481,6 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float stack_store_float(stack, alpha_offset, f.w); } - ccl_device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { uint id = node.y; diff --git a/intern/cycles/kernel/svm/svm_invert.h b/intern/cycles/kernel/svm/svm_invert.h index eb47e9ad4ab..5ce858e2e5d 100644 --- a/intern/cycles/kernel/svm/svm_invert.h +++ b/intern/cycles/kernel/svm/svm_invert.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -30,7 +30,7 @@ ccl_device void svm_node_invert(ShaderData *sd, float *stack, uint in_fac, uint color.y = invert(color.y, factor); color.z = invert(color.z, factor); - if (stack_valid(out_color)) + if(stack_valid(out_color)) stack_store_float3(stack, out_color, color); } diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h index da544c63ae0..a235dd35224 100644 --- a/intern/cycles/kernel/svm/svm_light_path.h +++ b/intern/cycles/kernel/svm/svm_light_path.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -31,10 +31,10 @@ ccl_device void svm_node_light_path(ShaderData *sd, float *stack, uint type, uin case NODE_LP_reflection: info = (path_flag & PATH_RAY_REFLECT)? 1.0f: 0.0f; break; case NODE_LP_transmission: info = (path_flag & PATH_RAY_TRANSMIT)? 1.0f: 0.0f; break; case NODE_LP_volume_scatter: info = (path_flag & PATH_RAY_VOLUME_SCATTER)? 1.0f: 0.0f; break; - case NODE_LP_backfacing: info = (sd->flag & SD_BACKFACING)? 1.0f: 0.0f; break; - case NODE_LP_ray_length: info = sd->ray_length; break; - case NODE_LP_ray_depth: info = (float)sd->ray_depth; break; - case NODE_LP_ray_transparent: info = sd->transparent_depth; break; + case NODE_LP_backfacing: info = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f: 0.0f; break; + case NODE_LP_ray_length: info = ccl_fetch(sd, ray_length); break; + case NODE_LP_ray_depth: info = (float)ccl_fetch(sd, ray_depth); break; + case NODE_LP_ray_transparent: info = (float)ccl_fetch(sd, transparent_depth); break; } stack_store_float(stack, out_offset, info); @@ -53,14 +53,14 @@ ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node) switch(type) { case NODE_LIGHT_FALLOFF_QUADRATIC: break; - case NODE_LIGHT_FALLOFF_LINEAR: strength *= sd->ray_length; break; - case NODE_LIGHT_FALLOFF_CONSTANT: strength *= sd->ray_length*sd->ray_length; break; + case NODE_LIGHT_FALLOFF_LINEAR: strength *= ccl_fetch(sd, ray_length); break; + case NODE_LIGHT_FALLOFF_CONSTANT: strength *= ccl_fetch(sd, ray_length)*ccl_fetch(sd, ray_length); break; } float smooth = stack_load_float(stack, smooth_offset); if(smooth > 0.0f) { - float squared = sd->ray_length*sd->ray_length; + float squared = ccl_fetch(sd, ray_length)*ccl_fetch(sd, ray_length); strength *= squared/(smooth + squared); } diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h index b661f5cacf8..ac87c77d719 100644 --- a/intern/cycles/kernel/svm/svm_magic.h +++ b/intern/cycles/kernel/svm/svm_magic.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h index c9fa8502dd1..0a890545af4 100644 --- a/intern/cycles/kernel/svm/svm_mapping.h +++ b/intern/cycles/kernel/svm/svm_mapping.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h index 1ce9386e40e..d633e54ed8d 100644 --- a/intern/cycles/kernel/svm/svm_math.h +++ b/intern/cycles/kernel/svm/svm_math.h @@ -11,99 +11,11 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN -ccl_device float svm_math(NodeMath type, float Fac1, float Fac2) -{ - float Fac; - - if(type == NODE_MATH_ADD) - Fac = Fac1 + Fac2; - else if(type == NODE_MATH_SUBTRACT) - Fac = Fac1 - Fac2; - else if(type == NODE_MATH_MULTIPLY) - Fac = Fac1*Fac2; - else if(type == NODE_MATH_DIVIDE) - Fac = safe_divide(Fac1, Fac2); - else if(type == NODE_MATH_SINE) - Fac = sinf(Fac1); - else if(type == NODE_MATH_COSINE) - Fac = cosf(Fac1); - else if(type == NODE_MATH_TANGENT) - Fac = tanf(Fac1); - else if(type == NODE_MATH_ARCSINE) - Fac = safe_asinf(Fac1); - else if(type == NODE_MATH_ARCCOSINE) - Fac = safe_acosf(Fac1); - else if(type == NODE_MATH_ARCTANGENT) - Fac = atanf(Fac1); - else if(type == NODE_MATH_POWER) - Fac = safe_powf(Fac1, Fac2); - else if(type == NODE_MATH_LOGARITHM) - Fac = safe_logf(Fac1, Fac2); - else if(type == NODE_MATH_MINIMUM) - Fac = fminf(Fac1, Fac2); - else if(type == NODE_MATH_MAXIMUM) - Fac = fmaxf(Fac1, Fac2); - else if(type == NODE_MATH_ROUND) - Fac = floorf(Fac1 + 0.5f); - else if(type == NODE_MATH_LESS_THAN) - Fac = Fac1 < Fac2; - else if(type == NODE_MATH_GREATER_THAN) - Fac = Fac1 > Fac2; - else if(type == NODE_MATH_MODULO) - Fac = safe_modulo(Fac1, Fac2); - else if(type == NODE_MATH_ABSOLUTE) - Fac = fabsf(Fac1); - else if(type == NODE_MATH_CLAMP) - Fac = clamp(Fac1, 0.0f, 1.0f); - else - Fac = 0.0f; - - return Fac; -} - -ccl_device float average_fac(float3 v) -{ - return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z))/3.0f; -} - -ccl_device void svm_vector_math(float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2) -{ - if(type == NODE_VECTOR_MATH_ADD) { - *Vector = Vector1 + Vector2; - *Fac = average_fac(*Vector); - } - else if(type == NODE_VECTOR_MATH_SUBTRACT) { - *Vector = Vector1 - Vector2; - *Fac = average_fac(*Vector); - } - else if(type == NODE_VECTOR_MATH_AVERAGE) { - *Fac = len(Vector1 + Vector2); - *Vector = normalize(Vector1 + Vector2); - } - else if(type == NODE_VECTOR_MATH_DOT_PRODUCT) { - *Fac = dot(Vector1, Vector2); - *Vector = make_float3(0.0f, 0.0f, 0.0f); - } - else if(type == NODE_VECTOR_MATH_CROSS_PRODUCT) { - float3 c = cross(Vector1, Vector2); - *Fac = len(c); - *Vector = normalize(c); - } - else if(type == NODE_VECTOR_MATH_NORMALIZE) { - *Fac = len(Vector1); - *Vector = normalize(Vector1); - } - else { - *Fac = 0.0f; - *Vector = make_float3(0.0f, 0.0f, 0.0f); - } -} - /* Nodes */ ccl_device void svm_node_math(KernelGlobals *kg, ShaderData *sd, float *stack, uint itype, uint f1_offset, uint f2_offset, int *offset) diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h new file mode 100644 index 00000000000..645cbd3fc73 --- /dev/null +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -0,0 +1,170 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device float average_fac(float3 v) +{ + return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z))/3.0f; +} + +ccl_device void svm_vector_math(float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2) +{ + if(type == NODE_VECTOR_MATH_ADD) { + *Vector = Vector1 + Vector2; + *Fac = average_fac(*Vector); + } + else if(type == NODE_VECTOR_MATH_SUBTRACT) { + *Vector = Vector1 - Vector2; + *Fac = average_fac(*Vector); + } + else if(type == NODE_VECTOR_MATH_AVERAGE) { + *Fac = len(Vector1 + Vector2); + *Vector = normalize(Vector1 + Vector2); + } + else if(type == NODE_VECTOR_MATH_DOT_PRODUCT) { + *Fac = dot(Vector1, Vector2); + *Vector = make_float3(0.0f, 0.0f, 0.0f); + } + else if(type == NODE_VECTOR_MATH_CROSS_PRODUCT) { + float3 c = cross(Vector1, Vector2); + *Fac = len(c); + *Vector = normalize(c); + } + else if(type == NODE_VECTOR_MATH_NORMALIZE) { + *Fac = len(Vector1); + *Vector = normalize(Vector1); + } + else { + *Fac = 0.0f; + *Vector = make_float3(0.0f, 0.0f, 0.0f); + } +} + +ccl_device float svm_math(NodeMath type, float Fac1, float Fac2) +{ + float Fac; + + if(type == NODE_MATH_ADD) + Fac = Fac1 + Fac2; + else if(type == NODE_MATH_SUBTRACT) + Fac = Fac1 - Fac2; + else if(type == NODE_MATH_MULTIPLY) + Fac = Fac1*Fac2; + else if(type == NODE_MATH_DIVIDE) + Fac = safe_divide(Fac1, Fac2); + else if(type == NODE_MATH_SINE) + Fac = sinf(Fac1); + else if(type == NODE_MATH_COSINE) + Fac = cosf(Fac1); + else if(type == NODE_MATH_TANGENT) + Fac = tanf(Fac1); + else if(type == NODE_MATH_ARCSINE) + Fac = safe_asinf(Fac1); + else if(type == NODE_MATH_ARCCOSINE) + Fac = safe_acosf(Fac1); + else if(type == NODE_MATH_ARCTANGENT) + Fac = atanf(Fac1); + else if(type == NODE_MATH_POWER) + Fac = safe_powf(Fac1, Fac2); + else if(type == NODE_MATH_LOGARITHM) + Fac = safe_logf(Fac1, Fac2); + else if(type == NODE_MATH_MINIMUM) + Fac = fminf(Fac1, Fac2); + else if(type == NODE_MATH_MAXIMUM) + Fac = fmaxf(Fac1, Fac2); + else if(type == NODE_MATH_ROUND) + Fac = floorf(Fac1 + 0.5f); + else if(type == NODE_MATH_LESS_THAN) + Fac = Fac1 < Fac2; + else if(type == NODE_MATH_GREATER_THAN) + Fac = Fac1 > Fac2; + else if(type == NODE_MATH_MODULO) + Fac = safe_modulo(Fac1, Fac2); + else if(type == NODE_MATH_ABSOLUTE) + Fac = fabsf(Fac1); + else if(type == NODE_MATH_CLAMP) + Fac = saturate(Fac1); + else + Fac = 0.0f; + + return Fac; +} + +ccl_device float3 svm_math_blackbody_color(float t) { + /* Calculate color in range 800..12000 using an approximation + * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B + * Max absolute error for RGB is (0.00095, 0.00077, 0.00057), + * which is enough to get the same 8 bit/channel color. + */ + + const float rc[6][3] = { + { 2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f }, + { 3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f }, + { 4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f }, + { 4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f }, + { 4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f }, + { 3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f }, + }; + + const float gc[6][3] = { + { -7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f }, + { -1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f }, + { -1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f }, + { -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f }, + { -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f }, + { -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f }, + }; + + const float bc[6][4] = { + { 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */ + { 0.0f, 0.0f, 0.0f, 0.0f }, + { 0.0f, 0.0f, 0.0f, 0.0f }, + { -2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f }, + { -2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f }, + { 6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f }, + }; + + if(t >= 12000.0f) + return make_float3(0.826270103f, 0.994478524f, 1.56626022f); + + /* Define a macro to reduce stack usage for nvcc */ +#define MAKE_BB_RGB(i) make_float3(\ + rc[i][0] / t + rc[i][1] * t + rc[i][2],\ + gc[i][0] / t + gc[i][1] * t + gc[i][2],\ + ((bc[i][0] * t + bc[i][1]) * t + bc[i][2]) * t + bc[i][3]) + + if(t >= 6365.0f) + return MAKE_BB_RGB(5); + if(t >= 3315.0f) + return MAKE_BB_RGB(4); + if(t >= 1902.0f) + return MAKE_BB_RGB(3); + if(t >= 1449.0f) + return MAKE_BB_RGB(2); + if(t >= 1167.0f) + return MAKE_BB_RGB(1); + if(t >= 965.0f) + return MAKE_BB_RGB(0); + +#undef MAKE_BB_RGB + + /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */ + return make_float3(4.70366907f, 0.0f, 0.0f); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h index edc3903865e..6111214acba 100644 --- a/intern/cycles/kernel/svm/svm_mix.h +++ b/intern/cycles/kernel/svm/svm_mix.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -254,16 +254,16 @@ ccl_device float3 svm_mix_clamp(float3 col) { float3 outcol = col; - outcol.x = clamp(col.x, 0.0f, 1.0f); - outcol.y = clamp(col.y, 0.0f, 1.0f); - outcol.z = clamp(col.z, 0.0f, 1.0f); + outcol.x = saturate(col.x); + outcol.y = saturate(col.y); + outcol.z = saturate(col.z); return outcol; } ccl_device float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2) { - float t = clamp(fac, 0.0f, 1.0f); + float t = saturate(fac); switch(type) { case NODE_MIX_BLEND: return svm_mix_blend(t, c1, c2); diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h index 61171d6849c..09eba31945e 100644 --- a/intern/cycles/kernel/svm/svm_musgrave.h +++ b/intern/cycles/kernel/svm/svm_musgrave.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN * from "Texturing and Modelling: A procedural approach" */ -ccl_device_noinline float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves) +ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity, float octaves) { float rmd; float value = 0.0f; @@ -53,7 +53,7 @@ ccl_device_noinline float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, flo * octaves: number of frequencies in the fBm */ -ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves) +ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, float H, float lacunarity, float octaves) { float rmd; float value = 1.0f; @@ -82,7 +82,7 @@ ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis * offset: raises the terrain from `sea level' */ -ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset) +ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, float H, float lacunarity, float octaves, float offset) { float value, increment, rmd; float pwHL = powf(lacunarity, -H); @@ -117,7 +117,7 @@ ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis * offset: raises the terrain from `sea level' */ -ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain) +ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, float H, float lacunarity, float octaves, float offset, float gain) { float result, signal, weight, rmd; float pwHL = powf(lacunarity, -H); @@ -154,7 +154,7 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNois * offset: raises the terrain from `sea level' */ -ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain) +ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, float H, float lacunarity, float octaves, float offset, float gain) { float result, signal, weight; float pwHL = powf(lacunarity, -H); @@ -168,7 +168,7 @@ ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNois for(i = 1; i < float_to_int(octaves); i++) { p *= lacunarity; - weight = clamp(signal * gain, 0.0f, 1.0f); + weight = saturate(signal * gain); signal = offset - fabsf(snoise(p)); signal *= signal; signal *= weight; @@ -183,18 +183,16 @@ ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNois ccl_device float svm_musgrave(NodeMusgraveType type, float dimension, float lacunarity, float octaves, float offset, float intensity, float gain, float3 p) { - NodeNoiseBasis basis = NODE_NOISE_PERLIN; - if(type == NODE_MUSGRAVE_MULTIFRACTAL) - return intensity*noise_musgrave_multi_fractal(p, basis, dimension, lacunarity, octaves); + return intensity*noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves); else if(type == NODE_MUSGRAVE_FBM) - return intensity*noise_musgrave_fBm(p, basis, dimension, lacunarity, octaves); + return intensity*noise_musgrave_fBm(p, dimension, lacunarity, octaves); else if(type == NODE_MUSGRAVE_HYBRID_MULTIFRACTAL) - return intensity*noise_musgrave_hybrid_multi_fractal(p, basis, dimension, lacunarity, octaves, offset, gain); + return intensity*noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, offset, gain); else if(type == NODE_MUSGRAVE_RIDGED_MULTIFRACTAL) - return intensity*noise_musgrave_ridged_multi_fractal(p, basis, dimension, lacunarity, octaves, offset, gain); + return intensity*noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, offset, gain); else if(type == NODE_MUSGRAVE_HETERO_TERRAIN) - return intensity*noise_musgrave_hetero_terrain(p, basis, dimension, lacunarity, octaves, offset); + return intensity*noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, offset); return 0.0f; } diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h index 869341c81f4..c77c2a1c482 100644 --- a/intern/cycles/kernel/svm/svm_noise.h +++ b/intern/cycles/kernel/svm/svm_noise.h @@ -290,40 +290,6 @@ ccl_device_noinline float perlin(float x, float y, float z) } #endif -#if 0 // unused -ccl_device_noinline float perlin_periodic(float x, float y, float z, float3 pperiod) -{ - int X; float fx = floorfrac(x, &X); - int Y; float fy = floorfrac(y, &Y); - int Z; float fz = floorfrac(z, &Z); - - int3 p; - - p.x = max(quick_floor(pperiod.x), 1); - p.y = max(quick_floor(pperiod.y), 1); - p.z = max(quick_floor(pperiod.z), 1); - - float u = fade(fx); - float v = fade(fy); - float w = fade(fz); - - float result; - - result = nerp (w, nerp (v, nerp (u, grad (phash (X , Y , Z , p), fx , fy , fz ), - grad (phash (X+1, Y , Z , p), fx-1.0f, fy , fz )), - nerp (u, grad (phash (X , Y+1, Z , p), fx , fy-1.0f, fz ), - grad (phash (X+1, Y+1, Z , p), fx-1.0f, fy-1.0f, fz ))), - nerp (v, nerp (u, grad (phash (X , Y , Z+1, p), fx , fy , fz-1.0f ), - grad (phash (X+1, Y , Z+1, p), fx-1.0f, fy , fz-1.0f )), - nerp (u, grad (phash (X , Y+1, Z+1, p), fx , fy-1.0f, fz-1.0f ), - grad (phash (X+1, Y+1, Z+1, p), fx-1.0f, fy-1.0f, fz-1.0f )))); - float r = scale3(result); - - /* can happen for big coordinates, things even out to 0.0 then anyway */ - return (isfinite(r))? r: 0.0f; -} -#endif - /* perlin noise in range 0..1 */ ccl_device float noise(float3 p) { @@ -367,20 +333,5 @@ ccl_device ssef cellnoise_color(const ssef& p) } #endif -#if 0 // unused -/* periodic perlin noise in range 0..1 */ -ccl_device float pnoise(float3 p, float3 pperiod) -{ - float r = perlin_periodic(p.x, p.y, p.z, pperiod); - return 0.5f*r + 0.5f; -} - -/* periodic perlin noise in range -1..1 */ -ccl_device float psnoise(float3 p, float3 pperiod) -{ - return perlin_periodic(p.x, p.y, p.z, pperiod); -} -#endif - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h index 5d5cfe6ffcc..62ff38cf1c5 100644 --- a/intern/cycles/kernel/svm/svm_noisetex.h +++ b/intern/cycles/kernel/svm/svm_noisetex.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -20,23 +20,22 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void svm_noise(float3 p, float detail, float distortion, float *fac, float3 *color) { - NodeNoiseBasis basis = NODE_NOISE_PERLIN; int hard = 0; if(distortion != 0.0f) { float3 r, offset = make_float3(13.5f, 13.5f, 13.5f); - r.x = noise_basis(p + offset, basis) * distortion; - r.y = noise_basis(p, basis) * distortion; - r.z = noise_basis(p - offset, basis) * distortion; + r.x = noise(p + offset) * distortion; + r.y = noise(p) * distortion; + r.z = noise(p - offset) * distortion; p += r; } - *fac = noise_turbulence(p, basis, detail, hard); + *fac = noise_turbulence(p, detail, hard); *color = make_float3(*fac, - noise_turbulence(make_float3(p.y, p.x, p.z), basis, detail, hard), - noise_turbulence(make_float3(p.y, p.z, p.x), basis, detail, hard)); + noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard), + noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard)); } ccl_device void svm_node_tex_noise(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h index 8695031b8b9..53abef71012 100644 --- a/intern/cycles/kernel/svm/svm_normal.h +++ b/intern/cycles/kernel/svm/svm_normal.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -28,10 +28,10 @@ ccl_device void svm_node_normal(KernelGlobals *kg, ShaderData *sd, float *stack, direction.z = __int_as_float(node1.z); direction = normalize(direction); - if (stack_valid(out_normal_offset)) + if(stack_valid(out_normal_offset)) stack_store_float3(stack, out_normal_offset, direction); - if (stack_valid(out_dot_offset)) + if(stack_valid(out_dot_offset)) stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal))); } diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h index 55eee3d24c3..062ab013b1f 100644 --- a/intern/cycles/kernel/svm/svm_ramp.h +++ b/intern/cycles/kernel/svm/svm_ramp.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __SVM_RAMP_H__ @@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN ccl_device float4 rgb_ramp_lookup(KernelGlobals *kg, int offset, float f, bool interpolate) { - f = clamp(f, 0.0f, 1.0f)*(RAMP_TABLE_SIZE-1); + f = saturate(f)*(RAMP_TABLE_SIZE-1); /* clamp int as well in case of NaN */ int i = clamp(float_to_int(f), 0, RAMP_TABLE_SIZE-1); diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h index 111d5d47988..6f51b163756 100644 --- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h +++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -28,7 +28,7 @@ ccl_device void svm_node_combine_hsv(KernelGlobals *kg, ShaderData *sd, float *s /* Combine, and convert back to RGB */ float3 color = hsv_to_rgb(make_float3(hue, saturation, value)); - if (stack_valid(color_out)) + if(stack_valid(color_out)) stack_store_float3(stack, color_out, color); } @@ -42,11 +42,11 @@ ccl_device void svm_node_separate_hsv(KernelGlobals *kg, ShaderData *sd, float * /* Convert to HSV */ color = rgb_to_hsv(color); - if (stack_valid(hue_out)) + if(stack_valid(hue_out)) stack_store_float(stack, hue_out, color.x); - if (stack_valid(saturation_out)) + if(stack_valid(saturation_out)) stack_store_float(stack, saturation_out, color.y); - if (stack_valid(value_out)) + if(stack_valid(value_out)) stack_store_float(stack, value_out, color.z); } diff --git a/intern/cycles/kernel/svm/svm_sepcomb_vector.h b/intern/cycles/kernel/svm/svm_sepcomb_vector.h index c8e7e34f87d..63570dd6942 100644 --- a/intern/cycles/kernel/svm/svm_sepcomb_vector.h +++ b/intern/cycles/kernel/svm/svm_sepcomb_vector.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -22,7 +22,7 @@ ccl_device void svm_node_combine_vector(ShaderData *sd, float *stack, uint in_of { float vector = stack_load_float(stack, in_offset); - if (stack_valid(out_offset)) + if(stack_valid(out_offset)) stack_store_float(stack, out_offset+vector_index, vector); } @@ -30,10 +30,10 @@ ccl_device void svm_node_separate_vector(ShaderData *sd, float *stack, uint ivec { float3 vector = stack_load_float3(stack, ivector_offset); - if (stack_valid(out_offset)) { - if (vector_index == 0) + if(stack_valid(out_offset)) { + if(vector_index == 0) stack_store_float(stack, out_offset, vector.x); - else if (vector_index == 1) + else if(vector_index == 1) stack_store_float(stack, out_offset, vector.y); else stack_store_float(stack, out_offset, vector.z); diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h index 500b5146931..4c8e3a32271 100644 --- a/intern/cycles/kernel/svm/svm_sky.h +++ b/intern/cycles/kernel/svm/svm_sky.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index a17e4a25efe..eebd9bee420 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -11,67 +11,85 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN /* Texture Coordinate Node */ -ccl_device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint type, uint out_offset) +ccl_device void svm_node_tex_coord(KernelGlobals *kg, + ShaderData *sd, + int path_flag, + float *stack, + uint4 node, + int *offset) { float3 data; + uint type = node.y; + uint out_offset = node.z; switch(type) { case NODE_TEXCO_OBJECT: { - data = sd->P; - if(sd->object != OBJECT_NONE) - object_inverse_position_transform(kg, sd, &data); + data = ccl_fetch(sd, P); + if(node.w == 0) { + if(ccl_fetch(sd, object) != OBJECT_NONE) { + object_inverse_position_transform(kg, sd, &data); + } + } + else { + Transform tfm; + tfm.x = read_node_float(kg, offset); + tfm.y = read_node_float(kg, offset); + tfm.z = read_node_float(kg, offset); + tfm.w = read_node_float(kg, offset); + data = transform_point(&tfm, data); + } break; } case NODE_TEXCO_NORMAL: { - data = sd->N; - if(sd->object != OBJECT_NONE) + data = ccl_fetch(sd, N); + if(ccl_fetch(sd, object) != OBJECT_NONE) object_inverse_normal_transform(kg, sd, &data); break; } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; - if(sd->object != OBJECT_NONE) - data = transform_point(&tfm, sd->P); + if(ccl_fetch(sd, object) != OBJECT_NONE) + data = transform_point(&tfm, ccl_fetch(sd, P)); else - data = transform_point(&tfm, sd->P + camera_position(kg)); + data = transform_point(&tfm, ccl_fetch(sd, P) + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P); + if((path_flag & PATH_RAY_CAMERA) && ccl_fetch(sd, object) == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, ray_P)); else - data = camera_world_to_ndc(kg, sd, sd->P); + data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, P)); data.z = 0.0f; break; } case NODE_TEXCO_REFLECTION: { - if(sd->object != OBJECT_NONE) - data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; + if(ccl_fetch(sd, object) != OBJECT_NONE) + data = 2.0f*dot(ccl_fetch(sd, N), ccl_fetch(sd, I))*ccl_fetch(sd, N) - ccl_fetch(sd, I); else - data = sd->I; + data = ccl_fetch(sd, I); break; } case NODE_TEXCO_DUPLI_GENERATED: { - data = object_dupli_generated(kg, sd->object); + data = object_dupli_generated(kg, ccl_fetch(sd, object)); break; } case NODE_TEXCO_DUPLI_UV: { - data = object_dupli_uv(kg, sd->object); + data = object_dupli_uv(kg, ccl_fetch(sd, object)); break; } case NODE_TEXCO_VOLUME_GENERATED: { - data = sd->P; + data = ccl_fetch(sd, P); #ifdef __VOLUME__ - if(sd->object != OBJECT_NONE) + if(ccl_fetch(sd, object) != OBJECT_NONE) data = volume_normalized_position(kg, sd, data); #endif break; @@ -81,61 +99,79 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, int path_f stack_store_float3(stack, out_offset, data); } -ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint type, uint out_offset) +ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, + ShaderData *sd, + int path_flag, + float *stack, + uint4 node, + int *offset) { #ifdef __RAY_DIFFERENTIALS__ float3 data; + uint type = node.y; + uint out_offset = node.z; switch(type) { case NODE_TEXCO_OBJECT: { - data = sd->P + sd->dP.dx; - if(sd->object != OBJECT_NONE) - object_inverse_position_transform(kg, sd, &data); + data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx; + if(node.w == 0) { + if(ccl_fetch(sd, object) != OBJECT_NONE) { + object_inverse_position_transform(kg, sd, &data); + } + } + else { + Transform tfm; + tfm.x = read_node_float(kg, offset); + tfm.y = read_node_float(kg, offset); + tfm.z = read_node_float(kg, offset); + tfm.w = read_node_float(kg, offset); + data = transform_point(&tfm, data); + } break; } case NODE_TEXCO_NORMAL: { - data = sd->N; - if(sd->object != OBJECT_NONE) + data = ccl_fetch(sd, N); + if(ccl_fetch(sd, object) != OBJECT_NONE) object_inverse_normal_transform(kg, sd, &data); break; } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; - if(sd->object != OBJECT_NONE) - data = transform_point(&tfm, sd->P + sd->dP.dx); + if(ccl_fetch(sd, object) != OBJECT_NONE) + data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx); else - data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg)); + data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx); + if((path_flag & PATH_RAY_CAMERA) && ccl_fetch(sd, object) == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, ray_P) + ccl_fetch(sd, ray_dP).dx); else - data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx); + data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx); data.z = 0.0f; break; } case NODE_TEXCO_REFLECTION: { - if(sd->object != OBJECT_NONE) - data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; + if(ccl_fetch(sd, object) != OBJECT_NONE) + data = 2.0f*dot(ccl_fetch(sd, N), ccl_fetch(sd, I))*ccl_fetch(sd, N) - ccl_fetch(sd, I); else - data = sd->I; + data = ccl_fetch(sd, I); break; } case NODE_TEXCO_DUPLI_GENERATED: { - data = object_dupli_generated(kg, sd->object); + data = object_dupli_generated(kg, ccl_fetch(sd, object)); break; } case NODE_TEXCO_DUPLI_UV: { - data = object_dupli_uv(kg, sd->object); + data = object_dupli_uv(kg, ccl_fetch(sd, object)); break; } case NODE_TEXCO_VOLUME_GENERATED: { - data = sd->P + sd->dP.dx; + data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx; #ifdef __VOLUME__ - if(sd->object != OBJECT_NONE) + if(ccl_fetch(sd, object) != OBJECT_NONE) data = volume_normalized_position(kg, sd, data); #endif break; @@ -144,65 +180,83 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, in stack_store_float3(stack, out_offset, data); #else - svm_node_tex_coord(kg, sd, stack, type, out_offset); + svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); #endif } -ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint type, uint out_offset) +ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, + ShaderData *sd, + int path_flag, + float *stack, + uint4 node, + int *offset) { #ifdef __RAY_DIFFERENTIALS__ float3 data; + uint type = node.y; + uint out_offset = node.z; switch(type) { case NODE_TEXCO_OBJECT: { - data = sd->P + sd->dP.dy; - if(sd->object != OBJECT_NONE) - object_inverse_position_transform(kg, sd, &data); + data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy; + if(node.w == 0) { + if(ccl_fetch(sd, object) != OBJECT_NONE) { + object_inverse_position_transform(kg, sd, &data); + } + } + else { + Transform tfm; + tfm.x = read_node_float(kg, offset); + tfm.y = read_node_float(kg, offset); + tfm.z = read_node_float(kg, offset); + tfm.w = read_node_float(kg, offset); + data = transform_point(&tfm, data); + } break; } case NODE_TEXCO_NORMAL: { - data = sd->N; - if(sd->object != OBJECT_NONE) + data = ccl_fetch(sd, N); + if(ccl_fetch(sd, object) != OBJECT_NONE) object_inverse_normal_transform(kg, sd, &data); break; } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; - if(sd->object != OBJECT_NONE) - data = transform_point(&tfm, sd->P + sd->dP.dy); + if(ccl_fetch(sd, object) != OBJECT_NONE) + data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy); else - data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg)); + data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy); + if((path_flag & PATH_RAY_CAMERA) && ccl_fetch(sd, object) == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, ray_P) + ccl_fetch(sd, ray_dP).dy); else - data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy); + data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy); data.z = 0.0f; break; } case NODE_TEXCO_REFLECTION: { - if(sd->object != OBJECT_NONE) - data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; + if(ccl_fetch(sd, object) != OBJECT_NONE) + data = 2.0f*dot(ccl_fetch(sd, N), ccl_fetch(sd, I))*ccl_fetch(sd, N) - ccl_fetch(sd, I); else - data = sd->I; + data = ccl_fetch(sd, I); break; } case NODE_TEXCO_DUPLI_GENERATED: { - data = object_dupli_generated(kg, sd->object); + data = object_dupli_generated(kg, ccl_fetch(sd, object)); break; } case NODE_TEXCO_DUPLI_UV: { - data = object_dupli_uv(kg, sd->object); + data = object_dupli_uv(kg, ccl_fetch(sd, object)); break; } case NODE_TEXCO_VOLUME_GENERATED: { - data = sd->P + sd->dP.dy; + data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy; #ifdef __VOLUME__ - if(sd->object != OBJECT_NONE) + if(ccl_fetch(sd, object) != OBJECT_NONE) data = volume_normalized_position(kg, sd, data); #endif break; @@ -211,7 +265,7 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, in stack_store_float3(stack, out_offset, data); #else - svm_node_tex_coord(kg, sd, stack, type, out_offset); + svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); #endif } @@ -227,7 +281,7 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st if(space == NODE_NORMAL_MAP_TANGENT) { /* tangent space */ - if(sd->object == OBJECT_NONE) { + if(ccl_fetch(sd, object) == OBJECT_NONE) { stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f)); return; } @@ -248,11 +302,11 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st float sign = primitive_attribute_float(kg, sd, attr_sign_elem, attr_sign_offset, NULL, NULL); float3 normal; - if(sd->shader & SHADER_SMOOTH_NORMAL) { + if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) { normal = primitive_attribute_float3(kg, sd, attr_normal_elem, attr_normal_offset, NULL, NULL); } else { - normal = sd->Ng; + normal = ccl_fetch(sd, Ng); object_inverse_normal_transform(kg, sd, &normal); } @@ -283,7 +337,7 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st if(strength != 1.0f) { strength = max(strength, 0.0f); - N = normalize(sd->N + (N - sd->N)*strength); + N = normalize(ccl_fetch(sd, N) + (N - ccl_fetch(sd, N))*strength); } stack_store_float3(stack, normal_offset, N); @@ -313,7 +367,7 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack float3 generated; if(attr_offset == ATTR_STD_NOT_FOUND) - generated = sd->P; + generated = ccl_fetch(sd, P); else generated = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); @@ -326,7 +380,7 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack } object_normal_transform(kg, sd, &tangent); - tangent = cross(sd->N, normalize(cross(tangent, sd->N))); + tangent = cross(ccl_fetch(sd, N), normalize(cross(tangent, ccl_fetch(sd, N)))); stack_store_float3(stack, tangent_offset, tangent); } diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h index d97c85db36a..dcb00f7dd55 100644 --- a/intern/cycles/kernel/svm/svm_texture.h +++ b/intern/cycles/kernel/svm/svm_texture.h @@ -11,266 +11,14 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN -/* Voronoi Distances */ - -#if 0 -ccl_device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d, float e) -{ -#if 0 - if(distance_metric == NODE_VORONOI_DISTANCE_SQUARED) -#endif - return dot(d, d); -#if 0 - if(distance_metric == NODE_VORONOI_ACTUAL_DISTANCE) - return len(d); - if(distance_metric == NODE_VORONOI_MANHATTAN) - return fabsf(d.x) + fabsf(d.y) + fabsf(d.z); - if(distance_metric == NODE_VORONOI_CHEBYCHEV) - return fmaxf(fabsf(d.x), fmaxf(fabsf(d.y), fabsf(d.z))); - if(distance_metric == NODE_VORONOI_MINKOVSKY_H) - return sqrtf(fabsf(d.x)) + sqrtf(fabsf(d.y)) + sqrtf(fabsf(d.y)); - if(distance_metric == NODE_VORONOI_MINKOVSKY_4) - return sqrtf(sqrtf(dot(d*d, d*d))); - if(distance_metric == NODE_VORONOI_MINKOVSKY) - return powf(powf(fabsf(d.x), e) + powf(fabsf(d.y), e) + powf(fabsf(d.z), e), 1.0f/e); - - return 0.0f; -#endif -} - -/* Voronoi / Worley like */ -ccl_device_inline float4 voronoi_Fn(float3 p, float e, int n1, int n2) -{ - float da[4]; - float3 pa[4]; - NodeDistanceMetric distance_metric = NODE_VORONOI_DISTANCE_SQUARED; - - /* returns distances in da and point coords in pa */ - int xx, yy, zz, xi, yi, zi; - - xi = floor_to_int(p.x); - yi = floor_to_int(p.y); - zi = floor_to_int(p.z); - - da[0] = 1e10f; - da[1] = 1e10f; - da[2] = 1e10f; - da[3] = 1e10f; - - pa[0] = make_float3(0.0f, 0.0f, 0.0f); - pa[1] = make_float3(0.0f, 0.0f, 0.0f); - pa[2] = make_float3(0.0f, 0.0f, 0.0f); - pa[3] = make_float3(0.0f, 0.0f, 0.0f); - - for(xx = xi-1; xx <= xi+1; xx++) { - for(yy = yi-1; yy <= yi+1; yy++) { - for(zz = zi-1; zz <= zi+1; zz++) { - float3 ip = make_float3((float)xx, (float)yy, (float)zz); - float3 vp = cellnoise_color(ip); - float3 pd = p - (vp + ip); - float d = voronoi_distance(distance_metric, pd, e); - - vp += ip; - - if(d < da[0]) { - da[3] = da[2]; - da[2] = da[1]; - da[1] = da[0]; - da[0] = d; - - pa[3] = pa[2]; - pa[2] = pa[1]; - pa[1] = pa[0]; - pa[0] = vp; - } - else if(d < da[1]) { - da[3] = da[2]; - da[2] = da[1]; - da[1] = d; - - pa[3] = pa[2]; - pa[2] = pa[1]; - pa[1] = vp; - } - else if(d < da[2]) { - da[3] = da[2]; - da[2] = d; - - pa[3] = pa[2]; - pa[2] = vp; - } - else if(d < da[3]) { - da[3] = d; - pa[3] = vp; - } - } - } - } - - float4 result = make_float4(pa[n1].x, pa[n1].y, pa[n1].z, da[n1]); - - if(n2 != -1) - result = make_float4(pa[n2].x, pa[n2].y, pa[n2].z, da[n2]) - result; - - return result; -} -#endif - -ccl_device float voronoi_F1_distance(float3 p) -{ - /* returns squared distance in da */ - float da = 1e10f; - -#ifndef __KERNEL_SSE2__ - int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z); - - for (int xx = -1; xx <= 1; xx++) { - for (int yy = -1; yy <= 1; yy++) { - for (int zz = -1; zz <= 1; zz++) { - float3 ip = make_float3(ix + xx, iy + yy, iz + zz); - float3 vp = ip + cellnoise_color(ip); - float d = len_squared(p - vp); - da = min(d, da); - } - } - } -#else - ssef vec_p = load4f(p); - ssei xyzi = quick_floor_sse(vec_p); - - for (int xx = -1; xx <= 1; xx++) { - for (int yy = -1; yy <= 1; yy++) { - for (int zz = -1; zz <= 1; zz++) { - ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0)); - ssef vp = ip + cellnoise_color(ip); - float d = len_squared<1, 1, 1, 0>(vec_p - vp); - da = min(d, da); - } - } - } -#endif - - return da; -} - -ccl_device float3 voronoi_F1_color(float3 p) -{ - /* returns color of the nearest point */ - float da = 1e10f; - -#ifndef __KERNEL_SSE2__ - float3 pa; - int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z); - - for (int xx = -1; xx <= 1; xx++) { - for (int yy = -1; yy <= 1; yy++) { - for (int zz = -1; zz <= 1; zz++) { - float3 ip = make_float3(ix + xx, iy + yy, iz + zz); - float3 vp = ip + cellnoise_color(ip); - float d = len_squared(p - vp); - - if(d < da) { - da = d; - pa = vp; - } - } - } - } - - return cellnoise_color(pa); -#else - ssef pa, vec_p = load4f(p); - ssei xyzi = quick_floor_sse(vec_p); - - for (int xx = -1; xx <= 1; xx++) { - for (int yy = -1; yy <= 1; yy++) { - for (int zz = -1; zz <= 1; zz++) { - ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0)); - ssef vp = ip + cellnoise_color(ip); - float d = len_squared<1, 1, 1, 0>(vec_p - vp); - - if(d < da) { - da = d; - pa = vp; - } - } - } - } - - ssef color = cellnoise_color(pa); - return (float3 &)color; -#endif -} - -#if 0 -ccl_device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0.0f, 0, -1).w; } -ccl_device float voronoi_F2(float3 p) { return voronoi_Fn(p, 0.0f, 1, -1).w; } -ccl_device float voronoi_F3(float3 p) { return voronoi_Fn(p, 0.0f, 2, -1).w; } -ccl_device float voronoi_F4(float3 p) { return voronoi_Fn(p, 0.0f, 3, -1).w; } -ccl_device float voronoi_F1F2(float3 p) { return voronoi_Fn(p, 0.0f, 0, 1).w; } - -ccl_device float voronoi_Cr(float3 p) -{ - /* crackle type pattern, just a scale/clamp of F2-F1 */ - float t = 10.0f*voronoi_F1F2(p); - return (t > 1.0f)? 1.0f: t; -} - -ccl_device float voronoi_F1S(float3 p) { return 2.0f*voronoi_F1(p) - 1.0f; } -ccl_device float voronoi_F2S(float3 p) { return 2.0f*voronoi_F2(p) - 1.0f; } -ccl_device float voronoi_F3S(float3 p) { return 2.0f*voronoi_F3(p) - 1.0f; } -ccl_device float voronoi_F4S(float3 p) { return 2.0f*voronoi_F4(p) - 1.0f; } -ccl_device float voronoi_F1F2S(float3 p) { return 2.0f*voronoi_F1F2(p) - 1.0f; } -ccl_device float voronoi_CrS(float3 p) { return 2.0f*voronoi_Cr(p) - 1.0f; } -#endif - -/* Noise Bases */ - -ccl_device float noise_basis(float3 p, NodeNoiseBasis basis) -{ - /* Only Perlin enabled for now, others break CUDA compile by making kernel - * too big, with compile using > 4GB, due to everything being inlined. */ - -#if 0 - if(basis == NODE_NOISE_PERLIN) -#endif - return noise(p); -#if 0 - if(basis == NODE_NOISE_VORONOI_F1) - return voronoi_F1S(p); - if(basis == NODE_NOISE_VORONOI_F2) - return voronoi_F2S(p); - if(basis == NODE_NOISE_VORONOI_F3) - return voronoi_F3S(p); - if(basis == NODE_NOISE_VORONOI_F4) - return voronoi_F4S(p); - if(basis == NODE_NOISE_VORONOI_F2_F1) - return voronoi_F1F2S(p); - if(basis == NODE_NOISE_VORONOI_CRACKLE) - return voronoi_CrS(p); - if(basis == NODE_NOISE_CELL_NOISE) - return cellnoise(p); - - return 0.0f; -#endif -} - -/* Soft/Hard Noise */ - -ccl_device float noise_basis_hard(float3 p, NodeNoiseBasis basis, int hard) -{ - float t = noise_basis(p, basis); - return (hard)? fabsf(2.0f*t - 1.0f): t; -} - /* Turbulence */ -ccl_device_noinline float noise_turbulence(float3 p, NodeNoiseBasis basis, float octaves, int hard) +ccl_device_noinline float noise_turbulence(float3 p, float octaves, int hard) { float fscale = 1.0f; float amp = 1.0f; @@ -281,7 +29,7 @@ ccl_device_noinline float noise_turbulence(float3 p, NodeNoiseBasis basis, float n = float_to_int(octaves); for(i = 0; i <= n; i++) { - float t = noise_basis(fscale*p, basis); + float t = noise(fscale*p); if(hard) t = fabsf(2.0f*t - 1.0f); @@ -294,7 +42,7 @@ ccl_device_noinline float noise_turbulence(float3 p, NodeNoiseBasis basis, float float rmd = octaves - floorf(octaves); if(rmd != 0.0f) { - float t = noise_basis(fscale*p, basis); + float t = noise(fscale*p); if(hard) t = fabsf(2.0f*t - 1.0f); diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index cd38ce4ba9b..33aa5e7c51c 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __SVM_TYPES_H__ @@ -28,6 +28,29 @@ CCL_NAMESPACE_BEGIN /* Nodes */ +/* Known frequencies of used nodes, used for selective nodes compilation + * in the kernel. Currently only affects split OpenCL kernel. + * + * Keep as defines so it's easy to check which nodes are to be compiled + * from preprocessor. + * + * Lower the number of group more often the node is used. + */ +#define NODE_GROUP_LEVEL_0 0 +#define NODE_GROUP_LEVEL_1 1 +#define NODE_GROUP_LEVEL_2 2 +#define NODE_GROUP_LEVEL_3 3 +#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_3 + +#define NODE_FEATURE_VOLUME (1 << 0) +#define NODE_FEATURE_HAIR (1 << 1) +#define NODE_FEATURE_BUMP (1 << 2) +/* TODO(sergey): Consider using something like ((uint)(-1)). + * Need to check carefully operand types around usage of this + * define first. + */ +#define NODE_FEATURE_ALL (NODE_FEATURE_VOLUME|NODE_FEATURE_HAIR|NODE_FEATURE_BUMP) + typedef enum NodeType { NODE_END = 0, NODE_CLOSURE_BSDF, @@ -103,8 +126,8 @@ typedef enum NodeType { NODE_NORMAL_MAP, NODE_HAIR_INFO, NODE_UVMAP, + NODE_TEX_VOXEL, - /* Camera ray nodes. */ NODE_CAMERA_PATH_ATTRIBUTE, NODE_CAMERA_SAMPLE_PERSPECTIVE, NODE_CAMERA_RAY_OUTPUT, @@ -262,27 +285,6 @@ typedef enum NodeConvert { NODE_CONVERT_IV } NodeConvert; -typedef enum NodeDistanceMetric { - NODE_VORONOI_DISTANCE_SQUARED, - NODE_VORONOI_ACTUAL_DISTANCE, - NODE_VORONOI_MANHATTAN, - NODE_VORONOI_CHEBYCHEV, - NODE_VORONOI_MINKOVSKY_H, - NODE_VORONOI_MINKOVSKY_4, - NODE_VORONOI_MINKOVSKY -} NodeDistanceMetric; - -typedef enum NodeNoiseBasis { - NODE_NOISE_PERLIN, - NODE_NOISE_VORONOI_F1, - NODE_NOISE_VORONOI_F2, - NODE_NOISE_VORONOI_F3, - NODE_NOISE_VORONOI_F4, - NODE_NOISE_VORONOI_F2_F1, - NODE_NOISE_VORONOI_CRACKLE, - NODE_NOISE_CELL_NOISE -} NodeNoiseBasis; - typedef enum NodeMusgraveType { NODE_MUSGRAVE_MULTIFRACTAL, NODE_MUSGRAVE_FBM, @@ -340,6 +342,24 @@ typedef enum NodeNormalMapSpace { NODE_NORMAL_MAP_BLENDER_WORLD, } NodeNormalMapSpace; +typedef enum NodeImageProjection { + NODE_IMAGE_PROJ_FLAT = 0, + NODE_IMAGE_PROJ_BOX = 1, + NODE_IMAGE_PROJ_SPHERE = 2, + NODE_IMAGE_PROJ_TUBE = 3, +} NodeImageProjection; + +typedef enum NodeBumpOffset { + NODE_BUMP_OFFSET_CENTER, + NODE_BUMP_OFFSET_DX, + NODE_BUMP_OFFSET_DY, +} NodeBumpOffset; + +typedef enum NodeTexVoxelSpace { + NODE_TEX_VOXEL_SPACE_OBJECT = 0, + NODE_TEX_VOXEL_SPACE_WORLD = 1, +} NodeTexVoxelSpace; + typedef enum ShaderType { SHADER_TYPE_SURFACE, SHADER_TYPE_VOLUME, @@ -355,7 +375,6 @@ typedef enum ClosureType { /* Diffuse */ CLOSURE_BSDF_DIFFUSE_ID, CLOSURE_BSDF_OREN_NAYAR_ID, - CLOSURE_BSDF_WESTIN_SHEEN_ID, CLOSURE_BSDF_DIFFUSE_RAMP_ID, CLOSURE_BSDF_DIFFUSE_TOON_ID, @@ -369,7 +388,6 @@ typedef enum ClosureType { CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID, CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID, CLOSURE_BSDF_ASHIKHMIN_VELVET_ID, - CLOSURE_BSDF_WESTIN_BACKSCATTER_ID, CLOSURE_BSDF_PHONG_RAMP_ID, CLOSURE_BSDF_GLOSSY_TOON_ID, CLOSURE_BSDF_HAIR_REFLECTION_ID, @@ -428,6 +446,7 @@ typedef enum NodePathAttribute { #define CLOSURE_IS_BACKGROUND(type) (type == CLOSURE_BACKGROUND_ID) #define CLOSURE_IS_AMBIENT_OCCLUSION(type) (type == CLOSURE_AMBIENT_OCCLUSION_ID) #define CLOSURE_IS_PHASE(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) +#define CLOSURE_IS_GLASS(type) (type >= CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID) #define CLOSURE_WEIGHT_CUTOFF 1e-5f diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h index 7beed065288..c1c2b539df3 100644 --- a/intern/cycles/kernel/svm/svm_value.h +++ b/intern/cycles/kernel/svm/svm_value.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h index 61d33aeb8cf..4c32130d06d 100644 --- a/intern/cycles/kernel/svm/svm_vector_transform.h +++ b/intern/cycles/kernel/svm/svm_vector_transform.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -33,7 +33,7 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito; Transform tfm; - bool is_object = (sd->object != OBJECT_NONE); + bool is_object = (ccl_fetch(sd, object) != OBJECT_NONE); bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR || type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL); /* From world */ @@ -45,7 +45,7 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo else in = transform_point(&tfm, in); } - else if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) { + else if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) { if(is_direction) object_inverse_dir_transform(kg, sd, &in); else @@ -54,7 +54,7 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo } /* From camera */ - else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) { + else if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) { if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD || to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) { tfm = kernel_data.cam.cameratoworld; if(is_direction) diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h index 083a2f30e06..d612d7e973f 100644 --- a/intern/cycles/kernel/svm/svm_voronoi.h +++ b/intern/cycles/kernel/svm/svm_voronoi.h @@ -11,13 +11,99 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN /* Voronoi */ +ccl_device float voronoi_F1_distance(float3 p) +{ + /* returns squared distance in da */ + float da = 1e10f; + +#ifndef __KERNEL_SSE2__ + int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z); + + for(int xx = -1; xx <= 1; xx++) { + for(int yy = -1; yy <= 1; yy++) { + for(int zz = -1; zz <= 1; zz++) { + float3 ip = make_float3(ix + xx, iy + yy, iz + zz); + float3 vp = ip + cellnoise_color(ip); + float d = len_squared(p - vp); + da = min(d, da); + } + } + } +#else + ssef vec_p = load4f(p); + ssei xyzi = quick_floor_sse(vec_p); + + for(int xx = -1; xx <= 1; xx++) { + for(int yy = -1; yy <= 1; yy++) { + for(int zz = -1; zz <= 1; zz++) { + ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0)); + ssef vp = ip + cellnoise_color(ip); + float d = len_squared<1, 1, 1, 0>(vec_p - vp); + da = min(d, da); + } + } + } +#endif + + return da; +} + +ccl_device float3 voronoi_F1_color(float3 p) +{ + /* returns color of the nearest point */ + float da = 1e10f; + +#ifndef __KERNEL_SSE2__ + float3 pa; + int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z); + + for(int xx = -1; xx <= 1; xx++) { + for(int yy = -1; yy <= 1; yy++) { + for(int zz = -1; zz <= 1; zz++) { + float3 ip = make_float3(ix + xx, iy + yy, iz + zz); + float3 vp = ip + cellnoise_color(ip); + float d = len_squared(p - vp); + + if(d < da) { + da = d; + pa = vp; + } + } + } + } + + return cellnoise_color(pa); +#else + ssef pa, vec_p = load4f(p); + ssei xyzi = quick_floor_sse(vec_p); + + for(int xx = -1; xx <= 1; xx++) { + for(int yy = -1; yy <= 1; yy++) { + for(int zz = -1; zz <= 1; zz++) { + ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0)); + ssef vp = ip + cellnoise_color(ip); + float d = len_squared<1, 1, 1, 0>(vec_p - vp); + + if(d < da) { + da = d; + pa = vp; + } + } + } + } + + ssef color = cellnoise_color(pa); + return (float3 &)color; +#endif +} + ccl_device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float3 p) { if(coloring == NODE_VORONOI_INTENSITY) { diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h new file mode 100644 index 00000000000..bbb687dfce5 --- /dev/null +++ b/intern/cycles/kernel/svm/svm_voxel.h @@ -0,0 +1,64 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +#if !defined(__KERNEL_GPU__) + +/* TODO(sergey): Think of making it more generic volume-type attribute + * sampler. + */ +ccl_device void svm_node_tex_voxel(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node, + int *offset) +{ + uint co_offset, density_out_offset, color_out_offset, space; + decode_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space); + int id = node.y; + float3 co = stack_load_float3(stack, co_offset); + if(space == NODE_TEX_VOXEL_SPACE_OBJECT) { + co = volume_normalized_position(kg, sd, co); + } + else { + kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD); + Transform tfm; + tfm.x = read_node_float(kg, offset); + tfm.y = read_node_float(kg, offset); + tfm.z = read_node_float(kg, offset); + tfm.w = read_node_float(kg, offset); + co = transform_point(&tfm, co); + } + if(co.x < 0.0f || co.y < 0.0f || co.z < 0.0f || + co.x > 1.0f || co.y > 1.0f || co.z > 1.0f) + { + if (stack_valid(density_out_offset)) + stack_store_float(stack, density_out_offset, 0.0f); + if (stack_valid(color_out_offset)) + stack_store_float3(stack, color_out_offset, make_float3(0.0f, 0.0f, 0.0f)); + return; + } + float4 r = kernel_tex_image_interp_3d(id, co.x, co.y, co.z); + if (stack_valid(density_out_offset)) + stack_store_float(stack, density_out_offset, r.w); + if (stack_valid(color_out_offset)) + stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z)); +} + +#endif /* !defined(__KERNEL_GPU__) */ + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h index 7f9081539a4..6eaddaf301c 100644 --- a/intern/cycles/kernel/svm/svm_wave.h +++ b/intern/cycles/kernel/svm/svm_wave.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -28,7 +28,7 @@ ccl_device_noinline float svm_wave(NodeWaveType type, float3 p, float detail, fl n = len(p) * 20.0f; if(distortion != 0.0f) - n += distortion * noise_turbulence(p*dscale, NODE_NOISE_PERLIN, detail, 0); + n += distortion * noise_turbulence(p*dscale, detail, 0); return 0.5f + 0.5f * sinf(n); } diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h index 9e57c470c0f..57030f3979d 100644 --- a/intern/cycles/kernel/svm/svm_wavelength.h +++ b/intern/cycles/kernel/svm/svm_wavelength.h @@ -77,7 +77,7 @@ ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelengt int i = float_to_int(ii); float3 color; - if (i < 0 || i >= 80) { + if(i < 0 || i >= 80) { color = make_float3(0.0f, 0.0f, 0.0f); } else { diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h index 660e6e2ca47..30ccd523add 100644 --- a/intern/cycles/kernel/svm/svm_wireframe.h +++ b/intern/cycles/kernel/svm/svm_wireframe.h @@ -34,20 +34,16 @@ CCL_NAMESPACE_BEGIN /* Wireframe Node */ -ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *stack, uint in_size, uint out_fac, uint use_pixel_size) +ccl_device float wireframe(KernelGlobals *kg, + ShaderData *sd, + float size, + int pixel_size, + float3 *P) { - /* Input Data */ - float size = stack_load_float(stack, in_size); - int pixel_size = (int)use_pixel_size; - - /* Output */ - float f = 0.0f; - - /* Calculate wireframe */ #ifdef __HAIR__ - if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE) + if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) #else - if (sd->prim != PRIM_NONE) + if(ccl_fetch(sd, prim) != PRIM_NONE) #endif { float3 Co[3]; @@ -55,45 +51,85 @@ ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *sta /* Triangles */ int np = 3; - - if(sd->type & PRIMITIVE_TRIANGLE) - triangle_vertices(kg, sd->prim, Co); + + if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) + triangle_vertices(kg, ccl_fetch(sd, prim), Co); else - motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co); + motion_triangle_vertices(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), Co); - if(!(sd->flag & SD_TRANSFORM_APPLIED)) { + if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED)) { object_position_transform(kg, sd, &Co[0]); object_position_transform(kg, sd, &Co[1]); object_position_transform(kg, sd, &Co[2]); } - + if(pixel_size) { // Project the derivatives of P to the viewing plane defined // by I so we have a measure of how big is a pixel at this point - float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I); - float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I); + float pixelwidth_x = len(ccl_fetch(sd, dP).dx - dot(ccl_fetch(sd, dP).dx, ccl_fetch(sd, I)) * ccl_fetch(sd, I)); + float pixelwidth_y = len(ccl_fetch(sd, dP).dy - dot(ccl_fetch(sd, dP).dy, ccl_fetch(sd, I)) * ccl_fetch(sd, I)); // Take the average of both axis' length pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f; } - + // Use half the width as the neighbor face will render the // other half. And take the square for fast comparison pixelwidth *= 0.5f * size; pixelwidth *= pixelwidth; - for (int i = 0; i < np; i++) { + for(int i = 0; i < np; i++) { int i2 = i ? i - 1 : np - 1; - float3 dir = sd->P - Co[i]; + float3 dir = *P - Co[i]; float3 edge = Co[i] - Co[i2]; float3 crs = cross(edge, dir); // At this point dot(crs, crs) / dot(edge, edge) is // the square of area / length(edge) == square of the // distance to the edge. - if (dot(crs, crs) < (dot(edge, edge) * pixelwidth)) - f = 1.0f; + if(dot(crs, crs) < (dot(edge, edge) * pixelwidth)) + return 1.0f; } } - - if (stack_valid(out_fac)) + return 0.0f; +} + +ccl_device void svm_node_wireframe(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) +{ + uint in_size = node.y; + uint out_fac = node.z; + uint use_pixel_size, bump_offset; + decode_node_uchar4(node.w, &use_pixel_size, &bump_offset, NULL, NULL); + + /* Input Data */ + float size = stack_load_float(stack, in_size); + int pixel_size = (int)use_pixel_size; + + /* Calculate wireframe */ +#ifdef __SPLIT_KERNEL__ + /* TODO(sergey): This is because sd is actually a global space, + * which makes it difficult to re-use same wireframe() function. + * + * With OpenCL 2.0 it's possible to avoid this change, but for until + * then we'll be living with such an exception. + */ + float3 P = ccl_fetch(sd, P); + float f = wireframe(kg, sd, size, pixel_size, &P); +#else + float f = wireframe(kg, sd, size, pixel_size, &ccl_fetch(sd, P)); +#endif + + /* TODO(sergey): Think of faster way to calculate derivatives. */ + if(bump_offset == NODE_BUMP_OFFSET_DX) { + float3 Px = ccl_fetch(sd, P) - ccl_fetch(sd, dP).dx; + f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(ccl_fetch(sd, dP).dx); + } + else if(bump_offset == NODE_BUMP_OFFSET_DY) { + float3 Py = ccl_fetch(sd, P) - ccl_fetch(sd, dP).dy; + f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(ccl_fetch(sd, dP).dy); + } + + if(stack_valid(out_fac)) stack_store_float(stack, out_fac, f); } |