Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/CMakeLists.txt25
-rw-r--r--intern/cycles/app/CMakeLists.txt6
-rw-r--r--intern/cycles/app/cycles_xml.h2
-rw-r--r--intern/cycles/blender/CCL_api.h2
-rw-r--r--intern/cycles/blender/addon/engine.py47
-rw-r--r--intern/cycles/blender/addon/properties.py36
-rw-r--r--intern/cycles/blender/addon/ui.py40
-rw-r--r--intern/cycles/blender/blender_curves.cpp3
-rw-r--r--intern/cycles/blender/blender_logging.cpp2
-rw-r--r--intern/cycles/blender/blender_object.cpp19
-rw-r--r--intern/cycles/blender/blender_object_cull.h2
-rw-r--r--intern/cycles/blender/blender_python.cpp12
-rw-r--r--intern/cycles/blender/blender_session.cpp39
-rw-r--r--intern/cycles/blender/blender_session.h2
-rw-r--r--intern/cycles/blender/blender_shader.cpp4
-rw-r--r--intern/cycles/blender/blender_sync.cpp87
-rw-r--r--intern/cycles/blender/blender_sync.h8
-rw-r--r--intern/cycles/blender/blender_util.h9
-rw-r--r--intern/cycles/bvh/CMakeLists.txt2
-rw-r--r--intern/cycles/bvh/bvh.cpp11
-rw-r--r--intern/cycles/bvh/bvh.h8
-rw-r--r--intern/cycles/bvh/bvh2.h2
-rw-r--r--intern/cycles/bvh/bvh4.h2
-rw-r--r--intern/cycles/bvh/bvh8.cpp31
-rw-r--r--intern/cycles/bvh/bvh8.h2
-rw-r--r--intern/cycles/bvh/bvh_build.h3
-rw-r--r--intern/cycles/bvh/bvh_embree.cpp884
-rw-r--r--intern/cycles/bvh/bvh_embree.h79
-rw-r--r--intern/cycles/bvh/bvh_node.h2
-rw-r--r--intern/cycles/bvh/bvh_params.h14
-rw-r--r--intern/cycles/bvh/bvh_sort.h2
-rw-r--r--intern/cycles/bvh/bvh_split.h2
-rw-r--r--intern/cycles/bvh/bvh_unaligned.h2
-rw-r--r--intern/cycles/cmake/external_libs.cmake6
-rw-r--r--intern/cycles/device/device.cpp2
-rw-r--r--intern/cycles/device/device.h11
-rw-r--r--intern/cycles/device/device_cpu.cpp62
-rw-r--r--intern/cycles/device/device_cuda.cpp46
-rw-r--r--intern/cycles/device/device_denoising.cpp10
-rw-r--r--intern/cycles/device/device_denoising.h2
-rw-r--r--intern/cycles/device/device_intern.h12
-rw-r--r--intern/cycles/device/device_memory.h3
-rw-r--r--intern/cycles/device/device_multi.cpp8
-rw-r--r--intern/cycles/device/device_network.cpp5
-rw-r--r--intern/cycles/device/device_network.h2
-rw-r--r--intern/cycles/device/device_opencl.cpp7
-rw-r--r--intern/cycles/device/device_split_kernel.h2
-rw-r--r--intern/cycles/device/device_task.h4
-rw-r--r--intern/cycles/device/opencl/opencl.h4
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp50
-rw-r--r--intern/cycles/device/opencl/opencl_mega.cpp4
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp6
-rw-r--r--intern/cycles/device/opencl/opencl_util.cpp8
-rw-r--r--intern/cycles/graph/node.h2
-rw-r--r--intern/cycles/graph/node_type.h2
-rw-r--r--intern/cycles/kernel/CMakeLists.txt6
-rw-r--r--intern/cycles/kernel/bvh/bvh.h171
-rw-r--r--intern/cycles/kernel/bvh/bvh_embree.h126
-rw-r--r--intern/cycles/kernel/bvh/bvh_local.h4
-rw-r--r--intern/cycles/kernel/bvh/bvh_shadow_all.h4
-rw-r--r--intern/cycles/kernel/bvh/bvh_traversal.h4
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume.h4
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume_all.h4
-rw-r--r--intern/cycles/kernel/bvh/obvh_local.h6
-rw-r--r--intern/cycles/kernel/bvh/obvh_shadow_all.h8
-rw-r--r--intern/cycles/kernel/bvh/obvh_traversal.h10
-rw-r--r--intern/cycles/kernel/bvh/obvh_volume.h6
-rw-r--r--intern/cycles/kernel/bvh/obvh_volume_all.h6
-rw-r--r--intern/cycles/kernel/bvh/qbvh_local.h6
-rw-r--r--intern/cycles/kernel/bvh/qbvh_shadow_all.h7
-rw-r--r--intern/cycles/kernel/bvh/qbvh_traversal.h10
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume.h6
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume_all.h6
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_diffuse.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_diffuse_ramp.h4
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair_principled.h8
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h8
-rw-r--r--intern/cycles/kernel/closure/bsdf_oren_nayar.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_phong_ramp.h4
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_diffuse.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_sheen.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_reflection.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_refraction.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_toon.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_transparent.h2
-rw-r--r--intern/cycles/kernel/closure/bsdf_util.h2
-rw-r--r--intern/cycles/kernel/closure/bssrdf.h2
-rw-r--r--intern/cycles/kernel/filter/filter.h2
-rw-r--r--intern/cycles/kernel/filter/filter_defines.h2
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_cpu.h139
-rw-r--r--intern/cycles/kernel/geom/geom_curve_intersect.h26
-rw-r--r--intern/cycles/kernel/geom/geom_object.h24
-rw-r--r--intern/cycles/kernel/geom/geom_subd_triangle.h4
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h414
-rw-r--r--intern/cycles/kernel/kernel.h2
-rw-r--r--intern/cycles/kernel/kernel_color.h2
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h2
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h2
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h2
-rw-r--r--intern/cycles/kernel/kernel_globals.h8
-rw-r--r--intern/cycles/kernel/kernel_id_passes.h94
-rw-r--r--intern/cycles/kernel/kernel_math.h2
-rw-r--r--intern/cycles/kernel/kernel_montecarlo.h99
-rw-r--r--intern/cycles/kernel/kernel_passes.h47
-rw-r--r--intern/cycles/kernel/kernel_path.h8
-rw-r--r--intern/cycles/kernel/kernel_path_volume.h8
-rw-r--r--intern/cycles/kernel/kernel_queues.h2
-rw-r--r--intern/cycles/kernel/kernel_random.h2
-rw-r--r--intern/cycles/kernel/kernel_shader.h5
-rw-r--r--intern/cycles/kernel/kernel_shadow.h2
-rw-r--r--intern/cycles/kernel/kernel_types.h49
-rw-r--r--intern/cycles/kernel/kernel_volume.h8
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h1
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h3
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h4
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h10
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu23
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu30
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel.cl12
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h2
-rw-r--r--intern/cycles/kernel/osl/osl_closures.h2
-rw-r--r--intern/cycles/kernel/osl/osl_globals.h2
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp32
-rw-r--r--intern/cycles/kernel/osl/osl_services.h9
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp2
-rw-r--r--intern/cycles/kernel/osl/osl_shader.h2
-rw-r--r--intern/cycles/kernel/shaders/oslutil.h2
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h68
-rw-r--r--intern/cycles/kernel/split/kernel_buffer_update.h13
-rw-r--r--intern/cycles/kernel/split/kernel_shader_sort.h4
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h4
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h4
-rw-r--r--intern/cycles/kernel/svm/svm.h4
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h6
-rw-r--r--intern/cycles/kernel/svm/svm_hsv.h2
-rw-r--r--intern/cycles/kernel/svm/svm_ramp.h2
-rw-r--r--intern/cycles/kernel/svm/svm_ramp_util.h2
-rw-r--r--intern/cycles/kernel/svm/svm_types.h2
-rw-r--r--intern/cycles/kernel/svm/svm_wave.h2
-rw-r--r--intern/cycles/render/CMakeLists.txt2
-rw-r--r--intern/cycles/render/attribute.h2
-rw-r--r--intern/cycles/render/background.h2
-rw-r--r--intern/cycles/render/bake.h2
-rw-r--r--intern/cycles/render/buffers.cpp74
-rw-r--r--intern/cycles/render/buffers.h6
-rw-r--r--intern/cycles/render/camera.cpp2
-rw-r--r--intern/cycles/render/camera.h3
-rw-r--r--intern/cycles/render/constant_fold.h2
-rw-r--r--intern/cycles/render/coverage.cpp143
-rw-r--r--intern/cycles/render/coverage.h49
-rw-r--r--intern/cycles/render/curves.h4
-rw-r--r--intern/cycles/render/film.cpp37
-rw-r--r--intern/cycles/render/film.h15
-rw-r--r--intern/cycles/render/graph.h2
-rw-r--r--intern/cycles/render/image.h2
-rw-r--r--intern/cycles/render/integrator.h2
-rw-r--r--intern/cycles/render/light.h2
-rw-r--r--intern/cycles/render/mesh.cpp49
-rw-r--r--intern/cycles/render/mesh.h3
-rw-r--r--intern/cycles/render/nodes.h3
-rw-r--r--intern/cycles/render/object.cpp39
-rw-r--r--intern/cycles/render/object.h8
-rw-r--r--intern/cycles/render/osl.cpp2
-rw-r--r--intern/cycles/render/osl.h3
-rw-r--r--intern/cycles/render/particles.h4
-rw-r--r--intern/cycles/render/scene.cpp12
-rw-r--r--intern/cycles/render/scene.h3
-rw-r--r--intern/cycles/render/session.cpp5
-rw-r--r--intern/cycles/render/session.h4
-rw-r--r--intern/cycles/render/shader.cpp22
-rw-r--r--intern/cycles/render/shader.h4
-rw-r--r--intern/cycles/render/sobol.h2
-rw-r--r--intern/cycles/render/stats.h2
-rw-r--r--intern/cycles/render/svm.cpp2
-rw-r--r--intern/cycles/render/svm.h3
-rw-r--r--intern/cycles/render/tables.h2
-rw-r--r--intern/cycles/render/tile.h2
-rw-r--r--intern/cycles/subd/subd_dice.h2
-rw-r--r--intern/cycles/subd/subd_patch.h2
-rw-r--r--intern/cycles/subd/subd_patch_table.cpp4
-rw-r--r--intern/cycles/subd/subd_patch_table.h4
-rw-r--r--intern/cycles/subd/subd_split.h2
-rw-r--r--intern/cycles/test/render_graph_finalize_test.cpp1
-rw-r--r--intern/cycles/test/util_path_test.cpp4
-rw-r--r--intern/cycles/util/CMakeLists.txt3
-rw-r--r--intern/cycles/util/util_algorithm.h2
-rw-r--r--intern/cycles/util/util_args.h2
-rw-r--r--intern/cycles/util/util_array.h289
-rw-r--r--intern/cycles/util/util_atomic.h32
-rw-r--r--intern/cycles/util/util_avxb.h72
-rw-r--r--intern/cycles/util/util_avxf.h10
-rw-r--r--intern/cycles/util/util_boundbox.h2
-rw-r--r--intern/cycles/util/util_color.h2
-rw-r--r--intern/cycles/util/util_debug.h2
-rw-r--r--intern/cycles/util/util_defines.h12
-rw-r--r--intern/cycles/util/util_foreach.h2
-rw-r--r--intern/cycles/util/util_function.h2
-rw-r--r--intern/cycles/util/util_guarded_allocator.cpp4
-rw-r--r--intern/cycles/util/util_guarded_allocator.h6
-rw-r--r--intern/cycles/util/util_half.h2
-rw-r--r--intern/cycles/util/util_hash.h2
-rw-r--r--intern/cycles/util/util_ies.cpp7
-rw-r--r--intern/cycles/util/util_ies.h2
-rw-r--r--intern/cycles/util/util_image.h2
-rw-r--r--intern/cycles/util/util_list.h2
-rw-r--r--intern/cycles/util/util_logging.cpp2
-rw-r--r--intern/cycles/util/util_logging.h6
-rw-r--r--intern/cycles/util/util_map.h2
-rw-r--r--intern/cycles/util/util_math.h30
-rw-r--r--intern/cycles/util/util_math_cdf.h2
-rw-r--r--intern/cycles/util/util_math_fast.h28
-rw-r--r--intern/cycles/util/util_math_float2.h2
-rw-r--r--intern/cycles/util/util_math_float3.h2
-rw-r--r--intern/cycles/util/util_math_float4.h14
-rw-r--r--intern/cycles/util/util_math_int2.h2
-rw-r--r--intern/cycles/util/util_math_int3.h2
-rw-r--r--intern/cycles/util/util_math_int4.h42
-rw-r--r--intern/cycles/util/util_math_intersect.h2
-rw-r--r--intern/cycles/util/util_md5.h2
-rw-r--r--intern/cycles/util/util_murmurhash.cpp127
-rw-r--r--intern/cycles/util/util_murmurhash.h30
-rw-r--r--intern/cycles/util/util_opengl.h2
-rw-r--r--intern/cycles/util/util_optimization.h2
-rw-r--r--intern/cycles/util/util_param.h2
-rw-r--r--intern/cycles/util/util_path.cpp2
-rw-r--r--intern/cycles/util/util_progress.h10
-rw-r--r--intern/cycles/util/util_projection.h4
-rw-r--r--intern/cycles/util/util_queue.h2
-rw-r--r--intern/cycles/util/util_rect.h2
-rw-r--r--intern/cycles/util/util_set.h2
-rw-r--r--intern/cycles/util/util_simd.h14
-rw-r--r--intern/cycles/util/util_sky_model.h2
-rw-r--r--intern/cycles/util/util_sseb.h2
-rw-r--r--intern/cycles/util/util_ssef.h12
-rw-r--r--intern/cycles/util/util_stack_allocator.h4
-rw-r--r--intern/cycles/util/util_static_assert.h2
-rw-r--r--intern/cycles/util/util_stats.h2
-rw-r--r--intern/cycles/util/util_string.h2
-rw-r--r--intern/cycles/util/util_system.cpp4
-rw-r--r--intern/cycles/util/util_system.h2
-rw-r--r--intern/cycles/util/util_texture.h2
-rw-r--r--intern/cycles/util/util_thread.cpp2
-rw-r--r--intern/cycles/util/util_thread.h6
-rw-r--r--intern/cycles/util/util_transform.h27
-rw-r--r--intern/cycles/util/util_types.h2
-rw-r--r--intern/cycles/util/util_types_float3.h4
-rw-r--r--intern/cycles/util/util_types_float3_impl.h4
-rw-r--r--intern/cycles/util/util_types_float4.h4
-rw-r--r--intern/cycles/util/util_types_float4_impl.h4
-rw-r--r--intern/cycles/util/util_types_float8.h6
-rw-r--r--intern/cycles/util/util_types_float8_impl.h4
-rw-r--r--intern/cycles/util/util_types_int3.h4
-rw-r--r--intern/cycles/util/util_types_int3_impl.h4
-rw-r--r--intern/cycles/util/util_types_int4.h6
-rw-r--r--intern/cycles/util/util_types_int4_impl.h14
-rw-r--r--intern/cycles/util/util_vector.h296
-rw-r--r--intern/cycles/util/util_version.h2
-rw-r--r--intern/cycles/util/util_view.cpp2
-rw-r--r--intern/cycles/util/util_view.h8
-rw-r--r--intern/cycles/util/util_windows.h2
-rw-r--r--intern/cycles/util/util_xml.h2
-rw-r--r--intern/elbeem/extern/LBM_fluidsim.h5
-rw-r--r--intern/ghost/GHOST_C-api.h5
-rw-r--r--intern/ghost/GHOST_ISystem.h5
-rw-r--r--intern/ghost/intern/GHOST_C-api.cpp6
-rw-r--r--intern/ghost/intern/GHOST_DropTargetWin32.h2
-rw-r--r--intern/ghost/intern/GHOST_EventTrackpad.h4
-rw-r--r--intern/ghost/intern/GHOST_System.cpp6
-rw-r--r--intern/ghost/intern/GHOST_System.h6
-rw-r--r--intern/ghost/intern/GHOST_SystemCocoa.h4
-rw-r--r--intern/ghost/intern/GHOST_SystemCocoa.mm12
-rw-r--r--intern/ghost/intern/GHOST_SystemX11.h20
-rw-r--r--intern/ghost/intern/GHOST_Window.h16
-rw-r--r--intern/ghost/intern/GHOST_WindowCocoa.mm19
-rw-r--r--intern/ghost/intern/GHOST_WindowWin32.cpp13
-rw-r--r--intern/ghost/intern/GHOST_WindowWin32.h12
-rw-r--r--intern/ghost/intern/GHOST_WindowX11.cpp10
-rw-r--r--intern/guardedalloc/CMakeLists.txt5
-rw-r--r--intern/guardedalloc/intern/mallocn.c7
-rw-r--r--intern/locale/boost_locale_wrapper.cpp12
-rw-r--r--intern/smoke/intern/FLUID_3D.cpp56
286 files changed, 3906 insertions, 1418 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 0147a4306f4..873bbfa36fa 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -3,7 +3,14 @@ if(NOT WITH_BLENDER AND WITH_CYCLES_STANDALONE)
set(CYCLES_INSTALL_PATH "")
else()
set(WITH_CYCLES_BLENDER ON)
- set(CYCLES_INSTALL_PATH "scripts/addons/cycles")
+ # WINDOWS_PYTHON_DEBUG needs to write into the user addons folder since it will
+ # be started with --env-system-scripts pointing to the release folder, which will
+ # lack the cycles addon, and we don't want to write into it.
+ if(NOT WINDOWS_PYTHON_DEBUG)
+ set(CYCLES_INSTALL_PATH "scripts/addons/cycles")
+ else()
+ set(CYCLES_INSTALL_PATH "$ENV{appdata}/blender foundation/blender/${BLENDER_VERSION}/scripts/addons/cycles")
+ endif()
endif()
# External Libraries
@@ -210,6 +217,15 @@ if(WITH_CYCLES_OSL)
)
endif()
+if(WITH_CYCLES_EMBREE)
+ add_definitions(-DWITH_EMBREE)
+ add_definitions(-DEMBREE_STATIC_LIB)
+ include_directories(
+ SYSTEM
+ ${EMBREE_INCLUDE_DIRS}
+ )
+endif()
+
if(WITH_CYCLES_OPENSUBDIV)
add_definitions(-DWITH_OPENSUBDIV)
include_directories(
@@ -283,12 +299,19 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
set(MAX_MSVC 1910)
elseif(${CUDA_VERSION} EQUAL "9.1")
set(MAX_MSVC 1911)
+ elseif(${CUDA_VERSION} EQUAL "10.0")
+ set(MAX_MSVC 1999)
endif()
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
set(WITH_CYCLES_CUBIN_COMPILER ON)
endif()
unset(MAX_MSVC)
+ elseif(APPLE)
+ if(${XCODE_VERSION} VERSION_GREATER_EQUAL 10.0)
+ message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
+ set(WITH_CYCLES_CUBIN_COMPILER ON)
+ endif()
endif()
endif()
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt
index 4fd551b33c2..2c1367a86dc 100644
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -77,6 +77,9 @@ macro(cycles_target_link_libraries target)
if(WITH_CYCLES_OSL)
target_link_libraries(${target} ${OSL_LIBRARIES} ${LLVM_LIBRARIES})
endif()
+ if(WITH_CYCLES_EMBREE)
+ target_link_libraries(${target} ${EMBREE_LIBRARIES})
+ endif()
if(WITH_CYCLES_OPENSUBDIV)
target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES})
endif()
@@ -144,6 +147,9 @@ if(WITH_CYCLES_CUBIN_COMPILER)
target_link_libraries(cycles_cubin_cc
extern_cuew
${OPENIMAGEIO_LIBRARIES}
+ ${OPENEXR_LIBRARIES}
+ ${PUGIXML_LIBRARIES}
+ ${BOOST_LIBRARIES}
${PLATFORM_LINKLIBS}
)
if(NOT CYCLES_STANDALONE_REPOSITORY)
diff --git a/intern/cycles/app/cycles_xml.h b/intern/cycles/app/cycles_xml.h
index 6a48980d8ea..a7bc1895d4e 100644
--- a/intern/cycles/app/cycles_xml.h
+++ b/intern/cycles/app/cycles_xml.h
@@ -29,4 +29,4 @@ void xml_read_file(Scene *scene, const char *filepath);
CCL_NAMESPACE_END
-#endif /* __CYCLES_XML_H__ */
+#endif /* __CYCLES_XML_H__ */
diff --git a/intern/cycles/blender/CCL_api.h b/intern/cycles/blender/CCL_api.h
index 233ffc8802c..b9750ad0c53 100644
--- a/intern/cycles/blender/CCL_api.h
+++ b/intern/cycles/blender/CCL_api.h
@@ -33,4 +33,4 @@ void CCL_logging_verbosity_set(int verbosity);
}
#endif
-#endif /* __CCL_API_H__ */
+#endif /* __CCL_API_H__ */
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index 87dcbe486c7..23239ee4352 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -254,21 +254,32 @@ def register_passes(engine, scene, srl):
if crl.use_pass_volume_indirect: engine.register_pass(scene, srl, "VolumeInd", 3, "RGB", 'COLOR')
cscene = scene.cycles
- if crl.use_denoising and crl.denoising_store_passes and not cscene.use_progressive_refine:
- engine.register_pass(scene, srl, "Denoising Normal", 3, "XYZ", 'VECTOR')
- engine.register_pass(scene, srl, "Denoising Normal Variance", 3, "XYZ", 'VECTOR')
- engine.register_pass(scene, srl, "Denoising Albedo", 3, "RGB", 'COLOR')
- engine.register_pass(scene, srl, "Denoising Albedo Variance", 3, "RGB", 'COLOR')
- engine.register_pass(scene, srl, "Denoising Depth", 1, "Z", 'VALUE')
- engine.register_pass(scene, srl, "Denoising Depth Variance", 1, "Z", 'VALUE')
- engine.register_pass(scene, srl, "Denoising Shadow A", 3, "XYV", 'VECTOR')
- engine.register_pass(scene, srl, "Denoising Shadow B", 3, "XYV", 'VECTOR')
- engine.register_pass(scene, srl, "Denoising Image", 3, "RGB", 'COLOR')
- engine.register_pass(scene, srl, "Denoising Image Variance", 3, "RGB", 'COLOR')
-
- clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect",
- "denoising_glossy_direct", "denoising_glossy_indirect",
- "denoising_transmission_direct", "denoising_transmission_indirect",
- "denoising_subsurface_direct", "denoising_subsurface_indirect")
- if any(getattr(crl, option) for option in clean_options):
- engine.register_pass(scene, srl, "Denoising Clean", 3, "RGB", 'COLOR')
+
+ if crl.use_pass_crypto_object:
+ for i in range(0, crl.pass_crypto_depth, 2):
+ engine.register_pass(scene, srl, "CryptoObject" + '{:02d}'.format(i), 4, "RGBA", 'COLOR')
+ if crl.use_pass_crypto_material:
+ for i in range(0, crl.pass_crypto_depth, 2):
+ engine.register_pass(scene, srl, "CryptoMaterial" + '{:02d}'.format(i), 4, "RGBA", 'COLOR')
+ if srl.cycles.use_pass_crypto_asset:
+ for i in range(0, srl.cycles.pass_crypto_depth, 2):
+ engine.register_pass(scene, srl, "CryptoAsset" + '{:02d}'.format(i), 4, "RGBA", 'COLOR')
+
+ if crl.use_denoising or crl.denoising_store_passes:
+ engine.register_pass(scene, srl, "Noisy Image", 4, "RGBA", 'COLOR')
+ if crl.denoising_store_passes:
+ engine.register_pass(scene, srl, "Denoising Normal", 3, "XYZ", 'VECTOR')
+ engine.register_pass(scene, srl, "Denoising Normal Variance", 3, "XYZ", 'VECTOR')
+ engine.register_pass(scene, srl, "Denoising Albedo", 3, "RGB", 'COLOR')
+ engine.register_pass(scene, srl, "Denoising Albedo Variance", 3, "RGB", 'COLOR')
+ engine.register_pass(scene, srl, "Denoising Depth", 1, "Z", 'VALUE')
+ engine.register_pass(scene, srl, "Denoising Depth Variance", 1, "Z", 'VALUE')
+ engine.register_pass(scene, srl, "Denoising Shadow A", 3, "XYV", 'VECTOR')
+ engine.register_pass(scene, srl, "Denoising Shadow B", 3, "XYV", 'VECTOR')
+ engine.register_pass(scene, srl, "Denoising Image Variance", 3, "RGB", 'COLOR')
+ clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect",
+ "denoising_glossy_direct", "denoising_glossy_indirect",
+ "denoising_transmission_direct", "denoising_transmission_indirect",
+ "denoising_subsurface_direct", "denoising_subsurface_indirect")
+ if any(getattr(crl, option) for option in clean_options):
+ engine.register_pass(scene, srl, "Denoising Clean", 3, "RGB", 'COLOR')
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 80b83c94012..d986ba8c7a8 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -547,6 +547,11 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Use special type BVH optimized for hair (uses more ram but renders faster)",
default=True,
)
+ cls.use_bvh_embree = BoolProperty(
+ name="Use Embree",
+ description="Use Embree as ray accelerator",
+ default=False,
+ )
cls.debug_bvh_time_steps = IntProperty(
name="BVH Time Steps",
description="Split BVH primitives by this number of time steps to speed up render time in cost of memory",
@@ -1339,7 +1344,36 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
default=False,
update=update_render_passes,
)
-
+ cls.use_pass_crypto_object = BoolProperty(
+ name="Cryptomatte Object",
+ description="Render cryptomatte object pass, for isolating objects in compositing",
+ default=False,
+ update=update_render_passes,
+ )
+ cls.use_pass_crypto_material = BoolProperty(
+ name="Cryptomatte Material",
+ description="Render cryptomatte material pass, for isolating materials in compositing",
+ default=False,
+ update=update_render_passes,
+ )
+ cls.use_pass_crypto_asset = BoolProperty(
+ name="Cryptomatte Asset",
+ description="Render cryptomatte asset pass, for isolating groups of objects with the same parent",
+ default=False,
+ update=update_render_passes,
+ )
+ cls.pass_crypto_depth = IntProperty(
+ name="Cryptomatte Levels",
+ description="Sets how many unique objects can be distinguished per pixel",
+ default=6, min=2, max=16, step=2,
+ update=update_render_passes,
+ )
+ cls.pass_crypto_accurate = BoolProperty(
+ name="Cryptomatte Accurate",
+ description="Gerenate a more accurate Cryptomatte pass. CPU only, may render slower and use more memory",
+ default=True,
+ update=update_render_passes,
+ )
@classmethod
def unregister(cls):
del bpy.types.SceneRenderLayer.cycles
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 5edbcb19672..2f1adfe4178 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -17,6 +17,7 @@
# <pep8 compliant>
import bpy
+import _cycles
from bpy.types import (
Panel,
@@ -430,11 +431,18 @@ class CYCLES_RENDER_PT_performance(CyclesButtonsPanel, Panel):
col.separator()
col.label(text="Acceleration structure:")
+ if _cycles.with_embree:
+ row = col.row()
+ row.active = use_cpu(context)
+ row.prop(cscene, "use_bvh_embree")
+ row = col.row()
col.prop(cscene, "debug_use_spatial_splits")
- col.prop(cscene, "debug_use_hair_bvh")
+ row = col.row()
+ row.active = not cscene.use_bvh_embree or not _cycles.with_embree
+ row.prop(cscene, "debug_use_hair_bvh")
row = col.row()
- row.active = not cscene.debug_use_spatial_splits
+ row.active = not cscene.debug_use_spatial_splits and not cscene.use_bvh_embree
row.prop(cscene, "debug_bvh_time_steps")
col = layout.column()
@@ -491,8 +499,6 @@ class CYCLES_RENDER_PT_layer_passes(CyclesButtonsPanel, Panel):
bl_options = {'DEFAULT_CLOSED'}
def draw(self, context):
- import _cycles
-
layout = self.layout
scene = context.scene
@@ -517,6 +523,8 @@ class CYCLES_RENDER_PT_layer_passes(CyclesButtonsPanel, Panel):
col.prop(rl, "use_pass_shadow")
col.prop(rl, "use_pass_ambient_occlusion")
col.separator()
+ col.prop(crl, "denoising_store_passes", text="Denoising Data")
+ col.separator()
col.prop(rl, "pass_alpha_threshold")
col = split.column()
@@ -549,12 +557,6 @@ class CYCLES_RENDER_PT_layer_passes(CyclesButtonsPanel, Panel):
col.prop(rl, "use_pass_emit", text="Emission")
col.prop(rl, "use_pass_environment")
- if context.scene.cycles.feature_set == 'EXPERIMENTAL':
- col.separator()
- sub = col.column()
- sub.active = crl.use_denoising
- sub.prop(crl, "denoising_store_passes", text="Denoising")
-
col = layout.column()
col.prop(crl, "pass_debug_render_time")
if _cycles.with_cycles_debug:
@@ -563,6 +565,17 @@ class CYCLES_RENDER_PT_layer_passes(CyclesButtonsPanel, Panel):
col.prop(crl, "pass_debug_bvh_intersections")
col.prop(crl, "pass_debug_ray_bounces")
+ crl = rl.cycles
+ layout.label("Cryptomatte:")
+ row = layout.row(align=True)
+ row.prop(crl, "use_pass_crypto_object", text="Object", toggle=True)
+ row.prop(crl, "use_pass_crypto_material", text="Material", toggle=True)
+ row.prop(crl, "use_pass_crypto_asset", text="Asset", toggle=True)
+ row = layout.row(align=True)
+ row.prop(crl, "pass_crypto_depth")
+ row = layout.row(align=True)
+ row.active = use_cpu(context)
+ row.prop(crl, "pass_crypto_accurate", text="Accurate Mode")
class CYCLES_RENDER_PT_views(CyclesButtonsPanel, Panel):
bl_label = "Views"
@@ -630,9 +643,8 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
rl = rd.layers.active
crl = rl.cycles
- layout.active = crl.use_denoising
-
split = layout.split()
+ split.active = crl.use_denoising
col = split.column()
sub = col.column(align=True)
@@ -647,24 +659,28 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
layout.separator()
row = layout.row()
+ row.active = crl.use_denoising or crl.denoising_store_passes
row.label(text="Diffuse:")
sub = row.row(align=True)
sub.prop(crl, "denoising_diffuse_direct", text="Direct", toggle=True)
sub.prop(crl, "denoising_diffuse_indirect", text="Indirect", toggle=True)
row = layout.row()
+ row.active = crl.use_denoising or crl.denoising_store_passes
row.label(text="Glossy:")
sub = row.row(align=True)
sub.prop(crl, "denoising_glossy_direct", text="Direct", toggle=True)
sub.prop(crl, "denoising_glossy_indirect", text="Indirect", toggle=True)
row = layout.row()
+ row.active = crl.use_denoising or crl.denoising_store_passes
row.label(text="Transmission:")
sub = row.row(align=True)
sub.prop(crl, "denoising_transmission_direct", text="Direct", toggle=True)
sub.prop(crl, "denoising_transmission_indirect", text="Indirect", toggle=True)
row = layout.row()
+ row.active = crl.use_denoising or crl.denoising_store_passes
row.label(text="Subsurface:")
sub = row.row(align=True)
sub.prop(crl, "denoising_subsurface_direct", text="Direct", toggle=True)
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
index 99313866e9e..94d5dc5ea3d 100644
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -707,7 +707,7 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
if(diff == 0) {
for(int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve]; curvekey++) {
if(i < mesh->curve_keys.size()) {
- mP[i] =CurveSegmentMotionCV(CData, sys, curve, curvekey);
+ mP[i] = CurveSegmentMotionCV(CData, sys, curve, curvekey);
if(!have_motion) {
/* unlike mesh coordinates, these tend to be slightly different
* between frames due to particle transforms into/out of object
@@ -718,7 +718,6 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
have_motion = true;
}
}
-
i++;
}
}
diff --git a/intern/cycles/blender/blender_logging.cpp b/intern/cycles/blender/blender_logging.cpp
index d0f82e37662..3fca4efd097 100644
--- a/intern/cycles/blender/blender_logging.cpp
+++ b/intern/cycles/blender/blender_logging.cpp
@@ -22,7 +22,7 @@ void CCL_init_logging(const char *argv0)
ccl::util_logging_init(argv0);
}
-void CCL_start_debug_logging(void)
+void CCL_start_debug_logging()
{
ccl::util_logging_start();
}
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
index 35bf7beda41..a05c982b367 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -384,6 +384,23 @@ Object *BlenderSync::sync_object(BL::Object& b_parent,
object_updated = true;
}
+ /* sync the asset name for Cryptomatte */
+ BL::Object parent = b_ob.parent();
+ ustring parent_name;
+ if(parent) {
+ while(parent.parent()) {
+ parent = parent.parent();
+ }
+ parent_name = parent.name();
+ }
+ else {
+ parent_name = b_ob.name();
+ }
+ if(object->asset_name != parent_name) {
+ object->asset_name = parent_name;
+ object_updated = true;
+ }
+
/* object sync
* transform comparison should not be needed, but duplis don't work perfect
* in the depsgraph and may not signal changes, so this is a workaround */
@@ -404,8 +421,8 @@ Object *BlenderSync::sync_object(BL::Object& b_parent,
if(scene->need_motion() == Scene::MOTION_BLUR) {
motion_steps = object_motion_steps(b_parent, b_ob);
+ mesh->motion_steps = motion_steps;
if(motion_steps && object_use_deform_motion(b_parent, b_ob)) {
- mesh->motion_steps = motion_steps;
mesh->use_motion_blur = true;
}
}
diff --git a/intern/cycles/blender/blender_object_cull.h b/intern/cycles/blender/blender_object_cull.h
index 2147877a860..6e2a22438ec 100644
--- a/intern/cycles/blender/blender_object_cull.h
+++ b/intern/cycles/blender/blender_object_cull.h
@@ -46,4 +46,4 @@ private:
CCL_NAMESPACE_END
-#endif /* __BLENDER_OBJECT_CULL_H__ */
+#endif /* __BLENDER_OBJECT_CULL_H__ */
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index 4b01eb5f2d4..8b3bec56d1f 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -839,10 +839,18 @@ void *CCL_python_module_init()
#ifdef WITH_NETWORK
PyModule_AddObject(mod, "with_network", Py_True);
Py_INCREF(Py_True);
-#else /* WITH_NETWORK */
+#else /* WITH_NETWORK */
PyModule_AddObject(mod, "with_network", Py_False);
Py_INCREF(Py_False);
-#endif /* WITH_NETWORK */
+#endif /* WITH_NETWORK */
+
+#ifdef WITH_EMBREE
+ PyModule_AddObject(mod, "with_embree", Py_True);
+ Py_INCREF(Py_True);
+#else /* WITH_EMBREE */
+ PyModule_AddObject(mod, "with_embree", Py_False);
+ Py_INCREF(Py_False);
+#endif /* WITH_EMBREE */
return (void*)mod;
}
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index a07131d04ae..75c7dcee05e 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -35,6 +35,7 @@
#include "util/util_function.h"
#include "util/util_hash.h"
#include "util/util_logging.h"
+#include "util/util_murmurhash.h"
#include "util/util_progress.h"
#include "util/util_time.h"
@@ -370,6 +371,17 @@ void BlenderSession::update_render_tile(RenderTile& rtile, bool highlight)
do_write_update_render_tile(rtile, false, false);
}
+static void add_cryptomatte_layer(BL::RenderResult& b_rr, string name, string manifest)
+{
+ string identifier = string_printf("%08x", util_murmur_hash3(name.c_str(), name.length(), 0));
+ string prefix = "cryptomatte/" + identifier.substr(0, 7) + "/";
+
+ render_add_metadata(b_rr, prefix+"name", name);
+ render_add_metadata(b_rr, prefix+"hash", "MurmurHash3_32");
+ render_add_metadata(b_rr, prefix+"conversion", "uint32_to_float32");
+ render_add_metadata(b_rr, prefix+"manifest", manifest);
+}
+
void BlenderSession::render()
{
/* set callback to write out render results */
@@ -405,17 +417,19 @@ void BlenderSession::render()
BL::RenderLayer b_rlay = *b_single_rlay;
/* add passes */
- array<Pass> passes = sync->sync_render_passes(b_rlay, *b_layer_iter, session_params);
+ vector<Pass> passes = sync->sync_render_passes(b_rlay, *b_layer_iter, session_params);
buffer_params.passes = passes;
PointerRNA crl = RNA_pointer_get(&b_layer_iter->ptr, "cycles");
bool use_denoising = get_boolean(crl, "use_denoising");
+ bool denoising_passes = use_denoising || get_boolean(crl, "denoising_store_passes");
session->tile_manager.schedule_denoising = use_denoising;
- buffer_params.denoising_data_pass = use_denoising;
+ buffer_params.denoising_data_pass = denoising_passes;
buffer_params.denoising_clean_pass = (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES);
session->params.use_denoising = use_denoising;
+ session->params.denoising_passes = denoising_passes;
session->params.denoising_radius = get_int(crl, "denoising_radius");
session->params.denoising_strength = get_float(crl, "denoising_strength");
session->params.denoising_feature_strength = get_float(crl, "denoising_feature_strength");
@@ -475,15 +489,28 @@ void BlenderSession::render()
break;
}
+ BL::RenderResult b_full_rr = b_engine.get_result();
if(is_single_layer) {
- BL::RenderResult b_rr = b_engine.get_result();
string num_aa_samples = string_printf("%d", session->params.samples);
- b_rr.stamp_data_add_field("Cycles Samples", num_aa_samples.c_str());
+ render_add_metadata(b_full_rr, "Cycles Samples", num_aa_samples);
/* TODO(sergey): Report whether we're doing resumable render
* and also start/end sample if so.
*/
}
+ if(scene->film->cryptomatte_passes & CRYPT_OBJECT) {
+ add_cryptomatte_layer(b_full_rr, b_rlay_name+".CryptoObject",
+ scene->object_manager->get_cryptomatte_objects(scene));
+ }
+ if(scene->film->cryptomatte_passes & CRYPT_MATERIAL) {
+ add_cryptomatte_layer(b_full_rr, b_rlay_name+".CryptoMaterial",
+ scene->shader_manager->get_cryptomatte_materials(scene));
+ }
+ if(scene->film->cryptomatte_passes & CRYPT_ASSET) {
+ add_cryptomatte_layer(b_full_rr, b_rlay_name+".CryptoAsset",
+ scene->object_manager->get_cryptomatte_assets(scene));
+ }
+
/* free result without merging */
end_render_result(b_engine, b_rr, true, true, false);
@@ -700,7 +727,7 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr,
bool read = false;
if(pass_type != PASS_NONE) {
/* copy pixels */
- read = buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0]);
+ read = buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0], b_pass.name());
}
else {
int denoising_offset = BlenderSync::get_denoising_pass(b_pass);
@@ -719,7 +746,7 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr,
else {
/* copy combined pass */
BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
- if(buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, &pixels[0]))
+ if(buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, &pixels[0], "Combined"))
b_combined_pass.rect(&pixels[0]);
}
diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h
index 08f5c873bef..b8a9096b354 100644
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -173,4 +173,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __BLENDER_SESSION_H__ */
+#endif /* __BLENDER_SESSION_H__ */
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
index 3eefb92f6af..e33a6c20a52 100644
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -635,8 +635,8 @@ static ShaderNode *add_node(Scene *scene,
}
}
#else
- (void)b_data;
- (void)b_ntree;
+ (void) b_data;
+ (void) b_ntree;
#endif
}
else if(b_node.is_a(&RNA_ShaderNodeTexImage)) {
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 5e47252e336..832847c179f 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -40,6 +40,8 @@
CCL_NAMESPACE_BEGIN
+static const char *cryptomatte_prefix = "Crypto";
+
/* Constructor */
BlenderSync::BlenderSync(BL::RenderEngine& b_engine,
@@ -517,6 +519,9 @@ PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass)
MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
#endif
MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
+ if(string_startswith(name, cryptomatte_prefix)) {
+ return PASS_CRYPTOMATTE;
+ }
#undef MAP_PASS
return PASS_NONE;
@@ -525,6 +530,9 @@ PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass)
int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass)
{
string name = b_pass.name();
+
+ if(name == "Noisy Image") return DENOISING_PASS_COLOR;
+
if(name.substr(0, 10) != "Denoising ") {
return -1;
}
@@ -539,7 +547,6 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass)
MAP_PASS("Depth Variance", DENOISING_PASS_DEPTH_VAR);
MAP_PASS("Shadow A", DENOISING_PASS_SHADOW_A);
MAP_PASS("Shadow B", DENOISING_PASS_SHADOW_B);
- MAP_PASS("Image", DENOISING_PASS_COLOR);
MAP_PASS("Image Variance", DENOISING_PASS_COLOR_VAR);
MAP_PASS("Clean", DENOISING_PASS_CLEAN);
#undef MAP_PASS
@@ -547,11 +554,11 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass)
return -1;
}
-array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
- BL::SceneRenderLayer& b_srlay,
- const SessionParams &session_params)
+vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
+ BL::SceneRenderLayer& b_srlay,
+ const SessionParams &session_params)
{
- array<Pass> passes;
+ vector<Pass> passes;
Pass::add(PASS_COMBINED, passes);
if(!session_params.device.advanced_shading) {
@@ -571,22 +578,11 @@ array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
Pass::add(pass_type, passes);
}
- scene->film->denoising_flags = 0;
PointerRNA crp = RNA_pointer_get(&b_srlay.ptr, "cycles");
- if(get_boolean(crp, "denoising_store_passes") &&
- get_boolean(crp, "use_denoising"))
- {
- b_engine.add_pass("Denoising Normal", 3, "XYZ", b_srlay.name().c_str());
- b_engine.add_pass("Denoising Normal Variance", 3, "XYZ", b_srlay.name().c_str());
- b_engine.add_pass("Denoising Albedo", 3, "RGB", b_srlay.name().c_str());
- b_engine.add_pass("Denoising Albedo Variance", 3, "RGB", b_srlay.name().c_str());
- b_engine.add_pass("Denoising Depth", 1, "Z", b_srlay.name().c_str());
- b_engine.add_pass("Denoising Depth Variance", 1, "Z", b_srlay.name().c_str());
- b_engine.add_pass("Denoising Shadow A", 3, "XYV", b_srlay.name().c_str());
- b_engine.add_pass("Denoising Shadow B", 3, "XYV", b_srlay.name().c_str());
- b_engine.add_pass("Denoising Image", 3, "RGB", b_srlay.name().c_str());
- b_engine.add_pass("Denoising Image Variance", 3, "RGB", b_srlay.name().c_str());
-
+ bool use_denoising = get_boolean(crp, "use_denoising");
+ bool store_denoising_passes = get_boolean(crp, "denoising_store_passes");
+ scene->film->denoising_flags = 0;
+ if(use_denoising || store_denoising_passes) {
#define MAP_OPTION(name, flag) if(!get_boolean(crp, name)) scene->film->denoising_flags |= flag;
MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR);
MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND);
@@ -597,9 +593,22 @@ array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
MAP_OPTION("denoising_subsurface_direct", DENOISING_CLEAN_SUBSURFACE_DIR);
MAP_OPTION("denoising_subsurface_indirect", DENOISING_CLEAN_SUBSURFACE_IND);
#undef MAP_OPTION
+ b_engine.add_pass("Noisy Image", 4, "RGBA", b_srlay.name().c_str());
+ }
+
+ if(store_denoising_passes) {
+ b_engine.add_pass("Denoising Normal", 3, "XYZ", b_srlay.name().c_str());
+ b_engine.add_pass("Denoising Normal Variance", 3, "XYZ", b_srlay.name().c_str());
+ b_engine.add_pass("Denoising Albedo", 3, "RGB", b_srlay.name().c_str());
+ b_engine.add_pass("Denoising Albedo Variance", 3, "RGB", b_srlay.name().c_str());
+ b_engine.add_pass("Denoising Depth", 1, "Z", b_srlay.name().c_str());
+ b_engine.add_pass("Denoising Depth Variance", 1, "Z", b_srlay.name().c_str());
+ b_engine.add_pass("Denoising Shadow A", 3, "XYV", b_srlay.name().c_str());
+ b_engine.add_pass("Denoising Shadow B", 3, "XYV", b_srlay.name().c_str());
+ b_engine.add_pass("Denoising Image Variance", 3, "RGB", b_srlay.name().c_str());
if(scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) {
- b_engine.add_pass("Denoising Clean", 3, "RGB", b_srlay.name().c_str());
+ b_engine.add_pass("Denoising Clean", 3, "RGB", b_srlay.name().c_str());
}
}
#ifdef __KERNEL_DEBUG__
@@ -633,6 +642,39 @@ array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
Pass::add(PASS_VOLUME_INDIRECT, passes);
}
+ /* Cryptomatte stores two ID/weight pairs per RGBA layer.
+ * User facing paramter is the number of pairs. */
+ int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")) / 2;
+ scene->film->cryptomatte_depth = crypto_depth;
+ scene->film->cryptomatte_passes = CRYPT_NONE;
+ if(get_boolean(crp, "use_pass_crypto_object")) {
+ for(int i = 0; i < crypto_depth; ++i) {
+ string passname = cryptomatte_prefix + string_printf("Object%02d", i);
+ b_engine.add_pass(passname.c_str(), 4, "RGBA", b_srlay.name().c_str());
+ Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
+ }
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_OBJECT);
+ }
+ if(get_boolean(crp, "use_pass_crypto_material")) {
+ for(int i = 0; i < crypto_depth; ++i) {
+ string passname = cryptomatte_prefix + string_printf("Material%02d", i);
+ b_engine.add_pass(passname.c_str(), 4, "RGBA", b_srlay.name().c_str());
+ Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
+ }
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_MATERIAL);
+ }
+ if(get_boolean(crp, "use_pass_crypto_asset")) {
+ for(int i = 0; i < crypto_depth; ++i) {
+ string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
+ b_engine.add_pass(passname.c_str(), 4, "RGBA", b_srlay.name().c_str());
+ Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
+ }
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_ASSET);
+ }
+ if(get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) {
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_ACCURATE);
+ }
+
return passes;
}
@@ -689,6 +731,9 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
params.bvh_layout = DebugFlags().cpu.bvh_layout;
}
+#ifdef WITH_EMBREE
+ params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE : params.bvh_layout;
+#endif
return params;
}
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index 5e63f76033d..6d78f62c7d0 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -66,9 +66,9 @@ public:
void **python_thread_state,
const char *layer = 0);
void sync_render_layers(BL::SpaceView3D& b_v3d, const char *layer);
- array<Pass> sync_render_passes(BL::RenderLayer& b_rlay,
- BL::SceneRenderLayer& b_srlay,
- const SessionParams &session_params);
+ vector<Pass> sync_render_passes(BL::RenderLayer& b_rlay,
+ BL::SceneRenderLayer& b_srlay,
+ const SessionParams &session_params);
void sync_integrator();
void sync_camera(BL::RenderSettings& b_render,
BL::Object& b_override,
@@ -213,4 +213,4 @@ private:
CCL_NAMESPACE_END
-#endif /* __BLENDER_SYNC_H__ */
+#endif /* __BLENDER_SYNC_H__ */
diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h
index 7e61888348b..eb7019f45bc 100644
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -20,6 +20,7 @@
#include "render/mesh.h"
#include "util/util_algorithm.h"
+#include "util/util_array.h"
#include "util/util_map.h"
#include "util/util_path.h"
#include "util/util_set.h"
@@ -243,6 +244,12 @@ static inline float *image_get_float_pixels_for_frame(BL::Image& image,
return BKE_image_get_float_pixels_for_frame(image.ptr.data, frame);
}
+static inline void render_add_metadata(BL::RenderResult& b_rr, string name, string value)
+{
+ b_rr.stamp_data_add_field(name.c_str(), value.c_str());
+}
+
+
/* Utilities */
static inline Transform get_transform(const BL::Array<float, 16>& array)
@@ -832,4 +839,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __BLENDER_UTIL_H__ */
+#endif /* __BLENDER_UTIL_H__ */
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index fcd28572fdf..6014624f395 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -13,6 +13,7 @@ set(SRC
bvh8.cpp
bvh_binning.cpp
bvh_build.cpp
+ bvh_embree.cpp
bvh_node.cpp
bvh_sort.cpp
bvh_split.cpp
@@ -26,6 +27,7 @@ set(SRC_HEADERS
bvh8.h
bvh_binning.h
bvh_build.h
+ bvh_embree.h
bvh_node.h
bvh_params.h
bvh_sort.h
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index bc73a3ad264..ac0614e3659 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -26,6 +26,10 @@
#include "bvh/bvh_build.h"
#include "bvh/bvh_node.h"
+#ifdef WITH_EMBREE
+#include "bvh/bvh_embree.h"
+#endif
+
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_progress.h"
@@ -41,6 +45,7 @@ const char *bvh_layout_name(BVHLayout layout)
case BVH_LAYOUT_BVH4: return "BVH4";
case BVH_LAYOUT_BVH8: return "BVH8";
case BVH_LAYOUT_NONE: return "NONE";
+ case BVH_LAYOUT_EMBREE: return "EMBREE";
case BVH_LAYOUT_ALL: return "ALL";
}
LOG(DFATAL) << "Unsupported BVH layout was passed.";
@@ -96,6 +101,10 @@ BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects)
return new BVH4(params, objects);
case BVH_LAYOUT_BVH8:
return new BVH8(params, objects);
+ case BVH_LAYOUT_EMBREE:
+#ifdef WITH_EMBREE
+ return new BVHEmbree(params, objects);
+#endif
case BVH_LAYOUT_NONE:
case BVH_LAYOUT_ALL:
break;
@@ -106,7 +115,7 @@ BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects)
/* Building */
-void BVH::build(Progress& progress)
+void BVH::build(Progress& progress, Stats*)
{
progress.set_substatus("Building BVH");
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index 86be0bae4be..c8ad29004d7 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -19,12 +19,13 @@
#define __BVH_H__
#include "bvh/bvh_params.h"
-
+#include "util/util_array.h"
#include "util/util_types.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
+class Stats;
class BVHNode;
struct BVHStackEntry;
class BVHParams;
@@ -35,7 +36,6 @@ class Progress;
#define BVH_ALIGN 4096
#define TRI_NODE_SIZE 3
-
/* Packed BVH
*
* BVH stored as it will be used for traversal on the rendering device. */
@@ -91,7 +91,7 @@ public:
static BVH *create(const BVHParams& params, const vector<Object*>& objects);
virtual ~BVH() {}
- void build(Progress& progress);
+ virtual void build(Progress& progress, Stats *stats=NULL);
void refit(Progress& progress);
protected:
@@ -126,4 +126,4 @@ struct BVHStackEntry
CCL_NAMESPACE_END
-#endif /* __BVH_H__ */
+#endif /* __BVH_H__ */
diff --git a/intern/cycles/bvh/bvh2.h b/intern/cycles/bvh/bvh2.h
index df65ddca5b7..ecc697567bb 100644
--- a/intern/cycles/bvh/bvh2.h
+++ b/intern/cycles/bvh/bvh2.h
@@ -84,4 +84,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __BVH2_H__ */
+#endif /* __BVH2_H__ */
diff --git a/intern/cycles/bvh/bvh4.h b/intern/cycles/bvh/bvh4.h
index 310909a37e1..28bab2fe327 100644
--- a/intern/cycles/bvh/bvh4.h
+++ b/intern/cycles/bvh/bvh4.h
@@ -84,4 +84,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __BVH4_H__ */
+#endif /* __BVH4_H__ */
diff --git a/intern/cycles/bvh/bvh8.cpp b/intern/cycles/bvh/bvh8.cpp
index 70d003d938a..b95fe572e27 100644
--- a/intern/cycles/bvh/bvh8.cpp
+++ b/intern/cycles/bvh/bvh8.cpp
@@ -124,6 +124,7 @@ void BVH8::pack_aligned_node(int idx,
data[0].a = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
data[0].b = time_from;
data[0].c = time_to;
+
for(int i = 0; i < num; i++) {
float3 bb_min = bounds[i].min;
float3 bb_max = bounds[i].max;
@@ -140,8 +141,8 @@ void BVH8::pack_aligned_node(int idx,
for(int i = num; i < 8; i++) {
/* We store BB which would never be recorded as intersection
- * so kernel might safely assume there are always 4 child nodes.
- */
+ * so kernel might safely assume there are always 4 child nodes.
+ */
data[1][i] = FLT_MAX;
data[2][i] = -FLT_MAX;
@@ -153,6 +154,7 @@ void BVH8::pack_aligned_node(int idx,
data[7][i] = __int_as_float(0);
}
+
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_ONODE_SIZE);
}
@@ -189,6 +191,7 @@ void BVH8::pack_unaligned_node(int idx,
{
float8 data[BVH_UNALIGNED_ONODE_SIZE];
memset(data, 0, sizeof(data));
+
data[0].a = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
data[0].b = time_from;
data[0].c = time_to;
@@ -222,21 +225,21 @@ void BVH8::pack_unaligned_node(int idx,
* so kernel might safely assume there are always 4 child nodes.
*/
- data[1][i] = 1.0f;
- data[2][i] = 0.0f;
- data[3][i] = 0.0f;
+ data[1][i] = NAN;
+ data[2][i] = NAN;
+ data[3][i] = NAN;
- data[4][i] = 0.0f;
- data[5][i] = 0.0f;
- data[6][i] = 0.0f;
+ data[4][i] = NAN;
+ data[5][i] = NAN;
+ data[6][i] = NAN;
- data[7][i] = 0.0f;
- data[8][i] = 0.0f;
- data[9][i] = 0.0f;
+ data[7][i] = NAN;
+ data[8][i] = NAN;
+ data[9][i] = NAN;
- data[10][i] = -FLT_MAX;
- data[11][i] = -FLT_MAX;
- data[12][i] = -FLT_MAX;
+ data[10][i] = NAN;
+ data[11][i] = NAN;
+ data[12][i] = NAN;
data[13][i] = __int_as_float(0);
}
diff --git a/intern/cycles/bvh/bvh8.h b/intern/cycles/bvh/bvh8.h
index 274a2442c7e..834daf3abce 100644
--- a/intern/cycles/bvh/bvh8.h
+++ b/intern/cycles/bvh/bvh8.h
@@ -95,4 +95,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __BVH8_H__ */
+#endif /* __BVH8_H__ */
diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h
index 7b245139819..dd95a5cc0e8 100644
--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@@ -23,6 +23,7 @@
#include "bvh/bvh_params.h"
#include "bvh/bvh_unaligned.h"
+#include "util/util_array.h"
#include "util/util_task.h"
#include "util/util_vector.h"
@@ -142,4 +143,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __BVH_BUILD_H__ */
+#endif /* __BVH_BUILD_H__ */
diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
new file mode 100644
index 00000000000..7489fe8ea42
--- /dev/null
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -0,0 +1,884 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This class implemens a ray accelerator for Cycles using Intel's Embree library.
+ * It supports triangles, curves, object and deformation blur and instancing.
+ * Not supported are thick line segments, those have no native equivalent in Embree.
+ * They could be implemented using Embree's thick curves, at the expense of wasted memory.
+ * User defined intersections for Embree could also be an option, but since Embree only uses aligned BVHs
+ * for user geometry, this would come with reduced performance and/or higher memory usage.
+ *
+ * Since Embree allows object to be either curves or triangles but not both, Cycles object IDs are maapped
+ * to Embree IDs by multiplying by two and adding one for curves.
+ *
+ * This implementation shares RTCDevices between Cycles instances. Eventually each instance should get
+ * a separate RTCDevice to correctly keep track of memory usage.
+ *
+ * Vertex and index buffers are duplicated between Cycles device arrays and Embree. These could be merged,
+ * which would requrie changes to intersection refinement, shader setup, mesh light sampling and a few
+ * other places in Cycles where direct access to vertex data is required.
+ */
+
+#ifdef WITH_EMBREE
+
+#include <pmmintrin.h>
+#include <xmmintrin.h>
+#include <embree3/rtcore_geometry.h>
+
+#include "bvh/bvh_embree.h"
+
+/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH. */
+#include "kernel/bvh/bvh_embree.h"
+#include "kernel/kernel_compat_cpu.h"
+#include "kernel/split/kernel_split_data_types.h"
+#include "kernel/kernel_globals.h"
+#include "kernel/kernel_random.h"
+
+#include "render/mesh.h"
+#include "render/object.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_progress.h"
+
+CCL_NAMESPACE_BEGIN
+
+#define IS_HAIR(x) (x & 1)
+
+/* This gets called by Embree at every valid ray/object intersection.
+ * Things like recording subsurface or shadow hits for later evaluation
+ * as well as filtering for volume objects happen here.
+ * Cycles' own BVH does that directly inside the traversal calls.
+ */
+static void rtc_filter_func(const RTCFilterFunctionNArguments *args)
+{
+ /* Current implementation in Cycles assumes only single-ray intersection queries. */
+ assert(args->N == 1);
+
+ const RTCRay *ray = (RTCRay*)args->ray;
+ const RTCHit *hit = (RTCHit*)args->hit;
+ CCLIntersectContext *ctx = ((IntersectContext*)args->context)->userRayExt;
+ KernelGlobals *kg = ctx->kg;
+
+ /* Check if there is backfacing hair to ignore. */
+ if(IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ && !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING)
+ && !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
+ if(dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+ }
+}
+
+static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
+{
+ assert(args->N == 1);
+
+ const RTCRay *ray = (RTCRay*)args->ray;
+ RTCHit *hit = (RTCHit*)args->hit;
+ CCLIntersectContext *ctx = ((IntersectContext*)args->context)->userRayExt;
+ KernelGlobals *kg = ctx->kg;
+
+ /* For all ray types: Check if there is backfacing hair to ignore */
+ if(IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ && !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING)
+ && !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
+ if(dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+ }
+
+ switch(ctx->type) {
+ case CCLIntersectContext::RAY_SHADOW_ALL: {
+ /* Append the intersection to the end of the array. */
+ if(ctx->num_hits < ctx->max_hits) {
+ Intersection current_isect;
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ for(size_t i = 0; i < ctx->max_hits; ++i) {
+ if(current_isect.object == ctx->isect_s[i].object &&
+ current_isect.prim == ctx->isect_s[i].prim &&
+ current_isect.t == ctx->isect_s[i].t) {
+ /* This intersection was already recorded, skip it. */
+ *args->valid = 0;
+ break;
+ }
+ }
+ Intersection *isect = &ctx->isect_s[ctx->num_hits];
+ ++ctx->num_hits;
+ *isect = current_isect;
+ int prim = kernel_tex_fetch(__prim_index, isect->prim);
+ int shader = 0;
+ if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
+ shader = kernel_tex_fetch(__tri_shader, prim);
+ }
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
+ int flag = kernel_tex_fetch(__shaders, shader & SHADER_MASK).flags;
+ /* If no transparent shadows, all light is blocked. */
+ if(flag & (SD_HAS_TRANSPARENT_SHADOW)) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ }
+ }
+ else {
+ /* Increase the number of hits beyond ray.max_hits
+ * so that the caller can detect this as opaque. */
+ ++ctx->num_hits;
+ }
+ break;
+ }
+ case CCLIntersectContext::RAY_SSS: {
+ /* No intersection information requested, just return a hit. */
+ if(ctx->max_hits == 0) {
+ break;
+ }
+
+ /* See triangle_intersect_subsurface() for the native equivalent. */
+ for(int i = min(ctx->max_hits, ctx->ss_isect->num_hits) - 1; i >= 0; --i) {
+ if(ctx->ss_isect->hits[i].t == ray->tfar) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ }
+
+ ++ctx->ss_isect->num_hits;
+ int hit_idx;
+
+ if(ctx->ss_isect->num_hits <= ctx->max_hits) {
+ hit_idx = ctx->ss_isect->num_hits - 1;
+ }
+ else {
+ /* reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it */
+ hit_idx = lcg_step_uint(ctx->lcg_state) % ctx->ss_isect->num_hits;
+
+ if(hit_idx >= ctx->max_hits) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ }
+ /* record intersection */
+ kernel_embree_convert_local_hit(kg, ray, hit, &ctx->ss_isect->hits[hit_idx], ctx->sss_object_id);
+ ctx->ss_isect->Ng[hit_idx].x = hit->Ng_x;
+ ctx->ss_isect->Ng[hit_idx].y = hit->Ng_y;
+ ctx->ss_isect->Ng[hit_idx].z = hit->Ng_z;
+ ctx->ss_isect->Ng[hit_idx] = normalize(ctx->ss_isect->Ng[hit_idx]);
+ /* This tells Embree to continue tracing .*/
+ *args->valid = 0;
+ break;
+ }
+ case CCLIntersectContext::RAY_VOLUME_ALL: {
+ /* Append the intersection to the end of the array. */
+ if(ctx->num_hits < ctx->max_hits) {
+ Intersection current_isect;
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ for(size_t i = 0; i < ctx->max_hits; ++i) {
+ if(current_isect.object == ctx->isect_s[i].object &&
+ current_isect.prim == ctx->isect_s[i].prim &&
+ current_isect.t == ctx->isect_s[i].t) {
+ /* This intersection was already recorded, skip it. */
+ *args->valid = 0;
+ break;
+ }
+ }
+ Intersection *isect = &ctx->isect_s[ctx->num_hits];
+ ++ctx->num_hits;
+ *isect = current_isect;
+ /* Only primitives from volume object. */
+ uint tri_object = (isect->object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, isect->prim) : isect->object;
+ int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+ if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ --ctx->num_hits;
+ }
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ }
+ case CCLIntersectContext::RAY_REGULAR:
+ default:
+ /* Nothing to do here. */
+ break;
+ }
+}
+
+static size_t unaccounted_mem = 0;
+
+static bool rtc_memory_monitor_func(void* userPtr, const ssize_t bytes, const bool)
+{
+ Stats *stats = (Stats*)userPtr;
+ if(stats) {
+ if(bytes > 0) {
+ stats->mem_alloc(bytes);
+ }
+ else {
+ stats->mem_free(-bytes);
+ }
+ }
+ else {
+ /* A stats pointer may not yet be available. Keep track of the memory usage for later. */
+ if(bytes >= 0) {
+ atomic_add_and_fetch_z(&unaccounted_mem, bytes);
+ }
+ else {
+ atomic_sub_and_fetch_z(&unaccounted_mem, -bytes);
+ }
+ }
+ return true;
+}
+
+static void rtc_error_func(void*, enum RTCError, const char* str)
+{
+ VLOG(1) << str;
+}
+
+static double progress_start_time = 0.0f;
+
+static bool rtc_progress_func(void* user_ptr, const double n)
+{
+ Progress *progress = (Progress*)user_ptr;
+
+ if(time_dt() - progress_start_time < 0.25) {
+ return true;
+ }
+
+ string msg = string_printf("Building BVH %.0f%%", n * 100.0);
+ progress->set_substatus(msg);
+ progress_start_time = time_dt();
+
+ return !progress->get_cancel();
+}
+
+/* This is to have a shared device between all BVH instances.
+ It would be useful to actually to use a separte RTCDevice per Cycles instance. */
+RTCDevice BVHEmbree::rtc_shared_device = NULL;
+int BVHEmbree::rtc_shared_users = 0;
+thread_mutex BVHEmbree::rtc_shared_mutex;
+
+BVHEmbree::BVHEmbree(const BVHParams& params_, const vector<Object*>& objects_)
+: BVH(params_, objects_), scene(NULL), mem_used(0), top_level(NULL), stats(NULL),
+ curve_subdivisions(params.curve_subdivisions), build_quality(RTC_BUILD_QUALITY_REFIT),
+ use_curves(params_.curve_flags & CURVE_KN_INTERPOLATE),
+ use_ribbons(params.curve_flags & CURVE_KN_RIBBONS), dynamic_scene(true)
+{
+ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+ _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+ thread_scoped_lock lock(rtc_shared_mutex);
+ if(rtc_shared_users == 0) {
+ rtc_shared_device = rtcNewDevice("verbose=0");
+ /* Check here if Embree was built with the correct flags. */
+ ssize_t ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED);
+ if(ret != 1) {
+ assert(0);
+ VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED flag."\
+ "Ray visiblity will not work.";
+ }
+ ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED);
+ if(ret != 1) {
+ assert(0);
+ VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED flag."\
+ "Renders may not look as expected.";
+ }
+ ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED);
+ if(ret != 1) {
+ assert(0);
+ VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED flag. "\
+ "Line primitives will not be rendered.";
+ }
+ ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED);
+ if(ret != 1) {
+ assert(0);
+ VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED flag. "\
+ "Triangle primitives will not be rendered.";
+ }
+ ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED);
+ if(ret != 0) {
+ assert(0);
+ VLOG(1) << "Embree is compiled with the RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED flag. "\
+ "Renders may not look as expected.";
+ }
+ }
+ ++rtc_shared_users;
+
+ rtcSetDeviceErrorFunction(rtc_shared_device, rtc_error_func, NULL);
+
+ pack.root_index = -1;
+}
+
+BVHEmbree::~BVHEmbree()
+{
+ if(!params.top_level) {
+ destroy(scene);
+ }
+}
+
+void BVHEmbree::destroy(RTCScene scene)
+{
+ if(scene) {
+ rtcReleaseScene(scene);
+ scene = NULL;
+ }
+ thread_scoped_lock lock(rtc_shared_mutex);
+ --rtc_shared_users;
+ if(rtc_shared_users == 0) {
+ rtcReleaseDevice (rtc_shared_device);
+ rtc_shared_device = NULL;
+ }
+}
+
+void BVHEmbree::delete_rtcScene()
+{
+ if(scene) {
+ /* When this BVH is used as an instance in a top level BVH, don't delete now
+ * Let the top_level BVH know that it should delete it later. */
+ if(top_level) {
+ top_level->add_delayed_delete_scene(scene);
+ }
+ else {
+ rtcReleaseScene(scene);
+ if(delayed_delete_scenes.size()) {
+ foreach(RTCScene s, delayed_delete_scenes) {
+ rtcReleaseScene(s);
+ }
+ }
+ delayed_delete_scenes.clear();
+ }
+ scene = NULL;
+ }
+}
+
+void BVHEmbree::build(Progress& progress, Stats *stats_)
+{
+ assert(rtc_shared_device);
+ stats = stats_;
+ rtcSetDeviceMemoryMonitorFunction(rtc_shared_device, rtc_memory_monitor_func, stats);
+
+ progress.set_substatus("Building BVH");
+
+ if(scene) {
+ rtcReleaseScene(scene);
+ scene = NULL;
+ }
+
+ const bool dynamic = params.bvh_type == SceneParams::BVH_DYNAMIC;
+
+ scene = rtcNewScene(rtc_shared_device);
+ const RTCSceneFlags scene_flags = (dynamic ? RTC_SCENE_FLAG_DYNAMIC : RTC_SCENE_FLAG_NONE) |
+ RTC_SCENE_FLAG_COMPACT | RTC_SCENE_FLAG_ROBUST;
+ rtcSetSceneFlags(scene, scene_flags);
+ build_quality = dynamic ? RTC_BUILD_QUALITY_LOW :
+ (params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH : RTC_BUILD_QUALITY_MEDIUM);
+ rtcSetSceneBuildQuality(scene, build_quality);
+
+ int i = 0;
+
+ pack.object_node.clear();
+
+ foreach(Object *ob, objects) {
+ if(params.top_level) {
+ if(!ob->is_traceable()) {
+ ++i;
+ continue;
+ }
+ if(!ob->mesh->is_instanced()) {
+ add_object(ob, i);
+ }
+ else {
+ add_instance(ob, i);
+ }
+ }
+ else {
+ add_object(ob, i);
+ }
+ ++i;
+ if(progress.get_cancel()) return;
+ }
+
+ if(progress.get_cancel()) {
+ delete_rtcScene();
+ stats = NULL;
+ return;
+ }
+
+ rtcSetSceneProgressMonitorFunction(scene, rtc_progress_func, &progress);
+ rtcCommitScene(scene);
+
+ pack_primitives();
+
+ if(progress.get_cancel()) {
+ delete_rtcScene();
+ stats = NULL;
+ return;
+ }
+
+ progress.set_substatus("Packing geometry");
+ pack_nodes(NULL);
+
+ stats = NULL;
+}
+
+void BVHEmbree::add_object(Object *ob, int i)
+{
+ Mesh *mesh = ob->mesh;
+ if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) {
+ add_triangles(ob, i);
+ }
+ if(params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) {
+ add_curves(ob, i);
+ }
+}
+
+void BVHEmbree::add_instance(Object *ob, int i)
+{
+ if(!ob || !ob->mesh) {
+ assert(0);
+ return;
+ }
+ BVHEmbree *instance_bvh = (BVHEmbree*)(ob->mesh->bvh);
+
+ if(instance_bvh->top_level != this) {
+ instance_bvh->top_level = this;
+ }
+
+ const size_t num_motion_steps = ob->use_motion() ? ob->motion.size() : 1;
+ RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, RTC_GEOMETRY_TYPE_INSTANCE);
+ rtcSetGeometryInstancedScene(geom_id, instance_bvh->scene);
+ rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
+
+ if(ob->use_motion()) {
+ for(size_t step = 0; step < num_motion_steps; ++step) {
+ rtcSetGeometryTransform(geom_id, step, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float*)&ob->motion[step]);
+ }
+ }
+ else {
+ rtcSetGeometryTransform(geom_id, 0, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float*)&ob->tfm);
+ }
+
+ pack.prim_index.push_back_slow(-1);
+ pack.prim_object.push_back_slow(i);
+ pack.prim_type.push_back_slow(PRIMITIVE_NONE);
+ pack.prim_tri_index.push_back_slow(-1);
+
+ rtcSetGeometryUserData(geom_id, (void*) instance_bvh->scene);
+ rtcSetGeometryMask(geom_id, ob->visibility);
+
+ rtcCommitGeometry(geom_id);
+ rtcAttachGeometryByID(scene, geom_id, i*2);
+ rtcReleaseGeometry(geom_id);
+}
+
+void BVHEmbree::add_triangles(Object *ob, int i)
+{
+ size_t prim_offset = pack.prim_index.size();
+ Mesh *mesh = ob->mesh;
+ const Attribute *attr_mP = NULL;
+ size_t num_motion_steps = 1;
+ if(mesh->has_motion_blur()) {
+ attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(attr_mP) {
+ num_motion_steps = mesh->motion_steps;
+ if(num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
+ assert(0);
+ num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
+ }
+ }
+ }
+
+ const size_t num_triangles = mesh->num_triangles();
+ RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, RTC_GEOMETRY_TYPE_TRIANGLE);
+ rtcSetGeometryBuildQuality(geom_id, build_quality);
+ rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
+
+ unsigned *rtc_indices = (unsigned*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_INDEX, 0,
+ RTC_FORMAT_UINT3, sizeof (int) * 3, num_triangles);
+ assert(rtc_indices);
+ if(!rtc_indices) {
+ VLOG(1) << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str() << ".\n";
+ return;
+ }
+ for(size_t j = 0; j < num_triangles; ++j) {
+ Mesh::Triangle t = mesh->get_triangle(j);
+ rtc_indices[j*3] = t.v[0];
+ rtc_indices[j*3+1] = t.v[1];
+ rtc_indices[j*3+2] = t.v[2];
+ }
+
+ update_tri_vertex_buffer(geom_id, mesh);
+
+ pack.prim_object.reserve(pack.prim_object.size() + num_triangles);
+ pack.prim_type.reserve(pack.prim_type.size() + num_triangles);
+ pack.prim_index.reserve(pack.prim_index.size() + num_triangles);
+ pack.prim_tri_index.reserve(pack.prim_index.size() + num_triangles);
+ for(size_t j = 0; j < num_triangles; ++j) {
+ pack.prim_object.push_back_reserved(i);
+ pack.prim_type.push_back_reserved(num_motion_steps > 1 ? PRIMITIVE_MOTION_TRIANGLE : PRIMITIVE_TRIANGLE);
+ pack.prim_index.push_back_reserved(j);
+ pack.prim_tri_index.push_back_reserved(j);
+ }
+
+ rtcSetGeometryUserData(geom_id, (void*) prim_offset);
+ rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
+ rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
+ rtcSetGeometryMask(geom_id, ob->visibility);
+
+ rtcCommitGeometry(geom_id);
+ rtcAttachGeometryByID(scene, geom_id, i*2);
+ rtcReleaseGeometry(geom_id);
+}
+
+void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh)
+{
+ const Attribute *attr_mP = NULL;
+ size_t num_motion_steps = 1;
+ int t_mid = 0;
+ if(mesh->has_motion_blur()) {
+ attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(attr_mP) {
+ num_motion_steps = mesh->motion_steps;
+ t_mid = (num_motion_steps - 1) / 2;
+ if(num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
+ assert(0);
+ num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
+ }
+ }
+ }
+ const size_t num_verts = mesh->verts.size();
+
+ for(int t = 0; t < num_motion_steps; ++t) {
+ const float3 *verts;
+ if(t == t_mid) {
+ verts = &mesh->verts[0];
+ }
+ else {
+ int t_ = (t > t_mid) ? (t - 1) : t;
+ verts = &attr_mP->data_float3()[t_ * num_verts];
+ }
+
+ float *rtc_verts = (float*) rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t,
+ RTC_FORMAT_FLOAT3, sizeof(float) * 3, num_verts + 1);
+ assert(rtc_verts);
+ if(rtc_verts) {
+ for(size_t j = 0; j < num_verts; ++j) {
+ rtc_verts[0] = verts[j].x;
+ rtc_verts[1] = verts[j].y;
+ rtc_verts[2] = verts[j].z;
+ rtc_verts += 3;
+ }
+ }
+ }
+}
+
+void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh)
+{
+ const Attribute *attr_mP = NULL;
+ size_t num_motion_steps = 1;
+ if(mesh->has_motion_blur()) {
+ attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(attr_mP) {
+ num_motion_steps = mesh->motion_steps;
+ }
+ }
+
+ const size_t num_curves = mesh->num_curves();
+ size_t num_keys = 0;
+ for(size_t j = 0; j < num_curves; ++j) {
+ const Mesh::Curve c = mesh->get_curve(j);
+ num_keys += c.num_keys;
+ }
+
+ /* Copy the CV data to Embree */
+ const int t_mid = (num_motion_steps - 1) / 2;
+ const float *curve_radius = &mesh->curve_radius[0];
+ for(int t = 0; t < num_motion_steps; ++t) {
+ const float3 *verts;
+ if(t == t_mid || attr_mP == NULL) {
+ verts = &mesh->curve_keys[0];
+ }
+ else {
+ int t_ = (t > t_mid) ? (t - 1) : t;
+ verts = &attr_mP->data_float3()[t_ * num_keys];
+ }
+
+ float4 *rtc_verts = (float4*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t,
+ RTC_FORMAT_FLOAT4, sizeof (float) * 4, num_keys);
+ float4 *rtc_tangents = NULL;
+ if(use_curves) {
+ rtc_tangents = (float4*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_TANGENT, t,
+ RTC_FORMAT_FLOAT4, sizeof (float) * 4, num_keys);
+ assert(rtc_tangents);
+ }
+ assert(rtc_verts);
+ if(rtc_verts) {
+ if(use_curves && rtc_tangents) {
+ const size_t num_curves = mesh->num_curves();
+ for(size_t j = 0; j < num_curves; ++j) {
+ Mesh::Curve c = mesh->get_curve(j);
+ int fk = c.first_key;
+ rtc_verts[0] = float3_to_float4(verts[fk]);
+ rtc_verts[0].w = curve_radius[fk];
+ rtc_tangents[0] = float3_to_float4(verts[fk + 1] - verts[fk]);
+ rtc_tangents[0].w = curve_radius[fk + 1] - curve_radius[fk];
+ ++fk;
+ int k = 1;
+ for(;k < c.num_segments(); ++k, ++fk) {
+ rtc_verts[k] = float3_to_float4(verts[fk]);
+ rtc_verts[k].w = curve_radius[fk];
+ rtc_tangents[k] = float3_to_float4((verts[fk + 1] - verts[fk - 1]) * 0.5f);
+ rtc_tangents[k].w = (curve_radius[fk + 1] - curve_radius[fk - 1]) * 0.5f;
+ }
+ rtc_verts[k] = float3_to_float4(verts[fk]);
+ rtc_verts[k].w = curve_radius[fk];
+ rtc_tangents[k] = float3_to_float4(verts[fk] - verts[fk - 1]);
+ rtc_tangents[k].w = curve_radius[fk] - curve_radius[fk - 1];
+ rtc_verts += c.num_keys;
+ rtc_tangents += c.num_keys;
+ }
+ }
+ else {
+ for(size_t j = 0; j < num_keys; ++j) {
+ rtc_verts[j] = float3_to_float4(verts[j]);
+ rtc_verts[j].w = curve_radius[j];
+ }
+ }
+ }
+ }
+}
+
+void BVHEmbree::add_curves(Object *ob, int i)
+{
+ size_t prim_offset = pack.prim_index.size();
+ const Mesh *mesh = ob->mesh;
+ const Attribute *attr_mP = NULL;
+ size_t num_motion_steps = 1;
+ if(mesh->has_motion_blur()) {
+ attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(attr_mP) {
+ num_motion_steps = mesh->motion_steps;
+ }
+ }
+
+ const size_t num_curves = mesh->num_curves();
+ size_t num_segments = 0;
+ for(size_t j = 0; j < num_curves; ++j) {
+ Mesh::Curve c = mesh->get_curve(j);
+ assert(c.num_segments() > 0);
+ num_segments += c.num_segments();
+ }
+
+ /* Make room for Cycles specific data. */
+ pack.prim_object.reserve(pack.prim_object.size() + num_segments);
+ pack.prim_type.reserve(pack.prim_type.size() + num_segments);
+ pack.prim_index.reserve(pack.prim_index.size() + num_segments);
+ pack.prim_tri_index.reserve(pack.prim_index.size() + num_segments);
+
+ enum RTCGeometryType type = (!use_curves) ? RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE :
+ (use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE :
+ RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE);
+
+ RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, type);
+ rtcSetGeometryTessellationRate(geom_id, curve_subdivisions);
+ unsigned *rtc_indices = (unsigned*) rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_INDEX, 0,
+ RTC_FORMAT_UINT, sizeof (int), num_segments);
+ size_t rtc_index = 0;
+ for(size_t j = 0; j < num_curves; ++j) {
+ Mesh::Curve c = mesh->get_curve(j);
+ for(size_t k = 0; k < c.num_segments(); ++k) {
+ rtc_indices[rtc_index] = c.first_key + k;
+ /* Cycles specific data. */
+ pack.prim_object.push_back_reserved(i);
+ pack.prim_type.push_back_reserved(PRIMITIVE_PACK_SEGMENT(num_motion_steps > 1 ?
+ PRIMITIVE_MOTION_CURVE : PRIMITIVE_CURVE, k));
+ pack.prim_index.push_back_reserved(j);
+ pack.prim_tri_index.push_back_reserved(rtc_index);
+
+ ++rtc_index;
+ }
+ }
+
+ rtcSetGeometryBuildQuality(geom_id, build_quality);
+ rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
+
+ update_curve_vertex_buffer(geom_id, mesh);
+
+ rtcSetGeometryUserData(geom_id, (void*) prim_offset);
+ rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
+ rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
+ rtcSetGeometryMask(geom_id, ob->visibility);
+
+ rtcCommitGeometry(geom_id);
+ rtcAttachGeometryByID(scene, geom_id, i * 2 + 1);
+ rtcReleaseGeometry(geom_id);
+}
+
+void BVHEmbree::pack_nodes(const BVHNode *)
+{
+ /* Quite a bit of this code is for compatibility with Cycles' native BVH. */
+ if(!params.top_level) {
+ return;
+ }
+
+ for(size_t i = 0; i < pack.prim_index.size(); ++i) {
+ if(pack.prim_index[i] != -1) {
+ if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
+ pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
+ else
+ pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
+ }
+ }
+
+ size_t prim_offset = pack.prim_index.size();
+
+ /* reserve */
+ size_t prim_index_size = pack.prim_index.size();
+ size_t prim_tri_verts_size = pack.prim_tri_verts.size();
+
+ size_t pack_prim_index_offset = prim_index_size;
+ size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
+ size_t object_offset = 0;
+
+ map<Mesh*, int> mesh_map;
+
+ foreach(Object *ob, objects) {
+ Mesh *mesh = ob->mesh;
+ BVH *bvh = mesh->bvh;
+
+ if(mesh->need_build_bvh()) {
+ if(mesh_map.find(mesh) == mesh_map.end()) {
+ prim_index_size += bvh->pack.prim_index.size();
+ prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
+ mesh_map[mesh] = 1;
+ }
+ }
+ }
+
+ mesh_map.clear();
+
+ pack.prim_index.resize(prim_index_size);
+ pack.prim_type.resize(prim_index_size);
+ pack.prim_object.resize(prim_index_size);
+ pack.prim_visibility.clear();
+ pack.prim_tri_verts.resize(prim_tri_verts_size);
+ pack.prim_tri_index.resize(prim_index_size);
+ pack.object_node.resize(objects.size());
+
+ int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL;
+ int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
+ int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
+ float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size())? &pack.prim_tri_verts[0]: NULL;
+ uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL;
+
+ /* merge */
+ foreach(Object *ob, objects) {
+ Mesh *mesh = ob->mesh;
+
+ /* We assume that if mesh doesn't need own BVH it was already included
+ * into a top-level BVH and no packing here is needed.
+ */
+ if(!mesh->need_build_bvh()) {
+ pack.object_node[object_offset++] = prim_offset;
+ continue;
+ }
+
+ /* if mesh already added once, don't add it again, but used set
+ * node offset for this object */
+ map<Mesh*, int>::iterator it = mesh_map.find(mesh);
+
+ if(mesh_map.find(mesh) != mesh_map.end()) {
+ int noffset = it->second;
+ pack.object_node[object_offset++] = noffset;
+ continue;
+ }
+
+ BVHEmbree *bvh = (BVHEmbree*)mesh->bvh;
+
+ rtc_memory_monitor_func(stats, unaccounted_mem, true);
+ unaccounted_mem = 0;
+
+ int mesh_tri_offset = mesh->tri_offset;
+ int mesh_curve_offset = mesh->curve_offset;
+
+ /* fill in node indexes for instances */
+ pack.object_node[object_offset++] = prim_offset;
+
+ mesh_map[mesh] = pack.object_node[object_offset-1];
+
+ /* merge primitive, object and triangle indexes */
+ if(bvh->pack.prim_index.size()) {
+ size_t bvh_prim_index_size = bvh->pack.prim_index.size();
+ int *bvh_prim_index = &bvh->pack.prim_index[0];
+ int *bvh_prim_type = &bvh->pack.prim_type[0];
+ uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
+
+ for(size_t i = 0; i < bvh_prim_index_size; ++i) {
+ if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
+ pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
+ pack_prim_tri_index[pack_prim_index_offset] = -1;
+ }
+ else {
+ pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
+ pack_prim_tri_index[pack_prim_index_offset] =
+ bvh_prim_tri_index[i] + pack_prim_tri_verts_offset;
+ }
+
+ pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
+ pack_prim_object[pack_prim_index_offset] = 0;
+
+ ++pack_prim_index_offset;
+ }
+ }
+
+ /* Merge triangle vertices data. */
+ if(bvh->pack.prim_tri_verts.size()) {
+ const size_t prim_tri_size = bvh->pack.prim_tri_verts.size();
+ memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset,
+ &bvh->pack.prim_tri_verts[0],
+ prim_tri_size*sizeof(float4));
+ pack_prim_tri_verts_offset += prim_tri_size;
+ }
+
+ prim_offset += bvh->pack.prim_index.size();
+ }
+}
+
+void BVHEmbree::refit_nodes()
+{
+ /* Update all vertex buffers, then tell Embree to rebuild/-fit the BVHs. */
+ unsigned geom_id = 0;
+ foreach(Object *ob, objects) {
+ if(!params.top_level || (ob->is_traceable() && !ob->mesh->is_instanced())) {
+ if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && ob->mesh->num_triangles() > 0) {
+ update_tri_vertex_buffer(rtcGetGeometry(scene, geom_id), ob->mesh);
+ rtcCommitGeometry(rtcGetGeometry(scene,geom_id));
+ }
+
+ if(params.primitive_mask & PRIMITIVE_ALL_CURVE && ob->mesh->num_curves() > 0) {
+ update_curve_vertex_buffer(rtcGetGeometry(scene, geom_id+1), ob->mesh);
+ rtcCommitGeometry(rtcGetGeometry(scene,geom_id+1));
+ }
+ }
+ geom_id += 2;
+ }
+ rtcCommitScene(scene);
+}
+CCL_NAMESPACE_END
+
+#endif /* WITH_EMBREE */
diff --git a/intern/cycles/bvh/bvh_embree.h b/intern/cycles/bvh/bvh_embree.h
new file mode 100644
index 00000000000..9990826ba98
--- /dev/null
+++ b/intern/cycles/bvh/bvh_embree.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_EMBREE_H__
+#define __BVH_EMBREE_H__
+
+#ifdef WITH_EMBREE
+
+#include <embree3/rtcore.h>
+#include <embree3/rtcore_scene.h>
+
+#include "bvh/bvh.h"
+#include "bvh/bvh_params.h"
+
+#include "util/util_thread.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Mesh;
+
+class BVHEmbree : public BVH
+{
+public:
+ virtual void build(Progress& progress, Stats *stats) override;
+ virtual ~BVHEmbree();
+ RTCScene scene;
+ static void destroy(RTCScene);
+protected:
+ friend class BVH;
+ BVHEmbree(const BVHParams& params, const vector<Object*>& objects);
+
+ virtual void pack_nodes(const BVHNode*) override;
+ virtual void refit_nodes() override;
+
+ void add_object(Object *ob, int i);
+ void add_instance(Object *ob, int i);
+ void add_curves(Object *ob, int i);
+ void add_triangles(Object *ob, int i);
+
+ ssize_t mem_used;
+
+ void add_delayed_delete_scene(RTCScene scene) { delayed_delete_scenes.push_back(scene); }
+ BVHEmbree *top_level;
+private:
+ void delete_rtcScene();
+ void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh);
+ void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh);
+
+ static RTCDevice rtc_shared_device;
+ static int rtc_shared_users;
+ static thread_mutex rtc_shared_mutex;
+
+ Stats *stats;
+ vector<RTCScene> delayed_delete_scenes;
+ int curve_subdivisions;
+ enum RTCBuildQuality build_quality;
+ bool use_curves, use_ribbons, dynamic_scene;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_EMBREE */
+
+#endif /* __BVH_EMBREE_H__ */
diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h
index ed89d52a50a..65d5df01158 100644
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -169,4 +169,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __BVH_NODE_H__ */
+#endif /* __BVH_NODE_H__ */
diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h
index d8dd7df6ba1..6408d56da80 100644
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -90,6 +90,13 @@ public:
/* Same as above, but for triangle primitives. */
int num_motion_triangle_steps;
+ /* Same as in SceneParams. */
+ int bvh_type;
+
+ /* These are needed for Embree. */
+ int curve_flags;
+ int curve_subdivisions;
+
/* fixed parameters */
enum {
MAX_DEPTH = 64,
@@ -123,6 +130,11 @@ public:
num_motion_curve_steps = 0;
num_motion_triangle_steps = 0;
+
+ bvh_type = 0;
+
+ curve_flags = 0;
+ curve_subdivisions = 4;
}
/* SAH costs */
@@ -274,4 +286,4 @@ struct BVHSpatialStorage {
CCL_NAMESPACE_END
-#endif /* __BVH_PARAMS_H__ */
+#endif /* __BVH_PARAMS_H__ */
diff --git a/intern/cycles/bvh/bvh_sort.h b/intern/cycles/bvh/bvh_sort.h
index 936401d8607..6910cc1e9b4 100644
--- a/intern/cycles/bvh/bvh_sort.h
+++ b/intern/cycles/bvh/bvh_sort.h
@@ -35,4 +35,4 @@ void bvh_reference_sort(int start,
CCL_NAMESPACE_END
-#endif /* __BVH_SORT_H__ */
+#endif /* __BVH_SORT_H__ */
diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h
index a874a118b99..cb47deab211 100644
--- a/intern/cycles/bvh/bvh_split.h
+++ b/intern/cycles/bvh/bvh_split.h
@@ -259,4 +259,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __BVH_SPLIT_H__ */
+#endif /* __BVH_SPLIT_H__ */
diff --git a/intern/cycles/bvh/bvh_unaligned.h b/intern/cycles/bvh/bvh_unaligned.h
index c3ece051cd5..bcfb6ed68da 100644
--- a/intern/cycles/bvh/bvh_unaligned.h
+++ b/intern/cycles/bvh/bvh_unaligned.h
@@ -77,4 +77,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __BVH_UNALIGNED_H__ */
+#endif /* __BVH_UNALIGNED_H__ */
diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake
index 2e386a6bfc5..d0f473a2939 100644
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -133,6 +133,12 @@ if(CYCLES_STANDALONE_REPOSITORY)
set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
####
+ # embree
+ if(WITH_CYCLES_EMBREE)
+ find_package(embree 3.2.4 REQUIRED)
+ endif()
+
+ ####
# Logging
if(WITH_CYCLES_LOGGING)
find_package(Glog REQUIRED)
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 6959dd73c32..7e20bb449c3 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -361,7 +361,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
info.has_half_images = true;
info.has_volume_decoupled = true;
- info.bvh_layout_mask = BVH_LAYOUT_ALL;
info.has_osl = true;
foreach(const DeviceInfo &device, subdevices) {
@@ -396,7 +395,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled;
- info.bvh_layout_mask = device.bvh_layout_mask & info.bvh_layout_mask;
info.has_osl &= device.has_osl;
}
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 2400788c833..f3fb338e638 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -58,7 +58,6 @@ public:
bool advanced_shading; /* Supports full shading system. */
bool has_half_images; /* Support half-float textures. */
bool has_volume_decoupled; /* Decoupled volume shading. */
- BVHLayoutMask bvh_layout_mask; /* Bitmask of supported BVH layouts. */
bool has_osl; /* Support Open Shading Language. */
bool use_split_kernel; /* Use split or mega kernel. */
int cpu_threads;
@@ -74,7 +73,6 @@ public:
advanced_shading = true;
has_half_images = false;
has_volume_decoupled = false;
- bvh_layout_mask = BVH_LAYOUT_NONE;
has_osl = false;
use_split_kernel = false;
}
@@ -183,7 +181,7 @@ public:
/* Convert the requested features structure to a build options,
* which could then be passed to compilers.
*/
- string get_build_options(void) const
+ string get_build_options() const
{
string build_options = "";
if(experimental) {
@@ -242,8 +240,8 @@ std::ostream& operator <<(std::ostream &os,
/* Device */
struct DeviceDrawParams {
- function<void(void)> bind_display_space_shader_cb;
- function<void(void)> unbind_display_space_shader_cb;
+ function<void()> bind_display_space_shader_cb;
+ function<void()> unbind_display_space_shader_cb;
};
class Device {
@@ -281,6 +279,7 @@ public:
fflush(stderr);
}
virtual bool show_samples() const { return false; }
+ virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
/* statistics */
Stats &stats;
@@ -361,4 +360,4 @@ private:
CCL_NAMESPACE_END
-#endif /* __DEVICE_H__ */
+#endif /* __DEVICE_H__ */
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 7c72ab1a009..76f6466bbde 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -41,6 +41,7 @@
#include "kernel/osl/osl_globals.h"
#include "render/buffers.h"
+#include "render/coverage.h"
#include "util/util_debug.h"
#include "util/util_foreach.h"
@@ -80,11 +81,11 @@ public:
/* Silence potential warnings about unused variables
* when compiling without some architectures. */
- (void)kernel_sse2;
- (void)kernel_sse3;
- (void)kernel_sse41;
- (void)kernel_avx;
- (void)kernel_avx2;
+ (void) kernel_sse2;
+ (void) kernel_sse3;
+ (void) kernel_sse41;
+ (void) kernel_avx;
+ (void) kernel_avx2;
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
architecture_name = "AVX2";
@@ -184,11 +185,11 @@ public:
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
- KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_blur_kernel;
- KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_calc_weight_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel;
- KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
+ KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_blur_kernel;
+ KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_calc_weight_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel;
+ KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel;
KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)> filter_construct_transform_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
@@ -277,6 +278,20 @@ public:
return (info.cpu_threads == 1);
}
+ virtual BVHLayoutMask get_bvh_layout_mask() const {
+ BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
+ if(DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
+ bvh_layout_mask |= BVH_LAYOUT_BVH4;
+ }
+ if(DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
+ bvh_layout_mask |= BVH_LAYOUT_BVH8;
+ }
+#ifdef WITH_EMBREE
+ bvh_layout_mask |= BVH_LAYOUT_EMBREE;
+#endif /* WITH_EMBREE */
+ return bvh_layout_mask;
+ }
+
void load_texture_info()
{
if(need_texture_info) {
@@ -499,6 +514,7 @@ public:
filter_nlm_update_output_kernel()(dx, dy,
blurDifference,
(float*) image_ptr,
+ difference,
(float*) out_ptr,
weightAccum,
local_rect,
@@ -676,12 +692,22 @@ public:
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
{
+ const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
+
scoped_timer timer(&tile.buffers->render_time);
+ Coverage coverage(kg, tile);
+ if(use_coverage) {
+ coverage.init_path_trace();
+ }
+
float *render_buffer = (float*)tile.buffer;
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
+ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+ _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+
for(int sample = start_sample; sample < end_sample; sample++) {
if(task.get_cancel() || task_pool.canceled()) {
if(task.need_finish_queue == false)
@@ -690,6 +716,9 @@ public:
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
+ if(use_coverage) {
+ coverage.init_pixel(x, y);
+ }
path_trace_kernel()(kg, render_buffer,
sample, x, y, tile.offset, tile.stride);
}
@@ -699,6 +728,9 @@ public:
task.update_progress(&tile, tile.w*tile.h);
}
+ if(use_coverage) {
+ coverage.finalize();
+ }
}
void denoise(DenoisingTask& denoising, RenderTile &tile)
@@ -759,7 +791,6 @@ public:
}
else if(tile.task == RenderTile::DENOISE) {
denoise(denoising, tile);
-
task.update_progress(&tile, tile.w*tile.h);
}
@@ -1027,13 +1058,6 @@ void device_cpu_info(vector<DeviceInfo>& devices)
info.id = "CPU";
info.num = 0;
info.advanced_shading = true;
- info.bvh_layout_mask = BVH_LAYOUT_BVH2;
- if(system_cpu_support_sse2()) {
- info.bvh_layout_mask |= BVH_LAYOUT_BVH4;
- }
- if(system_cpu_support_avx2()) {
- info.bvh_layout_mask |= BVH_LAYOUT_BVH8;
- }
info.has_volume_decoupled = true;
info.has_osl = true;
info.has_half_images = true;
@@ -1041,7 +1065,7 @@ void device_cpu_info(vector<DeviceInfo>& devices)
devices.insert(devices.begin(), info);
}
-string device_cpu_capabilities(void)
+string device_cpu_capabilities()
{
string capabilities = "";
capabilities += system_cpu_support_sse2() ? "SSE2 " : "";
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index da8e49f129f..46e7b043603 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -73,12 +73,12 @@ const char *cuewErrorString(CUresult result)
return error.c_str();
}
-const char *cuewCompilerPath(void)
+const char *cuewCompilerPath()
{
return CYCLES_CUDA_NVCC_EXECUTABLE;
}
-int cuewCompilerVersion(void)
+int cuewCompilerVersion()
{
return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10);
}
@@ -181,6 +181,10 @@ public:
return true;
}
+ virtual BVHLayoutMask get_bvh_layout_mask() const {
+ return BVH_LAYOUT_BVH2;
+ }
+
/*#ifdef NDEBUG
#define cuda_abort()
#else
@@ -207,7 +211,7 @@ public:
/*cuda_abort();*/ \
cuda_error_documentation(); \
} \
- } (void)0
+ } (void) 0
bool cuda_error_(CUresult result, const string& stmt)
{
@@ -1397,18 +1401,14 @@ public:
int h = task->reconstruction_state.source_h;
int stride = task->buffer.stride;
- int shift_stride = stride*h;
+ int pass_stride = task->buffer.pass_stride;
int num_shifts = (2*r+1)*(2*r+1);
- int mem_size = sizeof(float)*shift_stride*num_shifts;
-
- device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
- temporary_mem.alloc_to_device(2*mem_size);
if(have_error())
return false;
- CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer);
- CUdeviceptr blurDifference = difference + mem_size;
+ CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
+ CUdeviceptr blurDifference = difference + sizeof(float)*pass_stride*num_shifts;
{
CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
@@ -1426,9 +1426,9 @@ public:
task->reconstruction_state.source_w * task->reconstruction_state.source_h,
num_shifts);
- void *calc_difference_args[] = {&color_ptr, &color_variance_ptr, &difference, &w, &h, &stride, &shift_stride, &r, &task->buffer.pass_stride, &a, &k_2};
- void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &shift_stride, &r, &f};
- void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &shift_stride, &r, &f};
+ void *calc_difference_args[] = {&color_ptr, &color_variance_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &pass_stride, &a, &k_2};
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
void *construct_gramian_args[] = {&blurDifference,
&task->buffer.mem.device_pointer,
&task->storage.transform.device_pointer,
@@ -1437,9 +1437,8 @@ public:
&task->storage.XtWY.device_pointer,
&task->reconstruction_state.filter_window,
&w, &h, &stride,
- &shift_stride, &r,
- &f,
- &task->buffer.pass_stride};
+ &pass_stride, &r,
+ &f};
CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
@@ -1448,8 +1447,6 @@ public:
CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
}
- temporary_mem.free();
-
{
CUfunction cuFinalize;
cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize"));
@@ -1667,7 +1664,7 @@ public:
for(int sample = start_sample; sample < end_sample; sample += step_samples) {
/* Setup and copy work tile to device. */
wtile->start_sample = sample;
- wtile->num_samples = min(step_samples, end_sample - sample);;
+ wtile->num_samples = min(step_samples, end_sample - sample);
work_tiles.copy_to_device();
CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
@@ -2149,7 +2146,7 @@ public:
/*cuda_abort();*/ \
device->cuda_error_documentation(); \
} \
- } (void)0
+ } (void) 0
/* CUDA context scope. */
@@ -2358,7 +2355,7 @@ int2 CUDASplitKernel::split_kernel_global_size(device_memory& kg, device_memory&
return global_size;
}
-bool device_cuda_init(void)
+bool device_cuda_init()
{
#ifdef WITH_CUDA_DYNLOAD
static bool initialized = false;
@@ -2396,7 +2393,7 @@ bool device_cuda_init(void)
return result;
#else /* WITH_CUDA_DYNLOAD */
return true;
-#endif /* WITH_CUDA_DYNLOAD */
+#endif /* WITH_CUDA_DYNLOAD */
}
Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background)
@@ -2466,7 +2463,6 @@ void device_cuda_info(vector<DeviceInfo>& devices)
info.advanced_shading = (major >= 3);
info.has_half_images = (major >= 3);
info.has_volume_decoupled = false;
- info.bvh_layout_mask = BVH_LAYOUT_BVH2;
int pci_location[3] = {0, 0, 0};
cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
@@ -2501,7 +2497,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
devices.insert(devices.end(), display_devices.begin(), display_devices.end());
}
-string device_cuda_capabilities(void)
+string device_cuda_capabilities()
{
CUresult result = device_cuda_safe_init();
if(result != CUDA_SUCCESS) {
@@ -2534,7 +2530,7 @@ string device_cuda_capabilities(void)
capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", \
value); \
} \
- } (void)0
+ } (void) 0
/* TODO(sergey): Strip all attributes which are not useful for us
* or does not depend on the driver.
*/
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index 23c18fa15b2..78c65a3d22d 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -99,14 +99,18 @@ void DenoisingTask::setup_denoising_buffer()
buffer.mem.alloc_to_device(mem_size, false);
/* CPUs process shifts sequentially while GPUs process them in parallel. */
- int num_shifts = 1;
+ int num_layers;
if(buffer.gpu_temporary_mem) {
/* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */
int max_radius = max(radius, 6);
- num_shifts = (2*max_radius + 1) * (2*max_radius + 1);
+ int num_shifts = (2*max_radius + 1) * (2*max_radius + 1);
+ num_layers = 2*num_shifts + 1;
+ }
+ else {
+ num_layers = 3;
}
/* Allocate two layers per shift as well as one for the weight accumulation. */
- buffer.temporary_mem.alloc_to_device((2*num_shifts + 1) * buffer.pass_stride);
+ buffer.temporary_mem.alloc_to_device(num_layers * buffer.pass_stride);
}
void DenoisingTask::prefilter_shadowing()
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index 7474f71ff78..8e0666d0e59 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -166,4 +166,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __DEVICE_DENOISING_H__ */
+#endif /* __DEVICE_DENOISING_H__ */
diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h
index 941be448101..e6495c2bff3 100644
--- a/intern/cycles/device/device_intern.h
+++ b/intern/cycles/device/device_intern.h
@@ -22,9 +22,9 @@ CCL_NAMESPACE_BEGIN
class Device;
Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background);
-bool device_opencl_init(void);
+bool device_opencl_init();
Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background);
-bool device_cuda_init(void);
+bool device_cuda_init();
Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background);
Device *device_network_create(DeviceInfo& info, Stats &stats, const char *address);
Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background);
@@ -34,10 +34,10 @@ void device_opencl_info(vector<DeviceInfo>& devices);
void device_cuda_info(vector<DeviceInfo>& devices);
void device_network_info(vector<DeviceInfo>& devices);
-string device_cpu_capabilities(void);
-string device_opencl_capabilities(void);
-string device_cuda_capabilities(void);
+string device_cpu_capabilities();
+string device_opencl_capabilities();
+string device_cuda_capabilities();
CCL_NAMESPACE_END
-#endif /* __DEVICE_INTERN_H__ */
+#endif /* __DEVICE_INTERN_H__ */
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index 2b4835c9c65..e43834bdc8d 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -21,6 +21,7 @@
*
* Data types for allocating, copying and freeing device memory. */
+#include "util/util_array.h"
#include "util/util_half.h"
#include "util/util_texture.h"
#include "util/util_types.h"
@@ -496,4 +497,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __DEVICE_MEMORY_H__ */
+#endif /* __DEVICE_MEMORY_H__ */
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index f1bd3fd13e1..490ee3951c9 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -103,6 +103,14 @@ public:
return devices.front().device->show_samples();
}
+ virtual BVHLayoutMask get_bvh_layout_mask() const {
+ BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
+ foreach(const SubDevice& sub_device, devices) {
+ bvh_layout_mask &= sub_device.device->get_bvh_layout_mask();
+ }
+ return bvh_layout_mask;
+ }
+
bool load_kernels(const DeviceRequestedFeatures& requested_features)
{
foreach(SubDevice& sub, devices)
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 204e405421d..b6e18621f12 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -87,6 +87,10 @@ public:
snd.write();
}
+ virtual BVHLayoutMask get_bvh_layout_mask() const {
+ return BVH_LAYOUT_BVH2;
+ }
+
void mem_alloc(device_memory& mem)
{
if(mem.name) {
@@ -306,7 +310,6 @@ void device_network_info(vector<DeviceInfo>& devices)
/* todo: get this info from device */
info.advanced_shading = true;
info.has_volume_decoupled = false;
- info.bvh_layout_mask = BVH_LAYOUT_BVH2;
info.has_osl = false;
devices.push_back(info);
diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h
index 96e0de742db..67626ae177f 100644
--- a/intern/cycles/device/device_network.h
+++ b/intern/cycles/device/device_network.h
@@ -488,4 +488,4 @@ CCL_NAMESPACE_END
#endif
-#endif /* __DEVICE_NETWORK_H__ */
+#endif /* __DEVICE_NETWORK_H__ */
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index be0f8f45399..71410f80d57 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -44,7 +44,7 @@ Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background)
}
}
-bool device_opencl_init(void)
+bool device_opencl_init()
{
static bool initialized = false;
static bool result = false;
@@ -136,7 +136,6 @@ void device_opencl_info(vector<DeviceInfo>& devices)
info.use_split_kernel = OpenCLInfo::kernel_use_split(platform_name,
device_type);
info.has_volume_decoupled = false;
- info.bvh_layout_mask = BVH_LAYOUT_BVH2;
info.id = id;
/* Check OpenCL extensions */
@@ -147,7 +146,7 @@ void device_opencl_info(vector<DeviceInfo>& devices)
}
}
-string device_opencl_capabilities(void)
+string device_opencl_capabilities()
{
if(OpenCLInfo::device_type() == 0) {
return "All OpenCL devices are forced to be OFF";
@@ -246,4 +245,4 @@ string device_opencl_capabilities(void)
CCL_NAMESPACE_END
-#endif /* WITH_OPENCL */
+#endif /* WITH_OPENCL */
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 26ddce5bb22..5af4367d1b6 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -130,4 +130,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __DEVICE_SPLIT_KERNEL_H__ */
+#endif /* __DEVICE_SPLIT_KERNEL_H__ */
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index ec87aa8c560..861014373b3 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -64,7 +64,7 @@ public:
function<void(long, int)> update_progress_sample;
function<void(RenderTile&)> update_tile_sample;
function<void(RenderTile&)> release_tile;
- function<bool(void)> get_cancel;
+ function<bool()> get_cancel;
function<void(RenderTile*, Device*)> map_neighbor_tiles;
function<void(RenderTile*, Device*)> unmap_neighbor_tiles;
@@ -85,4 +85,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __DEVICE_TASK_H__ */
+#endif /* __DEVICE_TASK_H__ */
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 6c73d10a376..8cb7f6d0b82 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -245,7 +245,7 @@ public:
(device)->set_error(message); \
fprintf(stderr, "%s\n", message.c_str()); \
} \
- } (void)0
+ } (void) 0
#define opencl_assert(stmt) \
{ \
@@ -257,7 +257,7 @@ public:
error_msg = message; \
fprintf(stderr, "%s\n", message.c_str()); \
} \
- } (void)0
+ } (void) 0
class OpenCLDeviceBase : public Device
{
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index cc887134bb0..1e73d37d7a4 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -761,7 +761,7 @@ bool OpenCLDeviceBase::denoising_non_local_means(device_ptr image_ptr,
cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
cl_mem out_mem = CL_MEM_PTR(out_ptr);
- mem_zero_kernel(*difference, sizeof(float)*pass_stride);
+ mem_zero_kernel(*weightAccum, sizeof(float)*pass_stride);
mem_zero_kernel(out_ptr, sizeof(float)*pass_stride);
cl_kernel ckNLMCalcDifference = denoising_program(ustring("filter_nlm_calc_difference"));
@@ -865,38 +865,38 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
int h = task->reconstruction_state.source_h;
int stride = task->buffer.stride;
- int shift_stride = stride*h;
- int num_shifts = (2*task->radius + 1)*(2*task->radius + 1);
- int mem_size = sizeof(float)*shift_stride*num_shifts;
+ int r = task->radius;
+ int pass_stride = task->buffer.pass_stride;
+ int num_shifts = (2*r+1)*(2*r+1);
- cl_mem difference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr);
- opencl_assert_err(ciErr, "clCreateBuffer denoising_reconstruct");
- cl_mem blurDifference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr);
- opencl_assert_err(ciErr, "clCreateBuffer denoising_reconstruct");
+ device_sub_ptr difference(task->buffer.temporary_mem, 0, pass_stride*num_shifts);
+ device_sub_ptr blurDifference(task->buffer.temporary_mem, pass_stride*num_shifts, pass_stride*num_shifts);
+ cl_mem difference_mem = CL_MEM_PTR(*difference);
+ cl_mem blurDifference_mem = CL_MEM_PTR(*blurDifference);
kernel_set_args(ckNLMCalcDifference, 0,
color_mem,
color_variance_mem,
- difference,
+ difference_mem,
w, h, stride,
- shift_stride,
- task->radius,
- task->buffer.pass_stride,
+ pass_stride,
+ r,
+ pass_stride,
1.0f, task->nlm_k_2);
kernel_set_args(ckNLMBlur, 0,
- difference,
- blurDifference,
+ difference_mem,
+ blurDifference_mem,
w, h, stride,
- shift_stride,
- task->radius, 4);
+ pass_stride,
+ r, 4);
kernel_set_args(ckNLMCalcWeight, 0,
- blurDifference,
- difference,
+ blurDifference_mem,
+ difference_mem,
w, h, stride,
- shift_stride,
- task->radius, 4);
+ pass_stride,
+ r, 4);
kernel_set_args(ckNLMConstructGramian, 0,
- blurDifference,
+ blurDifference_mem,
buffer_mem,
transform_mem,
rank_mem,
@@ -904,9 +904,8 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
XtWY_mem,
task->reconstruction_state.filter_window,
w, h, stride,
- shift_stride,
- task->radius, 4,
- task->buffer.pass_stride);
+ pass_stride,
+ r, 4);
enqueue_kernel(ckNLMCalcDifference, w*h, num_shifts, true);
enqueue_kernel(ckNLMBlur, w*h, num_shifts, true);
@@ -914,9 +913,6 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
enqueue_kernel(ckNLMBlur, w*h, num_shifts, true);
enqueue_kernel(ckNLMConstructGramian, w*h, num_shifts, true, 256);
- opencl_assert(clReleaseMemObject(difference));
- opencl_assert(clReleaseMemObject(blurDifference));
-
kernel_set_args(ckFinalize, 0,
output_mem,
rank_mem,
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index e004c0b44f4..89001366d9d 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -43,6 +43,10 @@ public:
return true;
}
+ virtual BVHLayoutMask get_bvh_layout_mask() const {
+ return BVH_LAYOUT_BVH2;
+ }
+
virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
vector<OpenCLProgram*> &programs)
{
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 66a4aa7e891..adb73bc6e2c 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -95,6 +95,10 @@ public:
return true;
}
+ virtual BVHLayoutMask get_bvh_layout_mask() const {
+ return BVH_LAYOUT_BVH2;
+ }
+
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
vector<OpenCLDeviceBase::OpenCLProgram*> &programs)
{
@@ -459,4 +463,4 @@ Device *opencl_create_split_device(DeviceInfo& info, Stats& stats, bool backgrou
CCL_NAMESPACE_END
-#endif /* WITH_OPENCL */
+#endif /* WITH_OPENCL */
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index 895e4149a3a..4c9f3cd6ef7 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -106,7 +106,7 @@ cl_context OpenCLCache::get_context(cl_platform_id platform,
cl_int ciErr = clRetainContext(slot.context);
assert(ciErr == CL_SUCCESS);
- (void)ciErr;
+ (void) ciErr;
return slot.context;
}
@@ -153,7 +153,7 @@ cl_program OpenCLCache::get_program(cl_platform_id platform,
cl_int ciErr = clRetainProgram(entry.program);
assert(ciErr == CL_SUCCESS);
- (void)ciErr;
+ (void) ciErr;
return entry.program;
}
@@ -188,7 +188,7 @@ void OpenCLCache::store_context(cl_platform_id platform,
* The caller is going to release the object when done with it. */
cl_int ciErr = clRetainContext(context);
assert(ciErr == CL_SUCCESS);
- (void)ciErr;
+ (void) ciErr;
}
void OpenCLCache::store_program(cl_platform_id platform,
@@ -227,7 +227,7 @@ void OpenCLCache::store_program(cl_platform_id platform,
*/
cl_int ciErr = clRetainProgram(program);
assert(ciErr == CL_SUCCESS);
- (void)ciErr;
+ (void) ciErr;
}
string OpenCLCache::get_kernel_md5()
diff --git a/intern/cycles/graph/node.h b/intern/cycles/graph/node.h
index 11695a8631d..d50a3786139 100644
--- a/intern/cycles/graph/node.h
+++ b/intern/cycles/graph/node.h
@@ -18,9 +18,9 @@
#include "graph/node_type.h"
+#include "util/util_array.h"
#include "util/util_map.h"
#include "util/util_param.h"
-#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/graph/node_type.h b/intern/cycles/graph/node_type.h
index 1d565794b27..7d6abae2314 100644
--- a/intern/cycles/graph/node_type.h
+++ b/intern/cycles/graph/node_type.h
@@ -17,7 +17,7 @@
#pragma once
#include "graph/node_enum.h"
-
+#include "util/util_array.h"
#include "util/util_map.h"
#include "util/util_param.h"
#include "util/util_string.h"
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index c6e92c6d89d..92cb66bdec9 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -82,6 +82,7 @@ set(SRC_BVH_HEADERS
bvh/obvh_traversal.h
bvh/obvh_volume.h
bvh/obvh_volume_all.h
+ bvh/bvh_embree.h
)
set(SRC_HEADERS
@@ -96,6 +97,7 @@ set(SRC_HEADERS
kernel_emission.h
kernel_film.h
kernel_globals.h
+ kernel_id_passes.h
kernel_jitter.h
kernel_light.h
kernel_math.h
@@ -340,11 +342,11 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# warn for other versions
- if(CUDA_VERSION MATCHES "80" OR CUDA_VERSION MATCHES "90")
+ if(CUDA_VERSION MATCHES "90" OR CUDA_VERSION MATCHES "91")
else()
message(WARNING
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
- "build may succeed but only CUDA 8.0 is officially supported")
+ "build may succeed but only CUDA 9.0 and 9.1 are officially supported")
endif()
# build for each arch
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 2ad55d041bf..6708a3efac1 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -25,6 +25,10 @@
* the code has been extended and modified to support more primitives and work
* with CPU/CUDA/OpenCL. */
+#ifdef __EMBREE__
+# include "kernel/bvh/bvh_embree.h"
+#endif
+
CCL_NAMESPACE_BEGIN
#include "kernel/bvh/bvh_types.h"
@@ -32,9 +36,9 @@ CCL_NAMESPACE_BEGIN
/* Common QBVH functions. */
#ifdef __QBVH__
# include "kernel/bvh/qbvh_nodes.h"
-#ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_nodes.h"
-#endif
+# ifdef __KERNEL_AVX2__
+# include "kernel/bvh/obvh_nodes.h"
+# endif
#endif
/* Regular BVH traversal */
@@ -160,6 +164,19 @@ CCL_NAMESPACE_BEGIN
#undef BVH_NAME_EVAL
#undef BVH_FUNCTION_FULL_NAME
+ccl_device_inline bool scene_intersect_valid(const Ray *ray)
+{
+ /* NOTE: Due to some vectorization code non-finite origin point might
+ * cause lots of false-positive intersections which will overflow traversal
+ * stack.
+ * This code is a quick way to perform early output, to avoid crashes in
+ * such cases.
+ * From production scenes so far it seems it's enough to test first element
+ * only.
+ */
+ return isfinite(ray->P.x);
+}
+
/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
const Ray ray,
@@ -169,39 +186,57 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
float difl,
float extmax)
{
+ if(!scene_intersect_valid(&ray)) {
+ return false;
+ }
+#ifdef __EMBREE__
+ if(kernel_data.bvh.scene) {
+ isect->t = ray.t;
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
+ IntersectContext rtc_ctx(&ctx);
+ RTCRayHit ray_hit;
+ kernel_embree_setup_rayhit(ray, ray_hit, visibility);
+ rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
+ if(ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
+ kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
+ return true;
+ }
+ return false;
+ }
+#endif /* __EMBREE__ */
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax);
-# endif /* __HAIR__ */
+# endif /* __HAIR__ */
return bvh_intersect_motion(kg, &ray, isect, visibility);
}
-#endif /* __OBJECT_MOTION__ */
+#endif /* __OBJECT_MOTION__ */
#ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax);
-#endif /* __HAIR__ */
+#endif /* __HAIR__ */
#ifdef __KERNEL_CPU__
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
return bvh_intersect_instancing(kg, &ray, isect, visibility);
-# endif /* __INSTANCING__ */
+# endif /* __INSTANCING__ */
return bvh_intersect(kg, &ray, isect, visibility);
-#else /* __KERNEL_CPU__ */
+#else /* __KERNEL_CPU__ */
# ifdef __INSTANCING__
return bvh_intersect_instancing(kg, &ray, isect, visibility);
# else
return bvh_intersect(kg, &ray, isect, visibility);
-# endif /* __INSTANCING__ */
+# endif /* __INSTANCING__ */
-#endif /* __KERNEL_CPU__ */
+#endif /* __KERNEL_CPU__ */
}
#ifdef __BVH_LOCAL__
@@ -213,6 +248,58 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
+ if(!scene_intersect_valid(&ray)) {
+ return false;
+ }
+#ifdef __EMBREE__
+ if(kernel_data.bvh.scene) {
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS);
+ ctx.lcg_state = lcg_state;
+ ctx.max_hits = max_hits;
+ ctx.ss_isect = local_isect;
+ local_isect->num_hits = 0;
+ ctx.sss_object_id = local_object;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
+
+ /* Get the Embree scene for this intersection. */
+ RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
+ if(geom) {
+ float3 P = ray.P;
+ float3 dir = ray.D;
+ float3 idir = ray.D;
+ const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+ if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ Transform ob_itfm;
+ rtc_ray.tfar = bvh_instance_motion_push(kg,
+ local_object,
+ &ray,
+ &P,
+ &dir,
+ &idir,
+ ray.t,
+ &ob_itfm);
+ /* bvh_instance_motion_push() returns the inverse transform but
+ * it's not needed here. */
+ (void) ob_itfm;
+
+ rtc_ray.org_x = P.x;
+ rtc_ray.org_y = P.y;
+ rtc_ray.org_z = P.z;
+ rtc_ray.dir_x = dir.x;
+ rtc_ray.dir_y = dir.y;
+ rtc_ray.dir_z = dir.z;
+ }
+ RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
+ if(scene) {
+ rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
+ }
+ }
+
+ return local_isect->num_hits > 0;
+ }
+#endif /* __EMBREE__ */
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg,
@@ -222,7 +309,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
lcg_state,
max_hits);
}
-#endif /* __OBJECT_MOTION__ */
+#endif /* __OBJECT_MOTION__ */
return bvh_intersect_local(kg,
&ray,
local_isect,
@@ -240,6 +327,27 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
uint max_hits,
uint *num_hits)
{
+ if(!scene_intersect_valid(ray)) {
+ return false;
+ }
+# ifdef __EMBREE__
+ if(kernel_data.bvh.scene) {
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
+ ctx.isect_s = isect;
+ ctx.max_hits = max_hits;
+ ctx.num_hits = 0;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW);
+ rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
+
+ if(ctx.num_hits > max_hits) {
+ return true;
+ }
+ *num_hits = ctx.num_hits;
+ return rtc_ray.tfar == -INFINITY;
+ }
+# endif
# ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
# ifdef __HAIR__
@@ -251,7 +359,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
max_hits,
num_hits);
}
-# endif /* __HAIR__ */
+# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(kg,
ray,
@@ -260,7 +368,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
max_hits,
num_hits);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if(kernel_data.bvh.have_curves) {
@@ -271,7 +379,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
max_hits,
num_hits);
}
-# endif /* __HAIR__ */
+# endif /* __HAIR__ */
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing) {
@@ -282,7 +390,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
max_hits,
num_hits);
}
-# endif /* __INSTANCING__ */
+# endif /* __INSTANCING__ */
return bvh_intersect_shadow_all(kg,
ray,
@@ -299,24 +407,27 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
+ if(!scene_intersect_valid(ray)) {
+ return false;
+ }
# ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
# ifdef __KERNEL_CPU__
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
+# endif /* __INSTANCING__ */
return bvh_intersect_volume(kg, ray, isect, visibility);
-# else /* __KERNEL_CPU__ */
+# else /* __KERNEL_CPU__ */
# ifdef __INSTANCING__
return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
# else
return bvh_intersect_volume(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
+# endif /* __INSTANCING__ */
+# endif /* __KERNEL_CPU__ */
}
#endif /* __VOLUME__ */
@@ -327,15 +438,31 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
+ if(!scene_intersect_valid(ray)) {
+ return false;
+ }
+# ifdef __EMBREE__
+ if(kernel_data.bvh.scene) {
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
+ ctx.isect_s = isect;
+ ctx.max_hits = max_hits;
+ ctx.num_hits = 0;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, visibility);
+ rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
+ return rtc_ray.tfar == -INFINITY;
+ }
+# endif
# ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
-# endif /* __INSTANCING__ */
+# endif /* __INSTANCING__ */
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
#endif /* __VOLUME_RECORD_ALL__ */
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
new file mode 100644
index 00000000000..34a099ebb4d
--- /dev/null
+++ b/intern/cycles/kernel/bvh/bvh_embree.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <embree3/rtcore_ray.h>
+#include <embree3/rtcore_scene.h>
+
+#include "kernel/kernel_compat_cpu.h"
+#include "kernel/split/kernel_split_data_types.h"
+#include "kernel/kernel_globals.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+struct CCLIntersectContext {
+ typedef enum {
+ RAY_REGULAR = 0,
+ RAY_SHADOW_ALL = 1,
+ RAY_SSS = 2,
+ RAY_VOLUME_ALL = 3,
+
+ } RayType;
+
+ KernelGlobals *kg;
+ RayType type;
+
+ /* for shadow rays */
+ Intersection *isect_s;
+ int max_hits;
+ int num_hits;
+
+ /* for SSS Rays: */
+ LocalIntersection *ss_isect;
+ int sss_object_id;
+ uint *lcg_state;
+
+ CCLIntersectContext(KernelGlobals *kg_, RayType type_)
+ {
+ kg = kg_;
+ type = type_;
+ max_hits = 1;
+ num_hits = 0;
+ isect_s = NULL;
+ ss_isect = NULL;
+ sss_object_id = -1;
+ lcg_state = NULL;
+ }
+};
+
+class IntersectContext
+{
+public:
+ IntersectContext(CCLIntersectContext* ctx)
+ {
+ rtcInitIntersectContext(&context);
+ userRayExt = ctx;
+ }
+ RTCIntersectContext context;
+ CCLIntersectContext* userRayExt;
+};
+
+ccl_device_inline void kernel_embree_setup_ray(const Ray& ray, RTCRay& rtc_ray, const uint visibility)
+{
+ rtc_ray.org_x = ray.P.x;
+ rtc_ray.org_y = ray.P.y;
+ rtc_ray.org_z = ray.P.z;
+ rtc_ray.dir_x = ray.D.x;
+ rtc_ray.dir_y = ray.D.y;
+ rtc_ray.dir_z = ray.D.z;
+ rtc_ray.tnear = 0.0f;
+ rtc_ray.tfar = ray.t;
+ rtc_ray.time = ray.time;
+ rtc_ray.mask = visibility;
+}
+
+ccl_device_inline void kernel_embree_setup_rayhit(const Ray& ray, RTCRayHit& rayhit, const uint visibility)
+{
+ kernel_embree_setup_ray(ray, rayhit.ray, visibility);
+ rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
+ rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
+}
+
+ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect)
+{
+ bool is_hair = hit->geomID & 1;
+ isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u;
+ isect->v = is_hair ? hit->v : hit->u;
+ isect->t = ray->tfar;
+ isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
+ if(hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
+ isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, hit->instID[0]/2);
+ isect->object = hit->instID[0]/2;
+ }
+ else {
+ isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
+ isect->object = OBJECT_NONE;
+ }
+ isect->type = kernel_tex_fetch(__prim_type, isect->prim);
+}
+
+ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int local_object_id)
+{
+ isect->u = 1.0f - hit->v - hit->u;
+ isect->v = hit->u;
+ isect->t = ray->tfar;
+ isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
+ isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, local_object_id);
+ isect->object = local_object_id;
+ isect->type = kernel_tex_fetch(__prim_type, isect->prim);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 2b02f4527bb..8364bc3aa9a 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -136,7 +136,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr,
PATH_RAY_ALL_VISIBILITY,
dist);
-#else // __KERNEL_SSE2__
+#else // __KERNEL_SSE2__
traverse_mask = NODE_INTERSECT(kg,
P,
dir,
@@ -151,7 +151,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr,
PATH_RAY_ALL_VISIBILITY,
dist);
-#endif // __KERNEL_SSE2__
+#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index d525b29fd94..64eb2f3f659 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -124,7 +124,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
+#else // __KERNEL_SSE2__
traverse_mask = NODE_INTERSECT(kg,
P,
dir,
@@ -139,7 +139,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr,
visibility,
dist);
-#endif // __KERNEL_SSE2__
+#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index e95d2408201..af9f04db0ba 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -146,7 +146,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
visibility,
dist);
}
-#else // __KERNEL_SSE2__
+#else // __KERNEL_SSE2__
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
traverse_mask = NODE_INTERSECT_ROBUST(kg,
@@ -184,7 +184,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
visibility,
dist);
}
-#endif // __KERNEL_SSE2__
+#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index 7d03855cb8f..12d4c5eb94a 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -120,7 +120,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
+#else // __KERNEL_SSE2__
traverse_mask = NODE_INTERSECT(kg,
P,
dir,
@@ -135,7 +135,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr,
visibility,
dist);
-#endif // __KERNEL_SSE2__
+#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index 3d9b598914f..6205b9bcf7a 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -124,7 +124,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
+#else // __KERNEL_SSE2__
traverse_mask = NODE_INTERSECT(kg,
P,
dir,
@@ -139,7 +139,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr,
visibility,
dist);
-#endif // __KERNEL_SSE2__
+#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
diff --git a/intern/cycles/kernel/bvh/obvh_local.h b/intern/cycles/kernel/bvh/obvh_local.h
index 92143193a6a..eb24a607caa 100644
--- a/intern/cycles/kernel/bvh/obvh_local.h
+++ b/intern/cycles/kernel/bvh/obvh_local.h
@@ -73,12 +73,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
object = local_object;
}
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
avxf tnear(0.0f), tfar(isect_t);
#if BVH_FEATURE(BVH_HAIR)
avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
diff --git a/intern/cycles/kernel/bvh/obvh_shadow_all.h b/intern/cycles/kernel/bvh/obvh_shadow_all.h
index 3e877065127..8b739b3438a 100644
--- a/intern/cycles/kernel/bvh/obvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/obvh_shadow_all.h
@@ -66,12 +66,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*num_hits = 0;
isect_array->t = tmax;
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
#endif
@@ -103,7 +97,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- (void)inodes;
+ (void) inodes;
if(false
#ifdef __VISIBILITY_FLAG__
diff --git a/intern/cycles/kernel/bvh/obvh_traversal.h b/intern/cycles/kernel/bvh/obvh_traversal.h
index 2021d8e1143..6bb19eb1ed9 100644
--- a/intern/cycles/kernel/bvh/obvh_traversal.h
+++ b/intern/cycles/kernel/bvh/obvh_traversal.h
@@ -64,12 +64,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
Transform ob_itfm;
#endif
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
isect->t = ray->t;
isect->u = 0.0f;
isect->v = 0.0f;
@@ -103,7 +97,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- (void)inodes;
+ (void) inodes;
if(UNLIKELY(node_dist > isect->t)
#if BVH_FEATURE(BVH_MOTION)
@@ -179,7 +173,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
avxf cnodes;
/* TODO(sergey): Investigate whether moving cnodes upwards
* gives a speedup (will be different cache pattern but will
- * avoid extra check here),
+ * avoid extra check here).
*/
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
diff --git a/intern/cycles/kernel/bvh/obvh_volume.h b/intern/cycles/kernel/bvh/obvh_volume.h
index da9ddbd4f24..80d09c59039 100644
--- a/intern/cycles/kernel/bvh/obvh_volume.h
+++ b/intern/cycles/kernel/bvh/obvh_volume.h
@@ -52,12 +52,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
Transform ob_itfm;
#endif
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
isect->t = ray->t;
isect->u = 0.0f;
isect->v = 0.0f;
diff --git a/intern/cycles/kernel/bvh/obvh_volume_all.h b/intern/cycles/kernel/bvh/obvh_volume_all.h
index a88573e6f86..87216127ddb 100644
--- a/intern/cycles/kernel/bvh/obvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/obvh_volume_all.h
@@ -58,12 +58,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
uint num_hits = 0;
isect_array->t = tmax;
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return 0;
- }
-#endif
-
#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
#endif
diff --git a/intern/cycles/kernel/bvh/qbvh_local.h b/intern/cycles/kernel/bvh/qbvh_local.h
index ee3827de309..22d434a8737 100644
--- a/intern/cycles/kernel/bvh/qbvh_local.h
+++ b/intern/cycles/kernel/bvh/qbvh_local.h
@@ -82,12 +82,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object = local_object;
}
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
ssef tnear(0.0f), tfar(isect_t);
#if BVH_FEATURE(BVH_HAIR)
sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
index 46fd178aed6..37606e10b92 100644
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
@@ -66,11 +66,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*num_hits = 0;
isect_array->t = tmax;
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
@@ -103,7 +98,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- (void)inodes;
+ (void) inodes;
if(false
#ifdef __VISIBILITY_FLAG__
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
index 335a4afd47a..35c6e3aeec9 100644
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -71,12 +71,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
Transform ob_itfm;
#endif
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
isect->t = ray->t;
isect->u = 0.0f;
isect->v = 0.0f;
@@ -112,7 +106,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
- (void)inodes;
+ (void) inodes;
if(UNLIKELY(node_dist > isect->t)
#if BVH_FEATURE(BVH_MOTION)
@@ -188,7 +182,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float4 cnodes;
/* TODO(sergey): Investigate whether moving cnodes upwards
* gives a speedup (will be different cache pattern but will
- * avoid extra check here),
+ * avoid extra check here).
*/
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
index 192ce009524..7ec264e5f78 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume.h
@@ -58,12 +58,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
Transform ob_itfm;
#endif
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return false;
- }
-#endif
-
isect->t = ray->t;
isect->u = 0.0f;
isect->v = 0.0f;
diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
index 1e454e4d36b..dd603d79334 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h
@@ -64,12 +64,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
uint num_hits = 0;
isect_array->t = tmax;
-#ifndef __KERNEL_SSE41__
- if(!isfinite(P.x)) {
- return 0;
- }
-#endif
-
#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
#endif
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index ff238b7a834..4e7425bd800 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -232,4 +232,4 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng,
CCL_NAMESPACE_END
-#endif /* __BSDF_ASHIKHMIN_SHIRLEY_H__ */
+#endif /* __BSDF_ASHIKHMIN_SHIRLEY_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index b0bdea723b9..80fd9ba2b37 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ -158,4 +158,4 @@ ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc, float3 Ng,
CCL_NAMESPACE_END
-#endif /* __BSDF_ASHIKHMIN_VELVET_H__ */
+#endif /* __BSDF_ASHIKHMIN_VELVET_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h
index ee6d4cdf2df..946c460a70e 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ -139,4 +139,4 @@ ccl_device int bsdf_translucent_sample(const ShaderClosure *sc, float3 Ng, float
CCL_NAMESPACE_END
-#endif /* __BSDF_DIFFUSE_H__ */
+#endif /* __BSDF_DIFFUSE_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index 35bb2fdf0e8..ca33a5b275c 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@ -103,8 +103,8 @@ ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, float3 Ng, floa
return LABEL_REFLECT|LABEL_DIFFUSE;
}
-#endif /* __OSL__ */
+#endif /* __OSL__ */
CCL_NAMESPACE_END
-#endif /* __BSDF_DIFFUSE_RAMP_H__ */
+#endif /* __BSDF_DIFFUSE_RAMP_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index 7b44a23f05b..e1a0cfaa3f5 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -277,4 +277,4 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng,
CCL_NAMESPACE_END
-#endif /* __BSDF_HAIR_H__ */
+#endif /* __BSDF_HAIR_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index b3b56be39ff..68335ee887a 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -229,7 +229,7 @@ ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bs
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
}
-#endif /* __HAIR__ */
+#endif /* __HAIR__ */
/* Given the Fresnel term and transmittance, generate the attenuation terms for each bounce. */
ccl_device_inline void hair_attenuation(KernelGlobals *kg,
@@ -296,7 +296,7 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals *kg,
float3 Y = float4_to_float3(bsdf->extra->geom);
float3 X = safe_normalize(sd->dPdu);
- kernel_assert(fabsf(dot(X, Y)) < 1e-4f);
+ kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
float3 Z = safe_normalize(cross(X, Y));
float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
@@ -378,7 +378,7 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals *kg,
float3 Y = float4_to_float3(bsdf->extra->geom);
float3 X = safe_normalize(sd->dPdu);
- kernel_assert(fabsf(dot(X, Y)) < 1e-4f);
+ kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
float3 Z = safe_normalize(cross(X, Y));
float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
@@ -499,4 +499,4 @@ ccl_device void bsdf_principled_hair_blur(ShaderClosure *sc, float roughness)
CCL_NAMESPACE_END
-#endif /* __BSDF_HAIR_PRINCIPLED_H__ */
+#endif /* __BSDF_HAIR_PRINCIPLED_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index e74d5ebaa42..32b6e50b09a 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -1124,4 +1124,4 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderCl
CCL_NAMESPACE_END
-#endif /* __BSDF_MICROFACET_H__ */
+#endif /* __BSDF_MICROFACET_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
index e73915dbda7..5d300ef6db5 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
@@ -76,7 +76,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
eval *= -lambda_r / (shadowing_lambda - lambda_r);
else
eval *= -lambda_r * beta(-lambda_r, shadowing_lambda+1.0f);
-#else /* MF_MULTI_GLOSSY */
+#else /* MF_MULTI_GLOSSY */
const float G2 = 1.0f / (1.0f - (lambda_r + 1.0f) + shadowing_lambda);
float val = G2 * 0.25f / wi.z;
if(alpha.x == alpha.y)
@@ -129,7 +129,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta);
else
phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f/eta);
-#else /* MF_MULTI_GLOSSY */
+#else /* MF_MULTI_GLOSSY */
phase = mf_eval_phase_glossy(wr, lambda_r, wo, alpha) * throughput;
#endif
eval += throughput * phase * mf_G1(wo_outside? wo: -wo, mf_C1((outside == wo_outside)? hr: -hr), shadowing_lambda);
@@ -153,7 +153,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
else if(use_fresnel && order > 0) {
throughput *= interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0);
}
-#else /* MF_MULTI_GLOSSY */
+#else /* MF_MULTI_GLOSSY */
if(use_fresnel && order > 0) {
throughput *= interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
}
@@ -248,7 +248,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(
throughput *= t_color;
}
}
-#else /* MF_MULTI_GLOSSY */
+#else /* MF_MULTI_GLOSSY */
if(use_fresnel) {
float3 t_color = interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
index 6b770fc0c16..3446d1609d9 100644
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@ -108,4 +108,4 @@ ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, float3 Ng, float3
CCL_NAMESPACE_END
-#endif /* __BSDF_OREN_NAYAR_H__ */
+#endif /* __BSDF_OREN_NAYAR_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
index 91c7803346d..83da05ac435 100644
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@ -135,8 +135,8 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, float3 Ng, float3
return LABEL_REFLECT|LABEL_GLOSSY;
}
-#endif /* __OSL__ */
+#endif /* __OSL__ */
CCL_NAMESPACE_END
-#endif /* __BSDF_PHONG_RAMP_H__ */
+#endif /* __BSDF_PHONG_RAMP_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 83be2b35a00..2f65fd54be2 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -122,4 +122,4 @@ ccl_device int bsdf_principled_diffuse_sample(const ShaderClosure *sc,
CCL_NAMESPACE_END
-#endif /* __BSDF_PRINCIPLED_DIFFUSE_H__ */
+#endif /* __BSDF_PRINCIPLED_DIFFUSE_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_principled_sheen.h b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
index 8b7c4399516..ccdcb1babd2 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_sheen.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
@@ -108,4 +108,4 @@ ccl_device int bsdf_principled_sheen_sample(const ShaderClosure *sc,
CCL_NAMESPACE_END
-#endif /* __BSDF_PRINCIPLED_SHEEN_H__ */
+#endif /* __BSDF_PRINCIPLED_SHEEN_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h
index b33b6e3597b..94f1c283af7 100644
--- a/intern/cycles/kernel/closure/bsdf_reflection.h
+++ b/intern/cycles/kernel/closure/bsdf_reflection.h
@@ -77,4 +77,4 @@ ccl_device int bsdf_reflection_sample(const ShaderClosure *sc, float3 Ng, float3
CCL_NAMESPACE_END
-#endif /* __BSDF_REFLECTION_H__ */
+#endif /* __BSDF_REFLECTION_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h
index b181650e928..abdd01c7a1d 100644
--- a/intern/cycles/kernel/closure/bsdf_refraction.h
+++ b/intern/cycles/kernel/closure/bsdf_refraction.h
@@ -86,4 +86,4 @@ ccl_device int bsdf_refraction_sample(const ShaderClosure *sc, float3 Ng, float3
CCL_NAMESPACE_END
-#endif /* __BSDF_REFRACTION_H__ */
+#endif /* __BSDF_REFRACTION_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h
index 6d8074b7130..097a56f22eb 100644
--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@ -215,4 +215,4 @@ ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc, float3 Ng, float
CCL_NAMESPACE_END
-#endif /* __BSDF_TOON_H__ */
+#endif /* __BSDF_TOON_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_transparent.h b/intern/cycles/kernel/closure/bsdf_transparent.h
index f788dbcd0ff..060dff69f52 100644
--- a/intern/cycles/kernel/closure/bsdf_transparent.h
+++ b/intern/cycles/kernel/closure/bsdf_transparent.h
@@ -106,4 +106,4 @@ ccl_device int bsdf_transparent_sample(const ShaderClosure *sc, float3 Ng, float
CCL_NAMESPACE_END
-#endif /* __BSDF_TRANSPARENT_H__ */
+#endif /* __BSDF_TRANSPARENT_H__ */
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index b080e025d16..4f3453675c7 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -158,4 +158,4 @@ ccl_device_forceinline float3 interpolate_fresnel_color(float3 L, float3 H, floa
CCL_NAMESPACE_END
-#endif /* __BSDF_UTIL_H__ */
+#endif /* __BSDF_UTIL_H__ */
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index ba0c6ae8c61..98c7f23c288 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -499,4 +499,4 @@ ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r)
CCL_NAMESPACE_END
-#endif /* __KERNEL_BSSRDF_H__ */
+#endif /* __KERNEL_BSSRDF_H__ */
diff --git a/intern/cycles/kernel/filter/filter.h b/intern/cycles/kernel/filter/filter.h
index f6e474d6702..4209d69ee73 100644
--- a/intern/cycles/kernel/filter/filter.h
+++ b/intern/cycles/kernel/filter/filter.h
@@ -49,4 +49,4 @@ CCL_NAMESPACE_BEGIN
CCL_NAMESPACE_END
-#endif /* __FILTER_H__ */
+#endif /* __FILTER_H__ */
diff --git a/intern/cycles/kernel/filter/filter_defines.h b/intern/cycles/kernel/filter/filter_defines.h
index 1a2f22a6987..67f4e62ac0f 100644
--- a/intern/cycles/kernel/filter/filter_defines.h
+++ b/intern/cycles/kernel/filter/filter_defines.h
@@ -68,4 +68,4 @@ typedef struct TileInfo {
# define ccl_get_tile_buffer(id) (tile_info->buffers[id])
#endif
-#endif /* __FILTER_DEFINES_H__*/
+#endif /* __FILTER_DEFINES_H__*/
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
index e2da0fd872b..af73c0dadf2 100644
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h
@@ -16,6 +16,9 @@
CCL_NAMESPACE_BEGIN
+#define load4_a(buf, ofs) (*((float4*) ((buf) + (ofs))))
+#define load4_u(buf, ofs) load_float4((buf)+(ofs))
+
ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
const float *ccl_restrict weight_image,
const float *ccl_restrict variance_image,
@@ -26,20 +29,28 @@ ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
float a,
float k_2)
{
+ /* Strides need to be aligned to 16 bytes. */
+ kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
+
+ int aligned_lowx = rect.x & (~3);
+ const int numChannels = (channel_offset > 0)? 3 : 1;
+ const float4 channel_fac = make_float4(1.0f / numChannels);
+
for(int y = rect.y; y < rect.w; y++) {
- for(int x = rect.x; x < rect.z; x++) {
- float diff = 0.0f;
- int numChannels = channel_offset? 3 : 1;
- for(int c = 0; c < numChannels; c++) {
- float cdiff = weight_image[c*channel_offset + y*stride + x] - weight_image[c*channel_offset + (y+dy)*stride + (x+dx)];
- float pvar = variance_image[c*channel_offset + y*stride + x];
- float qvar = variance_image[c*channel_offset + (y+dy)*stride + (x+dx)];
- diff += (cdiff*cdiff - a*(pvar + min(pvar, qvar))) / (1e-8f + k_2*(pvar+qvar));
- }
- if(numChannels > 1) {
- diff *= 1.0f/numChannels;
+ int idx_p = y*stride + aligned_lowx;
+ int idx_q = (y+dy)*stride + aligned_lowx + dx;
+ for(int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
+ float4 diff = make_float4(0.0f);
+ for(int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
+ /* idx_p is guaranteed to be aligned, but idx_q isn't. */
+ float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
+ float4 color_q = load4_u(weight_image, idx_q + chan_ofs);
+ float4 cdiff = color_p - color_q;
+ float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
+ float4 var_q = load4_u(variance_image, idx_q + chan_ofs);
+ diff += (cdiff*cdiff - a*(var_p + min(var_p, var_q))) / (make_float4(1e-8f) + k_2*(var_p+var_q));
}
- difference_image[y*stride + x] = diff;
+ load4_a(difference_image, idx_p) = diff*channel_fac;
}
}
}
@@ -50,52 +61,77 @@ ccl_device_inline void kernel_filter_nlm_blur(const float *ccl_restrict differen
int stride,
int f)
{
- int aligned_lowx = rect.x / 4;
- int aligned_highx = (rect.z + 3) / 4;
+ int aligned_lowx = round_down(rect.x, 4);
for(int y = rect.y; y < rect.w; y++) {
const int low = max(rect.y, y-f);
const int high = min(rect.w, y+f+1);
- for(int x = rect.x; x < rect.z; x++) {
- out_image[y*stride + x] = 0.0f;
+ for(int x = aligned_lowx; x < rect.z; x += 4) {
+ load4_a(out_image, y*stride + x) = make_float4(0.0f);
}
for(int y1 = low; y1 < high; y1++) {
- float4* out_image4 = (float4*)(out_image + y*stride);
- float4* difference_image4 = (float4*)(difference_image + y1*stride);
- for(int x = aligned_lowx; x < aligned_highx; x++) {
- out_image4[x] += difference_image4[x];
+ for(int x = aligned_lowx; x < rect.z; x += 4) {
+ load4_a(out_image, y*stride + x) += load4_a(difference_image, y1*stride + x);
}
}
- for(int x = rect.x; x < rect.z; x++) {
- out_image[y*stride + x] *= 1.0f/(high - low);
+ float fac = 1.0f/(high - low);
+ for(int x = aligned_lowx; x < rect.z; x += 4) {
+ load4_a(out_image, y*stride + x) *= fac;
}
}
}
-ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict difference_image,
- float *out_image,
- int4 rect,
- int stride,
- int f)
+ccl_device_inline void nlm_blur_horizontal(const float *ccl_restrict difference_image,
+ float *out_image,
+ int4 rect,
+ int stride,
+ int f)
{
+ int aligned_lowx = round_down(rect.x, 4);
for(int y = rect.y; y < rect.w; y++) {
- for(int x = rect.x; x < rect.z; x++) {
- out_image[y*stride + x] = 0.0f;
+ for(int x = aligned_lowx; x < rect.z; x += 4) {
+ load4_a(out_image, y*stride + x) = make_float4(0.0f);
}
}
+
for(int dx = -f; dx <= f; dx++) {
- int pos_dx = max(0, dx);
- int neg_dx = min(0, dx);
+ aligned_lowx = round_down(rect.x - min(0, dx), 4);
+ int highx = rect.z - max(0, dx);
+ int4 lowx4 = make_int4(rect.x - min(0, dx));
+ int4 highx4 = make_int4(rect.z - max(0, dx));
for(int y = rect.y; y < rect.w; y++) {
- for(int x = rect.x-neg_dx; x < rect.z-pos_dx; x++) {
- out_image[y*stride + x] += difference_image[y*stride + x+dx];
+ for(int x = aligned_lowx; x < highx; x += 4) {
+ int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
+ int4 active = (x4 >= lowx4) & (x4 < highx4);
+
+ float4 diff = load4_u(difference_image, y*stride + x + dx);
+ load4_a(out_image, y*stride + x) += mask(active, diff);
}
}
}
+
+ aligned_lowx = round_down(rect.x, 4);
for(int y = rect.y; y < rect.w; y++) {
- for(int x = rect.x; x < rect.z; x++) {
- const int low = max(rect.x, x-f);
- const int high = min(rect.z, x+f+1);
- out_image[y*stride + x] = fast_expf(-max(out_image[y*stride + x] * (1.0f/(high - low)), 0.0f));
+ for(int x = aligned_lowx; x < rect.z; x += 4) {
+ float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
+ float4 low = max(make_float4(rect.x), x4 - make_float4(f));
+ float4 high = min(make_float4(rect.z), x4 + make_float4(f+1));
+ load4_a(out_image, y*stride + x) *= rcp(high - low);
+ }
+ }
+}
+
+ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict difference_image,
+ float *out_image,
+ int4 rect,
+ int stride,
+ int f)
+{
+ nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
+
+ int aligned_lowx = round_down(rect.x, 4);
+ for(int y = rect.y; y < rect.w; y++) {
+ for(int x = aligned_lowx; x < rect.z; x += 4) {
+ load4_a(out_image, y*stride + x) = fast_expf4(-max(load4_a(out_image, y*stride + x), make_float4(0.0f)));
}
}
}
@@ -103,23 +139,29 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict d
ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
const float *ccl_restrict difference_image,
const float *ccl_restrict image,
+ float *temp_image,
float *out_image,
float *accum_image,
int4 rect,
int stride,
int f)
{
+ nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
+
+ int aligned_lowx = round_down(rect.x, 4);
for(int y = rect.y; y < rect.w; y++) {
- for(int x = rect.x; x < rect.z; x++) {
- const int low = max(rect.x, x-f);
- const int high = min(rect.z, x+f+1);
- float sum = 0.0f;
- for(int x1 = low; x1 < high; x1++) {
- sum += difference_image[y*stride + x1];
- }
- float weight = sum * (1.0f/(high - low));
- accum_image[y*stride + x] += weight;
- out_image[y*stride + x] += weight*image[(y+dy)*stride + (x+dx)];
+ for(int x = aligned_lowx; x < rect.z; x += 4) {
+ int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
+ int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
+
+ int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx);
+
+ float4 weight = load4_a(temp_image, idx_p);
+ load4_a(accum_image, idx_p) += mask(active, weight);
+
+ float4 val = load4_u(image, idx_q);
+
+ load4_a(out_image, idx_p) += mask(active, weight*val);
}
}
}
@@ -177,4 +219,7 @@ ccl_device_inline void kernel_filter_nlm_normalize(float *out_image,
}
}
+#undef load4_a
+#undef load4_u
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index 4cfbe21685c..b6b58b52a29 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -379,7 +379,7 @@ ccl_device_forceinline bool cardinal_curve_intersect(
float inv_mw_extension = 1.0f/mw_extension;
if(d0 >= 0)
coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f;
- else // inside
+ else // inside
coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f;
}
@@ -817,16 +817,24 @@ ccl_device_inline float3 curve_refine(KernelGlobals *kg,
sd->Ng = normalize(-(D - tg * (dot(tg, D))));
}
else {
- /* direction from inside to surface of curve */
- float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
- sd->Ng = normalize(P - p_curr);
+#ifdef __EMBREE__
+ if(kernel_data.bvh.scene) {
+ sd->Ng = normalize(isect->Ng);
+ }
+ else
+#endif
+ {
+ /* direction from inside to surface of curve */
+ float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
+ sd->Ng = normalize(P - p_curr);
- /* adjustment for changing radius */
- float gd = isect->v;
+ /* adjustment for changing radius */
+ float gd = isect->v;
- if(gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- sd->Ng = normalize(sd->Ng);
+ if(gd != 0.0f) {
+ sd->Ng = sd->Ng - gd * tg;
+ sd->Ng = normalize(sd->Ng);
+ }
}
}
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index cfe17e63627..669c932d720 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -78,6 +78,12 @@ ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int
const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
Transform tfm;
+#ifdef __EMBREE__
+ if(kernel_data.bvh.scene) {
+ transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time);
+ }
+ else
+#endif
transform_motion_array_interpolate(&tfm, motion, num_steps, time);
return tfm;
@@ -304,6 +310,24 @@ ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
}
+/* Cryptomatte ID */
+
+ccl_device_inline float object_cryptomatte_id(KernelGlobals *kg, int object)
+{
+ if(object == OBJECT_NONE)
+ return 0.0f;
+
+ return kernel_tex_fetch(__objects, object).cryptomatte_object;
+}
+
+ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals *kg, int object)
+{
+ if(object == OBJECT_NONE)
+ return 0;
+
+ return kernel_tex_fetch(__objects, object).cryptomatte_asset;
+}
+
/* Particle data from which object was instanced */
ccl_device_inline uint particle_index(KernelGlobals *kg, int particle)
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
index 00ce89ae567..8c0d0a9770e 100644
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ b/intern/cycles/kernel/geom/geom_subd_triangle.h
@@ -146,7 +146,7 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const
return a;
}
else
-#endif /* __PATCH_EVAL__ */
+#endif /* __PATCH_EVAL__ */
if(desc.element == ATTR_ELEMENT_FACE) {
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
@@ -271,7 +271,7 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, con
return a;
}
else
-#endif /* __PATCH_EVAL__ */
+#endif /* __PATCH_EVAL__ */
if(desc.element == ATTR_ELEMENT_FACE) {
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index aa6b102a0f3..57f4c86d403 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -71,28 +71,23 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
}
#ifdef __KERNEL_AVX2__
-
#define cross256(A,B, C,D) _mm256_fmsub_ps(A,B, _mm256_mul_ps(C,D))
-#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300
-ccl_device_inline
-#else
-ccl_device_forceinline
-#endif
-int ray_triangle_intersect8(KernelGlobals *kg,
- float3 ray_P,
- float3 ray_dir,
- Intersection **isect,
- uint visibility,
- int object,
- __m256 *triA,
- __m256 *triB,
- __m256 *triC,
- int prim_addr,
- int prim_num,
- uint *num_hits,
- uint max_hits,
- int *num_hits_in_instance,
- float isec_t)
+ccl_device_inline int ray_triangle_intersect8(
+ KernelGlobals *kg,
+ float3 ray_P,
+ float3 ray_dir,
+ Intersection **isect,
+ uint visibility,
+ int object,
+ __m256 *triA,
+ __m256 *triB,
+ __m256 *triC,
+ int prim_addr,
+ int prim_num,
+ uint *num_hits,
+ uint max_hits,
+ int *num_hits_in_instance,
+ float isect_t)
{
const unsigned char prim_num_mask = (1 << prim_num) - 1;
@@ -108,10 +103,6 @@ int ray_triangle_intersect8(KernelGlobals *kg,
const __m256 dirz256 = _mm256_set1_ps(ray_dir.z);
/* Calculate vertices relative to ray origin. */
- /* const float3 v0 = tri_c - P;
- const float3 v1 = tri_a - P;
- const float3 v2 = tri_b - P; */
-
__m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256);
__m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256);
__m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256);
@@ -136,11 +127,7 @@ int ray_triangle_intersect8(KernelGlobals *kg,
__m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256);
__m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256);
- /* Calculate triangle edges.
- const float3 e0 = v2 - v0;
- const float3 e1 = v0 - v1;
- const float3 e2 = v1 - v2;*/
-
+ /* Calculate triangle edges. */
__m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256);
__m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256);
__m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256);
@@ -153,48 +140,32 @@ int ray_triangle_intersect8(KernelGlobals *kg,
__m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256);
__m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256);
- /* Perform edge tests.
- const float U = dot(cross(v2 + v0, e0), ray_dir);
- const float V = dot(cross(v0 + v1, e1), ray_dir);
- const float W = dot(cross(v1 + v2, e2), ray_dir);*/
-
- //cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx)
+ /* Perform edge tests. */
+ /* cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx) */
__m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256);
__m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256);
__m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256);
- //vertical dot
+ /* vertical dot */
__m256 U_256 = _mm256_mul_ps(U_x_256, dirx256);
- U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256); //_mm256_add_ps(U_256, _mm256_mul_ps(U_y_256, diry256));
- U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256); //_mm256_add_ps(U_256, _mm256_mul_ps(U_z_256, dirz256));
+ U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256);
+ U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256);
__m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256);
__m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256);
__m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256);
- //vertical dot
+ /* vertical dot */
__m256 V_256 = _mm256_mul_ps(V_x_256, dirx256);
- V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);// _mm256_add_ps(V_256, _mm256_mul_ps(V_y_256, diry256));
- V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);// _mm256_add_ps(V_256, _mm256_mul_ps(V_z_256, dirz256));
+ V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);
+ V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);
__m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256);
__m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256);
__m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256);
- //vertical dot
+ /* vertical dot */
__m256 W_256 = _mm256_mul_ps(W_x_256, dirx256);
- W_256 = _mm256_fmadd_ps(W_y_256, diry256,W_256);//_mm256_add_ps(W_256, _mm256_mul_ps(W_y_256, diry256));
- W_256 = _mm256_fmadd_ps(W_z_256, dirz256,W_256);//_mm256_add_ps(W_256, _mm256_mul_ps(W_z_256, dirz256));
-
- //const float minUVW = min(U, min(V, W));
- //const float maxUVW = max(U, max(V, W));
-#if 0
- __m256 minUVW_256 = _mm256_min_ps(U_256, _mm256_min_ps(V_256, W_256));
- __m256 maxUVW_256 = _mm256_max_ps(U_256, _mm256_max_ps(V_256, W_256));
-
- //if(minUVW < 0.0f && maxUVW > 0.0f)
- __m256i mask_minmaxUVW_256 = _mm256_and_si256(
- _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(minUVW_256)),
- //_mm256_castps_si256(minUVW_256),
- _mm256_cmpgt_epi32(_mm256_castps_si256(maxUVW_256), zero256));
-#else
+ W_256 = _mm256_fmadd_ps(W_y_256, diry256,W_256);
+ W_256 = _mm256_fmadd_ps(W_z_256, dirz256,W_256);
+
__m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31);
__m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31);
__m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31);
@@ -204,9 +175,8 @@ int ray_triangle_intersect8(KernelGlobals *kg,
const __m256i two256 = _mm256_set1_epi32(2);
__m256i mask_minmaxUVW_256 = _mm256_or_si256(
- _mm256_cmpeq_epi32(one256, UVW_256_1),
- _mm256_cmpeq_epi32(two256, UVW_256_1) );
-#endif
+ _mm256_cmpeq_epi32(one256, UVW_256_1),
+ _mm256_cmpeq_epi32(two256, UVW_256_1));
unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256));
if((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set
@@ -214,231 +184,187 @@ int ray_triangle_intersect8(KernelGlobals *kg,
}
/* Calculate geometry normal and denominator. */
- // const float3 Ng1 = cross(e1, e0);
- //const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0);
-
__m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256);
__m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256);
__m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256);
- //const float3 Ng = Ng1 + Ng1;
Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256);
Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256);
Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256);
- //const float den = dot3(Ng, dir);
- //vertical dot
+ /* vertical dot */
__m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256);
- den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256,den_256);//_mm256_add_ps(den_256, _mm256_mul_ps(Ng1_y_256, diry256));
- den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256,den_256);//_mm256_add_ps(den_256, _mm256_mul_ps(Ng1_z_256, dirz256));
-
- // __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
+ den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256,den_256);
+ den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256,den_256);
/* Perform depth test. */
- //const float T = dot3(v0, Ng);
__m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256);
- T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256,T_256);//_mm256_add_ps(T_256, _mm256_mul_ps(Ng1_y_256, v0_y_256));
- T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256,T_256);//_mm256_add_ps(T_256, _mm256_mul_ps(Ng1_z_256, v0_z_256));
+ T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256,T_256);
+ T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256,T_256);
- //const int sign_den = (__float_as_int(den) & 0x80000000);
const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000);
__m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000);
- //const float sign_T = xor_signmask(T, sign_den);
__m256 sign_T_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256));
- /*if((sign_T < 0.0f) || mask_minmaxUVW_pos { return false;} */
unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256);
if(((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) {
return false;
- } /**/
+ }
__m256 xor_signmask_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256));
-
ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8];
ccl_align(32) unsigned int mask_minmaxUVW8[8];
- if(visibility == PATH_RAY_SHADOW_OPAQUE){
- __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);//~mask_minmaxUVW_256
-
- __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
-
- __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256));
- __m256 rayt_256 = _mm256_set1_ps((*isect)->t);
-
- __m256i mask1 = _mm256_cmpgt_epi32(_mm256_castps_si256(sign_T_256),
- _mm256_castps_si256(
- _mm256_mul_ps(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), rayt_256)
- )
- );
- /* __m256i mask1 = _mm256_castps_si256(_mm256_cmp_ps(sign_T_256,
- _mm256_mul_ps(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), rayt_256),
- _CMP_GT_OS
- ) );*/
-
- mask0 = _mm256_or_si256(mask1, mask0);
- //unsigned char mask = _mm256_movemask_ps(_mm256_castsi256_ps(mask0));
- //unsigned char maskden = _mm256_movemask_ps(_mm256_castsi256_ps(maskden256));
- //unsigned char mask_final = ((~mask) & (~maskden) & (~mask_minmaxUVW_pos));
- mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask)
- mask_final_256 = _mm256_andnot_si256(maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
-
- unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
- if((mask_final & prim_num_mask) == 0) { //all bits NOT set
- return false;
- } /**/
-
- unsigned long i = 0;
-#if defined(_MSC_VER)
- unsigned char res = _BitScanForward(&i, (unsigned long)mask_final);
-#else
- i = __builtin_ffs(mask_final)-1;
-#endif
-
- den_256 = _mm256_rcp_ps(den_256); //inv_den
- U_256 = _mm256_mul_ps(U_256, den_256); //*inv_den
- V_256 = _mm256_mul_ps(V_256, den_256); //*inv_den
- T_256 = _mm256_mul_ps(T_256, den_256); //*inv_den
-
- _mm256_store_ps(U8, U_256);
- _mm256_store_ps(V8, V_256);
- _mm256_store_ps(T8, T_256);
-
-
- //here we assume (kernel_tex_fetch(__prim_visibility, (prim_addr +i)) & visibility) is always true
-
- (*isect)->u = U8[i];
- (*isect)->v = V8[i];
- (*isect)->t = T8[i];
-
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
-
- return true;
+ if(visibility == PATH_RAY_SHADOW_OPAQUE) {
+ __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);
+ __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
+ __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256));
+ __m256 rayt_256 = _mm256_set1_ps((*isect)->t);
+ __m256i mask1 = _mm256_cmpgt_epi32(_mm256_castps_si256(sign_T_256),
+ _mm256_castps_si256(
+ _mm256_mul_ps(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), rayt_256)
+ )
+ );
+ mask0 = _mm256_or_si256(mask1, mask0);
+ mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask)
+ mask_final_256 = _mm256_andnot_si256(maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
+ unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
+ if((mask_final & prim_num_mask) == 0) {
+ return false;
}
+ const int i = __bsf(mask_final);
+ __m256 inv_den_256 = _mm256_rcp_ps(den_256);
+ U_256 = _mm256_mul_ps(U_256, inv_den_256);
+ V_256 = _mm256_mul_ps(V_256, inv_den_256);
+ T_256 = _mm256_mul_ps(T_256, inv_den_256);
+ _mm256_store_ps(U8, U_256);
+ _mm256_store_ps(V8, V_256);
+ _mm256_store_ps(T8, T_256);
+ /* NOTE: Here we assume visibility for all triangles in the node is
+ * the same. */
+ (*isect)->u = U8[i];
+ (*isect)->v = V8[i];
+ (*isect)->t = T8[i];
+ (*isect)->prim = (prim_addr + i);
+ (*isect)->object = object;
+ (*isect)->type = PRIMITIVE_TRIANGLE;
+ return true;
+ }
else {
- _mm256_store_ps(den8, den_256);
- _mm256_store_ps(U8, U_256);
- _mm256_store_ps(V8, V_256);
- _mm256_store_ps(T8, T_256);
+ _mm256_store_ps(den8, den_256);
+ _mm256_store_ps(U8, U_256);
+ _mm256_store_ps(V8, V_256);
+ _mm256_store_ps(T8, T_256);
- _mm256_store_ps(sign_T8, sign_T_256);
- _mm256_store_ps(xor_signmask8, xor_signmask_256);
- _mm256_store_si256((__m256i*)mask_minmaxUVW8, mask_minmaxUVW_256);
+ _mm256_store_ps(sign_T8, sign_T_256);
+ _mm256_store_ps(xor_signmask8, xor_signmask_256);
+ _mm256_store_si256((__m256i*)mask_minmaxUVW8, mask_minmaxUVW_256);
- int ret = false;
+ int ret = false;
- if(visibility == PATH_RAY_SHADOW) {
- for(int i = 0; i < prim_num; i++) {
- if(!mask_minmaxUVW8[i]) {
+ if(visibility == PATH_RAY_SHADOW) {
+ for(int i = 0; i < prim_num; i++) {
+ if(mask_minmaxUVW8[i]) {
+ continue;
+ }
#ifdef __VISIBILITY_FLAG__
- if(kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility)
+ if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
+ continue;
+ }
#endif
- {
- if((sign_T8[i] >= 0.0f) &&
- (sign_T8[i] <= (*isect)->t * xor_signmask8[i]))
- {
- if(den8[i]) {
- const float inv_den = 1.0f / den8[i];
-
- (*isect)->u = U8[i] * inv_den;
- (*isect)->v = V8[i] * inv_den;
- (*isect)->t = T8[i] * inv_den;
-
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
-
- int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
- int shader = 0;
-
+ if((sign_T8[i] < 0.0f) ||
+ (sign_T8[i] > (*isect)->t * xor_signmask8[i]))
+ {
+ continue;
+ }
+ if(!den8[i]) {
+ continue;
+ }
+ const float inv_den = 1.0f / den8[i];
+ (*isect)->u = U8[i] * inv_den;
+ (*isect)->v = V8[i] * inv_den;
+ (*isect)->t = T8[i] * inv_den;
+ (*isect)->prim = (prim_addr + i);
+ (*isect)->object = object;
+ (*isect)->type = PRIMITIVE_TRIANGLE;
+ const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
+ int shader = 0;
#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
+ if(kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
+ {
+ shader = kernel_tex_fetch(__tri_shader, prim);
+ }
#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return 2;
- }
- /* if maximum number of hits reached, block all light */
- else if(*num_hits == max_hits) {
- return 2;
- }
- /* move on to next entry in intersections array */
- ret = true;
-
- (*isect)++;
- (*num_hits)++;
-
- (*num_hits_in_instance)++;
-
- (*isect)->t = isec_t;
-
- } //den
- } //if sign
- } //vis
- }//if mask
- } //for
+ const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+ /* If no transparent shadows, all light is blocked. */
+ if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+ return 2;
+ }
+ /* If maximum number of hits reached, block all light. */
+ else if(num_hits == NULL || *num_hits == max_hits) {
+ return 2;
+ }
+ /* Move on to next entry in intersections array. */
+ ret = true;
+ (*isect)++;
+ (*num_hits)++;
+ (*num_hits_in_instance)++;
+ (*isect)->t = isect_t;
+ }
}
- else { //default case
+ else {
for(int i = 0; i < prim_num; i++) {
- if(!mask_minmaxUVW8[i]) {
+ if(mask_minmaxUVW8[i]) {
+ continue;
+ }
#ifdef __VISIBILITY_FLAG__
- if(kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility)
+ if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
+ continue;
+ }
#endif
- {
- if((sign_T8[i] >= 0.0f) &&
- (sign_T8[i] <= (*isect)->t * xor_signmask8[i]))
- {
- if(den8[i]) {
- const float inv_den = 1.0f / den8[i];
-
- (*isect)->u = U8[i] * inv_den;
- (*isect)->v = V8[i] * inv_den;
- (*isect)->t = T8[i] * inv_den;
-
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
-
- ret = true;
- } //den
- } //if sign
- } //vis
- }//if mask
- } //for
- } //default
- return ret;
-}// else PATH_RAY_SHADOW_OPAQUE
-
+ if((sign_T8[i] < 0.0f) ||
+ (sign_T8[i] > (*isect)->t * xor_signmask8[i]))
+ {
+ continue;
+ }
+ if(!den8[i]) {
+ continue;
+ }
+ const float inv_den = 1.0f / den8[i];
+ (*isect)->u = U8[i] * inv_den;
+ (*isect)->v = V8[i] * inv_den;
+ (*isect)->t = T8[i] * inv_den;
+ (*isect)->prim = (prim_addr + i);
+ (*isect)->object = object;
+ (*isect)->type = PRIMITIVE_TRIANGLE;
+ ret = true;
+ }
+ }
+ return ret;
+ }
}
-//vz static
-ccl_device_inline
-int triangle_intersect8(KernelGlobals *kg,
- Intersection **isect,
- float3 P,
- float3 dir,
- uint visibility,
- int object,
- int prim_addr,
- int prim_num,
- uint *num_hits,
- uint max_hits,
- int *num_hits_in_instance,
- float isec_t)
+ccl_device_inline int triangle_intersect8(
+ KernelGlobals *kg,
+ Intersection **isect,
+ float3 P,
+ float3 dir,
+ uint visibility,
+ int object,
+ int prim_addr,
+ int prim_num,
+ uint *num_hits,
+ uint max_hits,
+ int *num_hits_in_instance,
+ float isect_t)
{
__m128 tri_a[8], tri_b[8], tri_c[8];
__m256 tritmp[12], tri[12];
@@ -540,11 +466,11 @@ int triangle_intersect8(KernelGlobals *kg,
num_hits,
max_hits,
num_hits_in_instance,
- isec_t);
+ isect_t);
return result;
}
-#endif /* __KERNEL_AVX2__ */
+#endif /* __KERNEL_AVX2__ */
/* Special ray intersection routines for subsurface scattering. In that case we
* only want to intersect with primitives in the same object, and if case of
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index 373324afb01..1c8c91d15e6 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -63,4 +63,4 @@ void kernel_tex_copy(KernelGlobals *kg,
CCL_NAMESPACE_END
-#endif /* __KERNEL_H__ */
+#endif /* __KERNEL_H__ */
diff --git a/intern/cycles/kernel/kernel_color.h b/intern/cycles/kernel/kernel_color.h
index 990e798543a..ea478a8a5d3 100644
--- a/intern/cycles/kernel/kernel_color.h
+++ b/intern/cycles/kernel/kernel_color.h
@@ -35,4 +35,4 @@ ccl_device float linear_rgb_to_gray(KernelGlobals *kg, float3 c)
CCL_NAMESPACE_END
-#endif /* __KERNEL_COLOR_H__ */
+#endif /* __KERNEL_COLOR_H__ */
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index aa7a16afa1d..4ee80850402 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -153,4 +153,4 @@ typedef vector3<avxf> avx3f;
CCL_NAMESPACE_END
-#endif /* __KERNEL_COMPAT_CPU_H__ */
+#endif /* __KERNEL_COMPAT_CPU_H__ */
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index ac63bcf7ac9..8ed96bbae64 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -150,4 +150,4 @@ ccl_device_inline uint ccl_num_groups(uint d)
#define logf(x) __logf(((float)(x)))
#define expf(x) __expf(((float)(x)))
-#endif /* __KERNEL_COMPAT_CUDA_H__ */
+#endif /* __KERNEL_COMPAT_CUDA_H__ */
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 3f7e264fbee..21a95098894 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -159,4 +159,4 @@
#include "util/util_half.h"
#include "util/util_types.h"
-#endif /* __KERNEL_COMPAT_OPENCL_H__ */
+#endif /* __KERNEL_COMPAT_OPENCL_H__ */
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 74cfacb5bc1..37402f42863 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -21,6 +21,7 @@
#ifdef __KERNEL_CPU__
# include "util/util_vector.h"
+# include "util/util_map.h"
#endif
#ifdef __KERNEL_OPENCL__
@@ -42,6 +43,8 @@ struct OSLThreadData;
struct OSLShadingSystem;
# endif
+typedef unordered_map<float, float> CoverageMap;
+
struct Intersection;
struct VolumeStep;
@@ -68,6 +71,11 @@ typedef struct KernelGlobals {
VolumeStep *decoupled_volume_steps[2];
int decoupled_volume_steps_index;
+ /* A buffer for storing per-pixel coverage for Cryptomatte. */
+ CoverageMap *coverage_object;
+ CoverageMap *coverage_material;
+ CoverageMap *coverage_asset;
+
/* split kernel */
SplitData split_data;
SplitParams split_param_data;
diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h
new file mode 100644
index 00000000000..ee3b8b8abfb
--- /dev/null
+++ b/intern/cycles/kernel/kernel_id_passes.h
@@ -0,0 +1,94 @@
+/*
+* Copyright 2018 Blender Foundation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, int num_slots, float id, float weight)
+{
+ kernel_assert(id != ID_NONE);
+ if(weight == 0.0f) {
+ return;
+ }
+
+ for(int slot = 0; slot < num_slots; slot++) {
+ ccl_global float2 *id_buffer = (ccl_global float2*)buffer;
+#ifdef __ATOMIC_PASS_WRITE__
+ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
+ if(id_buffer[slot].x == ID_NONE) {
+ /* Use an atomic to claim this slot.
+ * If a different thread got here first, try again from this slot on. */
+ float old_id = atomic_compare_and_swap_float(buffer+slot*2, ID_NONE, id);
+ if(old_id != ID_NONE && old_id != id) {
+ continue;
+ }
+ atomic_add_and_fetch_float(buffer+slot*2+1, weight);
+ break;
+ }
+ /* If there already is a slot for that ID, add the weight.
+ * If no slot was found, add it to the last. */
+ else if(id_buffer[slot].x == id || slot == num_slots - 1) {
+ atomic_add_and_fetch_float(buffer+slot*2+1, weight);
+ break;
+ }
+#else /* __ATOMIC_PASS_WRITE__ */
+ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
+ if(id_buffer[slot].x == ID_NONE) {
+ id_buffer[slot].x = id;
+ id_buffer[slot].y = weight;
+ break;
+ }
+ /* If there already is a slot for that ID, add the weight.
+ * If no slot was found, add it to the last. */
+ else if(id_buffer[slot].x == id || slot == num_slots - 1) {
+ id_buffer[slot].y += weight;
+ break;
+ }
+#endif /* __ATOMIC_PASS_WRITE__ */
+ }
+}
+
+ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
+{
+ ccl_global float2 *id_buffer = (ccl_global float2*)buffer;
+ for(int slot = 1; slot < num_slots; ++slot) {
+ if(id_buffer[slot].x == ID_NONE) {
+ return;
+ }
+ /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
+ int i = slot;
+ while(i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
+ float2 swap = id_buffer[i];
+ id_buffer[i] = id_buffer[i - 1];
+ id_buffer[i - 1] = swap;
+ --i;
+ }
+ }
+}
+
+#ifdef __KERNEL_GPU__
+/* post-sorting for Cryptomatte */
+ccl_device void kernel_cryptomatte_post(KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride)
+{
+ if(sample - 1 == kernel_data.integrator.aa_samples) {
+ int index = offset + x + y * stride;
+ int pass_stride = kernel_data.film.pass_stride;
+ ccl_global float *cryptomatte_buffer = buffer + index * pass_stride + kernel_data.film.pass_cryptomatte;
+ kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+ }
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_math.h b/intern/cycles/kernel/kernel_math.h
index 96391db7649..a8a43f3ea4a 100644
--- a/intern/cycles/kernel/kernel_math.h
+++ b/intern/cycles/kernel/kernel_math.h
@@ -25,4 +25,4 @@
#include "util/util_texture.h"
#include "util/util_transform.h"
-#endif /* __KERNEL_MATH_H__ */
+#endif /* __KERNEL_MATH_H__ */
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index 9b96bb80c32..dde93844dd3 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -187,7 +187,10 @@ ccl_device float2 regular_polygon_sample(float corners, float rotation, float u,
ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
{
float3 R = 2*dot(N, I)*N - I;
- if(dot(Ng, R) >= 0.05f) {
+
+ /* Reflection rays may always be at least as shallow as the incoming ray. */
+ float threshold = min(0.9f*dot(Ng, I), 0.01f);
+ if(dot(Ng, R) >= threshold) {
return N;
}
@@ -195,24 +198,88 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
* The X axis is found by normalizing the component of N that's orthogonal to Ng.
* The Y axis isn't actually needed.
*/
- float3 X = normalize(N - dot(N, Ng)*Ng);
-
- /* Calculate N.z and N.x in the local coordinate system. */
- float Iz = dot(I, Ng);
- float Ix2 = sqr(dot(I, X)), Iz2 = sqr(Iz);
- float Ix2Iz2 = Ix2 + Iz2;
-
- float a = safe_sqrtf(Ix2*(Ix2Iz2 - sqr(0.05f)));
- float b = Iz*0.05f + Ix2Iz2;
- float c = (a + b > 0.0f)? (a + b) : (-a + b);
+ float NdotNg = dot(N, Ng);
+ float3 X = normalize(N - NdotNg*Ng);
+
+ /* Calculate N.z and N.x in the local coordinate system.
+ *
+ * The goal of this computation is to find a N' that is rotated towards Ng just enough
+ * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t.
+ *
+ * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
+ *
+ * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t.
+ *
+ * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that
+ * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
+ *
+ * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
+ *
+ * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
+ *
+ * The only unknown here is N'.z, so we can solve for that.
+ *
+ * The equation has four solutions in general:
+ *
+ * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2))
+ * We can simplify this expression a bit by grouping terms:
+ *
+ * a = I.x^2 + I.z^2
+ * b = sqrt(I.x^2 * (a - t^2))
+ * c = I.z*t + a
+ * N'.z = +-sqrt(0.5*(+-b + c)/a)
+ *
+ * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere.
+ */
+ float Ix = dot(I, X), Iz = dot(I, Ng);
+ float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+ float a = Ix2 + Iz2;
+
+ float b = safe_sqrtf(Ix2*(a - sqr(threshold)));
+ float c = Iz*threshold + a;
+
+ /* Evaluate both solutions.
+ * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first.
+ * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */
+ float fac = 0.5f/a;
+ float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c);
+ bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
+ bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f));
+
+ float2 N_new;
+ if(valid1 && valid2) {
+ /* If both are possible, do the expensive reflection-based check. */
+ float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2));
+ float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2));
+
+ float R1 = 2*(N1.x*Ix + N1.y*Iz)*N1.y - Iz;
+ float R2 = 2*(N2.x*Ix + N2.y*Iz)*N2.y - Iz;
+
+ valid1 = (R1 >= 1e-5f);
+ valid2 = (R2 >= 1e-5f);
+ if(valid1 && valid2) {
+ /* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input
+ * (if the original reflection wasn't shallow, we would not be in this part of the function). */
+ N_new = (R1 < R2)? N1 : N2;
+ }
+ else {
+ /* If only one reflection is valid (= positive), pick that one. */
+ N_new = (R1 > R2)? N1 : N2;
+ }
- float Nz = safe_sqrtf(0.5f * c * (1.0f / Ix2Iz2));
- float Nx = safe_sqrtf(1.0f - sqr(Nz));
+ }
+ else if(valid1 || valid2) {
+ /* Only one solution passes the N'.z criterium, so pick that one. */
+ float Nz2 = valid1? N1_z2 : N2_z2;
+ N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2));
+ }
+ else {
+ return Ng;
+ }
- /* Transform back into global coordinates. */
- return Nx*X + Nz*Ng;
+ return N_new.x*X + N_new.y*Ng;
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_MONTECARLO_CL__ */
+#endif /* __KERNEL_MONTECARLO_CL__ */
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 458aa6c2a97..80477f921ea 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -14,12 +14,14 @@
* limitations under the License.
*/
-CCL_NAMESPACE_BEGIN
-
#if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__)
#define __ATOMIC_PASS_WRITE__
#endif
+#include "kernel/kernel_id_passes.h"
+
+CCL_NAMESPACE_BEGIN
+
ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value)
{
ccl_global float *buf = buffer;
@@ -108,7 +110,7 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_glob
float value = path_total_shaded / max(path_total, 1e-7f);
kernel_write_pass_float(buffer+2, value*value);
}
-#endif /* __DENOISING_FEATURES__ */
+#endif /* __DENOISING_FEATURES__ */
ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
ShaderData *sd,
@@ -187,7 +189,24 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
L->debug_data.num_ray_bounces);
}
}
-#endif /* __KERNEL_DEBUG__ */
+#endif /* __KERNEL_DEBUG__ */
+
+#ifdef __KERNEL_CPU__
+#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name)
+ccl_device_inline size_t kernel_write_id_pass_cpu(float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map)
+{
+ if(map) {
+ (*map)[id] += matte_weight;
+ return 0;
+ }
+#else /* __KERNEL_CPU__ */
+#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight)
+ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer, size_t depth, float id, float matte_weight)
+{
+#endif /* __KERNEL_CPU__ */
+ kernel_write_id_slots(buffer, depth, id, matte_weight);
+ return depth * 2;
+}
ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L,
ShaderData *sd, ccl_addr_space PathState *state, float3 throughput)
@@ -242,6 +261,26 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl
}
}
+ if(kernel_data.film.cryptomatte_passes) {
+ const float matte_weight = average(throughput) * (1.0f - average(shader_bsdf_transparency(kg, sd)));
+ if(matte_weight > 0.0f) {
+ ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+ if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+ float id = object_cryptomatte_id(kg, sd->object);
+ cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object);
+ }
+ if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+ float id = shader_cryptomatte_id(kg, sd->shader);
+ cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material);
+ }
+ if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+ float id = object_cryptomatte_asset_id(kg, sd->object);
+ cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset);
+ }
+ }
+ }
+
+
if(light_flag & PASSMASK_COMPONENT(DIFFUSE))
L->color_diffuse += shader_bsdf_diffuse(kg, sd)*throughput;
if(light_flag & PASSMASK_COMPONENT(GLOSSY))
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 5745762e183..cb1f410b09f 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -266,7 +266,7 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
}
#endif /* __VOLUME__ */
-#endif /* __SPLIT_KERNEL__ */
+#endif /* __SPLIT_KERNEL__ */
ccl_device_forceinline bool kernel_path_shader_apply(
KernelGlobals *kg,
@@ -434,7 +434,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
else if(result == VOLUME_PATH_MISSED) {
break;
}
-#endif /* __VOLUME__*/
+#endif /* __VOLUME__*/
/* Shade background. */
if(!hit) {
@@ -557,7 +557,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
#endif /* __SUBSURFACE__ */
}
-#endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
+#endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
ccl_device_forceinline void kernel_path_integrate(
KernelGlobals *kg,
@@ -605,7 +605,7 @@ ccl_device_forceinline void kernel_path_integrate(
else if(result == VOLUME_PATH_MISSED) {
break;
}
-#endif /* __VOLUME__*/
+#endif /* __VOLUME__*/
/* Shade background. */
if(!hit) {
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
index feaea15d3c4..d2506fc1e7e 100644
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -55,7 +55,7 @@ ccl_device_inline void kernel_path_volume_connect_light(
}
}
}
-#endif /* __EMISSION__ */
+#endif /* __EMISSION__ */
}
#ifdef __KERNEL_GPU__
@@ -277,10 +277,10 @@ ccl_device void kernel_branched_path_volume_connect_light(
}
}
}
-#endif /* __EMISSION__ */
+#endif /* __EMISSION__ */
}
-#endif /* __SPLIT_KERNEL__ */
+#endif /* __SPLIT_KERNEL__ */
-#endif /* __VOLUME_SCATTER__ */
+#endif /* __VOLUME_SCATTER__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index e32d4bbbc1b..de8cc4a0cef 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -145,4 +145,4 @@ ccl_device int dequeue_ray_index(
CCL_NAMESPACE_END
-#endif // __KERNEL_QUEUE_H__
+#endif // __KERNEL_QUEUE_H__
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index b33e4eba8a4..61ddf4a4f81 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -50,7 +50,7 @@ ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
return result;
}
-#endif /* __SOBOL__ */
+#endif /* __SOBOL__ */
ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index e834b701f96..af883aa715b 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -1276,4 +1276,9 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect
}
#endif /* __TRANSPARENT_SHADOWS__ */
+ccl_device float shader_cryptomatte_id(KernelGlobals *kg, int shader)
+{
+ return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 8a0da6c3b13..fafa3ad4bfa 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -446,7 +446,7 @@ ccl_device bool shadow_blocked_transparent_stepped(
}
# endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
-#endif /* __TRANSPARENT_SHADOWS__ */
+#endif /* __TRANSPARENT_SHADOWS__ */
ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
ShaderData *sd,
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index e93100a6442..864aa7c470a 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -17,6 +17,12 @@
#ifndef __KERNEL_TYPES_H__
#define __KERNEL_TYPES_H__
+#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
+# include <embree3/rtcore.h>
+# include <embree3/rtcore_scene.h>
+# define __EMBREE__
+#endif
+
#include "kernel/kernel_math.h"
#include "kernel/svm/svm_types.h"
#include "util/util_static_assert.h"
@@ -53,6 +59,7 @@ CCL_NAMESPACE_BEGIN
#define OBJECT_NONE (~0)
#define PRIM_NONE (~0)
#define LAMP_NONE (~0)
+#define ID_NONE (0.0f)
#define VOLUME_STACK_SIZE 32
@@ -415,6 +422,7 @@ typedef enum PassType {
PASS_RAY_BOUNCES,
#endif
PASS_RENDER_TIME,
+ PASS_CRYPTOMATTE,
PASS_CATEGORY_MAIN_END = 31,
PASS_MIST = 32,
@@ -443,6 +451,14 @@ typedef enum PassType {
#define PASS_ANY (~0)
+typedef enum CryptomatteType {
+ CRYPT_NONE = 0,
+ CRYPT_OBJECT = (1 << 0),
+ CRYPT_MATERIAL = (1 << 1),
+ CRYPT_ASSET = (1 << 2),
+ CRYPT_ACCURATE = (1 << 3),
+} CryptomatteType;
+
typedef enum DenoisingPassOffsets {
DENOISING_PASS_NORMAL = 0,
DENOISING_PASS_NORMAL_VAR = 3,
@@ -599,7 +615,7 @@ typedef ccl_addr_space struct PathRadiance {
#ifdef __KERNEL_DEBUG__
DebugData debug_data;
-#endif /* __KERNEL_DEBUG__ */
+#endif /* __KERNEL_DEBUG__ */
} PathRadiance;
typedef struct BsdfEval {
@@ -712,6 +728,9 @@ typedef struct Ray {
/* Intersection */
typedef struct Intersection {
+#ifdef __EMBREE__
+ float3 Ng;
+#endif
float t, u, v;
int prim;
int object;
@@ -1260,6 +1279,9 @@ typedef struct KernelFilm {
int pass_shadow;
float pass_shadow_scale;
int filter_table_offset;
+ int cryptomatte_passes;
+ int cryptomatte_depth;
+ int pass_cryptomatte;
int pass_mist;
float mist_start;
@@ -1270,8 +1292,6 @@ typedef struct KernelFilm {
int pass_denoising_clean;
int denoising_flags;
- int pad1, pad2, pad3;
-
/* XYZ to rendering color space transform. float4 instead of float3 to
* ensure consistent padding/alignment across devices. */
float4 xyz_to_r;
@@ -1385,20 +1405,29 @@ typedef enum KernelBVHLayout {
BVH_LAYOUT_BVH2 = (1 << 0),
BVH_LAYOUT_BVH4 = (1 << 1),
BVH_LAYOUT_BVH8 = (1 << 2),
-
+ BVH_LAYOUT_EMBREE = (1 << 3),
BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
BVH_LAYOUT_ALL = (unsigned int)(-1),
} KernelBVHLayout;
typedef struct KernelBVH {
- /* root node */
+ /* Own BVH */
int root;
int have_motion;
int have_curves;
int have_instancing;
int bvh_layout;
int use_bvh_steps;
+
+ /* Embree */
+#ifdef __EMBREE__
+ RTCScene scene;
+# ifndef __KERNEL_64_BIT__
+ int pad1;
+# endif
+#else
int pad1, pad2;
+#endif
} KernelBVH;
static_assert_align(KernelBVH, 16);
@@ -1460,7 +1489,11 @@ typedef struct KernelObject {
uint patch_map_offset;
uint attribute_map_offset;
uint motion_offset;
- uint pad;
+ uint pad1;
+
+ float cryptomatte_object;
+ float cryptomatte_asset;
+ float pad2, pad3;
} KernelObject;
static_assert_align(KernelObject, 16);
@@ -1540,7 +1573,7 @@ static_assert_align(KernelParticle, 16);
typedef struct KernelShader {
float constant_emission[3];
- float pad1;
+ float cryptomatte_id;
int flags;
int pass_id;
int pad2, pad3;
@@ -1672,4 +1705,4 @@ typedef struct WorkTile {
CCL_NAMESPACE_END
-#endif /* __KERNEL_TYPES_H__ */
+#endif /* __KERNEL_TYPES_H__ */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index d71761a97bc..d6d283c42c5 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -87,7 +87,7 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
return true;
}
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
ccl_device float3 volume_color_transmittance(float3 sigma, float t)
{
@@ -270,7 +270,7 @@ ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
}
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
/* Equi-angular sampling as in:
* "Importance Sampling Techniques for Path Tracing in Participating Media" */
@@ -1075,7 +1075,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
return VOLUME_PATH_SCATTERED;
}
-#endif /* __SPLIT_KERNEL */
+#endif /* __SPLIT_KERNEL */
/* decide if we need to use decoupled or not */
ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
@@ -1377,6 +1377,6 @@ ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
}
}
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index b62aa9663ec..e036b53b810 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -95,6 +95,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
int dy,
float *difference_image,
float *image,
+ float *temp_image,
float *out_image,
float *accum_image,
int* rect,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index 26777fdabb2..4c758711481 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -191,6 +191,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
int dy,
float *difference_image,
float *image,
+ float *temp_image,
float *out_image,
float *accum_image,
int *rect,
@@ -200,7 +201,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
#else
- kernel_filter_nlm_update_output(dx, dy, difference_image, image, out_image, accum_image, load_int4(rect), stride, f);
+ kernel_filter_nlm_update_output(dx, dy, difference_image, image, temp_image, out_image, accum_image, load_int4(rect), stride, f);
#endif
}
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index b77b7350d86..ae4fd85780d 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -26,7 +26,7 @@ template<typename T> struct TextureInterpolator {
u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
u[3] = (1.0f / 6.0f) * t * t * t; \
- } (void)0
+ } (void) 0
static ccl_always_inline float4 read(float4 r)
{
@@ -540,4 +540,4 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x,
CCL_NAMESPACE_END
-#endif // __KERNEL_CPU_IMAGE_H__
+#endif // __KERNEL_CPU_IMAGE_H__
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 5ec1655ab05..759b7e4c20d 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -97,7 +97,7 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
{
kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
}
-#endif /* KERNEL_STUB */
+#endif /* KERNEL_STUB */
}
/* Film */
@@ -120,7 +120,7 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
x, y,
offset,
stride);
-#endif /* KERNEL_STUB */
+#endif /* KERNEL_STUB */
}
void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
@@ -141,7 +141,7 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
x, y,
offset,
stride);
-#endif /* KERNEL_STUB */
+#endif /* KERNEL_STUB */
}
/* Shader Evaluate */
@@ -176,7 +176,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
else {
kernel_background_evaluate(kg, input, output, i);
}
-#endif /* KERNEL_STUB */
+#endif /* KERNEL_STUB */
}
#else /* __SPLIT_KERNEL__ */
@@ -208,7 +208,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
ccl_local type locals; \
kernel_##name(kg, &locals); \
}
-#endif /* KERNEL_STUB */
+#endif /* KERNEL_STUB */
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 0561c40e6b1..b856cbde45c 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -140,7 +140,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int channel_offset,
float a,
@@ -148,7 +148,7 @@ kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w,
weight_image,
variance_image,
@@ -165,13 +165,13 @@ kernel_cuda_filter_nlm_blur(const float *ccl_restrict difference_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_blur(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -186,13 +186,13 @@ kernel_cuda_filter_nlm_calc_weight(const float *ccl_restrict difference_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_weight(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -209,13 +209,13 @@ kernel_cuda_filter_nlm_update_output(const float *ccl_restrict difference_image,
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w,
difference_image + ofs,
image,
@@ -252,14 +252,13 @@ kernel_cuda_filter_nlm_construct_gramian(const float *ccl_restrict difference_im
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
- int f,
- int pass_stride)
+ int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords_window(w, h, r, shift_stride, &rect, &co, &ofs, filter_window)) {
+ if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) {
kernel_filter_nlm_construct_gramian(co.x, co.y,
co.z, co.w,
difference_image + ofs,
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index 8a180a509e8..af311027f78 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -40,14 +40,21 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_path_trace(WorkTile *tile, uint total_work_size)
{
int work_index = ccl_global_id(0);
-
- if(work_index < total_work_size) {
- uint x, y, sample;
+ bool thread_is_active = work_index < total_work_size;
+ uint x, y, sample;
+ KernelGlobals kg;
+ if(thread_is_active) {
get_work_pixel(tile, work_index, &x, &y, &sample);
- KernelGlobals kg;
kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
+
+ if(kernel_data.film.cryptomatte_passes) {
+ __syncthreads();
+ if(thread_is_active) {
+ kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
+ }
+ }
}
#ifdef __BRANCHED_PATH__
@@ -56,14 +63,21 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS)
kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
{
int work_index = ccl_global_id(0);
-
- if(work_index < total_work_size) {
- uint x, y, sample;
+ bool thread_is_active = work_index < total_work_size;
+ uint x, y, sample;
+ KernelGlobals kg;
+ if(thread_is_active) {
get_work_pixel(tile, work_index, &x, &y, &sample);
- KernelGlobals kg;
kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
+
+ if(kernel_data.film.cryptomatte_passes) {
+ __syncthreads();
+ if(thread_is_active) {
+ kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
+ }
+ }
}
#endif
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index 3c75754fb39..a550f97f4eb 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -132,7 +132,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int channel_offset,
float a,
@@ -140,7 +140,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w,
weight_image,
variance_image,
@@ -155,13 +155,13 @@ __kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict di
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_blur(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -174,13 +174,13 @@ __kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_rest
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_calc_weight(co.x, co.y,
difference_image + ofs,
out_image + ofs,
@@ -195,13 +195,13 @@ __kernel void kernel_ocl_filter_nlm_update_output(const ccl_global float *ccl_re
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords(w, h, r, shift_stride, &rect, &co, &ofs)) {
+ if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w,
difference_image + ofs,
image,
@@ -234,14 +234,13 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(const ccl_global float *cc
int w,
int h,
int stride,
- int shift_stride,
+ int pass_stride,
int r,
- int f,
- int pass_stride)
+ int f)
{
int4 co, rect;
int ofs;
- if(get_nlm_coords_window(w, h, r, shift_stride, &rect, &co, &ofs, filter_window)) {
+ if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) {
kernel_filter_nlm_construct_gramian(co.x, co.y,
co.z, co.w,
difference_image + ofs,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index 63128d0aecf..de1f5088629 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -66,9 +66,17 @@ __kernel void kernel_ocl_path_trace(
int x = sx + ccl_global_id(0);
int y = sy + ccl_global_id(1);
-
- if(x < sx + sw && y < sy + sh)
+ bool thread_is_active = x < sx + sw && y < sy + sh;
+ if(thread_is_active) {
kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
+ }
+ if(kernel_data.film.cryptomatte_passes) {
+ /* Make sure no thread is writing to the buffers. */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if(thread_is_active) {
+ kernel_cryptomatte_post(kg, buffer, sample, x, y, offset, stride);
+ }
+ }
}
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index dd9d683e030..79af831c2fb 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -142,7 +142,7 @@ ccl_device_inline float svm_image_texture_frac(float x, int *ix)
u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
u[3] = (1.0f / 6.0f) * t * t * t; \
- } (void)0
+ } (void) 0
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index d9aeb9ab9fb..2a50704b569 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -146,4 +146,4 @@ CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure)
CCL_NAMESPACE_END
-#endif /* __OSL_CLOSURES_H__ */
+#endif /* __OSL_CLOSURES_H__ */
diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h
index 30b29793e2d..88192fbcccb 100644
--- a/intern/cycles/kernel/osl/osl_globals.h
+++ b/intern/cycles/kernel/osl/osl_globals.h
@@ -94,4 +94,4 @@ CCL_NAMESPACE_END
#endif
-#endif /* __OSL_GLOBALS_H__ */
+#endif /* __OSL_GLOBALS_H__ */
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 7902381440b..97f97a4887e 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -884,6 +884,23 @@ bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, OSL::ShaderGlo
return false; /* never called by OSL */
}
+TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename)
+{
+ if(filename.length() && filename[0] == '@') {
+ /* Dummy, we don't use texture handles for builtin textures but need
+ * to tell the OSL runtime optimizer that this is a valid texture. */
+ return NULL;
+ }
+ else {
+ return texturesys()->get_texture_handle(filename);
+ }
+}
+
+bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle)
+{
+ return texturesys()->good(texture_handle);
+}
+
bool OSLRenderServices::texture(ustring filename,
TextureHandle *texture_handle,
TexturePerthread *texture_thread_info,
@@ -894,7 +911,8 @@ bool OSLRenderServices::texture(ustring filename,
int nchannels,
float *result,
float *dresultds,
- float *dresultdt)
+ float *dresultdt,
+ ustring *errormessage)
{
OSL::TextureSystem *ts = osl_ts;
ShaderData *sd = (ShaderData *)(sg->renderstate);
@@ -1035,7 +1053,7 @@ bool OSLRenderServices::texture(ustring filename,
* other nasty stuff happening.
*/
string err = ts->geterror();
- (void)err;
+ (void) err;
}
return status;
@@ -1114,7 +1132,7 @@ bool OSLRenderServices::texture3d(ustring filename,
* other nasty stuff happening.
*/
string err = ts->geterror();
- (void)err;
+ (void) err;
}
return status;
@@ -1156,7 +1174,13 @@ bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg, ustring filenam
TypeDesc datatype, void *data)
{
OSL::TextureSystem *ts = osl_ts;
- return ts->get_texture_info(filename, subimage, dataname, datatype, data);
+ if(filename.length() && filename[0] == '@') {
+ /* Special builtin textures. */
+ return false;
+ }
+ else {
+ return ts->get_texture_info(filename, subimage, dataname, datatype, data);
+ }
}
int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 &center,
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index 50044746fd1..712b06b41b8 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -93,6 +93,10 @@ public:
bool getmessage(OSL::ShaderGlobals *sg, ustring source, ustring name,
TypeDesc type, void *val, bool derivatives);
+ TextureSystem::TextureHandle *get_texture_handle(ustring filename);
+
+ bool good(TextureSystem::TextureHandle *texture_handle);
+
bool texture(ustring filename,
TextureSystem::TextureHandle *texture_handle,
TexturePerthread *texture_thread_info,
@@ -103,7 +107,8 @@ public:
int nchannels,
float *result,
float *dresultds,
- float *dresultdt);
+ float *dresultdt,
+ ustring *errormessage);
bool texture3d(ustring filename,
TextureHandle *texture_handle,
@@ -194,4 +199,4 @@ private:
CCL_NAMESPACE_END
-#endif /* __OSL_SERVICES_H__ */
+#endif /* __OSL_SERVICES_H__ */
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index 6a690e880ad..a89bb3fd1a3 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -193,7 +193,7 @@ void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state
float data[9];
bool found = kg->osl->services->get_attribute(sd, true, OSLRenderServices::u_empty, TypeDesc::TypeVector,
OSLRenderServices::u_geom_undisplaced, data);
- (void)found;
+ (void) found;
assert(found);
memcpy(&sd->P, data, sizeof(float)*3);
diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h
index 571a3f502be..9824f966a44 100644
--- a/intern/cycles/kernel/osl/osl_shader.h
+++ b/intern/cycles/kernel/osl/osl_shader.h
@@ -66,4 +66,4 @@ CCL_NAMESPACE_END
#endif
-#endif /* __OSL_SHADER_H__ */
+#endif /* __OSL_SHADER_H__ */
diff --git a/intern/cycles/kernel/shaders/oslutil.h b/intern/cycles/kernel/shaders/oslutil.h
index 141e5d27e3a..592a8ad12d9 100644
--- a/intern/cycles/kernel/shaders/oslutil.h
+++ b/intern/cycles/kernel/shaders/oslutil.h
@@ -92,4 +92,4 @@ float wireframe(string edge_type, float line_width) { return wireframe(edge_type
float wireframe(string edge_type) { return wireframe(edge_type, 1.0, 1); }
float wireframe() { return wireframe("polygons", 1.0, 1); }
-#endif /* CCL_OSLUTIL_H */
+#endif /* CCL_OSLUTIL_H */
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
index 4a8378796ba..7136c746321 100644
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@@ -284,33 +284,63 @@ point rotate (point p, float angle, point a, point b)
normal ensure_valid_reflection(normal Ng, vector I, normal N)
{
+ /* The implementation here mirrors the one in kernel_montecarlo.h,
+ * check there for an explanation of the algorithm. */
+
float sqr(float x) { return x*x; }
vector R = 2*dot(N, I)*N - I;
- if (dot(Ng, R) >= 0.05) {
+
+ float threshold = min(0.9*dot(Ng, I), 0.01);
+ if(dot(Ng, R) >= threshold) {
return N;
}
- /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane.
- * The X axis is found by normalizing the component of N that's orthogonal to Ng.
- * The Y axis isn't actually needed.
- */
- vector X = normalize(N - dot(N, Ng)*Ng);
+ float NdotNg = dot(N, Ng);
+ vector X = normalize(N - NdotNg*Ng);
- /* Calculate N.z and N.x in the local coordinate system. */
float Ix = dot(I, X), Iz = dot(I, Ng);
- float Ix2 = sqr(dot(I, X)), Iz2 = sqr(dot(I, Ng));
- float Ix2Iz2 = Ix2 + Iz2;
-
- float a = sqrt(Ix2*(Ix2Iz2 - sqr(0.05)));
- float b = Iz*0.05 + Ix2Iz2;
- float c = (a + b > 0.0)? (a + b) : (-a + b);
+ float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+ float a = Ix2 + Iz2;
+
+ float b = sqrt(Ix2*(a - sqr(threshold)));
+ float c = Iz*threshold + a;
+
+ float fac = 0.5/a;
+ float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c);
+ int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
+ int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
+
+ float N_new_x, N_new_z;
+ if(valid1 && valid2) {
+ float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
+ float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
+
+ float R1 = 2*(N1_x*Ix + N1_z*Iz)*N1_z - Iz;
+ float R2 = 2*(N2_x*Ix + N2_z*Iz)*N2_z - Iz;
+
+ valid1 = (R1 >= 1e-5);
+ valid2 = (R2 >= 1e-5);
+ if(valid1 && valid2) {
+ N_new_x = (R1 < R2)? N1_x : N2_x;
+ N_new_z = (R1 < R2)? N1_z : N2_z;
+ }
+ else {
+ N_new_x = (R1 > R2)? N1_x : N2_x;
+ N_new_z = (R1 > R2)? N1_z : N2_z;
+ }
- float Nz = sqrt(0.5 * c * (1.0 / Ix2Iz2));
- float Nx = sqrt(1.0 - sqr(Nz));
+ }
+ else if(valid1 || valid2) {
+ float Nz2 = valid1? N1_z2 : N2_z2;
+ N_new_x = sqrt(1.0 - Nz2);
+ N_new_z = sqrt(Nz2);
+ }
+ else {
+ return Ng;
+ }
- /* Transform back into global coordinates. */
- return Nx*X + Nz*Ng;
+ return N_new_x*X + N_new_z*Ng;
}
@@ -485,7 +515,7 @@ float smooth_linearstep (float edge0, float edge1, float x_, float eps_) {
else if (x >= eps && x <= 1.0-eps) result = x;
else if (x >= 1.0+eps) result = 1;
else if (x < eps) result = rampup (x+eps, 2.0*eps);
- else /* if (x < 1.0+eps) */ result = 1.0 - rampup (1.0+eps - x, 2.0*eps);
+ else /* if (x < 1.0+eps) */ result = 1.0 - rampup (1.0+eps - x, 2.0*eps);
} else {
result = step (edge0, x_);
}
@@ -656,4 +686,4 @@ int getmatrix (string fromspace, output matrix M) {
#undef PERCOMP2
#undef PERCOMP2F
-#endif /* CCL_STDOSL_H */
+#endif /* CCL_STDOSL_H */
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index 180c0b57077..18eec6372f1 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -80,8 +80,10 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ bool ray_was_updated = false;
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ ray_was_updated = true;
uint sample = state->sample;
uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
@@ -92,6 +94,17 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
}
+ if(kernel_data.film.cryptomatte_passes) {
+ /* Make sure no thread is writing to the buffers. */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if(ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) {
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+ kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+ }
+ }
+
if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
/* We have completed current work; So get next work */
ccl_global uint *work_pools = kernel_split_params.work_pools;
diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h
index 2132c42220f..666355de334 100644
--- a/intern/cycles/kernel/split/kernel_shader_sort.h
+++ b/intern/cycles/kernel/split/kernel_shader_sort.h
@@ -78,7 +78,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg,
}
}
}
-# endif /* __KERNEL_OPENCL__ */
+# endif /* __KERNEL_OPENCL__ */
/* copy to destination */
for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
@@ -91,7 +91,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg,
kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT : kernel_split_state.queue_data[ini];
}
}
-#endif /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_CUDA__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 9297e1e0ad5..3f6b3977d79 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
{
- (void)kg; /* Unused on CPU. */
+ (void) kg; /* Unused on CPU. */
uint64_t size = 0;
#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
@@ -48,7 +48,7 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
ccl_global void *data,
ccl_global char *ray_state)
{
- (void)kg; /* Unused on CPU. */
+ (void) kg; /* Unused on CPU. */
ccl_global char *p = (ccl_global char*)data;
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 56194d9f857..83df1e2a0a6 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -86,14 +86,14 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1)
#else
# define SPLIT_DATA_SUBSURFACE_ENTRIES
-#endif /* __SUBSURFACE__ */
+#endif /* __SUBSURFACE__ */
#ifdef __VOLUME__
# define SPLIT_DATA_VOLUME_ENTRIES \
SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1)
#else
# define SPLIT_DATA_VOLUME_ENTRIES
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
#define SPLIT_DATA_ENTRIES \
SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index ab69afa051e..ccb9aef7a5b 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -313,7 +313,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a
case NODE_LEAVE_BUMP_EVAL:
svm_node_leave_bump_eval(kg, sd, stack, node.y);
break;
-# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
+# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */
# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */
case NODE_HSV:
svm_node_hsv(kg, sd, stack, node, &offset);
@@ -497,4 +497,4 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a
CCL_NAMESPACE_END
-#endif /* __SVM_H__ */
+#endif /* __SVM_H__ */
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 64bf8244999..3cf33f4d431 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -262,7 +262,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra))
: NULL;
- if (bsdf && extra) {
+ if(bsdf && extra) {
bsdf->N = N;
bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f;
bsdf->T = T;
@@ -285,7 +285,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
/* setup bsdf */
if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || roughness <= 0.075f) /* use single-scatter GGX */
sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
- else /* use multi-scatter GGX */
+ else /* use multi-scatter GGX */
sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
}
}
@@ -314,7 +314,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra))
: NULL;
- if (bsdf && extra) {
+ if(bsdf && extra) {
bsdf->N = N;
bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
bsdf->extra = extra;
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index 27127b85323..41538d1138d 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -59,4 +59,4 @@ ccl_device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, ui
CCL_NAMESPACE_END
-#endif /* __SVM_HSV_H__ */
+#endif /* __SVM_HSV_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index a3e4b6e87cd..6f39391057e 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -108,4 +108,4 @@ ccl_device void svm_node_curves(KernelGlobals *kg, ShaderData *sd, float *stack,
CCL_NAMESPACE_END
-#endif /* __SVM_RAMP_H__ */
+#endif /* __SVM_RAMP_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ramp_util.h b/intern/cycles/kernel/svm/svm_ramp_util.h
index a67689ff9d1..847108ff1c2 100644
--- a/intern/cycles/kernel/svm/svm_ramp_util.h
+++ b/intern/cycles/kernel/svm/svm_ramp_util.h
@@ -95,4 +95,4 @@ ccl_device float float_ramp_lookup(const float *ramp,
CCL_NAMESPACE_END
-#endif /* __SVM_RAMP_UTIL_H__ */
+#endif /* __SVM_RAMP_UTIL_H__ */
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 910537a2539..0f1dfa4936b 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -531,4 +531,4 @@ typedef enum ClosureType {
CCL_NAMESPACE_END
-#endif /* __SVM_TYPES_H__ */
+#endif /* __SVM_TYPES_H__ */
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index 7b60ab6e6ae..80b63dc80cd 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -24,7 +24,7 @@ ccl_device_noinline float svm_wave(NodeWaveType type, NodeWaveProfile profile, f
if(type == NODE_WAVE_BANDS)
n = (p.x + p.y + p.z) * 10.0f;
- else /* NODE_WAVE_RINGS */
+ else /* NODE_WAVE_RINGS */
n = len(p) * 20.0f;
if(distortion != 0.0f)
diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt
index 7d2220f37f9..c0ce7368771 100644
--- a/intern/cycles/render/CMakeLists.txt
+++ b/intern/cycles/render/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SRC
buffers.cpp
camera.cpp
constant_fold.cpp
+ coverage.cpp
film.cpp
graph.cpp
image.cpp
@@ -46,6 +47,7 @@ set(SRC_HEADERS
buffers.h
camera.h
constant_fold.h
+ coverage.h
film.h
graph.h
image.h
diff --git a/intern/cycles/render/attribute.h b/intern/cycles/render/attribute.h
index 40e5be2e1b2..e7438f4513d 100644
--- a/intern/cycles/render/attribute.h
+++ b/intern/cycles/render/attribute.h
@@ -172,4 +172,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __ATTRIBUTE_H__ */
+#endif /* __ATTRIBUTE_H__ */
diff --git a/intern/cycles/render/background.h b/intern/cycles/render/background.h
index 3f56dedb2c8..17c3eaaaaf5 100644
--- a/intern/cycles/render/background.h
+++ b/intern/cycles/render/background.h
@@ -59,4 +59,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __BACKGROUND_H__ */
+#endif /* __BACKGROUND_H__ */
diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h
index a811eac3327..fce8f2fa606 100644
--- a/intern/cycles/render/bake.h
+++ b/intern/cycles/render/bake.h
@@ -83,4 +83,4 @@ private:
CCL_NAMESPACE_END
-#endif /* __BAKE_H__ */
+#endif /* __BAKE_H__ */
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 4cd8b3726d3..f901885e679 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -147,7 +147,7 @@ bool RenderBuffers::copy_from_device()
return true;
}
-bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels)
+bool RenderBuffers::get_denoising_pass_rect(int type, float exposure, int sample, int components, float *pixels)
{
if(buffer.data() == NULL) {
return false;
@@ -155,19 +155,20 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
float invsample = 1.0f/sample;
float scale = invsample;
- bool variance = (offset == DENOISING_PASS_NORMAL_VAR) ||
- (offset == DENOISING_PASS_ALBEDO_VAR) ||
- (offset == DENOISING_PASS_DEPTH_VAR) ||
- (offset == DENOISING_PASS_COLOR_VAR);
-
- if(offset == DENOISING_PASS_COLOR || offset == DENOISING_PASS_CLEAN) {
- scale *= exposure;
+ bool variance = (type == DENOISING_PASS_NORMAL_VAR) ||
+ (type == DENOISING_PASS_ALBEDO_VAR) ||
+ (type == DENOISING_PASS_DEPTH_VAR) ||
+ (type == DENOISING_PASS_COLOR_VAR);
+
+ float scale_exposure = scale;
+ if(type == DENOISING_PASS_COLOR || type == DENOISING_PASS_CLEAN) {
+ scale_exposure *= exposure;
}
- else if(offset == DENOISING_PASS_COLOR_VAR) {
- scale *= exposure*exposure;
+ else if(type == DENOISING_PASS_COLOR_VAR) {
+ scale_exposure *= exposure*exposure;
}
- offset += params.get_denoising_offset();
+ int offset = type + params.get_denoising_offset();
int pass_stride = params.get_passes_size();
int size = params.width*params.height;
@@ -181,14 +182,14 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
if(components == 1) {
for(int i = 0; i < size; i++, mean += pass_stride, var += pass_stride, pixels++) {
- pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale;
+ pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale_exposure;
}
}
else if(components == 3) {
for(int i = 0; i < size; i++, mean += pass_stride, var += pass_stride, pixels += 3) {
- pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale;
- pixels[1] = max(0.0f, var[1] - mean[1]*mean[1]*invsample)*scale;
- pixels[2] = max(0.0f, var[2] - mean[2]*mean[2]*invsample)*scale;
+ pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale_exposure;
+ pixels[1] = max(0.0f, var[1] - mean[1]*mean[1]*invsample)*scale_exposure;
+ pixels[2] = max(0.0f, var[2] - mean[2]*mean[2]*invsample)*scale_exposure;
}
}
else {
@@ -200,14 +201,28 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
if(components == 1) {
for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
- pixels[0] = in[0]*scale;
+ pixels[0] = in[0]*scale_exposure;
}
}
else if(components == 3) {
for(int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
- pixels[0] = in[0]*scale;
- pixels[1] = in[1]*scale;
- pixels[2] = in[2]*scale;
+ pixels[0] = in[0]*scale_exposure;
+ pixels[1] = in[1]*scale_exposure;
+ pixels[2] = in[2]*scale_exposure;
+ }
+ }
+ else if(components == 4) {
+ assert(type == DENOISING_PASS_COLOR);
+
+ /* Since the alpha channel is not involved in denoising, output the Combined alpha channel. */
+ assert(params.passes[0].type == PASS_COMBINED);
+ float *in_combined = buffer.data();
+
+ for(int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) {
+ pixels[0] = in[0]*scale_exposure;
+ pixels[1] = in[1]*scale_exposure;
+ pixels[2] = in[2]*scale_exposure;
+ pixels[3] = saturate(in_combined[3]*scale);
}
}
else {
@@ -218,7 +233,7 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
return true;
}
-bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels)
+bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels, const string &name)
{
if(buffer.data() == NULL) {
return false;
@@ -234,6 +249,14 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
continue;
}
+ /* Tell Cryptomatte passes apart by their name. */
+ if(pass.type == PASS_CRYPTOMATTE) {
+ if(pass.name != name) {
+ pass_offset += pass.components;
+ continue;
+ }
+ }
+
float *in = buffer.data() + pass_offset;
int pass_stride = params.get_passes_size();
@@ -370,6 +393,17 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
pixels[3] = f.w*invw;
}
}
+ else if(type == PASS_CRYPTOMATTE) {
+ for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+ float4 f = make_float4(in[0], in[1], in[2], in[3]);
+ /* x and z contain integer IDs, don't rescale them.
+ y and w contain matte weights, they get scaled. */
+ pixels[0] = f.x;
+ pixels[1] = f.y * scale;
+ pixels[2] = f.z;
+ pixels[3] = f.w * scale;
+ }
+ }
else {
for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
float4 f = make_float4(in[0], in[1], in[2], in[3]);
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index 1b06ffe33a6..46c3b89bd84 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -50,7 +50,7 @@ public:
int full_height;
/* passes */
- array<Pass> passes;
+ vector<Pass> passes;
bool denoising_data_pass;
/* If only some light path types should be denoised, an additional pass is needed. */
bool denoising_clean_pass;
@@ -84,7 +84,7 @@ public:
void zero();
bool copy_from_device();
- bool get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels);
+ bool get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels, const string &name);
bool get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels);
};
@@ -146,4 +146,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __BUFFERS_H__ */
+#endif /* __BUFFERS_H__ */
diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp
index ec3c56e820a..34066e1b024 100644
--- a/intern/cycles/render/camera.cpp
+++ b/intern/cycles/render/camera.cpp
@@ -716,7 +716,7 @@ float Camera::world_to_raster_size(float3 P)
float3 D = transform_point(&worldtocamera, P);
float dist = len(D);
- Ray ray = {0};
+ Ray ray = {{0}};
/* Distortion can become so great that the results become meaningless, there
* may be a better way to do this, but calculating differentials from the
diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h
index 323f2c61ca4..37f5dea624f 100644
--- a/intern/cycles/render/camera.h
+++ b/intern/cycles/render/camera.h
@@ -21,6 +21,7 @@
#include "graph/node.h"
+#include "util/util_array.h"
#include "util/util_boundbox.h"
#include "util/util_projection.h"
#include "util/util_transform.h"
@@ -212,4 +213,4 @@ private:
CCL_NAMESPACE_END
-#endif /* __CAMERA_H__ */
+#endif /* __CAMERA_H__ */
diff --git a/intern/cycles/render/constant_fold.h b/intern/cycles/render/constant_fold.h
index 26fa4e8b1c8..6ec94b055e3 100644
--- a/intern/cycles/render/constant_fold.h
+++ b/intern/cycles/render/constant_fold.h
@@ -70,4 +70,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __CONSTANT_FOLD_H__ */
+#endif /* __CONSTANT_FOLD_H__ */
diff --git a/intern/cycles/render/coverage.cpp b/intern/cycles/render/coverage.cpp
new file mode 100644
index 00000000000..72ef4cda3ff
--- /dev/null
+++ b/intern/cycles/render/coverage.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/coverage.h"
+#include "kernel/kernel_compat_cpu.h"
+#include "kernel/split/kernel_split_data.h"
+#include "kernel/kernel_globals.h"
+#include "kernel/kernel_id_passes.h"
+#include "kernel/kernel_types.h"
+#include "util/util_map.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+static bool crypomatte_comp(const pair<float, float>& i, const pair<float, float> j) { return i.first > j.first; }
+
+void Coverage::finalize()
+{
+ int pass_offset = 0;
+ if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+ finalize_buffer(coverage_object, pass_offset);
+ pass_offset += kernel_data.film.cryptomatte_depth * 4;
+ }
+ if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+ finalize_buffer(coverage_material, pass_offset);
+ pass_offset += kernel_data.film.cryptomatte_depth * 4;
+ }
+ if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+ finalize_buffer(coverage_asset, pass_offset);
+ }
+}
+
+void Coverage::init_path_trace()
+{
+ kg->coverage_object = kg->coverage_material = kg->coverage_asset = NULL;
+
+ if(kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) {
+ if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+ coverage_object.clear();
+ coverage_object.resize(tile.w * tile.h);
+ }
+ if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+ coverage_material.clear();
+ coverage_material.resize(tile.w * tile.h);
+ }
+ if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+ coverage_asset.clear();
+ coverage_asset.resize(tile.w * tile.h);
+ }
+ }
+}
+
+void Coverage::init_pixel(int x, int y)
+{
+ if(kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) {
+ const int pixel_index = tile.w * (y - tile.y) + x - tile.x;
+ if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+ kg->coverage_object = &coverage_object[pixel_index];
+ }
+ if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+ kg->coverage_material = &coverage_material[pixel_index];
+ }
+ if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+ kg->coverage_asset = &coverage_asset[pixel_index];
+ }
+ }
+}
+
+void Coverage::finalize_buffer(vector<CoverageMap> & coverage, const int pass_offset)
+{
+ if(kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) {
+ flatten_buffer(coverage, pass_offset);
+ }
+ else {
+ sort_buffer(pass_offset);
+ }
+}
+
+void Coverage::flatten_buffer(vector<CoverageMap> &coverage, const int pass_offset)
+{
+ /* Sort the coverage map and write it to the output */
+ int pixel_index = 0;
+ int pass_stride = tile.buffers->params.get_passes_size();
+ for(int y = 0; y < tile.h; ++y) {
+ for(int x = 0; x < tile.w; ++x) {
+ const CoverageMap& pixel = coverage[pixel_index];
+ if(!pixel.empty()) {
+ /* buffer offset */
+ int index = x + y * tile.stride;
+ float *buffer = (float*)tile.buffer + index*pass_stride;
+
+ /* sort the cryptomatte pixel */
+ vector<pair<float, float> > sorted_pixel;
+ for(CoverageMap::const_iterator it = pixel.begin(); it != pixel.end(); ++it) {
+ sorted_pixel.push_back(std::make_pair(it->second, it->first));
+ }
+ sort(sorted_pixel.begin(), sorted_pixel.end(), crypomatte_comp);
+ int num_slots = 2 * (kernel_data.film.cryptomatte_depth);
+ if(sorted_pixel.size() > num_slots) {
+ float leftover = 0.0f;
+ for(vector<pair<float, float> >::iterator it = sorted_pixel.begin()+num_slots; it != sorted_pixel.end(); ++it) {
+ leftover += it->first;
+ }
+ sorted_pixel[num_slots-1].first += leftover;
+ }
+ int limit = min(num_slots, sorted_pixel.size());
+ for(int i = 0; i < limit; ++i) {
+ kernel_write_id_slots(buffer + kernel_data.film.pass_cryptomatte + pass_offset, 2 * (kernel_data.film.cryptomatte_depth), sorted_pixel[i].second, sorted_pixel[i].first);
+ }
+ }
+ ++pixel_index;
+ }
+ }
+}
+
+void Coverage::sort_buffer(const int pass_offset)
+{
+ /* Sort the coverage map and write it to the output */
+ int pass_stride = tile.buffers->params.get_passes_size();
+ for(int y = 0; y < tile.h; ++y) {
+ for(int x = 0; x < tile.w; ++x) {
+ /* buffer offset */
+ int index = x + y*tile.stride;
+ float *buffer = (float*)tile.buffer + index*pass_stride;
+ kernel_sort_id_slots(buffer + kernel_data.film.pass_cryptomatte + pass_offset, 2 * (kernel_data.film.cryptomatte_depth));
+ }
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/coverage.h b/intern/cycles/render/coverage.h
new file mode 100644
index 00000000000..9ee0bce7517
--- /dev/null
+++ b/intern/cycles/render/coverage.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/buffers.h"
+#include "kernel/kernel_compat_cpu.h"
+#include "kernel/split/kernel_split_data.h"
+#include "kernel/kernel_globals.h"
+#include "util/util_map.h"
+#include "util/util_vector.h"
+
+#ifndef __COVERAGE_H__
+#define __COVERAGE_H__
+
+CCL_NAMESPACE_BEGIN
+
+class Coverage {
+public:
+ Coverage(KernelGlobals *kg_, RenderTile &tile_) : kg(kg_), tile(tile_) { }
+ void init_path_trace();
+ void init_pixel(int x, int y);
+ void finalize();
+private:
+ vector<CoverageMap>coverage_object;
+ vector<CoverageMap>coverage_material;
+ vector<CoverageMap>coverage_asset;
+ KernelGlobals *kg;
+ RenderTile &tile;
+ void finalize_buffer(vector<CoverageMap>&coverage, const int pass_offset);
+ void flatten_buffer(vector<CoverageMap>&coverage, const int pass_offset);
+ void sort_buffer(const int pass_offset);
+};
+
+
+CCL_NAMESPACE_END
+
+#endif /* __COVERAGE_H__ */
diff --git a/intern/cycles/render/curves.h b/intern/cycles/render/curves.h
index 62066d8a809..cf75751c58f 100644
--- a/intern/cycles/render/curves.h
+++ b/intern/cycles/render/curves.h
@@ -17,8 +17,8 @@
#ifndef __CURVES_H__
#define __CURVES_H__
+#include "util/util_array.h"
#include "util/util_types.h"
-#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -119,4 +119,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __CURVES_H__ */
+#endif /* __CURVES_H__ */
diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp
index 8f3596ade58..d0f15496e50 100644
--- a/intern/cycles/render/film.cpp
+++ b/intern/cycles/render/film.cpp
@@ -38,11 +38,14 @@ static bool compare_pass_order(const Pass& a, const Pass& b)
return (a.components > b.components);
}
-void Pass::add(PassType type, array<Pass>& passes)
+void Pass::add(PassType type, vector<Pass>& passes, const char *name)
{
- for(size_t i = 0; i < passes.size(); i++)
- if(passes[i].type == type)
+ for(size_t i = 0; i < passes.size(); i++) {
+ if(passes[i].type == type &&
+ (name ? (passes[i].name == name) : passes[i].name.empty())) {
return;
+ }
+ }
Pass pass;
@@ -50,6 +53,9 @@ void Pass::add(PassType type, array<Pass>& passes)
pass.filter = true;
pass.exposure = false;
pass.divide_type = PASS_NONE;
+ if(name) {
+ pass.name = name;
+ }
switch(type) {
case PASS_NONE:
@@ -155,13 +161,15 @@ void Pass::add(PassType type, array<Pass>& passes)
pass.components = 4;
pass.exposure = true;
break;
-
+ case PASS_CRYPTOMATTE:
+ pass.components = 4;
+ break;
default:
assert(false);
break;
}
- passes.push_back_slow(pass);
+ passes.push_back(pass);
/* order from by components, to ensure alignment so passes with size 4
* come first and then passes with size 1 */
@@ -171,19 +179,19 @@ void Pass::add(PassType type, array<Pass>& passes)
Pass::add(pass.divide_type, passes);
}
-bool Pass::equals(const array<Pass>& A, const array<Pass>& B)
+bool Pass::equals(const vector<Pass>& A, const vector<Pass>& B)
{
if(A.size() != B.size())
return false;
for(int i = 0; i < A.size(); i++)
- if(A[i].type != B[i].type)
+ if(A[i].type != B[i].type || A[i].name != B[i].name)
return false;
return true;
}
-bool Pass::contains(const array<Pass>& passes, PassType type)
+bool Pass::contains(const vector<Pass>& passes, PassType type)
{
for(size_t i = 0; i < passes.size(); i++)
if(passes[i].type == type)
@@ -290,6 +298,7 @@ Film::Film()
use_light_visibility = false;
filter_table_offset = TABLE_OFFSET_INVALID;
+ cryptomatte_passes = CRYPT_NONE;
need_update = true;
}
@@ -314,6 +323,8 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
kfilm->pass_stride = 0;
kfilm->use_light_pass = use_light_visibility || use_sample_clamp;
+ bool have_cryptomatte = false;
+
for(size_t i = 0; i < passes.size(); i++) {
Pass& pass = passes[i];
@@ -434,7 +445,10 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
#endif
case PASS_RENDER_TIME:
break;
-
+ case PASS_CRYPTOMATTE:
+ kfilm->pass_cryptomatte = have_cryptomatte ? min(kfilm->pass_cryptomatte, kfilm->pass_stride) : kfilm->pass_stride;
+ have_cryptomatte = true;
+ break;
default:
assert(false);
break;
@@ -471,6 +485,9 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
kfilm->mist_inv_depth = (mist_depth > 0.0f)? 1.0f/mist_depth: 0.0f;
kfilm->mist_falloff = mist_falloff;
+ kfilm->cryptomatte_passes = cryptomatte_passes;
+ kfilm->cryptomatte_depth = cryptomatte_depth;
+
pass_stride = kfilm->pass_stride;
denoising_data_offset = kfilm->pass_denoising_data;
denoising_clean_offset = kfilm->pass_denoising_clean;
@@ -490,7 +507,7 @@ bool Film::modified(const Film& film)
return !Node::equals(film) || !Pass::equals(passes, film.passes);
}
-void Film::tag_passes_update(Scene *scene, const array<Pass>& passes_)
+void Film::tag_passes_update(Scene *scene, const vector<Pass>& passes_)
{
if(Pass::contains(passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) {
scene->mesh_manager->tag_update(scene);
diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h
index 6ab2eea79b8..c597db4e4c5 100644
--- a/intern/cycles/render/film.h
+++ b/intern/cycles/render/film.h
@@ -45,10 +45,11 @@ public:
bool filter;
bool exposure;
PassType divide_type;
+ string name;
- static void add(PassType type, array<Pass>& passes);
- static bool equals(const array<Pass>& A, const array<Pass>& B);
- static bool contains(const array<Pass>& passes, PassType);
+ static void add(PassType type, vector<Pass>& passes, const char* name = NULL);
+ static bool equals(const vector<Pass>& A, const vector<Pass>& B);
+ static bool contains(const vector<Pass>& passes, PassType);
};
class Film : public Node {
@@ -56,7 +57,7 @@ public:
NODE_DECLARE
float exposure;
- array<Pass> passes;
+ vector<Pass> passes;
bool denoising_data_pass;
bool denoising_clean_pass;
int denoising_flags;
@@ -76,6 +77,8 @@ public:
bool use_light_visibility;
bool use_sample_clamp;
+ CryptomatteType cryptomatte_passes;
+ int cryptomatte_depth;
bool need_update;
@@ -86,10 +89,10 @@ public:
void device_free(Device *device, DeviceScene *dscene, Scene *scene);
bool modified(const Film& film);
- void tag_passes_update(Scene *scene, const array<Pass>& passes_);
+ void tag_passes_update(Scene *scene, const vector<Pass>& passes_);
void tag_update(Scene *scene);
};
CCL_NAMESPACE_END
-#endif /* __FILM_H__ */
+#endif /* __FILM_H__ */
diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h
index 426522066b3..d14a59b4900 100644
--- a/intern/cycles/render/graph.h
+++ b/intern/cycles/render/graph.h
@@ -293,4 +293,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __GRAPH_H__ */
+#endif /* __GRAPH_H__ */
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index d94ebe564e3..8367a6811bd 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -164,4 +164,4 @@ private:
CCL_NAMESPACE_END
-#endif /* __IMAGE_H__ */
+#endif /* __IMAGE_H__ */
diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h
index f68400ac416..6a7e2056851 100644
--- a/intern/cycles/render/integrator.h
+++ b/intern/cycles/render/integrator.h
@@ -94,4 +94,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __INTEGRATOR_H__ */
+#endif /* __INTEGRATOR_H__ */
diff --git a/intern/cycles/render/light.h b/intern/cycles/render/light.h
index 32a911dc256..f4dfe0cadbf 100644
--- a/intern/cycles/render/light.h
+++ b/intern/cycles/render/light.h
@@ -139,4 +139,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __LIGHT_H__ */
+#endif /* __LIGHT_H__ */
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index 8a00b88af12..5f884a3f871 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -39,6 +39,10 @@
#include "util/util_progress.h"
#include "util/util_set.h"
+#ifdef WITH_EMBREE
+# include "bvh/bvh_embree.h"
+#endif
+
CCL_NAMESPACE_BEGIN
/* Triangle */
@@ -1068,11 +1072,14 @@ void Mesh::compute_bvh(Device *device,
bparams.use_spatial_split = params->use_bvh_spatial_split;
bparams.bvh_layout = BVHParams::best_bvh_layout(
params->bvh_layout,
- device->info.bvh_layout_mask);
+ device->get_bvh_layout_mask());
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
params->use_bvh_unaligned_nodes;
bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
bparams.num_motion_curve_steps = params->num_bvh_time_steps;
+ bparams.bvh_type = params->bvh_type;
+ bparams.curve_flags = dscene->data.curve.curveflags;
+ bparams.curve_subdivisions = dscene->data.curve.subdivisions;
delete bvh;
bvh = BVH::create(bparams, objects);
@@ -1284,9 +1291,9 @@ void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector<Att
}
}
#else
- (void)device;
- (void)scene;
- (void)mesh_attributes;
+ (void) device;
+ (void) scene;
+ (void) mesh_attributes;
#endif
}
@@ -1855,20 +1862,38 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
bparams.top_level = true;
bparams.bvh_layout = BVHParams::best_bvh_layout(
scene->params.bvh_layout,
- device->info.bvh_layout_mask);
+ device->get_bvh_layout_mask());
bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
scene->params.use_bvh_unaligned_nodes;
bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
+ bparams.bvh_type = scene->params.bvh_type;
+ bparams.curve_flags = dscene->data.curve.curveflags;
+ bparams.curve_subdivisions = dscene->data.curve.subdivisions;
VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout)
<< " layout.";
+#ifdef WITH_EMBREE
+ if(bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
+ if(dscene->data.bvh.scene) {
+ BVHEmbree::destroy(dscene->data.bvh.scene);
+ }
+ }
+#endif
+
BVH *bvh = BVH::create(bparams, scene->objects);
- bvh->build(progress);
+ bvh->build(progress, &device->stats);
if(progress.get_cancel()) {
+#ifdef WITH_EMBREE
+ if(bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
+ if(dscene->data.bvh.scene) {
+ BVHEmbree::destroy(dscene->data.bvh.scene);
+ }
+ }
+#endif
delete bvh;
return;
}
@@ -1923,6 +1948,16 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
dscene->data.bvh.bvh_layout = bparams.bvh_layout;
dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
+
+#ifdef WITH_EMBREE
+ if(bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
+ dscene->data.bvh.scene = ((BVHEmbree*)bvh)->scene;
+ }
+ else {
+ dscene->data.bvh.scene = NULL;
+ }
+#endif
+
delete bvh;
}
@@ -2266,7 +2301,7 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene)
og->object_names.clear();
}
#else
- (void)device;
+ (void) device;
#endif
}
diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h
index 444f03a3664..7d36b2cd7ca 100644
--- a/intern/cycles/render/mesh.h
+++ b/intern/cycles/render/mesh.h
@@ -22,6 +22,7 @@
#include "render/attribute.h"
#include "render/shader.h"
+#include "util/util_array.h"
#include "util/util_boundbox.h"
#include "util/util_list.h"
#include "util/util_map.h"
@@ -390,4 +391,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __MESH_H__ */
+#endif /* __MESH_H__ */
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index 28bbe2de05a..048f0fcaa24 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -20,6 +20,7 @@
#include "render/graph.h"
#include "graph/node.h"
+#include "util/util_array.h"
#include "util/util_string.h"
CCL_NAMESPACE_BEGIN
@@ -1161,4 +1162,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __NODES_H__ */
+#endif /* __NODES_H__ */
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index e3f35c366d6..dc7a1043208 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -27,7 +27,9 @@
#include "util/util_logging.h"
#include "util/util_map.h"
#include "util/util_progress.h"
+#include "util/util_set.h"
#include "util/util_vector.h"
+#include "util/util_murmurhash.h"
#include "subd/subd_patch_table.h"
@@ -483,6 +485,10 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
kobject.numverts = mesh->verts.size();
kobject.patch_map_offset = 0;
kobject.attribute_map_offset = 0;
+ uint32_t hash_name = util_murmur_hash3(ob->name.c_str(), ob->name.length(), 0);
+ uint32_t hash_asset = util_murmur_hash3(ob->asset_name.c_str(), ob->asset_name.length(), 0);
+ kobject.cryptomatte_object = util_hash_to_float(hash_name);
+ kobject.cryptomatte_asset = util_hash_to_float(hash_asset);
/* Object flag. */
if(ob->use_holdout) {
@@ -839,4 +845,37 @@ void ObjectManager::tag_update(Scene *scene)
scene->light_manager->need_update = true;
}
+string ObjectManager::get_cryptomatte_objects(Scene *scene)
+{
+ string manifest = "{";
+
+ unordered_set<ustring, ustringHash> objects;
+ foreach(Object *object, scene->objects) {
+ if(objects.count(object->name)) {
+ continue;
+ }
+ objects.insert(object->name);
+ uint32_t hash_name = util_murmur_hash3(object->name.c_str(), object->name.length(), 0);
+ manifest += string_printf("\"%s\":\"%08x\",", object->name.c_str(), hash_name);
+ }
+ manifest[manifest.size()-1] = '}';
+ return manifest;
+}
+
+string ObjectManager::get_cryptomatte_assets(Scene *scene)
+{
+ string manifest = "{";
+ unordered_set<ustring, ustringHash> assets;
+ foreach(Object *ob, scene->objects) {
+ if(assets.count(ob->asset_name)) {
+ continue;
+ }
+ assets.insert(ob->asset_name);
+ uint32_t hash_asset = util_murmur_hash3(ob->asset_name.c_str(), ob->asset_name.length(), 0);
+ manifest += string_printf("\"%s\":\"%08x\",", ob->asset_name.c_str(), hash_asset);
+ }
+ manifest[manifest.size()-1] = '}';
+ return manifest;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h
index b80c4aef70b..87e6e6652ad 100644
--- a/intern/cycles/render/object.h
+++ b/intern/cycles/render/object.h
@@ -20,11 +20,13 @@
#include "graph/node.h"
#include "render/scene.h"
+#include "util/util_array.h"
#include "util/util_boundbox.h"
#include "util/util_param.h"
#include "util/util_transform.h"
#include "util/util_thread.h"
#include "util/util_types.h"
+#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -48,6 +50,7 @@ public:
BoundBox bounds;
uint random_id;
int pass_id;
+ ustring asset_name;
vector<ParamValue> attributes;
uint visibility;
array<Transform> motion;
@@ -115,6 +118,9 @@ public:
void apply_static_transforms(DeviceScene *dscene, Scene *scene, Progress& progress);
+ string get_cryptomatte_objects(Scene *scene);
+ string get_cryptomatte_assets(Scene *scene);
+
protected:
void device_update_object_transform(UpdateObjectTransformState *state,
Object *ob,
@@ -128,4 +134,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __OBJECT_H__ */
+#endif /* __OBJECT_H__ */
diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp
index 496e9d9491a..3fbc7d33a74 100644
--- a/intern/cycles/render/osl.cpp
+++ b/intern/cycles/render/osl.cpp
@@ -1255,6 +1255,6 @@ void OSLCompiler::parameter_color_array(const char * /*name*/, const array<float
{
}
-#endif /* WITH_OSL */
+#endif /* WITH_OSL */
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/osl.h b/intern/cycles/render/osl.h
index 966fc1965d7..e196e0be787 100644
--- a/intern/cycles/render/osl.h
+++ b/intern/cycles/render/osl.h
@@ -17,6 +17,7 @@
#ifndef __OSL_H__
#define __OSL_H__
+#include "util/util_array.h"
#include "util/util_set.h"
#include "util/util_string.h"
#include "util/util_thread.h"
@@ -171,4 +172,4 @@ private:
CCL_NAMESPACE_END
-#endif /* __OSL_H__ */
+#endif /* __OSL_H__ */
diff --git a/intern/cycles/render/particles.h b/intern/cycles/render/particles.h
index 7e7afd5d054..27821907af0 100644
--- a/intern/cycles/render/particles.h
+++ b/intern/cycles/render/particles.h
@@ -17,8 +17,8 @@
#ifndef __PARTICLES_H__
#define __PARTICLES_H__
+#include "util/util_array.h"
#include "util/util_types.h"
-#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -68,4 +68,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __PARTICLES_H__ */
+#endif /* __PARTICLES_H__ */
diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
index 9f93fed139c..ccaca8707c8 100644
--- a/intern/cycles/render/scene.cpp
+++ b/intern/cycles/render/scene.cpp
@@ -215,6 +215,11 @@ void Scene::device_update(Device *device_, Progress& progress)
object_manager->device_update(device, &dscene, this, progress);
if(progress.get_cancel() || device->have_error()) return;
+
+ progress.set_status("Updating Hair Systems");
+ curve_system_manager->device_update(device, &dscene, this, progress);
+
+ if(progress.get_cancel() || device->have_error()) return;
progress.set_status("Updating Particle Systems");
particle_system_manager->device_update(device, &dscene, this, progress);
@@ -240,12 +245,7 @@ void Scene::device_update(Device *device_, Progress& progress)
camera->device_update_volume(device, &dscene, this);
if(progress.get_cancel() || device->have_error()) return;
-
- progress.set_status("Updating Hair Systems");
- curve_system_manager->device_update(device, &dscene, this, progress);
-
- if(progress.get_cancel() || device->have_error()) return;
-
+
progress.set_status("Updating Lookup Tables");
lookup_tables->device_update(device, &dscene);
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index dd8069537eb..57ea1d471e8 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -166,7 +166,6 @@ public:
bool use_bvh_spatial_split;
bool use_bvh_unaligned_nodes;
int num_bvh_time_steps;
-
bool persistent_data;
int texture_limit;
@@ -269,4 +268,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __SCENE_H__ */
+#endif /* __SCENE_H__ */
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index d0aa985b035..d6ecafa19b7 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -682,7 +682,10 @@ DeviceRequestedFeatures Session::get_requested_device_features()
BakeManager *bake_manager = scene->bake_manager;
requested_features.use_baking = bake_manager->get_baking();
requested_features.use_integrator_branched = (scene->integrator->method == Integrator::BRANCHED_PATH);
- requested_features.use_denoising = params.use_denoising;
+ if(params.denoising_passes) {
+ requested_features.use_denoising = true;
+ requested_features.use_shadow_tricks = true;
+ }
return requested_features;
}
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 61f62f8e712..56a69919a7a 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -58,6 +58,7 @@ public:
bool display_buffer_linear;
bool use_denoising;
+ bool denoising_passes;
int denoising_radius;
float denoising_strength;
float denoising_feature_strength;
@@ -89,6 +90,7 @@ public:
threads = 0;
use_denoising = false;
+ denoising_passes = false;
denoising_radius = 8;
denoising_strength = 0.0f;
denoising_feature_strength = 0.0f;
@@ -236,4 +238,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __SESSION_H__ */
+#endif /* __SESSION_H__ */
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index ac605305b94..d6c2d7502f2 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -30,6 +30,7 @@
#include "render/tables.h"
#include "util/util_foreach.h"
+#include "util/util_murmurhash.h"
#ifdef WITH_OCIO
# include <OpenColorIO/OpenColorIO.h>
@@ -387,7 +388,7 @@ ShaderManager *ShaderManager::create(Scene *scene, int shadingsystem)
{
ShaderManager *manager;
- (void)shadingsystem; /* Ignored when built without OSL. */
+ (void) shadingsystem; /* Ignored when built without OSL. */
#ifdef WITH_OSL
if(shadingsystem == SHADINGSYSTEM_OSL) {
@@ -523,12 +524,15 @@ void ShaderManager::device_update_common(Device *device,
if(shader->is_constant_emission(&constant_emission))
flag |= SD_HAS_CONSTANT_EMISSION;
+ uint32_t cryptomatte_id = util_murmur_hash3(shader->name.c_str(), shader->name.length(), 0);
+
/* regular shader */
kshader->flags = flag;
kshader->pass_id = shader->pass_id;
kshader->constant_emission[0] = constant_emission.x;
kshader->constant_emission[1] = constant_emission.y;
kshader->constant_emission[2] = constant_emission.z;
+ kshader->cryptomatte_id = util_hash_to_float(cryptomatte_id);
kshader++;
has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
@@ -695,4 +699,20 @@ float ShaderManager::linear_rgb_to_gray(float3 c)
return dot(c, rgb_to_y);
}
+string ShaderManager::get_cryptomatte_materials(Scene *scene)
+{
+ string manifest = "{";
+ unordered_set<ustring, ustringHash> materials;
+ foreach(Shader *shader, scene->shaders) {
+ if(materials.count(shader->name)) {
+ continue;
+ }
+ materials.insert(shader->name);
+ uint32_t cryptomatte_id = util_murmur_hash3(shader->name.c_str(), shader->name.length(), 0);
+ manifest += string_printf("\"%s\":\"%08x\",", shader->name.c_str(), cryptomatte_id);
+ }
+ manifest[manifest.size()-1] = '}';
+ return manifest;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h
index 58314a1e310..4c7b2fd433b 100644
--- a/intern/cycles/render/shader.h
+++ b/intern/cycles/render/shader.h
@@ -197,6 +197,8 @@ public:
float linear_rgb_to_gray(float3 c);
+ string get_cryptomatte_materials(Scene *scene);
+
protected:
ShaderManager();
@@ -222,4 +224,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __SHADER_H__ */
+#endif /* __SHADER_H__ */
diff --git a/intern/cycles/render/sobol.h b/intern/cycles/render/sobol.h
index d38857d2b35..ce7a28587f2 100644
--- a/intern/cycles/render/sobol.h
+++ b/intern/cycles/render/sobol.h
@@ -28,4 +28,4 @@ void sobol_generate_direction_vectors(uint vectors[][SOBOL_BITS], int dimensions
CCL_NAMESPACE_END
-#endif /* __SOBOL_H__ */
+#endif /* __SOBOL_H__ */
diff --git a/intern/cycles/render/stats.h b/intern/cycles/render/stats.h
index 72d5f1dd93d..2ff0ec3e0e9 100644
--- a/intern/cycles/render/stats.h
+++ b/intern/cycles/render/stats.h
@@ -101,4 +101,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __RENDER_STATS_H__ */
+#endif /* __RENDER_STATS_H__ */
diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp
index 44b7eeec6db..b380117e729 100644
--- a/intern/cycles/render/svm.cpp
+++ b/intern/cycles/render/svm.cpp
@@ -266,7 +266,7 @@ int SVMCompiler::stack_assign(ShaderInput *input)
add_node(NODE_VALUE_V, input->stack_offset);
add_node(NODE_VALUE_V, node->get_float3(input->socket_type));
}
- else /* should not get called for closure */
+ else /* should not get called for closure */
assert(0);
}
}
diff --git a/intern/cycles/render/svm.h b/intern/cycles/render/svm.h
index 1d0613bbfdc..af97a490a87 100644
--- a/intern/cycles/render/svm.h
+++ b/intern/cycles/render/svm.h
@@ -21,6 +21,7 @@
#include "render/graph.h"
#include "render/shader.h"
+#include "util/util_array.h"
#include "util/util_set.h"
#include "util/util_string.h"
#include "util/util_thread.h"
@@ -223,4 +224,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __SVM_H__ */
+#endif /* __SVM_H__ */
diff --git a/intern/cycles/render/tables.h b/intern/cycles/render/tables.h
index 09d961a9c3c..709333cb1b6 100644
--- a/intern/cycles/render/tables.h
+++ b/intern/cycles/render/tables.h
@@ -50,4 +50,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __TABLES_H__ */
+#endif /* __TABLES_H__ */
diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h
index f72f653f4c2..2835c793073 100644
--- a/intern/cycles/render/tile.h
+++ b/intern/cycles/render/tile.h
@@ -154,4 +154,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __TILE_H__ */
+#endif /* __TILE_H__ */
diff --git a/intern/cycles/subd/subd_dice.h b/intern/cycles/subd/subd_dice.h
index 4617c782b3a..2bef8d4cf8d 100644
--- a/intern/cycles/subd/subd_dice.h
+++ b/intern/cycles/subd/subd_dice.h
@@ -134,4 +134,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __SUBD_DICE_H__ */
+#endif /* __SUBD_DICE_H__ */
diff --git a/intern/cycles/subd/subd_patch.h b/intern/cycles/subd/subd_patch.h
index 64ec8f70951..84100139f2c 100644
--- a/intern/cycles/subd/subd_patch.h
+++ b/intern/cycles/subd/subd_patch.h
@@ -56,4 +56,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __SUBD_PATCH_H__ */
+#endif /* __SUBD_PATCH_H__ */
diff --git a/intern/cycles/subd/subd_patch_table.cpp b/intern/cycles/subd/subd_patch_table.cpp
index 13a6f284542..0e9d3f37af4 100644
--- a/intern/cycles/subd/subd_patch_table.cpp
+++ b/intern/cycles/subd/subd_patch_table.cpp
@@ -252,8 +252,8 @@ void PackedPatchTable::pack(Far::PatchTable* patch_table, int offset)
build_patch_map(*this, patch_table, offset);
#else
- (void)patch_table;
- (void)offset;
+ (void) patch_table;
+ (void) offset;
#endif
}
diff --git a/intern/cycles/subd/subd_patch_table.h b/intern/cycles/subd/subd_patch_table.h
index 45be7051992..1765578c42e 100644
--- a/intern/cycles/subd/subd_patch_table.h
+++ b/intern/cycles/subd/subd_patch_table.h
@@ -17,8 +17,8 @@
#ifndef __SUBD_PATCH_TABLE_H__
#define __SUBD_PATCH_TABLE_H__
+#include "util/util_array.h"
#include "util/util_types.h"
-#include "util/util_vector.h"
#ifdef WITH_OPENSUBDIV
#ifdef _MSC_VER
@@ -59,4 +59,4 @@ struct PackedPatchTable {
CCL_NAMESPACE_END
-#endif /* __SUBD_PATCH_TABLE_H__ */
+#endif /* __SUBD_PATCH_TABLE_H__ */
diff --git a/intern/cycles/subd/subd_split.h b/intern/cycles/subd/subd_split.h
index 7a276b35382..3368c93944b 100644
--- a/intern/cycles/subd/subd_split.h
+++ b/intern/cycles/subd/subd_split.h
@@ -56,4 +56,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __SUBD_SPLIT_H__ */
+#endif /* __SUBD_SPLIT_H__ */
diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp
index b66a91adbda..cfdab7a6433 100644
--- a/intern/cycles/test/render_graph_finalize_test.cpp
+++ b/intern/cycles/test/render_graph_finalize_test.cpp
@@ -20,6 +20,7 @@
#include "render/graph.h"
#include "render/scene.h"
#include "render/nodes.h"
+#include "util/util_array.h"
#include "util/util_logging.h"
#include "util/util_string.h"
#include "util/util_vector.h"
diff --git a/intern/cycles/test/util_path_test.cpp b/intern/cycles/test/util_path_test.cpp
index c2f400c105d..1df568493d8 100644
--- a/intern/cycles/test/util_path_test.cpp
+++ b/intern/cycles/test/util_path_test.cpp
@@ -370,7 +370,7 @@ TEST(util_path_is_relative, relative_windir_on_unix)
bool is_relative = path_is_relative("tmp\\foo.txt");
EXPECT_TRUE(is_relative);
}
-#endif /* !_WIN32 */
+#endif /* !_WIN32 */
#ifdef _WIN32
TEST(util_path_is_relative, absolute_windows)
@@ -396,6 +396,6 @@ TEST(util_path_is_relative, relative_unixdir_on_windows)
bool is_relative = path_is_relative("tmp/foo.txt");
EXPECT_TRUE(is_relative);
}
-#endif /* _WIN32 */
+#endif /* _WIN32 */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 291f9a9fcae..77d47984ee7 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SRC
util_logging.cpp
util_math_cdf.cpp
util_md5.cpp
+ util_murmurhash.cpp
util_path.cpp
util_string.cpp
util_simd.cpp
@@ -36,6 +37,7 @@ set(SRC_HEADERS
util_algorithm.h
util_aligned_malloc.h
util_args.h
+ util_array.h
util_atomic.h
util_boundbox.h
util_debug.h
@@ -64,6 +66,7 @@ set(SRC_HEADERS
util_math_int4.h
util_math_matrix.h
util_md5.h
+ util_murmurhash.h
util_opengl.h
util_optimization.h
util_param.h
diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h
index eb874713d43..f9e6476cc52 100644
--- a/intern/cycles/util/util_algorithm.h
+++ b/intern/cycles/util/util_algorithm.h
@@ -29,4 +29,4 @@ using std::remove;
CCL_NAMESPACE_END
-#endif /* __UTIL_ALGORITHM_H__ */
+#endif /* __UTIL_ALGORITHM_H__ */
diff --git a/intern/cycles/util/util_args.h b/intern/cycles/util/util_args.h
index be6f2c2b9f1..9fe54b14d77 100644
--- a/intern/cycles/util/util_args.h
+++ b/intern/cycles/util/util_args.h
@@ -28,4 +28,4 @@ OIIO_NAMESPACE_USING
CCL_NAMESPACE_END
-#endif /* __UTIL_ARGS_H__ */
+#endif /* __UTIL_ARGS_H__ */
diff --git a/intern/cycles/util/util_array.h b/intern/cycles/util/util_array.h
new file mode 100644
index 00000000000..5f18d434c31
--- /dev/null
+++ b/intern/cycles/util/util_array.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_ARRAY_H__
+#define __UTIL_ARRAY_H__
+
+#include <cassert>
+#include <cstring>
+
+#include "util/util_aligned_malloc.h"
+#include "util/util_guarded_allocator.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Simplified version of vector, serving multiple purposes:
+ * - somewhat faster in that it does not clear memory on resize/alloc,
+ * this was actually showing up in profiles quite significantly. it
+ * also does not run any constructors/destructors
+ * - if this is used, we are not tempted to use inefficient operations
+ * - aligned allocation for CPU native data types */
+
+template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES>
+class array
+{
+public:
+ array()
+ : data_(NULL),
+ datasize_(0),
+ capacity_(0)
+ {}
+
+ explicit array(size_t newsize)
+ {
+ if(newsize == 0) {
+ data_ = NULL;
+ datasize_ = 0;
+ capacity_ = 0;
+ }
+ else {
+ data_ = mem_allocate(newsize);
+ datasize_ = newsize;
+ capacity_ = datasize_;
+ }
+ }
+
+ array(const array& from)
+ {
+ if(from.datasize_ == 0) {
+ data_ = NULL;
+ datasize_ = 0;
+ capacity_ = 0;
+ }
+ else {
+ data_ = mem_allocate(from.datasize_);
+ memcpy(data_, from.data_, from.datasize_*sizeof(T));
+ datasize_ = from.datasize_;
+ capacity_ = datasize_;
+ }
+ }
+
+ array& operator=(const array& from)
+ {
+ if(this != &from) {
+ resize(from.size());
+ memcpy((void*)data_, from.data_, datasize_*sizeof(T));
+ }
+
+ return *this;
+ }
+
+ array& operator=(const vector<T>& from)
+ {
+ resize(from.size());
+
+ if(from.size() > 0) {
+ memcpy(data_, &from[0], datasize_*sizeof(T));
+ }
+
+ return *this;
+ }
+
+ ~array()
+ {
+ mem_free(data_, capacity_);
+ }
+
+ bool operator==(const array<T>& other) const
+ {
+ if(datasize_ != other.datasize_) {
+ return false;
+ }
+
+ return memcmp(data_, other.data_, datasize_*sizeof(T)) == 0;
+ }
+
+ bool operator!=(const array<T>& other) const
+ {
+ return !(*this == other);
+ }
+
+ void steal_data(array& from)
+ {
+ if(this != &from) {
+ clear();
+
+ data_ = from.data_;
+ datasize_ = from.datasize_;
+ capacity_ = from.capacity_;
+
+ from.data_ = NULL;
+ from.datasize_ = 0;
+ from.capacity_ = 0;
+ }
+ }
+
+ T *steal_pointer()
+ {
+ T *ptr = data_;
+ data_ = NULL;
+ clear();
+ return ptr;
+ }
+
+ T* resize(size_t newsize)
+ {
+ if(newsize == 0) {
+ clear();
+ }
+ else if(newsize != datasize_) {
+ if(newsize > capacity_) {
+ T *newdata = mem_allocate(newsize);
+ if(newdata == NULL) {
+ /* Allocation failed, likely out of memory. */
+ clear();
+ return NULL;
+ }
+ else if(data_ != NULL) {
+ memcpy((void *)newdata,
+ data_,
+ ((datasize_ < newsize)? datasize_: newsize)*sizeof(T));
+ mem_free(data_, capacity_);
+ }
+ data_ = newdata;
+ capacity_ = newsize;
+ }
+ datasize_ = newsize;
+ }
+ return data_;
+ }
+
+ T* resize(size_t newsize, const T& value)
+ {
+ size_t oldsize = size();
+ resize(newsize);
+
+ for(size_t i = oldsize; i < size(); i++) {
+ data_[i] = value;
+ }
+
+ return data_;
+ }
+
+ void clear()
+ {
+ if(data_ != NULL) {
+ mem_free(data_, capacity_);
+ data_ = NULL;
+ }
+ datasize_ = 0;
+ capacity_ = 0;
+ }
+
+ size_t empty() const
+ {
+ return datasize_ == 0;
+ }
+
+ size_t size() const
+ {
+ return datasize_;
+ }
+
+ T* data()
+ {
+ return data_;
+ }
+
+ const T* data() const
+ {
+ return data_;
+ }
+
+ T& operator[](size_t i) const
+ {
+ assert(i < datasize_);
+ return data_[i];
+ }
+
+ void reserve(size_t newcapacity)
+ {
+ if(newcapacity > capacity_) {
+ T *newdata = mem_allocate(newcapacity);
+ if(data_ != NULL) {
+ memcpy(newdata, data_, ((datasize_ < newcapacity)? datasize_: newcapacity)*sizeof(T));
+ mem_free(data_, capacity_);
+ }
+ data_ = newdata;
+ capacity_ = newcapacity;
+ }
+ }
+
+ size_t capacity() const
+ {
+ return capacity_;
+ }
+
+ // do not use this method unless you are sure the code is not performance critical
+ void push_back_slow(const T& t)
+ {
+ if(capacity_ == datasize_)
+ {
+ reserve(datasize_ == 0 ? 1 : (size_t)((datasize_ + 1) * 1.2));
+ }
+
+ data_[datasize_++] = t;
+ }
+
+ void push_back_reserved(const T& t)
+ {
+ assert(datasize_ < capacity_);
+ push_back_slow(t);
+ }
+
+ void append(const array<T>& from)
+ {
+ if(from.size()) {
+ size_t old_size = size();
+ resize(old_size + from.size());
+ memcpy(data_ + old_size, from.data(), sizeof(T) * from.size());
+ }
+ }
+
+protected:
+ inline T* mem_allocate(size_t N)
+ {
+ if(N == 0) {
+ return NULL;
+ }
+ T *mem = (T*)util_aligned_malloc(sizeof(T)*N, alignment);
+ if(mem != NULL) {
+ util_guarded_mem_alloc(sizeof(T)*N);
+ }
+ else {
+ throw std::bad_alloc();
+ }
+ return mem;
+ }
+
+ inline void mem_free(T *mem, size_t N)
+ {
+ if(mem != NULL) {
+ util_guarded_mem_free(sizeof(T)*N);
+ util_aligned_free(mem);
+ }
+ }
+
+ T *data_;
+ size_t datasize_;
+ size_t capacity_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_ARRAY_H__ */
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index f3c7ae546a0..477b667a6fe 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -23,12 +23,13 @@
#include "atomic_ops.h"
#define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x))
+#define atomic_compare_and_swap_float(p, old_val, new_val) atomic_cas_float((p), (old_val), (new_val))
#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
#define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1)
#define CCL_LOCAL_MEM_FENCE 0
-#define ccl_barrier(flags) (void)0
+#define ccl_barrier(flags) ((void) 0)
#else /* __KERNEL_GPU__ */
@@ -57,6 +58,20 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so
return new_value.float_value;
}
+ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float *dest,
+ const float old_val, const float new_val)
+{
+ union {
+ unsigned int int_value;
+ float float_value;
+ } new_value, prev_value, result;
+ prev_value.float_value = old_val;
+ new_value.float_value = new_val;
+ result.int_value = atomic_cmpxchg((volatile ccl_global unsigned int *)dest,
+ prev_value.int_value, new_value.int_value);
+ return result.float_value;
+}
+
#define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))
#define atomic_fetch_and_inc_uint32(p) atomic_inc((p))
#define atomic_fetch_and_dec_uint32(p) atomic_dec((p))
@@ -75,6 +90,19 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so
#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
#define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
+ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest,
+ const float old_val, const float new_val)
+{
+ union {
+ unsigned int int_value;
+ float float_value;
+ } new_value, prev_value, result;
+ prev_value.float_value = old_val;
+ new_value.float_value = new_val;
+ result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value,new_value.int_value);
+ return result.float_value;
+}
+
#define CCL_LOCAL_MEM_FENCE
#define ccl_barrier(flags) __syncthreads()
@@ -82,4 +110,4 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so
#endif /* __KERNEL_GPU__ */
-#endif /* __UTIL_ATOMIC_H__ */
+#endif /* __UTIL_ATOMIC_H__ */
diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h
index 60d9bb44256..25ef39d39ae 100644
--- a/intern/cycles/util/util_avxb.h
+++ b/intern/cycles/util/util_avxb.h
@@ -44,23 +44,12 @@ struct avxb
__forceinline operator const __m256i( void ) const { return _mm256_castps_si256(m256); }
__forceinline operator const __m256d( void ) const { return _mm256_castps_pd(m256); }
- //__forceinline avxb ( bool a )
- // : m256(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {}
- //__forceinline avxb ( bool a, bool b)
- // : m256(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) {}
- //__forceinline avxb ( bool a, bool b, bool c, bool d)
- // : m256(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {}
- //__forceinline avxb(int mask) {
- // assert(mask >= 0 && mask < 16);
- // m128 = _mm_lookupmask_ps[mask];
- //}
-
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline avxb( FalseTy ) : m256(_mm256_setzero_ps()) {}
- __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_cmpeq_epi32(_mm256_setzero_si256(), _mm256_setzero_si256()))) {}
+ __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1))) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
@@ -97,7 +86,21 @@ __forceinline const avxb operator ^=( avxb& a, const avxb& b ) { return a = a ^
////////////////////////////////////////////////////////////////////////////////
__forceinline const avxb operator !=( const avxb& a, const avxb& b ) { return _mm256_xor_ps(a, b); }
-__forceinline const avxb operator ==( const avxb& a, const avxb& b ) { return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); }
+__forceinline const avxb operator ==( const avxb& a, const avxb& b )
+{
+#ifdef __KERNEL_AVX2__
+ return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
+#else
+ __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
+ __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
+ __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
+ __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
+ __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
+ __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
+ __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
+ return _mm256_castsi256_ps(result);
+#endif
+}
__forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
#if defined(__KERNEL_SSE41__)
@@ -114,47 +117,6 @@ __forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
__forceinline const avxb unpacklo( const avxb& a, const avxb& b ) { return _mm256_unpacklo_ps(a, b); }
__forceinline const avxb unpackhi( const avxb& a, const avxb& b ) { return _mm256_unpackhi_ps(a, b); }
-#define _MM256_SHUFFLE(fp7,fp6,fp5,fp4,fp3,fp2,fp1,fp0) (((fp7) << 14) | ((fp6) << 12) | ((fp5) << 10) | ((fp4) << 8) | \
- ((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
-
-template<size_t i0, size_t i1, size_t i2, size_t i3, size_t i4, size_t i5, size_t i6, size_t i7>
-__forceinline const avxb shuffle( const avxb& a ) {
- return _mm256_cvtepi32_ps(_mm256_shuffle_epi32(a, _MM256_SHUFFLE(i7, i6, i5, i4, i3, i2, i1, i0)));
-}
-
-/*
-template<> __forceinline const avxb shuffle<0, 1, 0, 1, 0, 1, 0, 1>( const avxb& a ) {
- return _mm_movelh_ps(a, a);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a ) {
- return _mm_movehl_ps(a, a);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb shuffle( const sseb& a, const sseb& b ) {
- return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
-}
-
-template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a, const sseb& b ) {
- return _mm_movelh_ps(a, b);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a, const sseb& b ) {
- return _mm_movehl_ps(b, a);
-}
-
-#if defined(__KERNEL_SSE3__)
-template<> __forceinline const sseb shuffle<0, 0, 2, 2>( const sseb& a ) { return _mm_moveldup_ps(a); }
-template<> __forceinline const sseb shuffle<1, 1, 3, 3>( const sseb& a ) { return _mm_movehdup_ps(a); }
-#endif
-
-#if defined(__KERNEL_SSE41__)
-template<size_t dst, size_t src, size_t clr> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); }
-template<size_t dst, size_t src> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return insert<dst, src, 0>(a, b); }
-template<size_t dst> __forceinline const sseb insert( const sseb& a, const bool b ) { return insert<dst,0>(a, sseb(b)); }
-#endif
-*/
-
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operations
////////////////////////////////////////////////////////////////////////////////
@@ -180,7 +142,7 @@ __forceinline size_t movemask( const avxb& a ) { return _mm256_movemask_ps(a); }
ccl_device_inline void print_avxb(const char *label, const avxb &a)
{
- printf("%s: %df %df %df %df %df %df %df %d\n",
+ printf("%s: %d %d %d %d %d %d %d %d\n",
label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
}
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
index 5596702ca20..f00c722f25b 100644
--- a/intern/cycles/util/util_avxf.h
+++ b/intern/cycles/util/util_avxf.h
@@ -40,8 +40,8 @@ struct avxf
__forceinline avxf(const __m256 a) : m256(a) {}
__forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps (a)) {}
- __forceinline operator const __m256&(void) const { return m256; }
- __forceinline operator __m256&(void) { return m256; }
+ __forceinline operator const __m256&() const { return m256; }
+ __forceinline operator __m256&() { return m256; }
__forceinline avxf (float a) : m256(_mm256_set1_ps(a)) {}
@@ -214,17 +214,19 @@ __forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) {
#endif
}
__forceinline const avxf msub(const avxf& a, const avxf& b, const avxf& c) {
+#ifdef __KERNEL_AVX2__
return _mm256_fmsub_ps(a, b, c);
+#else
+ return (a*b) - c;
+#endif
}
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
-#ifdef __KERNEL_AVX2__
__forceinline const avxb operator <=(const avxf& a, const avxf& b) {
return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS);
}
-#endif
#endif
diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h
index b1bd5be0df3..fe89e398840 100644
--- a/intern/cycles/util/util_boundbox.h
+++ b/intern/cycles/util/util_boundbox.h
@@ -282,4 +282,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __UTIL_BOUNDBOX_H__ */
+#endif /* __UTIL_BOUNDBOX_H__ */
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index 826db469d25..e6efc7d73fc 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -240,4 +240,4 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c)
CCL_NAMESPACE_END
-#endif /* __UTIL_COLOR_H__ */
+#endif /* __UTIL_COLOR_H__ */
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index f17f8a560ee..864089bb118 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -182,4 +182,4 @@ std::ostream& operator <<(std::ostream &os,
CCL_NAMESPACE_END
-#endif /* __UTIL_DEBUG_H__ */
+#endif /* __UTIL_DEBUG_H__ */
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
index 8bce4aca699..429cfe647ef 100644
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@@ -72,7 +72,7 @@
# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
# define ATTR_FALLTHROUGH __attribute__((fallthrough))
# else
-# define ATTR_FALLTHROUGH ((void)0)
+# define ATTR_FALLTHROUGH ((void) 0)
# endif
#endif /* __KERNEL_GPU__ */
@@ -104,14 +104,14 @@ template<typename T> static inline T decltype_helper(T x) { return x; }
#define CHECK_TYPE(var, type) { \
TYPEOF(var) *__tmp; \
__tmp = (type *)NULL; \
- (void)__tmp; \
-} (void)0
+ (void) __tmp; \
+} (void) 0
#define CHECK_TYPE_PAIR(var_a, var_b) { \
TYPEOF(var_a) *__tmp; \
__tmp = (typeof(var_b) *)NULL; \
- (void)__tmp; \
-} (void)0
+ (void) __tmp; \
+} (void) 0
#else
# define CHECK_TYPE(var, type)
# define CHECK_TYPE_PAIR(var_a, var_b)
@@ -128,4 +128,4 @@ template<typename T> static inline T decltype_helper(T x) { return x; }
# define util_assert(statement)
#endif
-#endif /* __UTIL_DEFINES_H__ */
+#endif /* __UTIL_DEFINES_H__ */
diff --git a/intern/cycles/util/util_foreach.h b/intern/cycles/util/util_foreach.h
index 2a74ff0a55d..fd106d58b43 100644
--- a/intern/cycles/util/util_foreach.h
+++ b/intern/cycles/util/util_foreach.h
@@ -21,4 +21,4 @@
#define foreach(x, y) for(x : y)
-#endif /* __UTIL_FOREACH_H__ */
+#endif /* __UTIL_FOREACH_H__ */
diff --git a/intern/cycles/util/util_function.h b/intern/cycles/util/util_function.h
index f3cc00329ad..72c7ce43073 100644
--- a/intern/cycles/util/util_function.h
+++ b/intern/cycles/util/util_function.h
@@ -36,4 +36,4 @@ using std::placeholders::_9;
CCL_NAMESPACE_END
-#endif /* __UTIL_FUNCTION_H__ */
+#endif /* __UTIL_FUNCTION_H__ */
diff --git a/intern/cycles/util/util_guarded_allocator.cpp b/intern/cycles/util/util_guarded_allocator.cpp
index 54fa6a80df5..ae1d217c54f 100644
--- a/intern/cycles/util/util_guarded_allocator.cpp
+++ b/intern/cycles/util/util_guarded_allocator.cpp
@@ -35,12 +35,12 @@ void util_guarded_mem_free(size_t n)
/* Public API. */
-size_t util_guarded_get_mem_used(void)
+size_t util_guarded_get_mem_used()
{
return global_stats.mem_used;
}
-size_t util_guarded_get_mem_peak(void)
+size_t util_guarded_get_mem_peak()
{
return global_stats.mem_peak;
}
diff --git a/intern/cycles/util/util_guarded_allocator.h b/intern/cycles/util/util_guarded_allocator.h
index 87c1526dee4..2c6f1790fd0 100644
--- a/intern/cycles/util/util_guarded_allocator.h
+++ b/intern/cycles/util/util_guarded_allocator.h
@@ -47,7 +47,7 @@ public:
T *allocate(size_t n, const void *hint = 0)
{
- (void)hint;
+ (void) hint;
size_t size = n * sizeof(T);
util_guarded_mem_alloc(size);
if(n == 0) {
@@ -158,8 +158,8 @@ public:
};
/* Get memory usage and peak from the guarded STL allocator. */
-size_t util_guarded_get_mem_used(void);
-size_t util_guarded_get_mem_peak(void);
+size_t util_guarded_get_mem_used();
+size_t util_guarded_get_mem_peak();
/* Call given function and keep track if it runs out of memory.
*
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 53b7f2472bd..3868509c21b 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -148,4 +148,4 @@ ccl_device_inline half float_to_half(float f)
CCL_NAMESPACE_END
-#endif /* __UTIL_HALF_H__ */
+#endif /* __UTIL_HALF_H__ */
diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h
index a8a5076fbb3..f343252eaca 100644
--- a/intern/cycles/util/util_hash.h
+++ b/intern/cycles/util/util_hash.h
@@ -68,4 +68,4 @@ ccl_device_inline float hash_int_01(uint k)
CCL_NAMESPACE_END
-#endif /* __UTIL_HASH_H__ */
+#endif /* __UTIL_HASH_H__ */
diff --git a/intern/cycles/util/util_ies.cpp b/intern/cycles/util/util_ies.cpp
index e068957325b..e1de2e0c6e4 100644
--- a/intern/cycles/util/util_ies.cpp
+++ b/intern/cycles/util/util_ies.cpp
@@ -21,6 +21,13 @@
CCL_NAMESPACE_BEGIN
+// NOTE: For some reason gcc-7.2 does not instantiate this versio of allocator
+// gere (used in IESTextParser). Works fine for gcc-6, gcc-7.3 and gcc-8.
+//
+// TODO(sergey): Get to the root of this issue, or confirm this i a compiler
+// issue.
+template class GuardedAllocator<char>;
+
bool IESFile::load(ustring ies)
{
clear();
diff --git a/intern/cycles/util/util_ies.h b/intern/cycles/util/util_ies.h
index 5933cb3962a..663ad649a9c 100644
--- a/intern/cycles/util/util_ies.h
+++ b/intern/cycles/util/util_ies.h
@@ -58,4 +58,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __UTIL_IES_H__ */
+#endif /* __UTIL_IES_H__ */
diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h
index 85bdb0d8050..da5f56271c8 100644
--- a/intern/cycles/util/util_image.h
+++ b/intern/cycles/util/util_image.h
@@ -102,6 +102,6 @@ inline half util_image_cast_from_float(float value)
CCL_NAMESPACE_END
-#endif /* __UTIL_IMAGE_H__ */
+#endif /* __UTIL_IMAGE_H__ */
#include "util/util_image_impl.h"
diff --git a/intern/cycles/util/util_list.h b/intern/cycles/util/util_list.h
index f555b001186..fcf8e4f5c74 100644
--- a/intern/cycles/util/util_list.h
+++ b/intern/cycles/util/util_list.h
@@ -25,4 +25,4 @@ using std::list;
CCL_NAMESPACE_END
-#endif /* __UTIL_LIST_H__ */
+#endif /* __UTIL_LIST_H__ */
diff --git a/intern/cycles/util/util_logging.cpp b/intern/cycles/util/util_logging.cpp
index f38683bf7de..b0922db32fb 100644
--- a/intern/cycles/util/util_logging.cpp
+++ b/intern/cycles/util/util_logging.cpp
@@ -45,7 +45,7 @@ void util_logging_init(const char *argv0)
#endif
}
-void util_logging_start(void)
+void util_logging_start()
{
#ifdef WITH_CYCLES_LOGGING
using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h
index 5c84b6593d3..f66d7c92dcc 100644
--- a/intern/cycles/util/util_logging.h
+++ b/intern/cycles/util/util_logging.h
@@ -41,7 +41,7 @@ public:
void operator&(StubStream&) { }
};
-# define LOG_SUPPRESS() (true) ? (void) 0 : LogMessageVoidify() & StubStream()
+# define LOG_SUPPRESS() (true) ? ((void) 0) : LogMessageVoidify() & StubStream()
# define LOG(severity) LOG_SUPPRESS()
# define VLOG(severity) LOG_SUPPRESS()
#endif
@@ -52,7 +52,7 @@ struct int2;
struct float3;
void util_logging_init(const char *argv0);
-void util_logging_start(void);
+void util_logging_start();
void util_logging_verbosity_set(int verbosity);
std::ostream& operator <<(std::ostream &os,
@@ -62,4 +62,4 @@ std::ostream& operator <<(std::ostream &os,
CCL_NAMESPACE_END
-#endif /* __UTIL_LOGGING_H__ */
+#endif /* __UTIL_LOGGING_H__ */
diff --git a/intern/cycles/util/util_map.h b/intern/cycles/util/util_map.h
index 3c9288417cf..1952d33ada8 100644
--- a/intern/cycles/util/util_map.h
+++ b/intern/cycles/util/util_map.h
@@ -28,4 +28,4 @@ using std::unordered_map;
CCL_NAMESPACE_END
-#endif /* __UTIL_MAP_H__ */
+#endif /* __UTIL_MAP_H__ */
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 52aeb8d8599..6167119f873 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -157,7 +157,7 @@ ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d)
{
return max(max(a,b),max(c,d));
}
-#endif /* __KERNEL_GPU__ */
+#endif /* __KERNEL_GPU__ */
ccl_device_inline float min4(float a, float b, float c, float d)
{
@@ -220,7 +220,31 @@ ccl_device_inline float __uint_as_float(uint i)
u.i = i;
return u.f;
}
-#endif /* __KERNEL_OPENCL__ */
+
+ccl_device_inline int4 __float4_as_int4(float4 f)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_castps_si128(f.m128));
+ #else
+ return make_int4(__float_as_int(f.x),
+ __float_as_int(f.y),
+ __float_as_int(f.z),
+ __float_as_int(f.w));
+#endif
+}
+
+ccl_device_inline float4 __int4_as_float4(int4 i)
+{
+#ifdef __KERNEL_SSE__
+ return float4(_mm_castsi128_ps(i.m128));
+#else
+ return make_float4(__int_as_float(i.x),
+ __int_as_float(i.y),
+ __int_as_float(i.z),
+ __int_as_float(i.w));
+#endif
+}
+#endif /* __KERNEL_OPENCL__ */
/* Versions of functions which are safe for fast math. */
ccl_device_inline bool isnan_safe(float f)
@@ -615,4 +639,4 @@ ccl_device_inline float2 map_to_sphere(const float3 co)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_H__ */
+#endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/util_math_cdf.h b/intern/cycles/util/util_math_cdf.h
index 79643fe26e3..983855e3e9b 100644
--- a/intern/cycles/util/util_math_cdf.h
+++ b/intern/cycles/util/util_math_cdf.h
@@ -75,4 +75,4 @@ void util_cdf_inverted(const int resolution,
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_H_CDF__ */
+#endif /* __UTIL_MATH_H_CDF__ */
diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h
index d3960deb3b4..323d40058e5 100644
--- a/intern/cycles/util/util_math_fast.h
+++ b/intern/cycles/util/util_math_fast.h
@@ -58,6 +58,11 @@ ccl_device_inline float madd(const float a, const float b, const float c)
return a * b + c;
}
+ccl_device_inline float4 madd4(const float4 a, const float4 b, const float4 c)
+{
+ return a * b + c;
+}
+
/*
* FAST & APPROXIMATE MATH
*
@@ -438,6 +443,29 @@ ccl_device_inline float fast_expf(float x)
return fast_exp2f(x / M_LN2_F);
}
+#ifndef __KERNEL_GPU__
+ccl_device float4 fast_exp2f4(float4 x)
+{
+ const float4 one = make_float4(1.0f);
+ const float4 limit = make_float4(126.0f);
+ x = clamp(x, -limit, limit);
+ int4 m = make_int4(x);
+ x = one - (one - (x - make_float4(m)));
+ float4 r = make_float4(1.33336498402e-3f);
+ r = madd4(x, r, make_float4(9.810352697968e-3f));
+ r = madd4(x, r, make_float4(5.551834031939e-2f));
+ r = madd4(x, r, make_float4(0.2401793301105f));
+ r = madd4(x, r, make_float4(0.693144857883f));
+ r = madd4(x, r, make_float4(1.0f));
+ return __int4_as_float4(__float4_as_int4(r) + (m << 23));
+}
+
+ccl_device_inline float4 fast_expf4(float4 x)
+{
+ return fast_exp2f4(x / M_LN2_F);
+}
+#endif
+
ccl_device_inline float fast_exp10(float x)
{
/* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]:
diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h
index 6f9d0855d50..e937509367f 100644
--- a/intern/cycles/util/util_math_float2.h
+++ b/intern/cycles/util/util_math_float2.h
@@ -224,4 +224,4 @@ ccl_device_inline float2 interp(const float2& a, const float2& b, float t)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_FLOAT2_H__ */
+#endif /* __UTIL_MATH_FLOAT2_H__ */
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 75265c1c9a2..a54a3f3087c 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -419,4 +419,4 @@ ccl_device_inline float3 ensure_finite3(float3 v)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_FLOAT3_H__ */
+#endif /* __UTIL_MATH_FLOAT3_H__ */
diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h
index aa7e56fefe9..479ccf202ba 100644
--- a/intern/cycles/util/util_math_float4.h
+++ b/intern/cycles/util/util_math_float4.h
@@ -38,6 +38,7 @@ ccl_device_inline float4 operator+(const float4& a, const float4& b);
ccl_device_inline float4 operator-(const float4& a, const float4& b);
ccl_device_inline float4 operator+=(float4& a, const float4& b);
ccl_device_inline float4 operator*=(float4& a, const float4& b);
+ccl_device_inline float4 operator*=(float4& a, float f);
ccl_device_inline float4 operator/=(float4& a, float f);
ccl_device_inline int4 operator<(const float4& a, const float4& b);
@@ -58,6 +59,7 @@ ccl_device_inline float4 normalize(const float4& a);
ccl_device_inline float4 safe_normalize(const float4& a);
ccl_device_inline float4 min(const float4& a, const float4& b);
ccl_device_inline float4 max(const float4& a, const float4& b);
+ccl_device_inline float4 clamp(const float4& a, const float4& mn, const float4& mx);
ccl_device_inline float4 fabs(const float4& a);
#endif /* !__KERNEL_OPENCL__*/
@@ -168,6 +170,11 @@ ccl_device_inline float4 operator*=(float4& a, const float4& b)
return a = a * b;
}
+ccl_device_inline float4 operator*=(float4& a, float f)
+{
+ return a = a * f;
+}
+
ccl_device_inline float4 operator/=(float4& a, float f)
{
return a = a / f;
@@ -333,6 +340,11 @@ ccl_device_inline float4 max(const float4& a, const float4& b)
#endif
}
+ccl_device_inline float4 clamp(const float4& a, const float4& mn, const float4& mx)
+{
+ return min(max(a, mn), mx);
+}
+
ccl_device_inline float4 fabs(const float4& a)
{
#ifdef __KERNEL_SSE__
@@ -445,4 +457,4 @@ ccl_device_inline float4 load_float4(const float *v)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_FLOAT4_H__ */
+#endif /* __UTIL_MATH_FLOAT4_H__ */
diff --git a/intern/cycles/util/util_math_int2.h b/intern/cycles/util/util_math_int2.h
index 828c49a131c..dd401d9a091 100644
--- a/intern/cycles/util/util_math_int2.h
+++ b/intern/cycles/util/util_math_int2.h
@@ -74,4 +74,4 @@ ccl_device_inline int2 operator/(const int2 &a, const int2 &b)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_INT2_H__ */
+#endif /* __UTIL_MATH_INT2_H__ */
diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h
index 81b10f31f4a..2f4752f90f1 100644
--- a/intern/cycles/util/util_math_int3.h
+++ b/intern/cycles/util/util_math_int3.h
@@ -113,4 +113,4 @@ ccl_device_inline int3 operator-(const int3 &a, const int3 &b)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_INT3_H__ */
+#endif /* __UTIL_MATH_INT3_H__ */
diff --git a/intern/cycles/util/util_math_int4.h b/intern/cycles/util/util_math_int4.h
index 79a8c0841e7..763c42318d5 100644
--- a/intern/cycles/util/util_math_int4.h
+++ b/intern/cycles/util/util_math_int4.h
@@ -31,6 +31,10 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline int4 operator+(const int4& a, const int4& b);
ccl_device_inline int4 operator+=(int4& a, const int4& b);
ccl_device_inline int4 operator>>(const int4& a, int i);
+ccl_device_inline int4 operator<<(const int4& a, int i);
+ccl_device_inline int4 operator<(const int4& a, const int4& b);
+ccl_device_inline int4 operator>=(const int4& a, const int4& b);
+ccl_device_inline int4 operator&(const int4& a, const int4& b);
ccl_device_inline int4 min(int4 a, int4 b);
ccl_device_inline int4 max(int4 a, int4 b);
ccl_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx);
@@ -65,6 +69,42 @@ ccl_device_inline int4 operator>>(const int4& a, int i)
#endif
}
+ccl_device_inline int4 operator<<(const int4& a, int i)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_slli_epi32(a.m128, i));
+#else
+ return make_int4(a.x << i, a.y << i, a.z << i, a.w << i);
+#endif
+}
+
+ccl_device_inline int4 operator<(const int4& a, const int4& b)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_cmplt_epi32(a.m128, b.m128));
+#else
+ return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
+#endif
+}
+
+ccl_device_inline int4 operator>=(const int4& a, const int4& b)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128)));
+#else
+ return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
+#endif
+}
+
+ccl_device_inline int4 operator&(const int4& a, const int4& b)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_and_si128(a.m128, b.m128));
+#else
+ return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
+#endif
+}
+
ccl_device_inline int4 min(int4 a, int4 b)
{
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
@@ -116,4 +156,4 @@ ccl_device_inline int4 load_int4(const int *v)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_INT4_H__ */
+#endif /* __UTIL_MATH_INT4_H__ */
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
index b5fbb24091f..190c2f5d6b0 100644
--- a/intern/cycles/util/util_math_intersect.h
+++ b/intern/cycles/util/util_math_intersect.h
@@ -219,4 +219,4 @@ ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D,
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_INTERSECT_H__ */
+#endif /* __UTIL_MATH_INTERSECT_H__ */
diff --git a/intern/cycles/util/util_md5.h b/intern/cycles/util/util_md5.h
index 9023ccee4c2..f8c0115d8ce 100644
--- a/intern/cycles/util/util_md5.h
+++ b/intern/cycles/util/util_md5.h
@@ -58,4 +58,4 @@ string util_md5_string(const string& str);
CCL_NAMESPACE_END
-#endif /* __UTIL_MD5_H__ */
+#endif /* __UTIL_MD5_H__ */
diff --git a/intern/cycles/util/util_murmurhash.cpp b/intern/cycles/util/util_murmurhash.cpp
new file mode 100644
index 00000000000..68b2f2031be
--- /dev/null
+++ b/intern/cycles/util/util_murmurhash.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is taken from alShaders/Cryptomatte/MurmurHash3.h:
+ *
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "util/util_algorithm.h"
+#include "util/util_murmurhash.h"
+
+#if defined(_MSC_VER)
+# define ROTL32(x,y) _rotl(x,y)
+# define ROTL64(x,y) _rotl64(x,y)
+# define BIG_CONSTANT(x) (x)
+#else
+ccl_device_inline uint32_t rotl32(uint32_t x, int8_t r)
+{
+ return (x << r) | (x >> (32 - r));
+}
+# define ROTL32(x,y) rotl32(x,y)
+# define BIG_CONSTANT(x) (x##LLU)
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Block read - if your platform needs to do endian-swapping or can only
+ * handle aligned reads, do the conversion here. */
+ccl_device_inline uint32_t mm_hash_getblock32(const uint32_t *p, int i)
+{
+ return p[i];
+}
+
+/* Finalization mix - force all bits of a hash block to avalanche */
+ccl_device_inline uint32_t mm_hash_fmix32 ( uint32_t h )
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+ return h;
+}
+
+uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed)
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 4;
+
+ uint32_t h1 = seed;
+
+ const uint32_t c1 = 0xcc9e2d51;
+ const uint32_t c2 = 0x1b873593;
+
+ const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+ for(int i = -nblocks; i; i++) {
+ uint32_t k1 = mm_hash_getblock32(blocks,i);
+
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = ROTL32(h1,13);
+ h1 = h1 * 5 + 0xe6546b64;
+ }
+
+ const uint8_t *tail = (const uint8_t*)(data + nblocks*4);
+
+ uint32_t k1 = 0;
+
+ switch(len & 3) {
+ case 3:
+ k1 ^= tail[2] << 16;
+ ATTR_FALLTHROUGH;
+ case 2:
+ k1 ^= tail[1] << 8;
+ ATTR_FALLTHROUGH;
+ case 1:
+ k1 ^= tail[0];
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+ h1 ^= k1;
+ }
+
+ h1 ^= len;
+ h1 = mm_hash_fmix32(h1);
+ return h1;
+}
+
+/* This is taken from the cryptomatte specification 1.0 */
+float util_hash_to_float(uint32_t hash)
+{
+ uint32_t mantissa = hash & (( 1 << 23) - 1);
+ uint32_t exponent = (hash >> 23) & ((1 << 8) - 1);
+ exponent = max(exponent, (uint32_t) 1);
+ exponent = min(exponent, (uint32_t) 254);
+ exponent = exponent << 23;
+ uint32_t sign = (hash >> 31);
+ sign = sign << 31;
+ uint32_t float_bits = sign | exponent | mantissa;
+ float f;
+ memcpy(&f, &float_bits, sizeof(uint32_t));
+ return f;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_murmurhash.h b/intern/cycles/util/util_murmurhash.h
new file mode 100644
index 00000000000..3e7897d3ae6
--- /dev/null
+++ b/intern/cycles/util/util_murmurhash.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef __UTIL_MURMURHASH_H__
+#define __UTIL_MURMURHASH_H__
+
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed);
+float util_hash_to_float(uint32_t hash);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MURMURHASH_H__ */
diff --git a/intern/cycles/util/util_opengl.h b/intern/cycles/util/util_opengl.h
index 0b5462e0a09..2ca7b7e4c87 100644
--- a/intern/cycles/util/util_opengl.h
+++ b/intern/cycles/util/util_opengl.h
@@ -28,4 +28,4 @@
# define mxMakeCurrentContext(x) (x)
#endif
-#endif /* __UTIL_OPENGL_H__ */
+#endif /* __UTIL_OPENGL_H__ */
diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h
index 3b3627242d5..5267bd9a97a 100644
--- a/intern/cycles/util/util_optimization.h
+++ b/intern/cycles/util/util_optimization.h
@@ -68,4 +68,4 @@
#endif
-#endif /* __UTIL_OPTIMIZATION_H__ */
+#endif /* __UTIL_OPTIMIZATION_H__ */
diff --git a/intern/cycles/util/util_param.h b/intern/cycles/util/util_param.h
index 246b5cb7d63..4453c66aae2 100644
--- a/intern/cycles/util/util_param.h
+++ b/intern/cycles/util/util_param.h
@@ -30,4 +30,4 @@ OIIO_NAMESPACE_USING
CCL_NAMESPACE_END
-#endif /* __UTIL_PARAM_H__ */
+#endif /* __UTIL_PARAM_H__ */
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
index 51b7944705e..93080a6c80c 100644
--- a/intern/cycles/util/util_path.cpp
+++ b/intern/cycles/util/util_path.cpp
@@ -614,7 +614,7 @@ bool path_exists(const string& path)
return 0;
}
return st.st_mode != 0;
-#endif /* _WIN32 */
+#endif /* _WIN32 */
}
bool path_is_directory(const string& path)
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index 3ef15c5c09a..4ed9ebd60ff 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -114,7 +114,7 @@ public:
return cancel_message;
}
- void set_cancel_callback(function<void(void)> function)
+ void set_cancel_callback(function<void()> function)
{
cancel_cb = function;
}
@@ -323,7 +323,7 @@ public:
}
}
- void set_update_callback(function<void(void)> function)
+ void set_update_callback(function<void()> function)
{
update_cb = function;
}
@@ -331,8 +331,8 @@ public:
protected:
thread_mutex progress_mutex;
thread_mutex update_mutex;
- function<void(void)> update_cb;
- function<void(void)> cancel_cb;
+ function<void()> update_cb;
+ function<void()> cancel_cb;
/* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel.
* This makes the progress estimate more accurate when tiles with different sizes are used.
@@ -365,4 +365,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __UTIL_PROGRESS_H__ */
+#endif /* __UTIL_PROGRESS_H__ */
diff --git a/intern/cycles/util/util_projection.h b/intern/cycles/util/util_projection.h
index 26b4843928c..d1af013ae3a 100644
--- a/intern/cycles/util/util_projection.h
+++ b/intern/cycles/util/util_projection.h
@@ -169,8 +169,8 @@ ccl_device_inline ProjectionTransform projection_orthographic(float znear, float
return ProjectionTransform(t);
}
-#endif /* __KERNEL_GPU__ */
+#endif /* __KERNEL_GPU__ */
CCL_NAMESPACE_END
-#endif /* __UTIL_PROJECTION_H__ */
+#endif /* __UTIL_PROJECTION_H__ */
diff --git a/intern/cycles/util/util_queue.h b/intern/cycles/util/util_queue.h
index 622f4fe3e47..0a2b7718f57 100644
--- a/intern/cycles/util/util_queue.h
+++ b/intern/cycles/util/util_queue.h
@@ -25,4 +25,4 @@ using std::queue;
CCL_NAMESPACE_END
-#endif /* __UTIL_LIST_H__ */
+#endif /* __UTIL_LIST_H__ */
diff --git a/intern/cycles/util/util_rect.h b/intern/cycles/util/util_rect.h
index d13baefe85e..389669acf2e 100644
--- a/intern/cycles/util/util_rect.h
+++ b/intern/cycles/util/util_rect.h
@@ -69,4 +69,4 @@ ccl_device_inline int rect_size(int4 rect)
CCL_NAMESPACE_END
-#endif /* __UTIL_RECT_H__ */
+#endif /* __UTIL_RECT_H__ */
diff --git a/intern/cycles/util/util_set.h b/intern/cycles/util/util_set.h
index 298e1f7729a..a9c56bb4919 100644
--- a/intern/cycles/util/util_set.h
+++ b/intern/cycles/util/util_set.h
@@ -31,4 +31,4 @@ using std::unordered_set;
CCL_NAMESPACE_END
-#endif /* __UTIL_SET_H__ */
+#endif /* __UTIL_SET_H__ */
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index cc7f436c8fe..565ea768089 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -219,9 +219,9 @@ __forceinline size_t __bscf(size_t& v)
return i;
}
-#endif /* __KERNEL_64_BIT__ */
+#endif /* __KERNEL_64_BIT__ */
-#else /* _WIN32 */
+#else /* _WIN32 */
__forceinline unsigned int __popcnt(unsigned int in) {
int r = 0; asm ("popcnt %1,%0" : "=r"(r) : "r"(in)); return r;
@@ -344,7 +344,7 @@ __forceinline size_t __bscf(size_t& v)
}
#endif
-#endif /* _WIN32 */
+#endif /* _WIN32 */
/* Test __KERNEL_SSE41__ for MSVC which does not define __SSE4_1__, and test
* __SSE4_1__ to avoid OpenImageIO conflicts with our emulation macros on other
@@ -442,7 +442,7 @@ __forceinline __m128 _mm_round_ps_emu( __m128 value, const int flags)
return value;
}
-#endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */
+#endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */
#else /* __KERNEL_SSE2__ */
@@ -470,7 +470,7 @@ ccl_device_inline int __bsr(int value)
return bit;
}
-#endif /* __KERNEL_SSE2__ */
+#endif /* __KERNEL_SSE2__ */
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__) || \
@@ -484,6 +484,6 @@ ccl_device_inline int __bsr(int value)
CCL_NAMESPACE_END
-#endif /* __KERNEL_GPU__ */
+#endif /* __KERNEL_GPU__ */
-#endif /* __UTIL_SIMD_TYPES_H__ */
+#endif /* __UTIL_SIMD_TYPES_H__ */
diff --git a/intern/cycles/util/util_sky_model.h b/intern/cycles/util/util_sky_model.h
index 237e4e61bf5..2e593f58c39 100644
--- a/intern/cycles/util/util_sky_model.h
+++ b/intern/cycles/util/util_sky_model.h
@@ -437,6 +437,6 @@ double arhosekskymodel_solar_radiance(ArHosekSkyModelState* state,
double wavelength);
-#endif // _SKY_MODEL_H_
+#endif // _SKY_MODEL_H_
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h
index 115b133c662..f6810505126 100644
--- a/intern/cycles/util/util_sseb.h
+++ b/intern/cycles/util/util_sseb.h
@@ -177,7 +177,7 @@ __forceinline size_t movemask( const sseb& a ) { return _mm_movemask_ps(a); }
ccl_device_inline void print_sseb(const char *label, const sseb &a)
{
- printf("%s: %df %df %df %d\n",
+ printf("%s: %d %d %d %d\n",
label, a[0], a[1], a[2], a[3]);
}
diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h
index b5623860e33..66670c9a779 100644
--- a/intern/cycles/util/util_ssef.h
+++ b/intern/cycles/util/util_ssef.h
@@ -44,8 +44,8 @@ struct ssef
__forceinline ssef& operator=(const ssef& other) { m128 = other.m128; return *this; }
__forceinline ssef(const __m128 a) : m128(a) {}
- __forceinline operator const __m128&(void) const { return m128; }
- __forceinline operator __m128&(void) { return m128; }
+ __forceinline operator const __m128&() const { return m128; }
+ __forceinline operator __m128&() { return m128; }
__forceinline ssef (float a) : m128(_mm_set1_ps(a)) {}
__forceinline ssef (float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d)) {}
@@ -517,12 +517,12 @@ ccl_device_inline float len3(const ssef& a)
/* faster version for SSSE3 */
typedef ssei shuffle_swap_t;
-ccl_device_inline shuffle_swap_t shuffle_swap_identity(void)
+ccl_device_inline shuffle_swap_t shuffle_swap_identity()
{
return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
}
-ccl_device_inline shuffle_swap_t shuffle_swap_swap(void)
+ccl_device_inline shuffle_swap_t shuffle_swap_swap()
{
return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
}
@@ -537,12 +537,12 @@ ccl_device_inline const ssef shuffle_swap(const ssef& a, const shuffle_swap_t& s
/* somewhat slower version for SSE2 */
typedef int shuffle_swap_t;
-ccl_device_inline shuffle_swap_t shuffle_swap_identity(void)
+ccl_device_inline shuffle_swap_t shuffle_swap_identity()
{
return 0;
}
-ccl_device_inline shuffle_swap_t shuffle_swap_swap(void)
+ccl_device_inline shuffle_swap_t shuffle_swap_swap()
{
return 1;
}
diff --git a/intern/cycles/util/util_stack_allocator.h b/intern/cycles/util/util_stack_allocator.h
index 79a535bd170..4e978e18bee 100644
--- a/intern/cycles/util/util_stack_allocator.h
+++ b/intern/cycles/util/util_stack_allocator.h
@@ -53,7 +53,7 @@ public:
T *allocate(size_t n, const void *hint = 0)
{
- (void)hint;
+ (void) hint;
if(n == 0) {
return NULL;
}
@@ -164,4 +164,4 @@ private:
CCL_NAMESPACE_END
-#endif /* __UTIL_GUARDED_ALLOCATOR_H__ */
+#endif /* __UTIL_STACK_ALLOCATOR_H__ */
diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h
index dc3cb3f6ecc..b1c6c374693 100644
--- a/intern/cycles/util/util_static_assert.h
+++ b/intern/cycles/util/util_static_assert.h
@@ -47,4 +47,4 @@ CCL_NAMESPACE_BEGIN
CCL_NAMESPACE_END
-#endif /* __UTIL_STATIC_ASSERT_H__ */
+#endif /* __UTIL_STATIC_ASSERT_H__ */
diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h
index 7667f58eb7d..0ba58422a67 100644
--- a/intern/cycles/util/util_stats.h
+++ b/intern/cycles/util/util_stats.h
@@ -44,4 +44,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __UTIL_STATS_H__ */
+#endif /* __UTIL_STATS_H__ */
diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h
index 3a4f4398158..f17112c30d2 100644
--- a/intern/cycles/util/util_string.h
+++ b/intern/cycles/util/util_string.h
@@ -74,4 +74,4 @@ string string_human_readable_number(size_t num);
CCL_NAMESPACE_END
-#endif /* __UTIL_STRING_H__ */
+#endif /* __UTIL_STRING_H__ */
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index 1b039888452..34f428f111c 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -52,14 +52,14 @@ int system_cpu_group_thread_count(int group)
util_windows_init_numa_groups();
return GetActiveProcessorCount(group);
#elif defined(__APPLE__)
- (void)group;
+ (void) group;
int count;
size_t len = sizeof(count);
int mib[2] = { CTL_HW, HW_NCPU };
sysctl(mib, 2, &count, &len, NULL, 0);
return count;
#else
- (void)group;
+ (void) group;
return sysconf(_SC_NPROCESSORS_ONLN);
#endif
}
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index 42ad72356b9..241ac897157 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -46,4 +46,4 @@ size_t system_physical_ram();
CCL_NAMESPACE_END
-#endif /* __UTIL_SYSTEM_H__ */
+#endif /* __UTIL_SYSTEM_H__ */
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index f752e81128d..233cfe33305 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -89,4 +89,4 @@ typedef struct TextureInfo {
CCL_NAMESPACE_END
-#endif /* __UTIL_TEXTURE_H__ */
+#endif /* __UTIL_TEXTURE_H__ */
diff --git a/intern/cycles/util/util_thread.cpp b/intern/cycles/util/util_thread.cpp
index 16a8591a8a9..37d8bdbd4b0 100644
--- a/intern/cycles/util/util_thread.cpp
+++ b/intern/cycles/util/util_thread.cpp
@@ -21,7 +21,7 @@
CCL_NAMESPACE_BEGIN
-thread::thread(function<void(void)> run_cb, int group)
+thread::thread(function<void()> run_cb, int group)
: run_cb_(run_cb),
joined_(false),
group_(group)
diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h
index f39fcfb4279..6250bb95dcf 100644
--- a/intern/cycles/util/util_thread.h
+++ b/intern/cycles/util/util_thread.h
@@ -46,14 +46,14 @@ typedef std::condition_variable thread_condition_variable;
class thread {
public:
- thread(function<void(void)> run_cb, int group = -1);
+ thread(function<void()> run_cb, int group = -1);
~thread();
static void *run(void *arg);
bool join();
protected:
- function<void(void)> run_cb_;
+ function<void()> run_cb_;
std::thread thread_;
bool joined_;
int group_;
@@ -138,4 +138,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __UTIL_THREAD_H__ */
+#endif /* __UTIL_THREAD_H__ */
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index e781f85dded..e4cadd3e81a 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -424,6 +424,31 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm,
#ifndef __KERNEL_GPU__
+#ifdef WITH_EMBREE
+ccl_device void transform_motion_array_interpolate_straight(Transform *tfm,
+ const ccl_global DecomposedTransform *motion,
+ uint numsteps,
+ float time)
+{
+ /* Figure out which steps we need to interpolate. */
+ int maxstep = numsteps - 1;
+ int step = min((int)(time*maxstep), maxstep - 1);
+ float t = time * maxstep - step;
+
+ const ccl_global DecomposedTransform *a = motion + step;
+ const ccl_global DecomposedTransform *b = motion + step + 1;
+ Transform step1, step2;
+
+ transform_compose(&step1, a);
+ transform_compose(&step2, b);
+
+ /* matrix lerp */
+ tfm->x = (1.0f - t) * step1.x + t * step2.x;
+ tfm->y = (1.0f - t) * step1.y + t * step2.y;
+ tfm->z = (1.0f - t) * step1.z + t * step2.z;
+}
+#endif
+
class BoundBox2D;
ccl_device_inline bool operator==(const DecomposedTransform& A, const DecomposedTransform& B)
@@ -470,4 +495,4 @@ OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction_transposed)
CCL_NAMESPACE_END
-#endif /* __UTIL_TRANSFORM_H__ */
+#endif /* __UTIL_TRANSFORM_H__ */
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 96c549b9be5..535048d8f8c 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -156,4 +156,4 @@ CCL_NAMESPACE_END
#endif
#endif
-#endif /* __UTIL_TYPES_H__ */
+#endif /* __UTIL_TYPES_H__ */
diff --git a/intern/cycles/util/util_types_float3.h b/intern/cycles/util/util_types_float3.h
index 28146ad04f7..ed2300e7996 100644
--- a/intern/cycles/util/util_types_float3.h
+++ b/intern/cycles/util/util_types_float3.h
@@ -35,8 +35,8 @@ struct ccl_try_align(16) float3 {
__forceinline float3(const float3& a);
__forceinline explicit float3(const __m128& a);
- __forceinline operator const __m128&(void) const;
- __forceinline operator __m128&(void);
+ __forceinline operator const __m128&() const;
+ __forceinline operator __m128&();
__forceinline float3& operator =(const float3& a);
#else /* __KERNEL_SSE__ */
diff --git a/intern/cycles/util/util_types_float3_impl.h b/intern/cycles/util/util_types_float3_impl.h
index 45f61767d3f..2e840a5c399 100644
--- a/intern/cycles/util/util_types_float3_impl.h
+++ b/intern/cycles/util/util_types_float3_impl.h
@@ -43,12 +43,12 @@ __forceinline float3::float3(const __m128& a)
{
}
-__forceinline float3::operator const __m128&(void) const
+__forceinline float3::operator const __m128&() const
{
return m128;
}
-__forceinline float3::operator __m128&(void)
+__forceinline float3::operator __m128&()
{
return m128;
}
diff --git a/intern/cycles/util/util_types_float4.h b/intern/cycles/util/util_types_float4.h
index 154391f6881..5c10d483c2e 100644
--- a/intern/cycles/util/util_types_float4.h
+++ b/intern/cycles/util/util_types_float4.h
@@ -36,8 +36,8 @@ struct ccl_try_align(16) float4 {
__forceinline float4();
__forceinline explicit float4(const __m128& a);
- __forceinline operator const __m128&(void) const;
- __forceinline operator __m128&(void);
+ __forceinline operator const __m128&() const;
+ __forceinline operator __m128&();
__forceinline float4& operator =(const float4& a);
diff --git a/intern/cycles/util/util_types_float4_impl.h b/intern/cycles/util/util_types_float4_impl.h
index a49fac65b10..a83148031f1 100644
--- a/intern/cycles/util/util_types_float4_impl.h
+++ b/intern/cycles/util/util_types_float4_impl.h
@@ -38,12 +38,12 @@ __forceinline float4::float4(const __m128& a)
{
}
-__forceinline float4::operator const __m128&(void) const
+__forceinline float4::operator const __m128&() const
{
return m128;
}
-__forceinline float4::operator __m128&(void)
+__forceinline float4::operator __m128&()
{
return m128;
}
diff --git a/intern/cycles/util/util_types_float8.h b/intern/cycles/util/util_types_float8.h
index 64ec5a71355..08720b8ff48 100644
--- a/intern/cycles/util/util_types_float8.h
+++ b/intern/cycles/util/util_types_float8.h
@@ -37,7 +37,7 @@ CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_GPU__
-struct ccl_try_align(16) float8 {
+struct ccl_try_align(32) float8 {
#ifdef __KERNEL_AVX2__
union {
__m256 m256;
@@ -48,8 +48,8 @@ struct ccl_try_align(16) float8 {
__forceinline float8(const float8& a);
__forceinline explicit float8(const __m256& a);
- __forceinline operator const __m256&(void) const;
- __forceinline operator __m256&(void);
+ __forceinline operator const __m256&() const;
+ __forceinline operator __m256&();
__forceinline float8& operator =(const float8& a);
diff --git a/intern/cycles/util/util_types_float8_impl.h b/intern/cycles/util/util_types_float8_impl.h
index ebf8260bc7c..84fe233c334 100644
--- a/intern/cycles/util/util_types_float8_impl.h
+++ b/intern/cycles/util/util_types_float8_impl.h
@@ -55,12 +55,12 @@ __forceinline float8::float8(const __m256& f)
{
}
-__forceinline float8::operator const __m256&(void) const
+__forceinline float8::operator const __m256&() const
{
return m256;
}
-__forceinline float8::operator __m256&(void)
+__forceinline float8::operator __m256&()
{
return m256;
}
diff --git a/intern/cycles/util/util_types_int3.h b/intern/cycles/util/util_types_int3.h
index 9d43b201c02..f68074b982b 100644
--- a/intern/cycles/util/util_types_int3.h
+++ b/intern/cycles/util/util_types_int3.h
@@ -35,8 +35,8 @@ struct ccl_try_align(16) int3 {
__forceinline int3(const int3& a);
__forceinline explicit int3(const __m128i& a);
- __forceinline operator const __m128i&(void) const;
- __forceinline operator __m128i&(void);
+ __forceinline operator const __m128i&() const;
+ __forceinline operator __m128i&();
__forceinline int3& operator =(const int3& a);
#else /* __KERNEL_SSE__ */
diff --git a/intern/cycles/util/util_types_int3_impl.h b/intern/cycles/util/util_types_int3_impl.h
index ada50c4812c..1b195ca753f 100644
--- a/intern/cycles/util/util_types_int3_impl.h
+++ b/intern/cycles/util/util_types_int3_impl.h
@@ -43,12 +43,12 @@ __forceinline int3::int3(const int3& a)
{
}
-__forceinline int3::operator const __m128i&(void) const
+__forceinline int3::operator const __m128i&() const
{
return m128;
}
-__forceinline int3::operator __m128i&(void)
+__forceinline int3::operator __m128i&()
{
return m128;
}
diff --git a/intern/cycles/util/util_types_int4.h b/intern/cycles/util/util_types_int4.h
index cdd0ecbdae5..52e6fed8c14 100644
--- a/intern/cycles/util/util_types_int4.h
+++ b/intern/cycles/util/util_types_int4.h
@@ -26,6 +26,7 @@ CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_GPU__
struct float3;
+struct float4;
struct ccl_try_align(16) int4 {
#ifdef __KERNEL_SSE__
@@ -38,8 +39,8 @@ struct ccl_try_align(16) int4 {
__forceinline int4(const int4& a);
__forceinline explicit int4(const __m128i& a);
- __forceinline operator const __m128i&(void) const;
- __forceinline operator __m128i&(void);
+ __forceinline operator const __m128i&() const;
+ __forceinline operator __m128i&();
__forceinline int4& operator=(const int4& a);
#else /* __KERNEL_SSE__ */
@@ -53,6 +54,7 @@ struct ccl_try_align(16) int4 {
ccl_device_inline int4 make_int4(int i);
ccl_device_inline int4 make_int4(int x, int y, int z, int w);
ccl_device_inline int4 make_int4(const float3& f);
+ccl_device_inline int4 make_int4(const float4& f);
ccl_device_inline void print_int4(const char *label, const int4& a);
#endif /* __KERNEL_GPU__ */
diff --git a/intern/cycles/util/util_types_int4_impl.h b/intern/cycles/util/util_types_int4_impl.h
index 07cdc88f2dc..c058f86c400 100644
--- a/intern/cycles/util/util_types_int4_impl.h
+++ b/intern/cycles/util/util_types_int4_impl.h
@@ -43,12 +43,12 @@ __forceinline int4::int4(const __m128i& a)
{
}
-__forceinline int4::operator const __m128i&(void) const
+__forceinline int4::operator const __m128i&() const
{
return m128;
}
-__forceinline int4::operator __m128i&(void)
+__forceinline int4::operator __m128i&()
{
return m128;
}
@@ -104,6 +104,16 @@ ccl_device_inline int4 make_int4(const float3& f)
return a;
}
+ccl_device_inline int4 make_int4(const float4& f)
+{
+#ifdef __KERNEL_SSE__
+ int4 a(_mm_cvtps_epi32(f.m128));
+#else
+ int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
+#endif
+ return a;
+}
+
ccl_device_inline void print_int4(const char *label, const int4& a)
{
printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
index 0b33221ad4d..18fa231d6e7 100644
--- a/intern/cycles/util/util_vector.h
+++ b/intern/cycles/util/util_vector.h
@@ -17,8 +17,6 @@
#ifndef __UTIL_VECTOR_H__
#define __UTIL_VECTOR_H__
-/* Vector */
-
#include <cassert>
#include <cstring>
#include <vector>
@@ -29,12 +27,9 @@
CCL_NAMESPACE_BEGIN
-/* Vector
- *
- * Own subclass-ed vestion of std::vector. Subclass is needed because:
+/* Own subclass-ed vestion of std::vector. Subclass is needed because:
*
* - Use own allocator which keeps track of used/peak memory.
- *
* - Have method to ensure capacity is re-set to 0.
*/
template<typename value_type,
@@ -42,30 +37,16 @@ template<typename value_type,
class vector : public std::vector<value_type, allocator_type>
{
public:
- /* Default constructor. */
- explicit vector() : std::vector<value_type, allocator_type>() { }
-
- /* Fill constructor. */
- explicit vector(size_t n, const value_type& val = value_type())
- : std::vector<value_type, allocator_type>(n, val) { }
-
- /* Range constructor. */
- template <class InputIterator>
- vector(InputIterator first, InputIterator last)
- : std::vector<value_type, allocator_type>(first, last) { }
-
- /* Copy constructor. */
- vector(const vector &x) : std::vector<value_type, allocator_type>(x) { }
+ typedef std::vector<value_type, allocator_type> BaseClass;
- void shrink_to_fit(void)
- {
- std::vector<value_type, allocator_type>::shrink_to_fit();
- }
+ /* Inherit all constructors from base class. */
+ using BaseClass::vector;
- void free_memory(void)
+ /* Try as hard as possible to use zero memory. */
+ void free_memory()
{
- std::vector<value_type, allocator_type>::resize(0);
- shrink_to_fit();
+ BaseClass::resize(0);
+ BaseClass::shrink_to_fit();
}
/* Some external API might demand working with std::vector. */
@@ -75,265 +56,6 @@ public:
}
};
-/* Array
- *
- * Simplified version of vector, serving multiple purposes:
- * - somewhat faster in that it does not clear memory on resize/alloc,
- * this was actually showing up in profiles quite significantly. it
- * also does not run any constructors/destructors
- * - if this is used, we are not tempted to use inefficient operations
- * - aligned allocation for CPU native data types */
-
-template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES>
-class array
-{
-public:
- array()
- : data_(NULL),
- datasize_(0),
- capacity_(0)
- {}
-
- explicit array(size_t newsize)
- {
- if(newsize == 0) {
- data_ = NULL;
- datasize_ = 0;
- capacity_ = 0;
- }
- else {
- data_ = mem_allocate(newsize);
- datasize_ = newsize;
- capacity_ = datasize_;
- }
- }
-
- array(const array& from)
- {
- if(from.datasize_ == 0) {
- data_ = NULL;
- datasize_ = 0;
- capacity_ = 0;
- }
- else {
- data_ = mem_allocate(from.datasize_);
- memcpy(data_, from.data_, from.datasize_*sizeof(T));
- datasize_ = from.datasize_;
- capacity_ = datasize_;
- }
- }
-
- array& operator=(const array& from)
- {
- if(this != &from) {
- resize(from.size());
- memcpy((void*)data_, from.data_, datasize_*sizeof(T));
- }
-
- return *this;
- }
-
- array& operator=(const vector<T>& from)
- {
- resize(from.size());
-
- if(from.size() > 0) {
- memcpy(data_, &from[0], datasize_*sizeof(T));
- }
-
- return *this;
- }
-
- ~array()
- {
- mem_free(data_, capacity_);
- }
-
- bool operator==(const array<T>& other) const
- {
- if(datasize_ != other.datasize_) {
- return false;
- }
-
- return memcmp(data_, other.data_, datasize_*sizeof(T)) == 0;
- }
-
- bool operator!=(const array<T>& other) const
- {
- return !(*this == other);
- }
-
- void steal_data(array& from)
- {
- if(this != &from) {
- clear();
-
- data_ = from.data_;
- datasize_ = from.datasize_;
- capacity_ = from.capacity_;
-
- from.data_ = NULL;
- from.datasize_ = 0;
- from.capacity_ = 0;
- }
- }
-
- T *steal_pointer()
- {
- T *ptr = data_;
- data_ = NULL;
- clear();
- return ptr;
- }
-
- T* resize(size_t newsize)
- {
- if(newsize == 0) {
- clear();
- }
- else if(newsize != datasize_) {
- if(newsize > capacity_) {
- T *newdata = mem_allocate(newsize);
- if(newdata == NULL) {
- /* Allocation failed, likely out of memory. */
- clear();
- return NULL;
- }
- else if(data_ != NULL) {
- memcpy((void *)newdata,
- data_,
- ((datasize_ < newsize)? datasize_: newsize)*sizeof(T));
- mem_free(data_, capacity_);
- }
- data_ = newdata;
- capacity_ = newsize;
- }
- datasize_ = newsize;
- }
- return data_;
- }
-
- T* resize(size_t newsize, const T& value)
- {
- size_t oldsize = size();
- resize(newsize);
-
- for(size_t i = oldsize; i < size(); i++) {
- data_[i] = value;
- }
-
- return data_;
- }
-
- void clear()
- {
- if(data_ != NULL) {
- mem_free(data_, capacity_);
- data_ = NULL;
- }
- datasize_ = 0;
- capacity_ = 0;
- }
-
- size_t empty() const
- {
- return datasize_ == 0;
- }
-
- size_t size() const
- {
- return datasize_;
- }
-
- T* data()
- {
- return data_;
- }
-
- const T* data() const
- {
- return data_;
- }
-
- T& operator[](size_t i) const
- {
- assert(i < datasize_);
- return data_[i];
- }
-
- void reserve(size_t newcapacity)
- {
- if(newcapacity > capacity_) {
- T *newdata = mem_allocate(newcapacity);
- if(data_ != NULL) {
- memcpy(newdata, data_, ((datasize_ < newcapacity)? datasize_: newcapacity)*sizeof(T));
- mem_free(data_, capacity_);
- }
- data_ = newdata;
- capacity_ = newcapacity;
- }
- }
-
- size_t capacity() const
- {
- return capacity_;
- }
-
- // do not use this method unless you are sure the code is not performance critical
- void push_back_slow(const T& t)
- {
- if(capacity_ == datasize_)
- {
- reserve(datasize_ == 0 ? 1 : (size_t)((datasize_ + 1) * 1.2));
- }
-
- data_[datasize_++] = t;
- }
-
- void push_back_reserved(const T& t)
- {
- assert(datasize_ < capacity_);
- push_back_slow(t);
- }
-
- void append(const array<T>& from)
- {
- if(from.size()) {
- size_t old_size = size();
- resize(old_size + from.size());
- memcpy(data_ + old_size, from.data(), sizeof(T) * from.size());
- }
- }
-
-protected:
- inline T* mem_allocate(size_t N)
- {
- if(N == 0) {
- return NULL;
- }
- T *mem = (T*)util_aligned_malloc(sizeof(T)*N, alignment);
- if(mem != NULL) {
- util_guarded_mem_alloc(sizeof(T)*N);
- }
- else {
- throw std::bad_alloc();
- }
- return mem;
- }
-
- inline void mem_free(T *mem, size_t N)
- {
- if(mem != NULL) {
- util_guarded_mem_free(sizeof(T)*N);
- util_aligned_free(mem);
- }
- }
-
- T *data_;
- size_t datasize_;
- size_t capacity_;
-};
-
CCL_NAMESPACE_END
-#endif /* __UTIL_VECTOR_H__ */
+#endif /* __UTIL_VECTOR_H__ */
diff --git a/intern/cycles/util/util_version.h b/intern/cycles/util/util_version.h
index 112255f447b..980c5a269e6 100644
--- a/intern/cycles/util/util_version.h
+++ b/intern/cycles/util/util_version.h
@@ -34,4 +34,4 @@ CCL_NAMESPACE_BEGIN
CCL_NAMESPACE_END
-#endif /* __UTIL_VERSION_H__ */
+#endif /* __UTIL_VERSION_H__ */
diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp
index 3836cc86ee0..9dffd7a80bd 100644
--- a/intern/cycles/util/util_view.cpp
+++ b/intern/cycles/util/util_view.cpp
@@ -215,7 +215,7 @@ static void view_motion(int x, int y)
V.mouseY = y;
}
-static void view_idle(void)
+static void view_idle()
{
if(V.redraw) {
V.redraw = false;
diff --git a/intern/cycles/util/util_view.h b/intern/cycles/util/util_view.h
index e603e605776..ae50b098b39 100644
--- a/intern/cycles/util/util_view.h
+++ b/intern/cycles/util/util_view.h
@@ -22,10 +22,10 @@
CCL_NAMESPACE_BEGIN
-typedef void (*ViewInitFunc)(void);
-typedef void (*ViewExitFunc)(void);
+typedef void (*ViewInitFunc)();
+typedef void (*ViewExitFunc)();
typedef void (*ViewResizeFunc)(int width, int height);
-typedef void (*ViewDisplayFunc)(void);
+typedef void (*ViewDisplayFunc)();
typedef void (*ViewKeyboardFunc)(unsigned char key);
typedef void (*ViewMotionFunc)(int x, int y, int button);
@@ -40,4 +40,4 @@ void view_redraw();
CCL_NAMESPACE_END
-#endif /*__UTIL_VIEW_H__*/
+#endif /*__UTIL_VIEW_H__*/
diff --git a/intern/cycles/util/util_windows.h b/intern/cycles/util/util_windows.h
index 3b23ce8a3cb..9b9268fed7a 100644
--- a/intern/cycles/util/util_windows.h
+++ b/intern/cycles/util/util_windows.h
@@ -56,4 +56,4 @@ CCL_NAMESPACE_END
#endif /* WIN32 */
-#endif /* __UTIL_WINDOWS_H__ */
+#endif /* __UTIL_WINDOWS_H__ */
diff --git a/intern/cycles/util/util_xml.h b/intern/cycles/util/util_xml.h
index 6f06f17937b..c8a3a495f30 100644
--- a/intern/cycles/util/util_xml.h
+++ b/intern/cycles/util/util_xml.h
@@ -38,4 +38,4 @@ using PUGIXML_NAMESPACE::xml_parse_result;
CCL_NAMESPACE_END
-#endif /* __UTIL_XML_H__ */
+#endif /* __UTIL_XML_H__ */
diff --git a/intern/elbeem/extern/LBM_fluidsim.h b/intern/elbeem/extern/LBM_fluidsim.h
index 374e11d1c06..05ac143c7a1 100644
--- a/intern/elbeem/extern/LBM_fluidsim.h
+++ b/intern/elbeem/extern/LBM_fluidsim.h
@@ -1,7 +1,4 @@
/*
- * BKE_fluidsim.h
- *
- *
* ***** BEGIN GPL LICENSE BLOCK *****
*
* This program is free software; you can redistribute it and/or
@@ -44,5 +41,3 @@ int performElbeemSimulation(char *cfgfilename);
#endif
-
-
diff --git a/intern/ghost/GHOST_C-api.h b/intern/ghost/GHOST_C-api.h
index 86e64b7eeb0..bc5c67f4bed 100644
--- a/intern/ghost/GHOST_C-api.h
+++ b/intern/ghost/GHOST_C-api.h
@@ -908,6 +908,11 @@ extern int GHOST_SupportsNativeDialogs(void);
extern int GHOST_UseNativePixels(void);
/**
+ * Focus window after opening, or put them in the background.
+ */
+extern void GHOST_UseWindowFocus(int use_focus);
+
+/**
* If window was opened using native pixel size, it returns scaling factor.
*/
extern float GHOST_GetNativePixelSize(GHOST_WindowHandle windowhandle);
diff --git a/intern/ghost/GHOST_ISystem.h b/intern/ghost/GHOST_ISystem.h
index be2a94bd508..9b017c2da38 100644
--- a/intern/ghost/GHOST_ISystem.h
+++ b/intern/ghost/GHOST_ISystem.h
@@ -306,6 +306,11 @@ public:
*/
virtual bool useNativePixel(void) = 0;
+ /**
+ * Focus window after opening, or put them in the background.
+ */
+ virtual void useWindowFocus(const bool use_focus) = 0;
+
/***************************************************************************************
* Event management functionality
***************************************************************************************/
diff --git a/intern/ghost/intern/GHOST_C-api.cpp b/intern/ghost/intern/GHOST_C-api.cpp
index 797fb16dd6f..e60a8a9d3f3 100644
--- a/intern/ghost/intern/GHOST_C-api.cpp
+++ b/intern/ghost/intern/GHOST_C-api.cpp
@@ -911,6 +911,12 @@ int GHOST_UseNativePixels(void)
return system->useNativePixel();
}
+void GHOST_UseWindowFocus(int use_focus)
+{
+ GHOST_ISystem *system = GHOST_ISystem::getSystem();
+ return system->useWindowFocus(use_focus);
+}
+
float GHOST_GetNativePixelSize(GHOST_WindowHandle windowhandle)
{
GHOST_IWindow *window = (GHOST_IWindow *) windowhandle;
diff --git a/intern/ghost/intern/GHOST_DropTargetWin32.h b/intern/ghost/intern/GHOST_DropTargetWin32.h
index 3d7be45799f..5dcefcaf25e 100644
--- a/intern/ghost/intern/GHOST_DropTargetWin32.h
+++ b/intern/ghost/intern/GHOST_DropTargetWin32.h
@@ -60,7 +60,7 @@ public:
* - Determining the effect of the drop on the target application.
* - Incorporating any valid dropped data when the drop occurs.
* - Communicating target feedback to the source so the source application
- * can provide appropriate visual feedback such as setting the cursor.
+ * can provide appropriate visual feedback such as setting the cursor.
* - Implementing drag scrolling.
* - Registering and revoking its application windows as drop targets.
*
diff --git a/intern/ghost/intern/GHOST_EventTrackpad.h b/intern/ghost/intern/GHOST_EventTrackpad.h
index faf0f1697d0..25988090c01 100644
--- a/intern/ghost/intern/GHOST_EventTrackpad.h
+++ b/intern/ghost/intern/GHOST_EventTrackpad.h
@@ -20,8 +20,8 @@
*
* The Original Code is: all of this file.
*
- * Contributor(s): James Deery 11/2009
- * Damien Plisson 12/2009
+ * Contributor(s): James Deery 11/2009
+ * Damien Plisson 12/2009
*
* ***** END GPL LICENSE BLOCK *****
*/
diff --git a/intern/ghost/intern/GHOST_System.cpp b/intern/ghost/intern/GHOST_System.cpp
index 0629eacc3ff..39f915bb0c2 100644
--- a/intern/ghost/intern/GHOST_System.cpp
+++ b/intern/ghost/intern/GHOST_System.cpp
@@ -48,6 +48,7 @@
GHOST_System::GHOST_System()
: m_nativePixel(false),
+ m_windowFocus(true),
m_displayManager(NULL),
m_timerManager(NULL),
m_windowManager(NULL),
@@ -390,3 +391,8 @@ bool GHOST_System::useNativePixel(void)
m_nativePixel = true;
return 1;
}
+
+void GHOST_System::useWindowFocus(const bool use_focus)
+{
+ m_windowFocus = use_focus;
+}
diff --git a/intern/ghost/intern/GHOST_System.h b/intern/ghost/intern/GHOST_System.h
index 464d9269f28..ee3c30c35b4 100644
--- a/intern/ghost/intern/GHOST_System.h
+++ b/intern/ghost/intern/GHOST_System.h
@@ -177,6 +177,12 @@ public:
bool useNativePixel(void);
bool m_nativePixel;
+ /**
+ * Focus window after opening, or put them in the background.
+ */
+ void useWindowFocus(const bool use_focus);
+ bool m_windowFocus;
+
/***************************************************************************************
* Event management functionality
***************************************************************************************/
diff --git a/intern/ghost/intern/GHOST_SystemCocoa.h b/intern/ghost/intern/GHOST_SystemCocoa.h
index d31df79bc2b..146fa841555 100644
--- a/intern/ghost/intern/GHOST_SystemCocoa.h
+++ b/intern/ghost/intern/GHOST_SystemCocoa.h
@@ -20,8 +20,8 @@
*
* The Original Code is: all of this file.
*
- * Contributor(s): Maarten Gribnau 05/2001
- * Damien Plisson 09/2009
+ * Contributor(s): Maarten Gribnau 05/2001
+ * Damien Plisson 09/2009
*
* ***** END GPL LICENSE BLOCK *****
*/
diff --git a/intern/ghost/intern/GHOST_SystemCocoa.mm b/intern/ghost/intern/GHOST_SystemCocoa.mm
index 916da546669..697c0fae809 100644
--- a/intern/ghost/intern/GHOST_SystemCocoa.mm
+++ b/intern/ghost/intern/GHOST_SystemCocoa.mm
@@ -304,11 +304,13 @@ extern "C" int GHOST_HACK_getFirstFile(char buf[FIRSTFILEBUFLG])
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification
{
- // raise application to front, convenient when starting from the terminal
- // and important for launching the animation player. we call this after the
- // application finishes launching, as doing it earlier can make us end up
- // with a frontmost window but an inactive application
- [NSApp activateIgnoringOtherApps:YES];
+ if (systemCocoa->m_windowFocus) {
+ // Raise application to front, convenient when starting from the terminal
+ // and important for launching the animation player. we call this after the
+ // application finishes launching, as doing it earlier can make us end up
+ // with a frontmost window but an inactive application.
+ [NSApp activateIgnoringOtherApps:YES];
+ }
}
- (BOOL)application:(NSApplication *)theApplication openFile:(NSString *)filename
diff --git a/intern/ghost/intern/GHOST_SystemX11.h b/intern/ghost/intern/GHOST_SystemX11.h
index 3a65ff2a843..e9312ceb683 100644
--- a/intern/ghost/intern/GHOST_SystemX11.h
+++ b/intern/ghost/intern/GHOST_SystemX11.h
@@ -149,16 +149,16 @@ public:
* Create a new window.
* The new window is added to the list of windows managed.
* Never explicitly delete the window, use disposeWindow() instead.
- * \param title The name of the window (displayed in the title bar of the window if the OS supports it).
- * \param left The coordinate of the left edge of the window.
- * \param top The coordinate of the top edge of the window.
- * \param width The width the window.
- * \param height The height the window.
- * \param state The state of the window when opened.
- * \param type The type of drawing context installed in this window.
- * \param stereoVisual Create a stereo visual for quad buffered stereo.
- * \param exclusive Use to show the window ontop and ignore others
- * (used fullscreen).
+ * \param title The name of the window (displayed in the title bar of the window if the OS supports it).
+ * \param left The coordinate of the left edge of the window.
+ * \param top The coordinate of the top edge of the window.
+ * \param width The width the window.
+ * \param height The height the window.
+ * \param state The state of the window when opened.
+ * \param type The type of drawing context installed in this window.
+ * \param stereoVisual Create a stereo visual for quad buffered stereo.
+ * \param exclusive Use to show the window ontop and ignore others
+ * (used fullscreen).
* \param parentWindow Parent (embedder) window
* \return The new window (or 0 if creation failed).
*/
diff --git a/intern/ghost/intern/GHOST_Window.h b/intern/ghost/intern/GHOST_Window.h
index 59d3123b9de..2fa30049eca 100644
--- a/intern/ghost/intern/GHOST_Window.h
+++ b/intern/ghost/intern/GHOST_Window.h
@@ -55,14 +55,14 @@ public:
* Constructor.
* Creates a new window and opens it.
* To check if the window was created properly, use the getValid() method.
- * \param width The width the window.
- * \param heigh The height the window.
- * \param state The state the window is initially opened with.
- * \param type The type of drawing context installed in this window.
- * \param stereoVisual Stereo visual for quad buffered stereo.
- * \param exclusive Use to show the window ontop and ignore others
- * (used fullscreen).
- * \param numOfAASamples Number of samples used for AA (zero if no AA)
+ * \param width The width the window.
+ * \param heigh The height the window.
+ * \param state The state the window is initially opened with.
+ * \param type The type of drawing context installed in this window.
+ * \param stereoVisual Stereo visual for quad buffered stereo.
+ * \param exclusive Use to show the window ontop and ignore others
+ * (used fullscreen).
+ * \param numOfAASamples Number of samples used for AA (zero if no AA)
*/
GHOST_Window(
GHOST_TUns32 width,
diff --git a/intern/ghost/intern/GHOST_WindowCocoa.mm b/intern/ghost/intern/GHOST_WindowCocoa.mm
index 20060ac1267..79d2f304e60 100644
--- a/intern/ghost/intern/GHOST_WindowCocoa.mm
+++ b/intern/ghost/intern/GHOST_WindowCocoa.mm
@@ -551,7 +551,15 @@ GHOST_WindowCocoa::GHOST_WindowCocoa(
//Creates the OpenGL View inside the window
m_openGLView = [[CocoaOpenGLView alloc] initWithFrame:rect];
-
+
+ if (m_systemCocoa->m_nativePixel) {
+ // Needs to happen early when building with the 10.14 SDK, otherwise
+ // has no effect until resizeing the window.
+ if ([m_openGLView respondsToSelector:@selector(setWantsBestResolutionOpenGLSurface:)]) {
+ [m_openGLView setWantsBestResolutionOpenGLSurface:YES];
+ }
+ }
+
[m_openGLView setSystemAndWindowCocoa:systemCocoa windowCocoa:this];
[m_window setContentView:m_openGLView];
@@ -563,14 +571,9 @@ GHOST_WindowCocoa::GHOST_WindowCocoa(
updateDrawingContext();
activateDrawingContext();
- // XXX jwilkins: This seems like it belongs in GHOST_ContextCGL, but probably not GHOST_ContextEGL
if (m_systemCocoa->m_nativePixel) {
- if ([m_openGLView respondsToSelector:@selector(setWantsBestResolutionOpenGLSurface:)]) {
- [m_openGLView setWantsBestResolutionOpenGLSurface:YES];
-
- NSRect backingBounds = [m_openGLView convertRectToBacking:[m_openGLView bounds]];
- m_nativePixelSize = (float)backingBounds.size.width / (float)rect.size.width;
- }
+ NSRect backingBounds = [m_openGLView convertRectToBacking:[m_openGLView bounds]];
+ m_nativePixelSize = (float)backingBounds.size.width / (float)rect.size.width;
}
setTitle(title);
diff --git a/intern/ghost/intern/GHOST_WindowWin32.cpp b/intern/ghost/intern/GHOST_WindowWin32.cpp
index 92de41a859b..983fffc10e6 100644
--- a/intern/ghost/intern/GHOST_WindowWin32.cpp
+++ b/intern/ghost/intern/GHOST_WindowWin32.cpp
@@ -201,6 +201,11 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system,
// Store a pointer to this class in the window structure
::SetWindowLongPtr(m_hWnd, GWLP_USERDATA, (LONG_PTR) this);
+ if (!m_system->m_windowFocus) {
+ // Lower to bottom and don't activate if we don't want focus
+ ::SetWindowPos(m_hWnd, HWND_BOTTOM, 0, 0, 0, 0, SWP_NOMOVE | SWP_NOSIZE | SWP_NOACTIVATE);
+ }
+
// Store the device context
m_hDC = ::GetDC(m_hWnd);
@@ -214,11 +219,11 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system,
nCmdShow = SW_SHOWMAXIMIZED;
break;
case GHOST_kWindowStateMinimized:
- nCmdShow = SW_SHOWMINIMIZED;
+ nCmdShow = (m_system->m_windowFocus) ? SW_SHOWMINIMIZED : SW_SHOWMINNOACTIVE;
break;
case GHOST_kWindowStateNormal:
default:
- nCmdShow = SW_SHOWNORMAL;
+ nCmdShow = (m_system->m_windowFocus) ? SW_SHOWNORMAL : SW_SHOWNOACTIVATE;
break;
}
@@ -1105,12 +1110,12 @@ GHOST_TSuccess GHOST_WindowWin32::endProgressBar()
#ifdef WITH_INPUT_IME
void GHOST_WindowWin32::beginIME(GHOST_TInt32 x, GHOST_TInt32 y, GHOST_TInt32 w, GHOST_TInt32 h, int completed)
{
- m_imeImput.BeginIME(m_hWnd, GHOST_Rect(x, y - h, x, y), (bool)completed);
+ m_imeInput.BeginIME(m_hWnd, GHOST_Rect(x, y - h, x, y), (bool)completed);
}
void GHOST_WindowWin32::endIME()
{
- m_imeImput.EndIME(m_hWnd);
+ m_imeInput.EndIME(m_hWnd);
}
#endif /* WITH_INPUT_IME */
diff --git a/intern/ghost/intern/GHOST_WindowWin32.h b/intern/ghost/intern/GHOST_WindowWin32.h
index c72669ed898..8b0ba2f1934 100644
--- a/intern/ghost/intern/GHOST_WindowWin32.h
+++ b/intern/ghost/intern/GHOST_WindowWin32.h
@@ -225,10 +225,10 @@ public:
* capturing).
*
* \param press
- * 0 - mouse pressed
- * 1 - mouse released
- * 2 - operator grab
- * 3 - operator ungrab
+ * 0 - mouse pressed
+ * 1 - mouse released
+ * 2 - operator grab
+ * 3 - operator ungrab
*/
void registerMouseClickEvent(int press);
@@ -265,7 +265,7 @@ public:
bool m_inLiveResize;
#ifdef WITH_INPUT_IME
- GHOST_ImeWin32 *getImeInput() {return &m_imeImput;}
+ GHOST_ImeWin32 *getImeInput() {return &m_imeInput;}
void beginIME(
GHOST_TInt32 x, GHOST_TInt32 y,
@@ -369,7 +369,7 @@ private:
#ifdef WITH_INPUT_IME
/** Handle input method editors event */
- GHOST_ImeWin32 m_imeImput;
+ GHOST_ImeWin32 m_imeInput;
#endif
bool m_debug_context;
};
diff --git a/intern/ghost/intern/GHOST_WindowX11.cpp b/intern/ghost/intern/GHOST_WindowX11.cpp
index 623d57705b2..a4ccdef3788 100644
--- a/intern/ghost/intern/GHOST_WindowX11.cpp
+++ b/intern/ghost/intern/GHOST_WindowX11.cpp
@@ -517,7 +517,7 @@ GHOST_WindowX11(GHOST_SystemX11 *system,
natom++;
}
- if (m_system->m_atom.WM_TAKE_FOCUS) {
+ if (m_system->m_atom.WM_TAKE_FOCUS && m_system->m_windowFocus) {
atoms[natom] = m_system->m_atom.WM_TAKE_FOCUS;
natom++;
}
@@ -532,7 +532,7 @@ GHOST_WindowX11(GHOST_SystemX11 *system,
{
XWMHints *xwmhints = XAllocWMHints();
xwmhints->initial_state = NormalState;
- xwmhints->input = True;
+ xwmhints->input = (m_system->m_windowFocus) ? True : False;
xwmhints->flags = InputHint | StateHint;
XSetWMHints(display, m_window, xwmhints);
XFree(xwmhints);
@@ -586,11 +586,15 @@ GHOST_WindowX11(GHOST_SystemX11 *system,
setTitle(title);
- if (exclusive) {
+ if (exclusive && system->m_windowFocus) {
XMapRaised(m_display, m_window);
}
else {
XMapWindow(m_display, m_window);
+
+ if (!system->m_windowFocus) {
+ XLowerWindow(m_display, m_window);
+ }
}
GHOST_PRINT("Mapped window\n");
diff --git a/intern/guardedalloc/CMakeLists.txt b/intern/guardedalloc/CMakeLists.txt
index 10ed4287185..3cec2fd1016 100644
--- a/intern/guardedalloc/CMakeLists.txt
+++ b/intern/guardedalloc/CMakeLists.txt
@@ -53,6 +53,11 @@ if(WIN32 AND NOT UNIX)
)
endif()
+# Jemalloc 5.0.0+ needs extra configuration.
+if(WITH_MEM_JEMALLOC AND NOT ("${JEMALLOC_VERSION}" VERSION_LESS "5.0.0"))
+ add_definitions(-DWITH_JEMALLOC_CONF)
+endif()
+
blender_add_lib(bf_intern_guardedalloc "${SRC}" "${INC}" "${INC_SYS}")
# Override C++ alloc, optional.
diff --git a/intern/guardedalloc/intern/mallocn.c b/intern/guardedalloc/intern/mallocn.c
index a95cc9163c4..8c17da853e5 100644
--- a/intern/guardedalloc/intern/mallocn.c
+++ b/intern/guardedalloc/intern/mallocn.c
@@ -37,6 +37,13 @@
#include "mallocn_intern.h"
+#ifdef WITH_JEMALLOC_CONF
+/* If jemalloc is used, it reads this global variable and enables background
+ * threads to purge dirty pages. Otherwise we release memory too slowly or not
+ * at all if the thread that did the allocation stays inactive. */
+const char *malloc_conf = "background_thread:true,dirty_decay_ms:4000";
+#endif
+
size_t (*MEM_allocN_len)(const void *vmemh) = MEM_lockfree_allocN_len;
void (*MEM_freeN)(void *vmemh) = MEM_lockfree_freeN;
void *(*MEM_dupallocN)(const void *vmemh) = MEM_lockfree_dupallocN;
diff --git a/intern/locale/boost_locale_wrapper.cpp b/intern/locale/boost_locale_wrapper.cpp
index 0707c0dd3e3..3fd8f146aa3 100644
--- a/intern/locale/boost_locale_wrapper.cpp
+++ b/intern/locale/boost_locale_wrapper.cpp
@@ -112,13 +112,17 @@ const char *bl_locale_pgettext(const char *msgctxt, const char *msgid)
return r;
return msgid;
}
- catch(std::bad_cast const &e) { /* if std::has_facet<char_message_facet>(l) == false, LC_ALL = "C" case */
-// std::cout << "bl_locale_pgettext(" << msgid << "): " << e.what() << " \n";
+ catch(const std::bad_cast &e) { /* if std::has_facet<char_message_facet>(l) == false, LC_ALL = "C" case */
+#ifndef NDEBUG
+ std::cout << "bl_locale_pgettext(" << msgid << "): " << e.what() << " \n";
+#endif
(void)e;
return msgid;
}
- catch(std::exception const &e) {
-// std::cout << "bl_locale_pgettext(" << msgctxt << ", " << msgid << "): " << e.what() << " \n";
+ catch(const std::exception &e) {
+#ifndef NDEBUG
+ std::cout << "bl_locale_pgettext(" << msgctxt << ", " << msgid << "): " << e.what() << " \n";
+#endif
(void)e;
return msgid;
}
diff --git a/intern/smoke/intern/FLUID_3D.cpp b/intern/smoke/intern/FLUID_3D.cpp
index 8a27818ff36..fd0a7e2005f 100644
--- a/intern/smoke/intern/FLUID_3D.cpp
+++ b/intern/smoke/intern/FLUID_3D.cpp
@@ -38,7 +38,7 @@
#if PARALLEL==1
#include <omp.h>
-#endif // PARALLEL
+#endif // PARALLEL
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
@@ -51,13 +51,13 @@ FLUID_3D::FLUID_3D(int *res, float dx, float dtdef, int init_heat, int init_fire
_dt = dtdef; // just in case. set in step from a RNA factor
_iterations = 100;
- _tempAmb = 0;
+ _tempAmb = 0;
_heatDiffusion = 1e-3;
_totalTime = 0.0f;
_totalSteps = 0;
_res = Vec3Int(_xRes,_yRes,_zRes);
_maxRes = MAX3(_xRes, _yRes, _zRes);
-
+
// initialize wavelet turbulence
/*
if(amplify)
@@ -65,7 +65,7 @@ FLUID_3D::FLUID_3D(int *res, float dx, float dtdef, int init_heat, int init_fire
else
_wTurbulence = NULL;
*/
-
+
// scale the constants according to the refinement of the grid
if (!dx)
_dx = 1.0f / (float)_maxRes;
@@ -218,7 +218,7 @@ void FLUID_3D::initColors(float init_r, float init_g, float init_b)
void FLUID_3D::setBorderObstacles()
{
-
+
// set side obstacles
unsigned int index;
for (int y = 0; y < _yRes; y++)
@@ -331,7 +331,7 @@ void FLUID_3D::step(float dt, float gravity[3])
// If border rules have been changed
if (_colloPrev != *_borderColli) {
printf("Border collisions changed\n");
-
+
// DG TODO: Need to check that no animated obstacle flags are overwritten
setBorderCollisions();
}
@@ -490,7 +490,7 @@ void FLUID_3D::step(float dt, float gravity[3])
for (int i=1; i<stepParts; i++)
{
int zPos=(int)((float)i*partSize + 0.5f);
-
+
artificialDampingExactSL(zPos);
}
@@ -620,7 +620,7 @@ void FLUID_3D::artificialDampingSL(int zBegin, int zEnd) {
void FLUID_3D::artificialDampingExactSL(int pos) {
const float w = 0.9;
int index, x,y,z;
-
+
size_t posslab;
@@ -650,7 +650,7 @@ void FLUID_3D::artificialDampingExactSL(int pos) {
_zVelocityTemp[index+1] + _zVelocityTemp[index-1] +
_zVelocityTemp[index+_res[0]] + _zVelocityTemp[index-_res[0]] +
_zVelocityTemp[index+_slabSize] + _zVelocityTemp[index-_slabSize] );
-
+
}
}
@@ -677,7 +677,7 @@ void FLUID_3D::artificialDampingExactSL(int pos) {
_zVelocityTemp[index+1] + _zVelocityTemp[index-1] +
_zVelocityTemp[index+_res[0]] + _zVelocityTemp[index-_res[0]] +
_zVelocityTemp[index+_slabSize] + _zVelocityTemp[index-_slabSize] );
-
+
}
}
@@ -759,7 +759,7 @@ void FLUID_3D::wipeBoundaries(int zBegin, int zEnd)
void FLUID_3D::wipeBoundariesSL(int zBegin, int zEnd)
{
-
+
/////////////////////////////////////
// setZeroBorder to all:
/////////////////////////////////////
@@ -933,16 +933,16 @@ void FLUID_3D::project()
memset(_pressure, 0, sizeof(float)*_totalCells);
memset(_divergence, 0, sizeof(float)*_totalCells);
-
+
// set velocity and pressure inside of obstacles to zero
setObstacleBoundaries(_pressure, 0, _zRes);
-
+
// copy out the boundaries
if(!_domainBcLeft) setNeumannX(_xVelocity, _res, 0, _zRes);
- else setZeroX(_xVelocity, _res, 0, _zRes);
+ else setZeroX(_xVelocity, _res, 0, _zRes);
if(!_domainBcFront) setNeumannY(_yVelocity, _res, 0, _zRes);
- else setZeroY(_yVelocity, _res, 0, _zRes);
+ else setZeroY(_yVelocity, _res, 0, _zRes);
if(!_domainBcTop) setNeumannZ(_zVelocity, _res, 0, _zRes);
else setZeroZ(_zVelocity, _res, 0, _zRes);
@@ -953,13 +953,13 @@ void FLUID_3D::project()
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
-
+
if(_obstacles[index])
{
_divergence[index] = 0.0f;
continue;
}
-
+
float xright = _xVelocity[index + 1];
float xleft = _xVelocity[index - 1];
@@ -1058,7 +1058,7 @@ void FLUID_3D::project()
//////////////////////////////////////////////////////////////////////
void FLUID_3D::setObstacleVelocity(int zBegin, int zEnd)
{
-
+
// completely TODO <-- who wrote this and what is here TODO? DG
const size_t index_ = _slabSize + _xRes + 1;
@@ -1082,7 +1082,7 @@ void FLUID_3D::setObstacleVelocity(int zBegin, int zEnd)
{
if (!_obstacles[index])
{
- // if(_obstacles[index+1]) xright = - _xVelocityOb[index];
+ // if(_obstacles[index+1]) xright = - _xVelocityOb[index];
if((_obstacles[index - 1] & 8) && abs(_xVelocityOb[index - 1]) > FLT_EPSILON )
{
// printf("velocity x!\n");
@@ -1221,7 +1221,7 @@ void FLUID_3D::setObstaclePressure(float *_pressure, int zBegin, int zEnd)
_pressure[index] += _pressure[index + _slabSize];
pcnt += 1.0f;
}
-
+
if(pcnt > 0.000001f)
_pressure[index] /= pcnt;
@@ -1254,7 +1254,7 @@ void FLUID_3D::setObstacleBoundaries(float *_pressure, int zBegin, int zEnd)
for (int z = zBegin + bb; z < zEnd - bt; z++)
{
size_t index = index_ +(z-1)*_slabSize;
-
+
for (int y = 1; y < _yRes - 1; y++, index += 2)
{
for (int x = 1; x < _xRes - 1; x++, index++)
@@ -1563,7 +1563,7 @@ void FLUID_3D::addVorticity(int zBegin, int zEnd)
// calculate normalized vorticity vectors
float eps = _vorticityEps;
-
+
//index = _slabSize + _xRes + 1;
vIndex=_slabSize + _xRes + 1;
@@ -1618,7 +1618,7 @@ void FLUID_3D::addVorticity(int zBegin, int zEnd)
} // y loop
//vIndex+=2*_xRes;
} // z loop
-
+
if (_xVorticity) delete[] _xVorticity;
if (_yVorticity) delete[] _yVorticity;
if (_zVorticity) delete[] _zVorticity;
@@ -1704,10 +1704,10 @@ void FLUID_3D::advectMacCormackEnd2(int zBegin, int zEnd)
/* set boundary conditions for velocity */
if(!_domainBcLeft) copyBorderX(_xVelocityTemp, res, zBegin, zEnd);
- else setZeroX(_xVelocityTemp, res, zBegin, zEnd);
+ else setZeroX(_xVelocityTemp, res, zBegin, zEnd);
if(!_domainBcFront) copyBorderY(_yVelocityTemp, res, zBegin, zEnd);
- else setZeroY(_yVelocityTemp, res, zBegin, zEnd);
+ else setZeroY(_yVelocityTemp, res, zBegin, zEnd);
if(!_domainBcTop) copyBorderZ(_zVelocityTemp, res, zBegin, zEnd);
else setZeroZ(_zVelocityTemp, res, zBegin, zEnd);
@@ -1778,9 +1778,9 @@ void FLUID_3D::updateFlame(float *react, float *flame, int total_cells)
for (int index = 0; index < total_cells; index++)
{
/* model flame temperature curve from the reaction coordinate (fuel)
- * TODO: Would probably be best to get rid of whole "flame" data field.
- * Currently it's just sqrt mirror of reaction coordinate, and therefore
- * basically just waste of memory and disk space...
+ * TODO: Would probably be best to get rid of whole "flame" data field.
+ * Currently it's just sqrt mirror of reaction coordinate, and therefore
+ * basically just waste of memory and disk space...
*/
if (react[index]>0.0f) {
/* do a smooth falloff for rest of the values */