From fd25e883e2807a151f673b87c152a59701a0df80 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 24 Oct 2021 14:19:19 +0200 Subject: Cycles: remove prefix from source code file names Remove prefix of filenames that is the same as the folder name. This used to help when #includes were using individual files, but now they are always relative to the cycles root directory and so the prefixes are redundant. For patches and branches, git merge and rebase should be able to detect the renames and move over code to the right file. --- intern/cycles/util/CMakeLists.txt | 240 +++--- intern/cycles/util/algorithm.h | 33 + intern/cycles/util/aligned_malloc.cpp | 76 ++ intern/cycles/util/aligned_malloc.h | 50 ++ intern/cycles/util/args.h | 31 + intern/cycles/util/array.h | 318 +++++++ intern/cycles/util/atomic.h | 68 ++ intern/cycles/util/avxb.h | 243 ++++++ intern/cycles/util/avxf.h | 392 +++++++++ intern/cycles/util/avxi.h | 745 +++++++++++++++++ intern/cycles/util/boundbox.h | 282 +++++++ intern/cycles/util/color.h | 296 +++++++ intern/cycles/util/debug.cpp | 124 +++ intern/cycles/util/debug.h | 167 ++++ intern/cycles/util/defines.h | 146 ++++ intern/cycles/util/deque.h | 28 + intern/cycles/util/disjoint_set.h | 75 ++ intern/cycles/util/foreach.h | 24 + intern/cycles/util/function.h | 39 + intern/cycles/util/guarded_allocator.cpp | 48 ++ intern/cycles/util/guarded_allocator.h | 185 +++++ intern/cycles/util/half.h | 169 ++++ intern/cycles/util/hash.h | 389 +++++++++ intern/cycles/util/ies.cpp | 411 +++++++++ intern/cycles/util/ies.h | 59 ++ intern/cycles/util/image.h | 98 +++ intern/cycles/util/image_impl.h | 175 ++++ intern/cycles/util/list.h | 28 + intern/cycles/util/log.cpp | 96 +++ intern/cycles/util/log.h | 101 +++ intern/cycles/util/map.h | 39 + intern/cycles/util/math.h | 870 +++++++++++++++++++ intern/cycles/util/math_cdf.cpp | 70 ++ intern/cycles/util/math_cdf.h | 75 ++ intern/cycles/util/math_fast.h | 652 +++++++++++++++ intern/cycles/util/math_float2.h | 269 ++++++ intern/cycles/util/math_float3.h | 530 ++++++++++++ intern/cycles/util/math_float4.h | 536 ++++++++++++ intern/cycles/util/math_int2.h | 73 ++ intern/cycles/util/math_int3.h | 110 +++ intern/cycles/util/math_int4.h | 156 ++++ intern/cycles/util/math_intersect.h | 249 ++++++ intern/cycles/util/math_matrix.h | 454 ++++++++++ intern/cycles/util/md5.cpp | 387 +++++++++ intern/cycles/util/md5.h | 61 ++ intern/cycles/util/murmurhash.cpp | 126 +++ intern/cycles/util/murmurhash.h | 29 + intern/cycles/util/opengl.h | 25 + intern/cycles/util/openimagedenoise.h | 44 + intern/cycles/util/openvdb.h | 68 ++ intern/cycles/util/optimization.h | 77 ++ intern/cycles/util/param.h | 40 + intern/cycles/util/path.cpp | 781 +++++++++++++++++ intern/cycles/util/path.h | 74 ++ intern/cycles/util/profiling.cpp | 174 ++++ intern/cycles/util/profiling.h | 180 ++++ intern/cycles/util/progress.h | 370 +++++++++ intern/cycles/util/projection.h | 217 +++++ intern/cycles/util/queue.h | 28 + intern/cycles/util/rect.h | 75 ++ intern/cycles/util/semaphore.h | 61 ++ intern/cycles/util/set.h | 34 + intern/cycles/util/simd.cpp | 44 + intern/cycles/util/simd.h | 572 +++++++++++++ intern/cycles/util/sseb.h | 358 ++++++++ intern/cycles/util/ssef.h | 1104 +++++++++++++++++++++++++ intern/cycles/util/ssei.h | 646 +++++++++++++++ intern/cycles/util/stack_allocator.h | 165 ++++ intern/cycles/util/static_assert.h | 36 + intern/cycles/util/stats.h | 54 ++ intern/cycles/util/string.cpp | 268 ++++++ intern/cycles/util/string.h | 81 ++ intern/cycles/util/system.cpp | 415 ++++++++++ intern/cycles/util/system.h | 73 ++ intern/cycles/util/task.cpp | 251 ++++++ intern/cycles/util/task.h | 148 ++++ intern/cycles/util/tbb.h | 55 ++ intern/cycles/util/texture.h | 99 +++ intern/cycles/util/thread.cpp | 72 ++ intern/cycles/util/thread.h | 90 ++ intern/cycles/util/time.cpp | 139 ++++ intern/cycles/util/time.h | 91 ++ intern/cycles/util/transform.cpp | 345 ++++++++ intern/cycles/util/transform.h | 512 ++++++++++++ intern/cycles/util/types.h | 138 ++++ intern/cycles/util/types_float2.h | 40 + intern/cycles/util/types_float2_impl.h | 59 ++ intern/cycles/util/types_float3.h | 60 ++ intern/cycles/util/types_float3_impl.h | 103 +++ intern/cycles/util/types_float4.h | 63 ++ intern/cycles/util/types_float4_impl.h | 109 +++ intern/cycles/util/types_float8.h | 74 ++ intern/cycles/util/types_float8_impl.h | 112 +++ intern/cycles/util/types_int2.h | 39 + intern/cycles/util/types_int2_impl.h | 50 ++ intern/cycles/util/types_int3.h | 60 ++ intern/cycles/util/types_int3_impl.h | 104 +++ intern/cycles/util/types_int4.h | 66 ++ intern/cycles/util/types_int4_impl.h | 123 +++ intern/cycles/util/types_uchar2.h | 39 + intern/cycles/util/types_uchar2_impl.h | 50 ++ intern/cycles/util/types_uchar3.h | 39 + intern/cycles/util/types_uchar3_impl.h | 50 ++ intern/cycles/util/types_uchar4.h | 39 + intern/cycles/util/types_uchar4_impl.h | 50 ++ intern/cycles/util/types_uint2.h | 39 + intern/cycles/util/types_uint2_impl.h | 48 ++ intern/cycles/util/types_uint3.h | 39 + intern/cycles/util/types_uint3_impl.h | 48 ++ intern/cycles/util/types_uint4.h | 39 + intern/cycles/util/types_uint4_impl.h | 48 ++ intern/cycles/util/types_ushort4.h | 36 + intern/cycles/util/types_vector3.h | 39 + intern/cycles/util/types_vector3_impl.h | 43 + intern/cycles/util/unique_ptr.h | 29 + intern/cycles/util/util_algorithm.h | 33 - intern/cycles/util/util_aligned_malloc.cpp | 76 -- intern/cycles/util/util_aligned_malloc.h | 50 -- intern/cycles/util/util_args.h | 31 - intern/cycles/util/util_array.h | 318 ------- intern/cycles/util/util_atomic.h | 68 -- intern/cycles/util/util_avxb.h | 243 ------ intern/cycles/util/util_avxf.h | 392 --------- intern/cycles/util/util_avxi.h | 745 ----------------- intern/cycles/util/util_boundbox.h | 282 ------- intern/cycles/util/util_color.h | 296 ------- intern/cycles/util/util_debug.cpp | 124 --- intern/cycles/util/util_debug.h | 167 ---- intern/cycles/util/util_defines.h | 146 ---- intern/cycles/util/util_deque.h | 28 - intern/cycles/util/util_disjoint_set.h | 75 -- intern/cycles/util/util_foreach.h | 24 - intern/cycles/util/util_function.h | 39 - intern/cycles/util/util_guarded_allocator.cpp | 48 -- intern/cycles/util/util_guarded_allocator.h | 185 ----- intern/cycles/util/util_half.h | 169 ---- intern/cycles/util/util_hash.h | 389 --------- intern/cycles/util/util_ies.cpp | 411 --------- intern/cycles/util/util_ies.h | 59 -- intern/cycles/util/util_image.h | 98 --- intern/cycles/util/util_image_impl.h | 175 ---- intern/cycles/util/util_list.h | 28 - intern/cycles/util/util_logging.cpp | 96 --- intern/cycles/util/util_logging.h | 101 --- intern/cycles/util/util_map.h | 39 - intern/cycles/util/util_math.h | 870 ------------------- intern/cycles/util/util_math_cdf.cpp | 70 -- intern/cycles/util/util_math_cdf.h | 75 -- intern/cycles/util/util_math_fast.h | 652 --------------- intern/cycles/util/util_math_float2.h | 269 ------ intern/cycles/util/util_math_float3.h | 530 ------------ intern/cycles/util/util_math_float4.h | 536 ------------ intern/cycles/util/util_math_int2.h | 73 -- intern/cycles/util/util_math_int3.h | 110 --- intern/cycles/util/util_math_int4.h | 156 ---- intern/cycles/util/util_math_intersect.h | 249 ------ intern/cycles/util/util_math_matrix.h | 454 ---------- intern/cycles/util/util_md5.cpp | 387 --------- intern/cycles/util/util_md5.h | 61 -- intern/cycles/util/util_murmurhash.cpp | 126 --- intern/cycles/util/util_murmurhash.h | 29 - intern/cycles/util/util_opengl.h | 25 - intern/cycles/util/util_openimagedenoise.h | 44 - intern/cycles/util/util_openvdb.h | 68 -- intern/cycles/util/util_optimization.h | 77 -- intern/cycles/util/util_param.h | 40 - intern/cycles/util/util_path.cpp | 781 ----------------- intern/cycles/util/util_path.h | 74 -- intern/cycles/util/util_profiling.cpp | 174 ---- intern/cycles/util/util_profiling.h | 180 ---- intern/cycles/util/util_progress.h | 370 --------- intern/cycles/util/util_projection.h | 217 ----- intern/cycles/util/util_queue.h | 28 - intern/cycles/util/util_rect.h | 75 -- intern/cycles/util/util_semaphore.h | 61 -- intern/cycles/util/util_set.h | 34 - intern/cycles/util/util_simd.cpp | 44 - intern/cycles/util/util_simd.h | 572 ------------- intern/cycles/util/util_sseb.h | 358 -------- intern/cycles/util/util_ssef.h | 1104 ------------------------- intern/cycles/util/util_ssei.h | 646 --------------- intern/cycles/util/util_stack_allocator.h | 165 ---- intern/cycles/util/util_static_assert.h | 36 - intern/cycles/util/util_stats.h | 54 -- intern/cycles/util/util_string.cpp | 268 ------ intern/cycles/util/util_string.h | 84 -- intern/cycles/util/util_system.cpp | 415 ---------- intern/cycles/util/util_system.h | 73 -- intern/cycles/util/util_task.cpp | 251 ------ intern/cycles/util/util_task.h | 148 ---- intern/cycles/util/util_tbb.h | 55 -- intern/cycles/util/util_texture.h | 99 --- intern/cycles/util/util_thread.cpp | 72 -- intern/cycles/util/util_thread.h | 90 -- intern/cycles/util/util_time.cpp | 139 ---- intern/cycles/util/util_time.h | 91 -- intern/cycles/util/util_transform.cpp | 345 -------- intern/cycles/util/util_transform.h | 512 ------------ intern/cycles/util/util_types.h | 138 ---- intern/cycles/util/util_types_float2.h | 40 - intern/cycles/util/util_types_float2_impl.h | 59 -- intern/cycles/util/util_types_float3.h | 60 -- intern/cycles/util/util_types_float3_impl.h | 103 --- intern/cycles/util/util_types_float4.h | 63 -- intern/cycles/util/util_types_float4_impl.h | 109 --- intern/cycles/util/util_types_float8.h | 74 -- intern/cycles/util/util_types_float8_impl.h | 112 --- intern/cycles/util/util_types_int2.h | 39 - intern/cycles/util/util_types_int2_impl.h | 50 -- intern/cycles/util/util_types_int3.h | 60 -- intern/cycles/util/util_types_int3_impl.h | 104 --- intern/cycles/util/util_types_int4.h | 66 -- intern/cycles/util/util_types_int4_impl.h | 123 --- intern/cycles/util/util_types_uchar2.h | 39 - intern/cycles/util/util_types_uchar2_impl.h | 50 -- intern/cycles/util/util_types_uchar3.h | 39 - intern/cycles/util/util_types_uchar3_impl.h | 50 -- intern/cycles/util/util_types_uchar4.h | 39 - intern/cycles/util/util_types_uchar4_impl.h | 50 -- intern/cycles/util/util_types_uint2.h | 39 - intern/cycles/util/util_types_uint2_impl.h | 48 -- intern/cycles/util/util_types_uint3.h | 39 - intern/cycles/util/util_types_uint3_impl.h | 48 -- intern/cycles/util/util_types_uint4.h | 39 - intern/cycles/util/util_types_uint4_impl.h | 48 -- intern/cycles/util/util_types_ushort4.h | 36 - intern/cycles/util/util_types_vector3.h | 39 - intern/cycles/util/util_types_vector3_impl.h | 43 - intern/cycles/util/util_unique_ptr.h | 29 - intern/cycles/util/util_vector.h | 59 -- intern/cycles/util/util_version.h | 35 - intern/cycles/util/util_view.cpp | 282 ------- intern/cycles/util/util_view.h | 48 -- intern/cycles/util/util_windows.cpp | 54 -- intern/cycles/util/util_windows.h | 42 - intern/cycles/util/util_xml.h | 41 - intern/cycles/util/vector.h | 59 ++ intern/cycles/util/version.h | 35 + intern/cycles/util/view.cpp | 282 +++++++ intern/cycles/util/view.h | 48 ++ intern/cycles/util/windows.cpp | 54 ++ intern/cycles/util/windows.h | 42 + intern/cycles/util/xml.h | 41 + 243 files changed, 20004 insertions(+), 20007 deletions(-) create mode 100644 intern/cycles/util/algorithm.h create mode 100644 intern/cycles/util/aligned_malloc.cpp create mode 100644 intern/cycles/util/aligned_malloc.h create mode 100644 intern/cycles/util/args.h create mode 100644 intern/cycles/util/array.h create mode 100644 intern/cycles/util/atomic.h create mode 100644 intern/cycles/util/avxb.h create mode 100644 intern/cycles/util/avxf.h create mode 100644 intern/cycles/util/avxi.h create mode 100644 intern/cycles/util/boundbox.h create mode 100644 intern/cycles/util/color.h create mode 100644 intern/cycles/util/debug.cpp create mode 100644 intern/cycles/util/debug.h create mode 100644 intern/cycles/util/defines.h create mode 100644 intern/cycles/util/deque.h create mode 100644 intern/cycles/util/disjoint_set.h create mode 100644 intern/cycles/util/foreach.h create mode 100644 intern/cycles/util/function.h create mode 100644 intern/cycles/util/guarded_allocator.cpp create mode 100644 intern/cycles/util/guarded_allocator.h create mode 100644 intern/cycles/util/half.h create mode 100644 intern/cycles/util/hash.h create mode 100644 intern/cycles/util/ies.cpp create mode 100644 intern/cycles/util/ies.h create mode 100644 intern/cycles/util/image.h create mode 100644 intern/cycles/util/image_impl.h create mode 100644 intern/cycles/util/list.h create mode 100644 intern/cycles/util/log.cpp create mode 100644 intern/cycles/util/log.h create mode 100644 intern/cycles/util/map.h create mode 100644 intern/cycles/util/math.h create mode 100644 intern/cycles/util/math_cdf.cpp create mode 100644 intern/cycles/util/math_cdf.h create mode 100644 intern/cycles/util/math_fast.h create mode 100644 intern/cycles/util/math_float2.h create mode 100644 intern/cycles/util/math_float3.h create mode 100644 intern/cycles/util/math_float4.h create mode 100644 intern/cycles/util/math_int2.h create mode 100644 intern/cycles/util/math_int3.h create mode 100644 intern/cycles/util/math_int4.h create mode 100644 intern/cycles/util/math_intersect.h create mode 100644 intern/cycles/util/math_matrix.h create mode 100644 intern/cycles/util/md5.cpp create mode 100644 intern/cycles/util/md5.h create mode 100644 intern/cycles/util/murmurhash.cpp create mode 100644 intern/cycles/util/murmurhash.h create mode 100644 intern/cycles/util/opengl.h create mode 100644 intern/cycles/util/openimagedenoise.h create mode 100644 intern/cycles/util/openvdb.h create mode 100644 intern/cycles/util/optimization.h create mode 100644 intern/cycles/util/param.h create mode 100644 intern/cycles/util/path.cpp create mode 100644 intern/cycles/util/path.h create mode 100644 intern/cycles/util/profiling.cpp create mode 100644 intern/cycles/util/profiling.h create mode 100644 intern/cycles/util/progress.h create mode 100644 intern/cycles/util/projection.h create mode 100644 intern/cycles/util/queue.h create mode 100644 intern/cycles/util/rect.h create mode 100644 intern/cycles/util/semaphore.h create mode 100644 intern/cycles/util/set.h create mode 100644 intern/cycles/util/simd.cpp create mode 100644 intern/cycles/util/simd.h create mode 100644 intern/cycles/util/sseb.h create mode 100644 intern/cycles/util/ssef.h create mode 100644 intern/cycles/util/ssei.h create mode 100644 intern/cycles/util/stack_allocator.h create mode 100644 intern/cycles/util/static_assert.h create mode 100644 intern/cycles/util/stats.h create mode 100644 intern/cycles/util/string.cpp create mode 100644 intern/cycles/util/string.h create mode 100644 intern/cycles/util/system.cpp create mode 100644 intern/cycles/util/system.h create mode 100644 intern/cycles/util/task.cpp create mode 100644 intern/cycles/util/task.h create mode 100644 intern/cycles/util/tbb.h create mode 100644 intern/cycles/util/texture.h create mode 100644 intern/cycles/util/thread.cpp create mode 100644 intern/cycles/util/thread.h create mode 100644 intern/cycles/util/time.cpp create mode 100644 intern/cycles/util/time.h create mode 100644 intern/cycles/util/transform.cpp create mode 100644 intern/cycles/util/transform.h create mode 100644 intern/cycles/util/types.h create mode 100644 intern/cycles/util/types_float2.h create mode 100644 intern/cycles/util/types_float2_impl.h create mode 100644 intern/cycles/util/types_float3.h create mode 100644 intern/cycles/util/types_float3_impl.h create mode 100644 intern/cycles/util/types_float4.h create mode 100644 intern/cycles/util/types_float4_impl.h create mode 100644 intern/cycles/util/types_float8.h create mode 100644 intern/cycles/util/types_float8_impl.h create mode 100644 intern/cycles/util/types_int2.h create mode 100644 intern/cycles/util/types_int2_impl.h create mode 100644 intern/cycles/util/types_int3.h create mode 100644 intern/cycles/util/types_int3_impl.h create mode 100644 intern/cycles/util/types_int4.h create mode 100644 intern/cycles/util/types_int4_impl.h create mode 100644 intern/cycles/util/types_uchar2.h create mode 100644 intern/cycles/util/types_uchar2_impl.h create mode 100644 intern/cycles/util/types_uchar3.h create mode 100644 intern/cycles/util/types_uchar3_impl.h create mode 100644 intern/cycles/util/types_uchar4.h create mode 100644 intern/cycles/util/types_uchar4_impl.h create mode 100644 intern/cycles/util/types_uint2.h create mode 100644 intern/cycles/util/types_uint2_impl.h create mode 100644 intern/cycles/util/types_uint3.h create mode 100644 intern/cycles/util/types_uint3_impl.h create mode 100644 intern/cycles/util/types_uint4.h create mode 100644 intern/cycles/util/types_uint4_impl.h create mode 100644 intern/cycles/util/types_ushort4.h create mode 100644 intern/cycles/util/types_vector3.h create mode 100644 intern/cycles/util/types_vector3_impl.h create mode 100644 intern/cycles/util/unique_ptr.h delete mode 100644 intern/cycles/util/util_algorithm.h delete mode 100644 intern/cycles/util/util_aligned_malloc.cpp delete mode 100644 intern/cycles/util/util_aligned_malloc.h delete mode 100644 intern/cycles/util/util_args.h delete mode 100644 intern/cycles/util/util_array.h delete mode 100644 intern/cycles/util/util_atomic.h delete mode 100644 intern/cycles/util/util_avxb.h delete mode 100644 intern/cycles/util/util_avxf.h delete mode 100644 intern/cycles/util/util_avxi.h delete mode 100644 intern/cycles/util/util_boundbox.h delete mode 100644 intern/cycles/util/util_color.h delete mode 100644 intern/cycles/util/util_debug.cpp delete mode 100644 intern/cycles/util/util_debug.h delete mode 100644 intern/cycles/util/util_defines.h delete mode 100644 intern/cycles/util/util_deque.h delete mode 100644 intern/cycles/util/util_disjoint_set.h delete mode 100644 intern/cycles/util/util_foreach.h delete mode 100644 intern/cycles/util/util_function.h delete mode 100644 intern/cycles/util/util_guarded_allocator.cpp delete mode 100644 intern/cycles/util/util_guarded_allocator.h delete mode 100644 intern/cycles/util/util_half.h delete mode 100644 intern/cycles/util/util_hash.h delete mode 100644 intern/cycles/util/util_ies.cpp delete mode 100644 intern/cycles/util/util_ies.h delete mode 100644 intern/cycles/util/util_image.h delete mode 100644 intern/cycles/util/util_image_impl.h delete mode 100644 intern/cycles/util/util_list.h delete mode 100644 intern/cycles/util/util_logging.cpp delete mode 100644 intern/cycles/util/util_logging.h delete mode 100644 intern/cycles/util/util_map.h delete mode 100644 intern/cycles/util/util_math.h delete mode 100644 intern/cycles/util/util_math_cdf.cpp delete mode 100644 intern/cycles/util/util_math_cdf.h delete mode 100644 intern/cycles/util/util_math_fast.h delete mode 100644 intern/cycles/util/util_math_float2.h delete mode 100644 intern/cycles/util/util_math_float3.h delete mode 100644 intern/cycles/util/util_math_float4.h delete mode 100644 intern/cycles/util/util_math_int2.h delete mode 100644 intern/cycles/util/util_math_int3.h delete mode 100644 intern/cycles/util/util_math_int4.h delete mode 100644 intern/cycles/util/util_math_intersect.h delete mode 100644 intern/cycles/util/util_math_matrix.h delete mode 100644 intern/cycles/util/util_md5.cpp delete mode 100644 intern/cycles/util/util_md5.h delete mode 100644 intern/cycles/util/util_murmurhash.cpp delete mode 100644 intern/cycles/util/util_murmurhash.h delete mode 100644 intern/cycles/util/util_opengl.h delete mode 100644 intern/cycles/util/util_openimagedenoise.h delete mode 100644 intern/cycles/util/util_openvdb.h delete mode 100644 intern/cycles/util/util_optimization.h delete mode 100644 intern/cycles/util/util_param.h delete mode 100644 intern/cycles/util/util_path.cpp delete mode 100644 intern/cycles/util/util_path.h delete mode 100644 intern/cycles/util/util_profiling.cpp delete mode 100644 intern/cycles/util/util_profiling.h delete mode 100644 intern/cycles/util/util_progress.h delete mode 100644 intern/cycles/util/util_projection.h delete mode 100644 intern/cycles/util/util_queue.h delete mode 100644 intern/cycles/util/util_rect.h delete mode 100644 intern/cycles/util/util_semaphore.h delete mode 100644 intern/cycles/util/util_set.h delete mode 100644 intern/cycles/util/util_simd.cpp delete mode 100644 intern/cycles/util/util_simd.h delete mode 100644 intern/cycles/util/util_sseb.h delete mode 100644 intern/cycles/util/util_ssef.h delete mode 100644 intern/cycles/util/util_ssei.h delete mode 100644 intern/cycles/util/util_stack_allocator.h delete mode 100644 intern/cycles/util/util_static_assert.h delete mode 100644 intern/cycles/util/util_stats.h delete mode 100644 intern/cycles/util/util_string.cpp delete mode 100644 intern/cycles/util/util_string.h delete mode 100644 intern/cycles/util/util_system.cpp delete mode 100644 intern/cycles/util/util_system.h delete mode 100644 intern/cycles/util/util_task.cpp delete mode 100644 intern/cycles/util/util_task.h delete mode 100644 intern/cycles/util/util_tbb.h delete mode 100644 intern/cycles/util/util_texture.h delete mode 100644 intern/cycles/util/util_thread.cpp delete mode 100644 intern/cycles/util/util_thread.h delete mode 100644 intern/cycles/util/util_time.cpp delete mode 100644 intern/cycles/util/util_time.h delete mode 100644 intern/cycles/util/util_transform.cpp delete mode 100644 intern/cycles/util/util_transform.h delete mode 100644 intern/cycles/util/util_types.h delete mode 100644 intern/cycles/util/util_types_float2.h delete mode 100644 intern/cycles/util/util_types_float2_impl.h delete mode 100644 intern/cycles/util/util_types_float3.h delete mode 100644 intern/cycles/util/util_types_float3_impl.h delete mode 100644 intern/cycles/util/util_types_float4.h delete mode 100644 intern/cycles/util/util_types_float4_impl.h delete mode 100644 intern/cycles/util/util_types_float8.h delete mode 100644 intern/cycles/util/util_types_float8_impl.h delete mode 100644 intern/cycles/util/util_types_int2.h delete mode 100644 intern/cycles/util/util_types_int2_impl.h delete mode 100644 intern/cycles/util/util_types_int3.h delete mode 100644 intern/cycles/util/util_types_int3_impl.h delete mode 100644 intern/cycles/util/util_types_int4.h delete mode 100644 intern/cycles/util/util_types_int4_impl.h delete mode 100644 intern/cycles/util/util_types_uchar2.h delete mode 100644 intern/cycles/util/util_types_uchar2_impl.h delete mode 100644 intern/cycles/util/util_types_uchar3.h delete mode 100644 intern/cycles/util/util_types_uchar3_impl.h delete mode 100644 intern/cycles/util/util_types_uchar4.h delete mode 100644 intern/cycles/util/util_types_uchar4_impl.h delete mode 100644 intern/cycles/util/util_types_uint2.h delete mode 100644 intern/cycles/util/util_types_uint2_impl.h delete mode 100644 intern/cycles/util/util_types_uint3.h delete mode 100644 intern/cycles/util/util_types_uint3_impl.h delete mode 100644 intern/cycles/util/util_types_uint4.h delete mode 100644 intern/cycles/util/util_types_uint4_impl.h delete mode 100644 intern/cycles/util/util_types_ushort4.h delete mode 100644 intern/cycles/util/util_types_vector3.h delete mode 100644 intern/cycles/util/util_types_vector3_impl.h delete mode 100644 intern/cycles/util/util_unique_ptr.h delete mode 100644 intern/cycles/util/util_vector.h delete mode 100644 intern/cycles/util/util_version.h delete mode 100644 intern/cycles/util/util_view.cpp delete mode 100644 intern/cycles/util/util_view.h delete mode 100644 intern/cycles/util/util_windows.cpp delete mode 100644 intern/cycles/util/util_windows.h delete mode 100644 intern/cycles/util/util_xml.h create mode 100644 intern/cycles/util/vector.h create mode 100644 intern/cycles/util/version.h create mode 100644 intern/cycles/util/view.cpp create mode 100644 intern/cycles/util/view.h create mode 100644 intern/cycles/util/windows.cpp create mode 100644 intern/cycles/util/windows.h create mode 100644 intern/cycles/util/xml.h (limited to 'intern/cycles/util') diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 18e14913884..b68646a44d5 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -22,23 +22,23 @@ set(INC_SYS ) set(SRC - util_aligned_malloc.cpp - util_debug.cpp - util_ies.cpp - util_logging.cpp - util_math_cdf.cpp - util_md5.cpp - util_murmurhash.cpp - util_path.cpp - util_profiling.cpp - util_string.cpp - util_simd.cpp - util_system.cpp - util_task.cpp - util_thread.cpp - util_time.cpp - util_transform.cpp - util_windows.cpp + aligned_malloc.cpp + debug.cpp + ies.cpp + log.cpp + math_cdf.cpp + md5.cpp + murmurhash.cpp + path.cpp + profiling.cpp + string.cpp + simd.cpp + system.cpp + task.cpp + thread.cpp + time.cpp + transform.cpp + windows.cpp ) set(LIB @@ -48,7 +48,7 @@ set(LIB if(WITH_CYCLES_STANDALONE) if(WITH_CYCLES_STANDALONE_GUI) list(APPEND SRC - util_view.cpp + view.cpp ) endif() endif() @@ -64,108 +64,108 @@ else() endif() set(SRC_HEADERS - util_algorithm.h - util_aligned_malloc.h - util_args.h - util_array.h - util_atomic.h - util_boundbox.h - util_debug.h - util_defines.h - util_deque.h - util_disjoint_set.h - util_guarded_allocator.cpp - util_foreach.h - util_function.h - util_guarded_allocator.h - util_half.h - util_hash.h - util_ies.h - util_image.h - util_image_impl.h - util_list.h - util_logging.h - util_map.h - util_math.h - util_math_cdf.h - util_math_fast.h - util_math_intersect.h - util_math_float2.h - util_math_float3.h - util_math_float4.h - util_math_int2.h - util_math_int3.h - util_math_int4.h - util_math_matrix.h - util_md5.h - util_murmurhash.h - util_openimagedenoise.h - util_opengl.h - util_openvdb.h - util_optimization.h - util_param.h - util_path.h - util_profiling.h - util_progress.h - util_projection.h - util_queue.h - util_rect.h - util_set.h - util_simd.h - util_avxf.h - util_avxb.h - util_avxi.h - util_semaphore.h - util_sseb.h - util_ssef.h - util_ssei.h - util_stack_allocator.h - util_static_assert.h - util_stats.h - util_string.h - util_system.h - util_task.h - util_tbb.h - util_texture.h - util_thread.h - util_time.h - util_transform.h - util_types.h - util_types_float2.h - util_types_float2_impl.h - util_types_float3.h - util_types_float3_impl.h - util_types_float4.h - util_types_float4_impl.h - util_types_float8.h - util_types_float8_impl.h - util_types_int2.h - util_types_int2_impl.h - util_types_int3.h - util_types_int3_impl.h - util_types_int4.h - util_types_int4_impl.h - util_types_uchar2.h - util_types_uchar2_impl.h - util_types_uchar3.h - util_types_uchar3_impl.h - util_types_uchar4.h - util_types_uchar4_impl.h - util_types_uint2.h - util_types_uint2_impl.h - util_types_uint3.h - util_types_uint3_impl.h - util_types_uint4.h - util_types_uint4_impl.h - util_types_ushort4.h - util_types_vector3.h - util_types_vector3_impl.h - util_unique_ptr.h - util_vector.h - util_version.h - util_view.h - util_windows.h - util_xml.h + algorithm.h + aligned_malloc.h + args.h + array.h + atomic.h + boundbox.h + debug.h + defines.h + deque.h + disjoint_set.h + guarded_allocator.cpp + foreach.h + function.h + guarded_allocator.h + half.h + hash.h + ies.h + image.h + image_impl.h + list.h + log.h + map.h + math.h + math_cdf.h + math_fast.h + math_intersect.h + math_float2.h + math_float3.h + math_float4.h + math_int2.h + math_int3.h + math_int4.h + math_matrix.h + md5.h + murmurhash.h + openimagedenoise.h + opengl.h + openvdb.h + optimization.h + param.h + path.h + profiling.h + progress.h + projection.h + queue.h + rect.h + set.h + simd.h + avxf.h + avxb.h + avxi.h + semaphore.h + sseb.h + ssef.h + ssei.h + stack_allocator.h + static_assert.h + stats.h + string.h + system.h + task.h + tbb.h + texture.h + thread.h + time.h + transform.h + types.h + types_float2.h + types_float2_impl.h + types_float3.h + types_float3_impl.h + types_float4.h + types_float4_impl.h + types_float8.h + types_float8_impl.h + types_int2.h + types_int2_impl.h + types_int3.h + types_int3_impl.h + types_int4.h + types_int4_impl.h + types_uchar2.h + types_uchar2_impl.h + types_uchar3.h + types_uchar3_impl.h + types_uchar4.h + types_uchar4_impl.h + types_uint2.h + types_uint2_impl.h + types_uint3.h + types_uint3_impl.h + types_uint4.h + types_uint4_impl.h + types_ushort4.h + types_vector3.h + types_vector3_impl.h + unique_ptr.h + vector.h + version.h + view.h + windows.h + xml.h ) include_directories(${INC}) diff --git a/intern/cycles/util/algorithm.h b/intern/cycles/util/algorithm.h new file mode 100644 index 00000000000..63abd4e92a3 --- /dev/null +++ b/intern/cycles/util/algorithm.h @@ -0,0 +1,33 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_ALGORITHM_H__ +#define __UTIL_ALGORITHM_H__ + +#include + +CCL_NAMESPACE_BEGIN + +using std::max; +using std::min; +using std::remove; +using std::sort; +using std::stable_sort; +using std::swap; + +CCL_NAMESPACE_END + +#endif /* __UTIL_ALGORITHM_H__ */ diff --git a/intern/cycles/util/aligned_malloc.cpp b/intern/cycles/util/aligned_malloc.cpp new file mode 100644 index 00000000000..2b05559b55f --- /dev/null +++ b/intern/cycles/util/aligned_malloc.cpp @@ -0,0 +1,76 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/aligned_malloc.h" +#include "util/guarded_allocator.h" + +#include + +/* Adopted from Libmv. */ + +#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__) +/* Needed for memalign on Linux and _aligned_alloc on Windows. */ +# ifdef FREE_WINDOWS +/* Make sure _aligned_malloc is included. */ +# ifdef __MSVCRT_VERSION__ +# undef __MSVCRT_VERSION__ +# endif +# define __MSVCRT_VERSION__ 0x0700 +# endif /* FREE_WINDOWS */ +# include +#else +/* Apple's malloc is 16-byte aligned, and does not have malloc.h, so include + * stdilb instead. + */ +# include +#endif + +CCL_NAMESPACE_BEGIN + +void *util_aligned_malloc(size_t size, int alignment) +{ +#ifdef WITH_BLENDER_GUARDEDALLOC + return MEM_mallocN_aligned(size, alignment, "Cycles Aligned Alloc"); +#elif defined(_WIN32) + return _aligned_malloc(size, alignment); +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__APPLE__) + void *result; + if (posix_memalign(&result, alignment, size)) { + /* Non-zero means allocation error + * either no allocation or bad alignment value. + */ + return NULL; + } + return result; +#else /* This is for Linux. */ + return memalign(alignment, size); +#endif +} + +void util_aligned_free(void *ptr) +{ +#if defined(WITH_BLENDER_GUARDEDALLOC) + if (ptr != NULL) { + MEM_freeN(ptr); + } +#elif defined(_WIN32) + _aligned_free(ptr); +#else + free(ptr); +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/aligned_malloc.h b/intern/cycles/util/aligned_malloc.h new file mode 100644 index 00000000000..66c2ac1c593 --- /dev/null +++ b/intern/cycles/util/aligned_malloc.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_ALIGNED_MALLOC_H__ +#define __UTIL_ALIGNED_MALLOC_H__ + +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* Minimum alignment needed by all CPU native data types (SSE, AVX). */ +#define MIN_ALIGNMENT_CPU_DATA_TYPES 16 + +/* Allocate block of size bytes at least aligned to a given value. */ +void *util_aligned_malloc(size_t size, int alignment); + +/* Free memory allocated by util_aligned_malloc. */ +void util_aligned_free(void *ptr); + +/* Aligned new operator. */ +template T *util_aligned_new(Args... args) +{ + void *mem = util_aligned_malloc(sizeof(T), alignof(T)); + return new (mem) T(args...); +} + +template void util_aligned_delete(T *t) +{ + if (t) { + t->~T(); + util_aligned_free(t); + } +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_ALIGNED_MALLOC_H__ */ diff --git a/intern/cycles/util/args.h b/intern/cycles/util/args.h new file mode 100644 index 00000000000..be6f2c2b9f1 --- /dev/null +++ b/intern/cycles/util/args.h @@ -0,0 +1,31 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_ARGS_H__ +#define __UTIL_ARGS_H__ + +/* Argument Parsing for command line, we use the OpenImageIO + * library because it has nice functions to do this. */ + +#include + +CCL_NAMESPACE_BEGIN + +OIIO_NAMESPACE_USING + +CCL_NAMESPACE_END + +#endif /* __UTIL_ARGS_H__ */ diff --git a/intern/cycles/util/array.h b/intern/cycles/util/array.h new file mode 100644 index 00000000000..4c905b09138 --- /dev/null +++ b/intern/cycles/util/array.h @@ -0,0 +1,318 @@ +/* + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_ARRAY_H__ +#define __UTIL_ARRAY_H__ + +#include +#include + +#include "util/aligned_malloc.h" +#include "util/guarded_allocator.h" +#include "util/types.h" +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +/* Simplified version of vector, serving multiple purposes: + * - somewhat faster in that it does not clear memory on resize/alloc, + * this was actually showing up in profiles quite significantly. it + * also does not run any constructors/destructors + * - if this is used, we are not tempted to use inefficient operations + * - aligned allocation for CPU native data types */ + +template class array { + public: + array() : data_(NULL), datasize_(0), capacity_(0) + { + } + + explicit array(size_t newsize) + { + if (newsize == 0) { + data_ = NULL; + datasize_ = 0; + capacity_ = 0; + } + else { + data_ = mem_allocate(newsize); + datasize_ = newsize; + capacity_ = datasize_; + } + } + + array(const array &from) + { + if (from.datasize_ == 0) { + data_ = NULL; + datasize_ = 0; + capacity_ = 0; + } + else { + data_ = mem_allocate(from.datasize_); + if (from.datasize_ > 0) { + memcpy(data_, from.data_, from.datasize_ * sizeof(T)); + } + datasize_ = from.datasize_; + capacity_ = datasize_; + } + } + + array &operator=(const array &from) + { + if (this != &from) { + resize(from.size()); + if (datasize_ > 0) { + memcpy((void *)data_, from.data_, datasize_ * sizeof(T)); + } + } + + return *this; + } + + array &operator=(const vector &from) + { + resize(from.size()); + + if (from.size() > 0 && datasize_ > 0) { + memcpy(data_, &from[0], datasize_ * sizeof(T)); + } + + return *this; + } + + ~array() + { + mem_free(data_, capacity_); + } + + bool operator==(const array &other) const + { + if (datasize_ != other.datasize_) { + return false; + } + if (datasize_ == 0) { + return true; + } + + return memcmp(data_, other.data_, datasize_ * sizeof(T)) == 0; + } + + bool operator!=(const array &other) const + { + return !(*this == other); + } + + void steal_data(array &from) + { + if (this != &from) { + clear(); + + data_ = from.data_; + datasize_ = from.datasize_; + capacity_ = from.capacity_; + + from.data_ = NULL; + from.datasize_ = 0; + from.capacity_ = 0; + } + } + + void set_data(T *ptr_, size_t datasize) + { + clear(); + data_ = ptr_; + datasize_ = datasize; + capacity_ = datasize; + } + + T *steal_pointer() + { + T *ptr = data_; + data_ = NULL; + clear(); + return ptr; + } + + T *resize(size_t newsize) + { + if (newsize == 0) { + clear(); + } + else if (newsize != datasize_) { + if (newsize > capacity_) { + T *newdata = mem_allocate(newsize); + if (newdata == NULL) { + /* Allocation failed, likely out of memory. */ + clear(); + return NULL; + } + else if (data_ != NULL) { + memcpy( + (void *)newdata, data_, ((datasize_ < newsize) ? datasize_ : newsize) * sizeof(T)); + mem_free(data_, capacity_); + } + data_ = newdata; + capacity_ = newsize; + } + datasize_ = newsize; + } + return data_; + } + + T *resize(size_t newsize, const T &value) + { + size_t oldsize = size(); + resize(newsize); + + for (size_t i = oldsize; i < size(); i++) { + data_[i] = value; + } + + return data_; + } + + void clear() + { + if (data_ != NULL) { + mem_free(data_, capacity_); + data_ = NULL; + } + datasize_ = 0; + capacity_ = 0; + } + + size_t empty() const + { + return datasize_ == 0; + } + + size_t size() const + { + return datasize_; + } + + T *data() + { + return data_; + } + + const T *data() const + { + return data_; + } + + T &operator[](size_t i) const + { + assert(i < datasize_); + return data_[i]; + } + + T *begin() + { + return data_; + } + + const T *begin() const + { + return data_; + } + + T *end() + { + return data_ + datasize_; + } + + const T *end() const + { + return data_ + datasize_; + } + + void reserve(size_t newcapacity) + { + if (newcapacity > capacity_) { + T *newdata = mem_allocate(newcapacity); + if (data_ != NULL) { + memcpy(newdata, data_, ((datasize_ < newcapacity) ? datasize_ : newcapacity) * sizeof(T)); + mem_free(data_, capacity_); + } + data_ = newdata; + capacity_ = newcapacity; + } + } + + size_t capacity() const + { + return capacity_; + } + + // do not use this method unless you are sure the code is not performance critical + void push_back_slow(const T &t) + { + if (capacity_ == datasize_) { + reserve(datasize_ == 0 ? 1 : (size_t)((datasize_ + 1) * 1.2)); + } + + data_[datasize_++] = t; + } + + void push_back_reserved(const T &t) + { + assert(datasize_ < capacity_); + push_back_slow(t); + } + + void append(const array &from) + { + if (from.size()) { + size_t old_size = size(); + resize(old_size + from.size()); + memcpy(data_ + old_size, from.data(), sizeof(T) * from.size()); + } + } + + protected: + inline T *mem_allocate(size_t N) + { + if (N == 0) { + return NULL; + } + T *mem = (T *)util_aligned_malloc(sizeof(T) * N, alignment); + if (mem != NULL) { + util_guarded_mem_alloc(sizeof(T) * N); + } + else { + throw std::bad_alloc(); + } + return mem; + } + + inline void mem_free(T *mem, size_t N) + { + if (mem != NULL) { + util_guarded_mem_free(sizeof(T) * N); + util_aligned_free(mem); + } + } + + T *data_; + size_t datasize_; + size_t capacity_; +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_ARRAY_H__ */ diff --git a/intern/cycles/util/atomic.h b/intern/cycles/util/atomic.h new file mode 100644 index 00000000000..faba411c769 --- /dev/null +++ b/intern/cycles/util/atomic.h @@ -0,0 +1,68 @@ +/* + * Copyright 2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_ATOMIC_H__ +#define __UTIL_ATOMIC_H__ + +#ifndef __KERNEL_GPU__ + +/* Using atomic ops header from Blender. */ +# include "atomic_ops.h" + +# define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x)) +# define atomic_compare_and_swap_float(p, old_val, new_val) \ + atomic_cas_float((p), (old_val), (new_val)) + +# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) +# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1) + +# define CCL_LOCAL_MEM_FENCE 0 +# define ccl_barrier(flags) ((void)0) + +#else /* __KERNEL_GPU__ */ + +# if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) + +# define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x)) + +# define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int *)(p), (unsigned int)(x)) +# define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x)) +# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) +# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1) +# define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x)) + +ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest, + const float old_val, + const float new_val) +{ + union { + unsigned int int_value; + float float_value; + } new_value, prev_value, result; + prev_value.float_value = old_val; + new_value.float_value = new_val; + result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value, new_value.int_value); + return result.float_value; +} + +# define CCL_LOCAL_MEM_FENCE +# define ccl_barrier(flags) __syncthreads() + +# endif /* __KERNEL_CUDA__ */ + +#endif /* __KERNEL_GPU__ */ + +#endif /* __UTIL_ATOMIC_H__ */ diff --git a/intern/cycles/util/avxb.h b/intern/cycles/util/avxb.h new file mode 100644 index 00000000000..15215d04ca3 --- /dev/null +++ b/intern/cycles/util/avxb.h @@ -0,0 +1,243 @@ +/* + * Copyright 2011-2013 Intel Corporation + * Modifications Copyright 2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0(the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_AVXB_H__ +#define __UTIL_AVXB_H__ + +CCL_NAMESPACE_BEGIN + +struct avxf; + +/*! 4-wide SSE bool type. */ +struct avxb { + typedef avxb Mask; // mask type + typedef avxf Float; // float type + + enum { size = 8 }; // number of SIMD elements + union { + __m256 m256; + int32_t v[8]; + }; // data + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline avxb() + { + } + __forceinline avxb(const avxb &other) + { + m256 = other.m256; + } + __forceinline avxb &operator=(const avxb &other) + { + m256 = other.m256; + return *this; + } + + __forceinline avxb(const __m256 input) : m256(input) + { + } + __forceinline avxb(const __m128 &a, const __m128 &b) + : m256(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1)) + { + } + __forceinline operator const __m256 &(void) const + { + return m256; + } + __forceinline operator const __m256i(void) const + { + return _mm256_castps_si256(m256); + } + __forceinline operator const __m256d(void) const + { + return _mm256_castps_pd(m256); + } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline avxb(FalseTy) : m256(_mm256_setzero_ps()) + { + } + __forceinline avxb(TrueTy) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1))) + { + } + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool operator[](const size_t i) const + { + assert(i < 8); + return (_mm256_movemask_ps(m256) >> i) & 1; + } + __forceinline int32_t &operator[](const size_t i) + { + assert(i < 8); + return v[i]; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +/// Unary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const avxb operator!(const avxb &a) +{ + return _mm256_xor_ps(a, avxb(True)); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Binary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const avxb operator&(const avxb &a, const avxb &b) +{ + return _mm256_and_ps(a, b); +} +__forceinline const avxb operator|(const avxb &a, const avxb &b) +{ + return _mm256_or_ps(a, b); +} +__forceinline const avxb operator^(const avxb &a, const avxb &b) +{ + return _mm256_xor_ps(a, b); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Assignment Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const avxb operator&=(avxb &a, const avxb &b) +{ + return a = a & b; +} +__forceinline const avxb operator|=(avxb &a, const avxb &b) +{ + return a = a | b; +} +__forceinline const avxb operator^=(avxb &a, const avxb &b) +{ + return a = a ^ b; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Comparison Operators + Select +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const avxb operator!=(const avxb &a, const avxb &b) +{ + return _mm256_xor_ps(a, b); +} +__forceinline const avxb operator==(const avxb &a, const avxb &b) +{ +#ifdef __KERNEL_AVX2__ + return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); +#else + __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0)); + __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1)); + __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0)); + __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1)); + __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo); + __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi); + __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1); + return _mm256_castsi256_ps(result); +#endif +} + +__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f) +{ +#if defined(__KERNEL_SSE41__) + return _mm256_blendv_ps(f, t, m); +#else + return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f)); +#endif +} + +//////////////////////////////////////////////////////////////////////////////// +/// Movement/Shifting/Shuffling Functions +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const avxb unpacklo(const avxb &a, const avxb &b) +{ + return _mm256_unpacklo_ps(a, b); +} +__forceinline const avxb unpackhi(const avxb &a, const avxb &b) +{ + return _mm256_unpackhi_ps(a, b); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Reduction Operations +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__KERNEL_SSE41__) +__forceinline uint32_t popcnt(const avxb &a) +{ + return _mm_popcnt_u32(_mm256_movemask_ps(a)); +} +#else +__forceinline uint32_t popcnt(const avxb &a) +{ + return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]) + bool(a[4]) + bool(a[5]) + bool(a[6]) + + bool(a[7]); +} +#endif + +__forceinline bool reduce_and(const avxb &a) +{ + return _mm256_movemask_ps(a) == 0xf; +} +__forceinline bool reduce_or(const avxb &a) +{ + return _mm256_movemask_ps(a) != 0x0; +} +__forceinline bool all(const avxb &b) +{ + return _mm256_movemask_ps(b) == 0xf; +} +__forceinline bool any(const avxb &b) +{ + return _mm256_movemask_ps(b) != 0x0; +} +__forceinline bool none(const avxb &b) +{ + return _mm256_movemask_ps(b) == 0x0; +} + +__forceinline uint32_t movemask(const avxb &a) +{ + return _mm256_movemask_ps(a); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Debug Functions +//////////////////////////////////////////////////////////////////////////////// + +ccl_device_inline void print_avxb(const char *label, const avxb &a) +{ + printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]); +} + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/util/avxf.h b/intern/cycles/util/avxf.h new file mode 100644 index 00000000000..1fb3ded422f --- /dev/null +++ b/intern/cycles/util/avxf.h @@ -0,0 +1,392 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0(the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_AVXF_H__ +#define __UTIL_AVXF_H__ + +CCL_NAMESPACE_BEGIN + +struct avxb; + +struct avxf { + typedef avxf Float; + + enum { size = 8 }; /* Number of SIMD elements. */ + + union { + __m256 m256; + float f[8]; + int i[8]; + }; + + __forceinline avxf() + { + } + __forceinline avxf(const avxf &other) + { + m256 = other.m256; + } + __forceinline avxf &operator=(const avxf &other) + { + m256 = other.m256; + return *this; + } + + __forceinline avxf(const __m256 a) : m256(a) + { + } + __forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps(a)) + { + } + + __forceinline operator const __m256 &() const + { + return m256; + } + __forceinline operator __m256 &() + { + return m256; + } + + __forceinline avxf(float a) : m256(_mm256_set1_ps(a)) + { + } + + __forceinline avxf(float high32x4, float low32x4) + : m256(_mm256_set_ps( + high32x4, high32x4, high32x4, high32x4, low32x4, low32x4, low32x4, low32x4)) + { + } + + __forceinline avxf(float a3, float a2, float a1, float a0) + : m256(_mm256_set_ps(a3, a2, a1, a0, a3, a2, a1, a0)) + { + } + + __forceinline avxf( + float a7, float a6, float a5, float a4, float a3, float a2, float a1, float a0) + : m256(_mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0)) + { + } + + __forceinline avxf(float3 a) : m256(_mm256_set_ps(a.w, a.z, a.y, a.x, a.w, a.z, a.y, a.x)) + { + } + + __forceinline avxf(int a3, int a2, int a1, int a0) + { + const __m256i foo = _mm256_set_epi32(a3, a2, a1, a0, a3, a2, a1, a0); + m256 = _mm256_castsi256_ps(foo); + } + + __forceinline avxf(int a7, int a6, int a5, int a4, int a3, int a2, int a1, int a0) + { + const __m256i foo = _mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0); + m256 = _mm256_castsi256_ps(foo); + } + + __forceinline avxf(__m128 a, __m128 b) + { + const __m256 foo = _mm256_castps128_ps256(a); + m256 = _mm256_insertf128_ps(foo, b, 1); + } + + __forceinline const float &operator[](const size_t i) const + { + assert(i < 8); + return f[i]; + } + __forceinline float &operator[](const size_t i) + { + assert(i < 8); + return f[i]; + } +}; + +__forceinline avxf cross(const avxf &a, const avxf &b) +{ + avxf r(0.0, + a[4] * b[5] - a[5] * b[4], + a[6] * b[4] - a[4] * b[6], + a[5] * b[6] - a[6] * b[5], + 0.0, + a[0] * b[1] - a[1] * b[0], + a[2] * b[0] - a[0] * b[2], + a[1] * b[2] - a[2] * b[1]); + return r; +} + +__forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2) +{ + const avxf t = _mm256_mul_ps(a.m256, b.m256); + den = ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]; + den2 = ((float *)&t)[4] + ((float *)&t)[5] + ((float *)&t)[6]; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Unary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const avxf cast(const __m256i &a) +{ + return _mm256_castsi256_ps(a); +} + +__forceinline const avxf mm256_sqrt(const avxf &a) +{ + return _mm256_sqrt_ps(a.m256); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Binary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const avxf operator+(const avxf &a, const avxf &b) +{ + return _mm256_add_ps(a.m256, b.m256); +} +__forceinline const avxf operator+(const avxf &a, const float &b) +{ + return a + avxf(b); +} +__forceinline const avxf operator+(const float &a, const avxf &b) +{ + return avxf(a) + b; +} + +__forceinline const avxf operator-(const avxf &a, const avxf &b) +{ + return _mm256_sub_ps(a.m256, b.m256); +} +__forceinline const avxf operator-(const avxf &a, const float &b) +{ + return a - avxf(b); +} +__forceinline const avxf operator-(const float &a, const avxf &b) +{ + return avxf(a) - b; +} + +__forceinline const avxf operator*(const avxf &a, const avxf &b) +{ + return _mm256_mul_ps(a.m256, b.m256); +} +__forceinline const avxf operator*(const avxf &a, const float &b) +{ + return a * avxf(b); +} +__forceinline const avxf operator*(const float &a, const avxf &b) +{ + return avxf(a) * b; +} + +__forceinline const avxf operator/(const avxf &a, const avxf &b) +{ + return _mm256_div_ps(a.m256, b.m256); +} +__forceinline const avxf operator/(const avxf &a, const float &b) +{ + return a / avxf(b); +} +__forceinline const avxf operator/(const float &a, const avxf &b) +{ + return avxf(a) / b; +} + +__forceinline const avxf operator|(const avxf &a, const avxf &b) +{ + return _mm256_or_ps(a.m256, b.m256); +} + +__forceinline const avxf operator^(const avxf &a, const avxf &b) +{ + return _mm256_xor_ps(a.m256, b.m256); +} + +__forceinline const avxf operator&(const avxf &a, const avxf &b) +{ + return _mm256_and_ps(a.m256, b.m256); +} + +__forceinline const avxf max(const avxf &a, const avxf &b) +{ + return _mm256_max_ps(a.m256, b.m256); +} +__forceinline const avxf min(const avxf &a, const avxf &b) +{ + return _mm256_min_ps(a.m256, b.m256); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Movement/Shifting/Shuffling Functions +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const avxf shuffle(const avxf &a, const __m256i &shuf) +{ + return _mm256_permutevar_ps(a, shuf); +} + +template +__forceinline const avxf shuffle(const avxf &a) +{ + return _mm256_permutevar_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0)); +} + +template +__forceinline const avxf shuffle(const avxf &a, const avxf &b) +{ + return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); +} +template +__forceinline const avxf shuffle(const avxf &a) +{ + return shuffle(a, a); +} +template __forceinline const avxf shuffle(const avxf &a, const avxf &b) +{ + return shuffle(a, b); +} +template __forceinline const avxf shuffle(const avxf &a) +{ + return shuffle(a, a); +} + +template __forceinline float extract(const avxf &a) +{ + __m256 b = shuffle(a).m256; + return _mm256_cvtss_f32(b); +} +template<> __forceinline float extract<0>(const avxf &a) +{ + return _mm256_cvtss_f32(a.m256); +} + +__forceinline ssef low(const avxf &a) +{ + return _mm256_extractf128_ps(a.m256, 0); +} +__forceinline ssef high(const avxf &a) +{ + return _mm256_extractf128_ps(a.m256, 1); +} + +template +__forceinline const avxf permute(const avxf &a) +{ +#ifdef __KERNEL_AVX2__ + return _mm256_permutevar8x32_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0)); +#else + float temp[8]; + _mm256_storeu_ps((float *)&temp, a); + return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]); +#endif +} + +template +ccl_device_inline const avxf set_sign_bit(const avxf &a) +{ + return a ^ avxf(S7 << 31, S6 << 31, S5 << 31, S4 << 31, S3 << 31, S2 << 31, S1 << 31, S0 << 31); +} + +template +ccl_device_inline const avxf blend(const avxf &a, const avxf &b) +{ + return _mm256_blend_ps( + a, b, S7 << 0 | S6 << 1 | S5 << 2 | S4 << 3 | S3 << 4 | S2 << 5 | S1 << 6 | S0 << 7); +} + +template +ccl_device_inline const avxf blend(const avxf &a, const avxf &b) +{ + return blend(a, b); +} + +//#if defined(__KERNEL_SSE41__) +__forceinline avxf maxi(const avxf &a, const avxf &b) +{ + const avxf ci = _mm256_max_ps(a, b); + return ci; +} + +__forceinline avxf mini(const avxf &a, const avxf &b) +{ + const avxf ci = _mm256_min_ps(a, b); + return ci; +} +//#endif + +//////////////////////////////////////////////////////////////////////////////// +/// Ternary Operators +//////////////////////////////////////////////////////////////////////////////// +__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c) +{ +#ifdef __KERNEL_AVX2__ + return _mm256_fmadd_ps(a, b, c); +#else + return c + (a * b); +#endif +} + +__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c) +{ +#ifdef __KERNEL_AVX2__ + return _mm256_fnmadd_ps(a, b, c); +#else + return c - (a * b); +#endif +} +__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c) +{ +#ifdef __KERNEL_AVX2__ + return _mm256_fmsub_ps(a, b, c); +#else + return (a * b) - c; +#endif +} + +//////////////////////////////////////////////////////////////////////////////// +/// Comparison Operators + Select +//////////////////////////////////////////////////////////////////////////////// +__forceinline const avxb operator<=(const avxf &a, const avxf &b) +{ + return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS); +} + +__forceinline const avxf select(const avxb &m, const avxf &t, const avxf &f) +{ + return _mm256_blendv_ps(f, t, m); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Common Functions +//////////////////////////////////////////////////////////////////////////////// + +__forceinline avxf mix(const avxf &a, const avxf &b, const avxf &t) +{ + return madd(t, b, (avxf(1.0f) - t) * a); +} + +#ifndef _mm256_set_m128 +# define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \ + _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1) +#endif + +#define _mm256_loadu2_m128(/* float const* */ hiaddr, /* float const* */ loaddr) \ + _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr)) + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/util/avxi.h b/intern/cycles/util/avxi.h new file mode 100644 index 00000000000..0ae4bf271c8 --- /dev/null +++ b/intern/cycles/util/avxi.h @@ -0,0 +1,745 @@ +/* + * Copyright 2009-2013 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_AVXI_H__ +#define __UTIL_AVXI_H__ + +CCL_NAMESPACE_BEGIN + +struct avxb; + +struct avxi { + typedef avxb Mask; // mask type for us + enum { size = 8 }; // number of SIMD elements + union { // data + __m256i m256; +#if !defined(__KERNEL_AVX2__) + struct { + __m128i l, h; + }; +#endif + int32_t v[8]; + }; + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline avxi() + { + } + __forceinline avxi(const avxi &a) + { + m256 = a.m256; + } + __forceinline avxi &operator=(const avxi &a) + { + m256 = a.m256; + return *this; + } + + __forceinline avxi(const __m256i a) : m256(a) + { + } + __forceinline operator const __m256i &(void) const + { + return m256; + } + __forceinline operator __m256i &(void) + { + return m256; + } + + __forceinline explicit avxi(const ssei &a) + : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), a, 1)) + { + } + __forceinline avxi(const ssei &a, const ssei &b) + : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1)) + { + } +#if defined(__KERNEL_AVX2__) + __forceinline avxi(const __m128i &a, const __m128i &b) + : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1)) + { + } +#else + __forceinline avxi(const __m128i &a, const __m128i &b) : l(a), h(b) + { + } +#endif + __forceinline explicit avxi(const int32_t *const a) + : m256(_mm256_castps_si256(_mm256_loadu_ps((const float *)a))) + { + } + __forceinline avxi(int32_t a) : m256(_mm256_set1_epi32(a)) + { + } + __forceinline avxi(int32_t a, int32_t b) : m256(_mm256_set_epi32(b, a, b, a, b, a, b, a)) + { + } + __forceinline avxi(int32_t a, int32_t b, int32_t c, int32_t d) + : m256(_mm256_set_epi32(d, c, b, a, d, c, b, a)) + { + } + __forceinline avxi( + int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, int32_t f, int32_t g, int32_t h) + : m256(_mm256_set_epi32(h, g, f, e, d, c, b, a)) + { + } + + __forceinline explicit avxi(const __m256 a) : m256(_mm256_cvtps_epi32(a)) + { + } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline avxi(ZeroTy) : m256(_mm256_setzero_si256()) + { + } +#if defined(__KERNEL_AVX2__) + __forceinline avxi(OneTy) : m256(_mm256_set1_epi32(1)) + { + } + __forceinline avxi(PosInfTy) : m256(_mm256_set1_epi32(pos_inf)) + { + } + __forceinline avxi(NegInfTy) : m256(_mm256_set1_epi32(neg_inf)) + { + } +#else + __forceinline avxi(OneTy) : m256(_mm256_set_epi32(1, 1, 1, 1, 1, 1, 1, 1)) + { + } + __forceinline avxi(PosInfTy) + : m256(_mm256_set_epi32( + pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf)) + { + } + __forceinline avxi(NegInfTy) + : m256(_mm256_set_epi32( + neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf)) + { + } +#endif + __forceinline avxi(StepTy) : m256(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0)) + { + } + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const int32_t &operator[](const size_t i) const + { + assert(i < 8); + return v[i]; + } + __forceinline int32_t &operator[](const size_t i) + { + assert(i < 8); + return v[i]; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +/// Unary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const avxi cast(const __m256 &a) +{ + return _mm256_castps_si256(a); +} +__forceinline const avxi operator+(const avxi &a) +{ + return a; +} +#if defined(__KERNEL_AVX2__) +__forceinline const avxi operator-(const avxi &a) +{ + return _mm256_sub_epi32(_mm256_setzero_si256(), a.m256); +} +__forceinline const avxi abs(const avxi &a) +{ + return _mm256_abs_epi32(a.m256); +} +#else +__forceinline const avxi operator-(const avxi &a) +{ + return avxi(_mm_sub_epi32(_mm_setzero_si128(), a.l), _mm_sub_epi32(_mm_setzero_si128(), a.h)); +} +__forceinline const avxi abs(const avxi &a) +{ + return avxi(_mm_abs_epi32(a.l), _mm_abs_epi32(a.h)); +} +#endif + +//////////////////////////////////////////////////////////////////////////////// +/// Binary Operators +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__KERNEL_AVX2__) +__forceinline const avxi operator+(const avxi &a, const avxi &b) +{ + return _mm256_add_epi32(a.m256, b.m256); +} +#else +__forceinline const avxi operator+(const avxi &a, const avxi &b) +{ + return avxi(_mm_add_epi32(a.l, b.l), _mm_add_epi32(a.h, b.h)); +} +#endif +__forceinline const avxi operator+(const avxi &a, const int32_t b) +{ + return a + avxi(b); +} +__forceinline const avxi operator+(const int32_t a, const avxi &b) +{ + return avxi(a) + b; +} + +#if defined(__KERNEL_AVX2__) +__forceinline const avxi operator-(const avxi &a, const avxi &b) +{ + return _mm256_sub_epi32(a.m256, b.m256); +} +#else +__forceinline const avxi operator-(const avxi &a, const avxi &b) +{ + return avxi(_mm_sub_epi32(a.l, b.l), _mm_sub_epi32(a.h, b.h)); +} +#endif +__forceinline const avxi operator-(const avxi &a, const int32_t b) +{ + return a - avxi(b); +} +__forceinline const avxi operator-(const int32_t a, const avxi &b) +{ + return avxi(a) - b; +} + +#if defined(__KERNEL_AVX2__) +__forceinline const avxi operator*(const avxi &a, const avxi &b) +{ + return _mm256_mullo_epi32(a.m256, b.m256); +} +#else +__forceinline const avxi operator*(const avxi &a, const avxi &b) +{ + return avxi(_mm_mullo_epi32(a.l, b.l), _mm_mullo_epi32(a.h, b.h)); +} +#endif +__forceinline const avxi operator*(const avxi &a, const int32_t b) +{ + return a * avxi(b); +} +__forceinline const avxi operator*(const int32_t a, const avxi &b) +{ + return avxi(a) * b; +} + +#if defined(__KERNEL_AVX2__) +__forceinline const avxi operator&(const avxi &a, const avxi &b) +{ + return _mm256_and_si256(a.m256, b.m256); +} +#else +__forceinline const avxi operator&(const avxi &a, const avxi &b) +{ + return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); +} +#endif +__forceinline const avxi operator&(const avxi &a, const int32_t b) +{ + return a & avxi(b); +} +__forceinline const avxi operator&(const int32_t a, const avxi &b) +{ + return avxi(a) & b; +} + +#if defined(__KERNEL_AVX2__) +__forceinline const avxi operator|(const avxi &a, const avxi &b) +{ + return _mm256_or_si256(a.m256, b.m256); +} +#else +__forceinline const avxi operator|(const avxi &a, const avxi &b) +{ + return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); +} +#endif +__forceinline const avxi operator|(const avxi &a, const int32_t b) +{ + return a | avxi(b); +} +__forceinline const avxi operator|(const int32_t a, const avxi &b) +{ + return avxi(a) | b; +} + +#if defined(__KERNEL_AVX2__) +__forceinline const avxi operator^(const avxi &a, const avxi &b) +{ + return _mm256_xor_si256(a.m256, b.m256); +} +#else +__forceinline const avxi operator^(const avxi &a, const avxi &b) +{ + return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); +} +#endif +__forceinline const avxi operator^(const avxi &a, const int32_t b) +{ + return a ^ avxi(b); +} +__forceinline const avxi operator^(const int32_t a, const avxi &b) +{ + return avxi(a) ^ b; +} + +#if defined(__KERNEL_AVX2__) +__forceinline const avxi operator<<(const avxi &a, const int32_t n) +{ + return _mm256_slli_epi32(a.m256, n); +} +__forceinline const avxi operator>>(const avxi &a, const int32_t n) +{ + return _mm256_srai_epi32(a.m256, n); +} + +__forceinline const avxi sra(const avxi &a, const int32_t b) +{ + return _mm256_srai_epi32(a.m256, b); +} +__forceinline const avxi srl(const avxi &a, const int32_t b) +{ + return _mm256_srli_epi32(a.m256, b); +} +#else +__forceinline const avxi operator<<(const avxi &a, const int32_t n) +{ + return avxi(_mm_slli_epi32(a.l, n), _mm_slli_epi32(a.h, n)); +} +__forceinline const avxi operator>>(const avxi &a, const int32_t n) +{ + return avxi(_mm_srai_epi32(a.l, n), _mm_srai_epi32(a.h, n)); +} + +__forceinline const avxi sra(const avxi &a, const int32_t b) +{ + return avxi(_mm_srai_epi32(a.l, b), _mm_srai_epi32(a.h, b)); +} +__forceinline const avxi srl(const avxi &a, const int32_t b) +{ + return avxi(_mm_srli_epi32(a.l, b), _mm_srli_epi32(a.h, b)); +} +#endif + +#if defined(__KERNEL_AVX2__) +__forceinline const avxi min(const avxi &a, const avxi &b) +{ + return _mm256_min_epi32(a.m256, b.m256); +} +#else +__forceinline const avxi min(const avxi &a, const avxi &b) +{ + return avxi(_mm_min_epi32(a.l, b.l), _mm_min_epi32(a.h, b.h)); +} +#endif +__forceinline const avxi min(const avxi &a, const int32_t b) +{ + return min(a, avxi(b)); +} +__forceinline const avxi min(const int32_t a, const avxi &b) +{ + return min(avxi(a), b); +} + +#if defined(__KERNEL_AVX2__) +__forceinline const avxi max(const avxi &a, const avxi &b) +{ + return _mm256_max_epi32(a.m256, b.m256); +} +#else +__forceinline const avxi max(const avxi &a, const avxi &b) +{ + return avxi(_mm_max_epi32(a.l, b.l), _mm_max_epi32(a.h, b.h)); +} +#endif +__forceinline const avxi max(const avxi &a, const int32_t b) +{ + return max(a, avxi(b)); +} +__forceinline const avxi max(const int32_t a, const avxi &b) +{ + return max(avxi(a), b); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Assignment Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline avxi &operator+=(avxi &a, const avxi &b) +{ + return a = a + b; +} +__forceinline avxi &operator+=(avxi &a, const int32_t b) +{ + return a = a + b; +} + +__forceinline avxi &operator-=(avxi &a, const avxi &b) +{ + return a = a - b; +} +__forceinline avxi &operator-=(avxi &a, const int32_t b) +{ + return a = a - b; +} + +__forceinline avxi &operator*=(avxi &a, const avxi &b) +{ + return a = a * b; +} +__forceinline avxi &operator*=(avxi &a, const int32_t b) +{ + return a = a * b; +} + +__forceinline avxi &operator&=(avxi &a, const avxi &b) +{ + return a = a & b; +} +__forceinline avxi &operator&=(avxi &a, const int32_t b) +{ + return a = a & b; +} + +__forceinline avxi &operator|=(avxi &a, const avxi &b) +{ + return a = a | b; +} +__forceinline avxi &operator|=(avxi &a, const int32_t b) +{ + return a = a | b; +} + +__forceinline avxi &operator^=(avxi &a, const avxi &b) +{ + return a = a ^ b; +} +__forceinline avxi &operator^=(avxi &a, const int32_t b) +{ + return a = a ^ b; +} + +__forceinline avxi &operator<<=(avxi &a, const int32_t b) +{ + return a = a << b; +} +__forceinline avxi &operator>>=(avxi &a, const int32_t b) +{ + return a = a >> b; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Comparison Operators + Select +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__KERNEL_AVX2__) +__forceinline const avxb operator==(const avxi &a, const avxi &b) +{ + return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a.m256, b.m256)); +} +#else +__forceinline const avxb operator==(const avxi &a, const avxi &b) +{ + return avxb(_mm_castsi128_ps(_mm_cmpeq_epi32(a.l, b.l)), + _mm_castsi128_ps(_mm_cmpeq_epi32(a.h, b.h))); +} +#endif +__forceinline const avxb operator==(const avxi &a, const int32_t b) +{ + return a == avxi(b); +} +__forceinline const avxb operator==(const int32_t a, const avxi &b) +{ + return avxi(a) == b; +} + +__forceinline const avxb operator!=(const avxi &a, const avxi &b) +{ + return !(a == b); +} +__forceinline const avxb operator!=(const avxi &a, const int32_t b) +{ + return a != avxi(b); +} +__forceinline const avxb operator!=(const int32_t a, const avxi &b) +{ + return avxi(a) != b; +} + +#if defined(__KERNEL_AVX2__) +__forceinline const avxb operator<(const avxi &a, const avxi &b) +{ + return _mm256_castsi256_ps(_mm256_cmpgt_epi32(b.m256, a.m256)); +} +#else +__forceinline const avxb operator<(const avxi &a, const avxi &b) +{ + return avxb(_mm_castsi128_ps(_mm_cmplt_epi32(a.l, b.l)), + _mm_castsi128_ps(_mm_cmplt_epi32(a.h, b.h))); +} +#endif +__forceinline const avxb operator<(const avxi &a, const int32_t b) +{ + return a < avxi(b); +} +__forceinline const avxb operator<(const int32_t a, const avxi &b) +{ + return avxi(a) < b; +} + +__forceinline const avxb operator>=(const avxi &a, const avxi &b) +{ + return !(a < b); +} +__forceinline const avxb operator>=(const avxi &a, const int32_t b) +{ + return a >= avxi(b); +} +__forceinline const avxb operator>=(const int32_t a, const avxi &b) +{ + return avxi(a) >= b; +} + +#if defined(__KERNEL_AVX2__) +__forceinline const avxb operator>(const avxi &a, const avxi &b) +{ + return _mm256_castsi256_ps(_mm256_cmpgt_epi32(a.m256, b.m256)); +} +#else +__forceinline const avxb operator>(const avxi &a, const avxi &b) +{ + return avxb(_mm_castsi128_ps(_mm_cmpgt_epi32(a.l, b.l)), + _mm_castsi128_ps(_mm_cmpgt_epi32(a.h, b.h))); +} +#endif +__forceinline const avxb operator>(const avxi &a, const int32_t b) +{ + return a > avxi(b); +} +__forceinline const avxb operator>(const int32_t a, const avxi &b) +{ + return avxi(a) > b; +} + +__forceinline const avxb operator<=(const avxi &a, const avxi &b) +{ + return !(a > b); +} +__forceinline const avxb operator<=(const avxi &a, const int32_t b) +{ + return a <= avxi(b); +} +__forceinline const avxb operator<=(const int32_t a, const avxi &b) +{ + return avxi(a) <= b; +} + +__forceinline const avxi select(const avxb &m, const avxi &t, const avxi &f) +{ + return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(f), _mm256_castsi256_ps(t), m)); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Movement/Shifting/Shuffling Functions +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__KERNEL_AVX2__) +__forceinline avxi unpacklo(const avxi &a, const avxi &b) +{ + return _mm256_unpacklo_epi32(a.m256, b.m256); +} +__forceinline avxi unpackhi(const avxi &a, const avxi &b) +{ + return _mm256_unpackhi_epi32(a.m256, b.m256); +} +#else +__forceinline avxi unpacklo(const avxi &a, const avxi &b) +{ + return _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); +} +__forceinline avxi unpackhi(const avxi &a, const avxi &b) +{ + return _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); +} +#endif + +template __forceinline const avxi shuffle(const avxi &a) +{ + return _mm256_castps_si256(_mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i, i, i, i))); +} + +template __forceinline const avxi shuffle(const avxi &a) +{ + return _mm256_permute2f128_si256(a, a, (i1 << 4) | (i0 << 0)); +} + +template __forceinline const avxi shuffle(const avxi &a, const avxi &b) +{ + return _mm256_permute2f128_si256(a, b, (i1 << 4) | (i0 << 0)); +} + +template +__forceinline const avxi shuffle(const avxi &a) +{ + return _mm256_castps_si256( + _mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i3, i2, i1, i0))); +} + +template +__forceinline const avxi shuffle(const avxi &a, const avxi &b) +{ + return _mm256_castps_si256(_mm256_shuffle_ps( + _mm256_castsi256_ps(a), _mm256_castsi256_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); +} + +template<> __forceinline const avxi shuffle<0, 0, 2, 2>(const avxi &b) +{ + return _mm256_castps_si256(_mm256_moveldup_ps(_mm256_castsi256_ps(b))); +} +template<> __forceinline const avxi shuffle<1, 1, 3, 3>(const avxi &b) +{ + return _mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(b))); +} +template<> __forceinline const avxi shuffle<0, 1, 0, 1>(const avxi &b) +{ + return _mm256_castps_si256( + _mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(_mm256_castsi256_ps(b))))); +} + +__forceinline const avxi broadcast(const int *ptr) +{ + return _mm256_castps_si256(_mm256_broadcast_ss((const float *)ptr)); +} +template __forceinline const avxi insert(const avxi &a, const ssei &b) +{ + return _mm256_insertf128_si256(a, b, i); +} +template __forceinline const ssei extract(const avxi &a) +{ + return _mm256_extractf128_si256(a, i); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Reductions +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const avxi vreduce_min2(const avxi &v) +{ + return min(v, shuffle<1, 0, 3, 2>(v)); +} +__forceinline const avxi vreduce_min4(const avxi &v) +{ + avxi v1 = vreduce_min2(v); + return min(v1, shuffle<2, 3, 0, 1>(v1)); +} +__forceinline const avxi vreduce_min(const avxi &v) +{ + avxi v1 = vreduce_min4(v); + return min(v1, shuffle<1, 0>(v1)); +} + +__forceinline const avxi vreduce_max2(const avxi &v) +{ + return max(v, shuffle<1, 0, 3, 2>(v)); +} +__forceinline const avxi vreduce_max4(const avxi &v) +{ + avxi v1 = vreduce_max2(v); + return max(v1, shuffle<2, 3, 0, 1>(v1)); +} +__forceinline const avxi vreduce_max(const avxi &v) +{ + avxi v1 = vreduce_max4(v); + return max(v1, shuffle<1, 0>(v1)); +} + +__forceinline const avxi vreduce_add2(const avxi &v) +{ + return v + shuffle<1, 0, 3, 2>(v); +} +__forceinline const avxi vreduce_add4(const avxi &v) +{ + avxi v1 = vreduce_add2(v); + return v1 + shuffle<2, 3, 0, 1>(v1); +} +__forceinline const avxi vreduce_add(const avxi &v) +{ + avxi v1 = vreduce_add4(v); + return v1 + shuffle<1, 0>(v1); +} + +__forceinline int reduce_min(const avxi &v) +{ + return extract<0>(extract<0>(vreduce_min(v))); +} +__forceinline int reduce_max(const avxi &v) +{ + return extract<0>(extract<0>(vreduce_max(v))); +} +__forceinline int reduce_add(const avxi &v) +{ + return extract<0>(extract<0>(vreduce_add(v))); +} + +__forceinline uint32_t select_min(const avxi &v) +{ + return __bsf(movemask(v == vreduce_min(v))); +} +__forceinline uint32_t select_max(const avxi &v) +{ + return __bsf(movemask(v == vreduce_max(v))); +} + +__forceinline uint32_t select_min(const avxb &valid, const avxi &v) +{ + const avxi a = select(valid, v, avxi(pos_inf)); + return __bsf(movemask(valid & (a == vreduce_min(a)))); +} +__forceinline uint32_t select_max(const avxb &valid, const avxi &v) +{ + const avxi a = select(valid, v, avxi(neg_inf)); + return __bsf(movemask(valid & (a == vreduce_max(a)))); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Output Operators +//////////////////////////////////////////////////////////////////////////////// + +ccl_device_inline void print_avxi(const char *label, const avxi &a) +{ + printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]); +} + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/util/boundbox.h b/intern/cycles/util/boundbox.h new file mode 100644 index 00000000000..ed81e4cf8c3 --- /dev/null +++ b/intern/cycles/util/boundbox.h @@ -0,0 +1,282 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_BOUNDBOX_H__ +#define __UTIL_BOUNDBOX_H__ + +#include +#include + +#include "util/math.h" +#include "util/string.h" +#include "util/transform.h" +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* 3D BoundBox */ + +class BoundBox { + public: + float3 min, max; + + __forceinline BoundBox() + { + } + + __forceinline BoundBox(const float3 &pt) : min(pt), max(pt) + { + } + + __forceinline BoundBox(const float3 &min_, const float3 &max_) : min(min_), max(max_) + { + } + + enum empty_t { empty = 0 }; + + __forceinline BoundBox(empty_t) + : min(make_float3(FLT_MAX, FLT_MAX, FLT_MAX)), max(make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX)) + { + } + + __forceinline void grow(const float3 &pt) + { + /* the order of arguments to min is such that if pt is nan, it will not + * influence the resulting bounding box */ + min = ccl::min(pt, min); + max = ccl::max(pt, max); + } + + __forceinline void grow(const float3 &pt, float border) + { + float3 shift = make_float3(border, border, border); + min = ccl::min(pt - shift, min); + max = ccl::max(pt + shift, max); + } + + __forceinline void grow(const BoundBox &bbox) + { + grow(bbox.min); + grow(bbox.max); + } + + __forceinline void grow_safe(const float3 &pt) + { + /* the order of arguments to min is such that if pt is nan, it will not + * influence the resulting bounding box */ + if (isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z)) { + min = ccl::min(pt, min); + max = ccl::max(pt, max); + } + } + + __forceinline void grow_safe(const float3 &pt, float border) + { + if (isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z) && isfinite(border)) { + float3 shift = make_float3(border, border, border); + min = ccl::min(pt - shift, min); + max = ccl::max(pt + shift, max); + } + } + + __forceinline void grow_safe(const BoundBox &bbox) + { + grow_safe(bbox.min); + grow_safe(bbox.max); + } + + __forceinline void intersect(const BoundBox &bbox) + { + min = ccl::max(min, bbox.min); + max = ccl::min(max, bbox.max); + } + + /* todo: avoid using this */ + __forceinline float safe_area() const + { + if (!((min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z))) + return 0.0f; + + return area(); + } + + __forceinline float area() const + { + return half_area() * 2.0f; + } + + __forceinline float half_area() const + { + float3 d = max - min; + return (d.x * d.z + d.y * d.z + d.x * d.y); + } + + __forceinline float3 center() const + { + return 0.5f * (min + max); + } + + __forceinline float3 center2() const + { + return min + max; + } + + __forceinline float3 size() const + { + return max - min; + } + + __forceinline bool valid() const + { + return (min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z) && + (isfinite(min.x) && isfinite(min.y) && isfinite(min.z)) && + (isfinite(max.x) && isfinite(max.y) && isfinite(max.z)); + } + + BoundBox transformed(const Transform *tfm) const + { + BoundBox result = BoundBox::empty; + + for (int i = 0; i < 8; i++) { + float3 p; + + p.x = (i & 1) ? min.x : max.x; + p.y = (i & 2) ? min.y : max.y; + p.z = (i & 4) ? min.z : max.z; + + result.grow(transform_point(tfm, p)); + } + + return result; + } + + __forceinline bool intersects(const BoundBox &other) + { + float3 center_diff = center() - other.center(), total_size = (size() + other.size()) * 0.5f; + return fabsf(center_diff.x) <= total_size.x && fabsf(center_diff.y) <= total_size.y && + fabsf(center_diff.z) <= total_size.z; + } +}; + +__forceinline BoundBox merge(const BoundBox &bbox, const float3 &pt) +{ + return BoundBox(min(bbox.min, pt), max(bbox.max, pt)); +} + +__forceinline BoundBox merge(const BoundBox &a, const BoundBox &b) +{ + return BoundBox(min(a.min, b.min), max(a.max, b.max)); +} + +__forceinline BoundBox merge(const BoundBox &a, + const BoundBox &b, + const BoundBox &c, + const BoundBox &d) +{ + return merge(merge(a, b), merge(c, d)); +} + +__forceinline BoundBox intersect(const BoundBox &a, const BoundBox &b) +{ + return BoundBox(max(a.min, b.min), min(a.max, b.max)); +} + +__forceinline BoundBox intersect(const BoundBox &a, const BoundBox &b, const BoundBox &c) +{ + return intersect(a, intersect(b, c)); +} + +/* 2D BoundBox */ + +class BoundBox2D { + public: + float left; + float right; + float bottom; + float top; + + BoundBox2D() : left(0.0f), right(1.0f), bottom(0.0f), top(1.0f) + { + } + + bool operator==(const BoundBox2D &other) const + { + return (left == other.left && right == other.right && bottom == other.bottom && + top == other.top); + } + + float width() + { + return right - left; + } + + float height() + { + return top - bottom; + } + + BoundBox2D operator*(float f) const + { + BoundBox2D result; + + result.left = left * f; + result.right = right * f; + result.bottom = bottom * f; + result.top = top * f; + + return result; + } + + BoundBox2D subset(const BoundBox2D &other) const + { + BoundBox2D subset; + + subset.left = left + other.left * (right - left); + subset.right = left + other.right * (right - left); + subset.bottom = bottom + other.bottom * (top - bottom); + subset.top = bottom + other.top * (top - bottom); + + return subset; + } + + BoundBox2D make_relative_to(const BoundBox2D &other) const + { + BoundBox2D result; + + result.left = ((left - other.left) / (other.right - other.left)); + result.right = ((right - other.left) / (other.right - other.left)); + result.bottom = ((bottom - other.bottom) / (other.top - other.bottom)); + result.top = ((top - other.bottom) / (other.top - other.bottom)); + + return result; + } + + BoundBox2D clamp(float mn = 0.0f, float mx = 1.0f) + { + BoundBox2D result; + + result.left = ccl::clamp(left, mn, mx); + result.right = ccl::clamp(right, mn, mx); + result.bottom = ccl::clamp(bottom, mn, mx); + result.top = ccl::clamp(top, mn, mx); + + return result; + } +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_BOUNDBOX_H__ */ diff --git a/intern/cycles/util/color.h b/intern/cycles/util/color.h new file mode 100644 index 00000000000..e2a5c5b9c4a --- /dev/null +++ b/intern/cycles/util/color.h @@ -0,0 +1,296 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_COLOR_H__ +#define __UTIL_COLOR_H__ + +#include "util/math.h" +#include "util/types.h" + +#if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__) +# include "util/simd.h" +#endif + +CCL_NAMESPACE_BEGIN + +ccl_device uchar float_to_byte(float val) +{ + return ((val <= 0.0f) ? 0 : + ((val > (1.0f - 0.5f / 255.0f)) ? 255 : (uchar)((255.0f * val) + 0.5f))); +} + +ccl_device uchar4 color_float_to_byte(float3 c) +{ + uchar r, g, b; + + r = float_to_byte(c.x); + g = float_to_byte(c.y); + b = float_to_byte(c.z); + + return make_uchar4(r, g, b, 0); +} + +ccl_device uchar4 color_float4_to_uchar4(float4 c) +{ + uchar r, g, b, a; + + r = float_to_byte(c.x); + g = float_to_byte(c.y); + b = float_to_byte(c.z); + a = float_to_byte(c.w); + + return make_uchar4(r, g, b, a); +} + +ccl_device_inline float3 color_byte_to_float(uchar4 c) +{ + return make_float3(c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f)); +} + +ccl_device_inline float4 color_uchar4_to_float4(uchar4 c) +{ + return make_float4( + c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f), c.w * (1.0f / 255.0f)); +} + +ccl_device float color_srgb_to_linear(float c) +{ + if (c < 0.04045f) + return (c < 0.0f) ? 0.0f : c * (1.0f / 12.92f); + else + return powf((c + 0.055f) * (1.0f / 1.055f), 2.4f); +} + +ccl_device float color_linear_to_srgb(float c) +{ + if (c < 0.0031308f) + return (c < 0.0f) ? 0.0f : c * 12.92f; + else + return 1.055f * powf(c, 1.0f / 2.4f) - 0.055f; +} + +ccl_device float3 rgb_to_hsv(float3 rgb) +{ + float cmax, cmin, h, s, v, cdelta; + float3 c; + + cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z)); + cmin = min(rgb.x, min(rgb.y, rgb.z)); + cdelta = cmax - cmin; + + v = cmax; + + if (cmax != 0.0f) { + s = cdelta / cmax; + } + else { + s = 0.0f; + h = 0.0f; + } + + if (s != 0.0f) { + float3 cmax3 = make_float3(cmax, cmax, cmax); + c = (cmax3 - rgb) / cdelta; + + if (rgb.x == cmax) + h = c.z - c.y; + else if (rgb.y == cmax) + h = 2.0f + c.x - c.z; + else + h = 4.0f + c.y - c.x; + + h /= 6.0f; + + if (h < 0.0f) + h += 1.0f; + } + else { + h = 0.0f; + } + + return make_float3(h, s, v); +} + +ccl_device float3 hsv_to_rgb(float3 hsv) +{ + float i, f, p, q, t, h, s, v; + float3 rgb; + + h = hsv.x; + s = hsv.y; + v = hsv.z; + + if (s != 0.0f) { + if (h == 1.0f) + h = 0.0f; + + h *= 6.0f; + i = floorf(h); + f = h - i; + rgb = make_float3(f, f, f); + p = v * (1.0f - s); + q = v * (1.0f - (s * f)); + t = v * (1.0f - (s * (1.0f - f))); + + if (i == 0.0f) + rgb = make_float3(v, t, p); + else if (i == 1.0f) + rgb = make_float3(q, v, p); + else if (i == 2.0f) + rgb = make_float3(p, v, t); + else if (i == 3.0f) + rgb = make_float3(p, q, v); + else if (i == 4.0f) + rgb = make_float3(t, p, v); + else + rgb = make_float3(v, p, q); + } + else { + rgb = make_float3(v, v, v); + } + + return rgb; +} + +ccl_device float3 xyY_to_xyz(float x, float y, float Y) +{ + float X, Z; + + if (y != 0.0f) + X = (x / y) * Y; + else + X = 0.0f; + + if (y != 0.0f && Y != 0.0f) + Z = (1.0f - x - y) / y * Y; + else + Z = 0.0f; + + return make_float3(X, Y, Z); +} + +#ifdef __KERNEL_SSE2__ +/* + * Calculate initial guess for arg^exp based on float representation + * This method gives a constant bias, + * which can be easily compensated by multiplication with bias_coeff. + * Gives better results for exponents near 1 (e. g. 4/5). + * exp = exponent, encoded as uint32_t + * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t + */ +template ccl_device_inline ssef fastpow(const ssef &arg) +{ + ssef ret; + ret = arg * cast(ssei(e2coeff)); + ret = ssef(cast(ret)); + ret = ret * cast(ssei(exp)); + ret = cast(ssei(ret)); + return ret; +} + +/* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */ +ccl_device_inline ssef improve_5throot_solution(const ssef &old_result, const ssef &x) +{ + ssef approx2 = old_result * old_result; + ssef approx4 = approx2 * approx2; + ssef t = x / approx4; + ssef summ = madd(ssef(4.0f), old_result, t); + return summ * ssef(1.0f / 5.0f); +} + +/* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */ +ccl_device_inline ssef fastpow24(const ssef &arg) +{ + /* max, avg and |avg| errors were calculated in gcc without FMA instructions + * The final precision should be better than powf in glibc */ + + /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */ + /* 0x3F4CCCCD = 4/5 */ + /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */ + ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05 + ssef arg2 = arg * arg; + ssef arg4 = arg2 * arg2; + + /* error max = 0.018 avg = 0.0031 |avg| = 0.0031 */ + x = improve_5throot_solution(x, arg4); + /* error max = 0.00021 avg = 1.6e-05 |avg| = 1.6e-05 */ + x = improve_5throot_solution(x, arg4); + /* error max = 6.1e-07 avg = 5.2e-08 |avg| = 1.1e-07 */ + x = improve_5throot_solution(x, arg4); + + return x * (x * x); +} + +ccl_device ssef color_srgb_to_linear(const ssef &c) +{ + sseb cmp = c < ssef(0.04045f); + ssef lt = max(c * ssef(1.0f / 12.92f), ssef(0.0f)); + ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f / 1.055f); /* fma */ + ssef gte = fastpow24(gtebase); + return select(cmp, lt, gte); +} +#endif /* __KERNEL_SSE2__ */ + +ccl_device float3 color_srgb_to_linear_v3(float3 c) +{ + return make_float3( + color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z)); +} + +ccl_device float3 color_linear_to_srgb_v3(float3 c) +{ + return make_float3( + color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z)); +} + +ccl_device float4 color_linear_to_srgb_v4(float4 c) +{ + return make_float4( + color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z), c.w); +} + +ccl_device float4 color_srgb_to_linear_v4(float4 c) +{ +#ifdef __KERNEL_SSE2__ + ssef r_ssef; + float4 &r = (float4 &)r_ssef; + r = c; + r_ssef = color_srgb_to_linear(r_ssef); + r.w = c.w; + return r; +#else + return make_float4( + color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z), c.w); +#endif +} + +ccl_device float3 color_highlight_compress(float3 color, ccl_private float3 *variance) +{ + color += one_float3(); + if (variance) { + *variance *= sqr3(one_float3() / color); + } + return log3(color); +} + +ccl_device float3 color_highlight_uncompress(float3 color) +{ + return exp3(color) - one_float3(); +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_COLOR_H__ */ diff --git a/intern/cycles/util/debug.cpp b/intern/cycles/util/debug.cpp new file mode 100644 index 00000000000..b49df3d42bc --- /dev/null +++ b/intern/cycles/util/debug.cpp @@ -0,0 +1,124 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/debug.h" + +#include + +#include "bvh/params.h" + +#include "util/log.h" +#include "util/string.h" + +CCL_NAMESPACE_BEGIN + +DebugFlags::CPU::CPU() + : avx2(true), avx(true), sse41(true), sse3(true), sse2(true), bvh_layout(BVH_LAYOUT_AUTO) +{ + reset(); +} + +void DebugFlags::CPU::reset() +{ +#define STRINGIFY(x) #x +#define CHECK_CPU_FLAGS(flag, env) \ + do { \ + flag = (getenv(env) == NULL); \ + if (!flag) { \ + VLOG(1) << "Disabling " << STRINGIFY(flag) << " instruction set."; \ + } \ + } while (0) + + CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2"); + CHECK_CPU_FLAGS(avx, "CYCLES_CPU_NO_AVX"); + CHECK_CPU_FLAGS(sse41, "CYCLES_CPU_NO_SSE41"); + CHECK_CPU_FLAGS(sse3, "CYCLES_CPU_NO_SSE3"); + CHECK_CPU_FLAGS(sse2, "CYCLES_CPU_NO_SSE2"); + +#undef STRINGIFY +#undef CHECK_CPU_FLAGS + + bvh_layout = BVH_LAYOUT_AUTO; +} + +DebugFlags::CUDA::CUDA() : adaptive_compile(false) +{ + reset(); +} + +DebugFlags::HIP::HIP() : adaptive_compile(false) +{ + reset(); +} + +void DebugFlags::CUDA::reset() +{ + if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL) + adaptive_compile = true; +} + +void DebugFlags::HIP::reset() +{ + if (getenv("CYCLES_HIP_ADAPTIVE_COMPILE") != NULL) + adaptive_compile = true; +} + +DebugFlags::OptiX::OptiX() +{ + reset(); +} + +void DebugFlags::OptiX::reset() +{ + use_debug = false; +} + +DebugFlags::DebugFlags() : viewport_static_bvh(false), running_inside_blender(false) +{ + /* Nothing for now. */ +} + +void DebugFlags::reset() +{ + viewport_static_bvh = false; + cpu.reset(); + cuda.reset(); + optix.reset(); +} + +std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags) +{ + os << "CPU flags:\n" + << " AVX2 : " << string_from_bool(debug_flags.cpu.avx2) << "\n" + << " AVX : " << string_from_bool(debug_flags.cpu.avx) << "\n" + << " SSE4.1 : " << string_from_bool(debug_flags.cpu.sse41) << "\n" + << " SSE3 : " << string_from_bool(debug_flags.cpu.sse3) << "\n" + << " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n" + << " BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n"; + + os << "CUDA flags:\n" + << " Adaptive Compile : " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n"; + + os << "OptiX flags:\n" + << " Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n"; + + os << "HIP flags:\n" + << " HIP streams : " << string_from_bool(debug_flags.hip.adaptive_compile) << "\n"; + + return os; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/debug.h b/intern/cycles/util/debug.h new file mode 100644 index 00000000000..58b2b047261 --- /dev/null +++ b/intern/cycles/util/debug.h @@ -0,0 +1,167 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_DEBUG_H__ +#define __UTIL_DEBUG_H__ + +#include +#include + +#include "bvh/params.h" + +CCL_NAMESPACE_BEGIN + +/* Global storage for all sort of flags used to fine-tune behavior of particular + * areas for the development purposes, without officially exposing settings to + * the interface. + */ +class DebugFlags { + public: + /* Use static BVH in viewport, to match final render exactly. */ + bool viewport_static_bvh; + + bool running_inside_blender; + + /* Descriptor of CPU feature-set to be used. */ + struct CPU { + CPU(); + + /* Reset flags to their defaults. */ + void reset(); + + /* Flags describing which instructions sets are allowed for use. */ + bool avx2; + bool avx; + bool sse41; + bool sse3; + bool sse2; + + /* Check functions to see whether instructions up to the given one + * are allowed for use. + */ + bool has_avx2() + { + return has_avx() && avx2; + } + bool has_avx() + { + return has_sse41() && avx; + } + bool has_sse41() + { + return has_sse3() && sse41; + } + bool has_sse3() + { + return has_sse2() && sse3; + } + bool has_sse2() + { + return sse2; + } + + /* Requested BVH layout. + * + * By default the fastest will be used. For debugging the BVH used by other + * CPUs and GPUs can be selected here instead. + */ + BVHLayout bvh_layout; + }; + + /* Descriptor of CUDA feature-set to be used. */ + struct CUDA { + CUDA(); + + /* Reset flags to their defaults. */ + void reset(); + + /* Whether adaptive feature based runtime compile is enabled or not. + * Requires the CUDA Toolkit and only works on Linux at the moment. */ + bool adaptive_compile; + }; + + /* Descriptor of HIP feature-set to be used. */ + struct HIP { + HIP(); + + /* Reset flags to their defaults. */ + void reset(); + + /* Whether adaptive feature based runtime compile is enabled or not.*/ + bool adaptive_compile; + }; + + /* Descriptor of OptiX feature-set to be used. */ + struct OptiX { + OptiX(); + + /* Reset flags to their defaults. */ + void reset(); + + /* Load OptiX module with debug capabilities. Will lower logging verbosity level, enable + * validations, and lower optimization level. */ + bool use_debug; + }; + + /* Get instance of debug flags registry. */ + static DebugFlags &get() + { + static DebugFlags instance; + return instance; + } + + /* Reset flags to their defaults. */ + void reset(); + + /* Requested CPU flags. */ + CPU cpu; + + /* Requested CUDA flags. */ + CUDA cuda; + + /* Requested OptiX flags. */ + OptiX optix; + + /* Requested HIP flags. */ + HIP hip; + + private: + DebugFlags(); + +#if (__cplusplus > 199711L) + public: + explicit DebugFlags(DebugFlags const & /*other*/) = delete; + void operator=(DebugFlags const & /*other*/) = delete; +#else + private: + explicit DebugFlags(DebugFlags const & /*other*/); + void operator=(DebugFlags const & /*other*/); +#endif +}; + +typedef DebugFlags &DebugFlagsRef; +typedef const DebugFlags &DebugFlagsConstRef; + +inline DebugFlags &DebugFlags() +{ + return DebugFlags::get(); +} + +std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags); + +CCL_NAMESPACE_END + +#endif /* __UTIL_DEBUG_H__ */ diff --git a/intern/cycles/util/defines.h b/intern/cycles/util/defines.h new file mode 100644 index 00000000000..9b1698d461a --- /dev/null +++ b/intern/cycles/util/defines.h @@ -0,0 +1,146 @@ + +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* clang-format off */ + +/* #define __forceinline triggers a bug in some clang-format versions, disable + * format for entire file to keep results consistent. */ + +#ifndef __UTIL_DEFINES_H__ +#define __UTIL_DEFINES_H__ + +/* Bitness */ + +#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \ + defined(_M_X64) || defined(__aarch64__) +# define __KERNEL_64_BIT__ +#endif + +/* Qualifiers for kernel code shared by CPU and GPU */ + +#ifndef __KERNEL_GPU__ +# define ccl_device static inline +# define ccl_device_noinline static +# define ccl_device_noinline_cpu ccl_device_noinline +# define ccl_global +# define ccl_static_constant static const +# define ccl_constant const +# define ccl_local +# define ccl_local_param +# define ccl_private +# define ccl_restrict __restrict +# define ccl_optional_struct_init +# define ccl_loop_no_unroll +# define ccl_attr_maybe_unused [[maybe_unused]] +# define __KERNEL_WITH_SSE_ALIGN__ + +# if defined(_WIN32) && !defined(FREE_WINDOWS) +# define ccl_device_inline static __forceinline +# define ccl_device_forceinline static __forceinline +# define ccl_align(...) __declspec(align(__VA_ARGS__)) +# ifdef __KERNEL_64_BIT__ +# define ccl_try_align(...) __declspec(align(__VA_ARGS__)) +# else /* __KERNEL_64_BIT__ */ +# undef __KERNEL_WITH_SSE_ALIGN__ +/* No support for function arguments (error C2719). */ +# define ccl_try_align(...) +# endif /* __KERNEL_64_BIT__ */ +# define ccl_may_alias +# define ccl_always_inline __forceinline +# define ccl_never_inline __declspec(noinline) +# else /* _WIN32 && !FREE_WINDOWS */ +# define ccl_device_inline static inline __attribute__((always_inline)) +# define ccl_device_forceinline static inline __attribute__((always_inline)) +# define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) +# ifndef FREE_WINDOWS64 +# define __forceinline inline __attribute__((always_inline)) +# endif +# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) +# define ccl_may_alias __attribute__((__may_alias__)) +# define ccl_always_inline __attribute__((always_inline)) +# define ccl_never_inline __attribute__((noinline)) +# endif /* _WIN32 && !FREE_WINDOWS */ + +/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */ +# ifndef ATTR_FALLTHROUGH +# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */ +# define ATTR_FALLTHROUGH __attribute__((fallthrough)) +# else +# define ATTR_FALLTHROUGH ((void)0) +# endif +# endif +#endif /* __KERNEL_GPU__ */ + +/* macros */ + +/* hints for branch prediction, only use in code that runs a _lot_ */ +#if defined(__GNUC__) && defined(__KERNEL_CPU__) +# define LIKELY(x) __builtin_expect(!!(x), 1) +# define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +# define LIKELY(x) (x) +# define UNLIKELY(x) (x) +#endif + +#if defined(__GNUC__) || defined(__clang__) +# if defined(__cplusplus) +/* Some magic to be sure we don't have reference in the type. */ +template static inline T decltype_helper(T x) +{ + return x; +} +# define TYPEOF(x) decltype(decltype_helper(x)) +# else +# define TYPEOF(x) typeof(x) +# endif +#endif + +/* Causes warning: + * incompatible types when assigning to type 'Foo' from type 'Bar' + * ... the compiler optimizes away the temp var */ +#ifdef __GNUC__ +# define CHECK_TYPE(var, type) \ + { \ + TYPEOF(var) * __tmp; \ + __tmp = (type *)NULL; \ + (void)__tmp; \ + } \ + (void)0 + +# define CHECK_TYPE_PAIR(var_a, var_b) \ + { \ + TYPEOF(var_a) * __tmp; \ + __tmp = (typeof(var_b) *)NULL; \ + (void)__tmp; \ + } \ + (void)0 +#else +# define CHECK_TYPE(var, type) +# define CHECK_TYPE_PAIR(var_a, var_b) +#endif + +/* can be used in simple macros */ +#define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val))) + +#ifndef __KERNEL_GPU__ +# include +# define util_assert(statement) assert(statement) +#else +# define util_assert(statement) +#endif + +#endif /* __UTIL_DEFINES_H__ */ diff --git a/intern/cycles/util/deque.h b/intern/cycles/util/deque.h new file mode 100644 index 00000000000..ccac961aa7d --- /dev/null +++ b/intern/cycles/util/deque.h @@ -0,0 +1,28 @@ +/* + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_DEQUE_H__ +#define __UTIL_DEQUE_H__ + +#include + +CCL_NAMESPACE_BEGIN + +using std::deque; + +CCL_NAMESPACE_END + +#endif /* __UTIL_DEQUE_H__ */ diff --git a/intern/cycles/util/disjoint_set.h b/intern/cycles/util/disjoint_set.h new file mode 100644 index 00000000000..5226423d7cd --- /dev/null +++ b/intern/cycles/util/disjoint_set.h @@ -0,0 +1,75 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_DISJOINT_SET_H__ +#define __UTIL_DISJOINT_SET_H__ + +#include "util/array.h" +#include + +CCL_NAMESPACE_BEGIN + +class DisjointSet { + private: + array parents; + array ranks; + + public: + DisjointSet(size_t size) : parents(size), ranks(size) + { + for (size_t i = 0; i < size; i++) { + parents[i] = i; + ranks[i] = 0; + } + } + + size_t find(size_t x) + { + size_t root = x; + while (parents[root] != root) { + root = parents[root]; + } + while (parents[x] != root) { + size_t parent = parents[x]; + parents[x] = root; + x = parent; + } + return root; + } + + void join(size_t x, size_t y) + { + size_t x_root = find(x); + size_t y_root = find(y); + + if (x_root == y_root) { + return; + } + + if (ranks[x_root] < ranks[y_root]) { + std::swap(x_root, y_root); + } + parents[y_root] = x_root; + + if (ranks[x_root] == ranks[y_root]) { + ranks[x_root]++; + } + } +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_DISJOINT_SET_H__ */ diff --git a/intern/cycles/util/foreach.h b/intern/cycles/util/foreach.h new file mode 100644 index 00000000000..d907974be91 --- /dev/null +++ b/intern/cycles/util/foreach.h @@ -0,0 +1,24 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_FOREACH_H__ +#define __UTIL_FOREACH_H__ + +/* Nice foreach() loops for STL data structures. */ + +#define foreach(x, y) for (x : y) + +#endif /* __UTIL_FOREACH_H__ */ diff --git a/intern/cycles/util/function.h b/intern/cycles/util/function.h new file mode 100644 index 00000000000..f3cc00329ad --- /dev/null +++ b/intern/cycles/util/function.h @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_FUNCTION_H__ +#define __UTIL_FUNCTION_H__ + +#include + +CCL_NAMESPACE_BEGIN + +#define function_bind std::bind +#define function_null nullptr +using std::function; +using std::placeholders::_1; +using std::placeholders::_2; +using std::placeholders::_3; +using std::placeholders::_4; +using std::placeholders::_5; +using std::placeholders::_6; +using std::placeholders::_7; +using std::placeholders::_8; +using std::placeholders::_9; + +CCL_NAMESPACE_END + +#endif /* __UTIL_FUNCTION_H__ */ diff --git a/intern/cycles/util/guarded_allocator.cpp b/intern/cycles/util/guarded_allocator.cpp new file mode 100644 index 00000000000..4063b301331 --- /dev/null +++ b/intern/cycles/util/guarded_allocator.cpp @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/guarded_allocator.h" +#include "util/stats.h" + +CCL_NAMESPACE_BEGIN + +static Stats global_stats(Stats::static_init); + +/* Internal API. */ + +void util_guarded_mem_alloc(size_t n) +{ + global_stats.mem_alloc(n); +} + +void util_guarded_mem_free(size_t n) +{ + global_stats.mem_free(n); +} + +/* Public API. */ + +size_t util_guarded_get_mem_used() +{ + return global_stats.mem_used; +} + +size_t util_guarded_get_mem_peak() +{ + return global_stats.mem_peak; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/guarded_allocator.h b/intern/cycles/util/guarded_allocator.h new file mode 100644 index 00000000000..f78cc5f5da9 --- /dev/null +++ b/intern/cycles/util/guarded_allocator.h @@ -0,0 +1,185 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_GUARDED_ALLOCATOR_H__ +#define __UTIL_GUARDED_ALLOCATOR_H__ + +#include +#include +#include + +#ifdef WITH_BLENDER_GUARDEDALLOC +# include "../../guardedalloc/MEM_guardedalloc.h" +#endif + +CCL_NAMESPACE_BEGIN + +/* Internal use only. */ +void util_guarded_mem_alloc(size_t n); +void util_guarded_mem_free(size_t n); + +/* Guarded allocator for the use with STL. */ +template class GuardedAllocator { + public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef T &reference; + typedef const T &const_reference; + typedef T value_type; + + GuardedAllocator() + { + } + GuardedAllocator(const GuardedAllocator &) + { + } + + T *allocate(size_t n, const void *hint = 0) + { + (void)hint; + size_t size = n * sizeof(T); + util_guarded_mem_alloc(size); + if (n == 0) { + return NULL; + } + T *mem; +#ifdef WITH_BLENDER_GUARDEDALLOC + /* C++ standard requires allocation functions to allocate memory suitably + * aligned for any standard type. This is 16 bytes for 64 bit platform as + * far as i concerned. We might over-align on 32bit here, but that should + * be all safe actually. + */ + mem = (T *)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); +#else + mem = (T *)malloc(size); +#endif + if (mem == NULL) { + throw std::bad_alloc(); + } + return mem; + } + + void deallocate(T *p, size_t n) + { + util_guarded_mem_free(n * sizeof(T)); + if (p != NULL) { +#ifdef WITH_BLENDER_GUARDEDALLOC + MEM_freeN(p); +#else + free(p); +#endif + } + } + + T *address(T &x) const + { + return &x; + } + + const T *address(const T &x) const + { + return &x; + } + + GuardedAllocator &operator=(const GuardedAllocator &) + { + return *this; + } + + size_t max_size() const + { + return size_t(-1); + } + + template struct rebind { + typedef GuardedAllocator other; + }; + + template GuardedAllocator(const GuardedAllocator &) + { + } + + template GuardedAllocator &operator=(const GuardedAllocator &) + { + return *this; + } + + inline bool operator==(GuardedAllocator const & /*other*/) const + { + return true; + } + inline bool operator!=(GuardedAllocator const &other) const + { + return !operator==(other); + } + +#ifdef _MSC_VER + /* Welcome to the black magic here. + * + * The issue is that MSVC C++ allocates container proxy on any + * vector initialization, including static vectors which don't + * have any data yet. This leads to several issues: + * + * - Static objects initialization fiasco (global_stats from + * util_stats.h might not be initialized yet). + * - If main() function changes allocator type (for example, + * this might happen with `blender --debug-memory`) nobody + * will know how to convert already allocated memory to a new + * guarded allocator. + * + * Here we work this around by making it so container proxy does + * not use guarded allocation. A bit fragile, unfortunately. + */ + template<> struct rebind { + typedef std::allocator other; + }; + + operator std::allocator() const + { + return std::allocator(); + } +#endif +}; + +/* Get memory usage and peak from the guarded STL allocator. */ +size_t util_guarded_get_mem_used(); +size_t util_guarded_get_mem_peak(); + +/* Call given function and keep track if it runs out of memory. + * + * If it does run out f memory, stop execution and set progress + * to do a global cancel. + * + * It's not fully robust, but good enough to catch obvious issues + * when running out of memory. + */ +#define MEM_GUARDED_CALL(progress, func, ...) \ + do { \ + try { \ + (func)(__VA_ARGS__); \ + } \ + catch (std::bad_alloc &) { \ + fprintf(stderr, "Error: run out of memory!\n"); \ + fflush(stderr); \ + (progress)->set_error("Out of memory"); \ + } \ + } while (false) + +CCL_NAMESPACE_END + +#endif /* __UTIL_GUARDED_ALLOCATOR_H__ */ diff --git a/intern/cycles/util/half.h b/intern/cycles/util/half.h new file mode 100644 index 00000000000..016975e3c25 --- /dev/null +++ b/intern/cycles/util/half.h @@ -0,0 +1,169 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_HALF_H__ +#define __UTIL_HALF_H__ + +#include "util/math.h" +#include "util/types.h" + +#if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__) +# include "util/simd.h" +#endif + +CCL_NAMESPACE_BEGIN + +/* Half Floats */ + +/* CUDA has its own half data type, no need to define then */ +#if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__) +/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from + * unsigned shorts. */ +class half { + public: + half() : v(0) + { + } + half(const unsigned short &i) : v(i) + { + } + operator unsigned short() + { + return v; + } + half &operator=(const unsigned short &i) + { + v = i; + return *this; + } + + private: + unsigned short v; +}; +#endif + +struct half4 { + half x, y, z, w; +}; + +/* Conversion to/from half float for image textures + * + * Simplified float to half for fast sampling on processor without a native + * instruction, and eliminating any NaN and inf values. */ + +ccl_device_inline half float_to_half_image(float f) +{ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) + return __float2half(f); +#else + const uint u = __float_as_uint(f); + /* Sign bit, shifted to its position. */ + uint sign_bit = u & 0x80000000; + sign_bit >>= 16; + /* Exponent. */ + uint exponent_bits = u & 0x7f800000; + /* Non-sign bits. */ + uint value_bits = u & 0x7fffffff; + value_bits >>= 13; /* Align mantissa on MSB. */ + value_bits -= 0x1c000; /* Adjust bias. */ + /* Flush-to-zero. */ + value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits; + /* Clamp-to-max. */ + value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits; + /* Denormals-as-zero. */ + value_bits = (exponent_bits == 0 ? 0 : value_bits); + /* Re-insert sign bit and return. */ + return (value_bits | sign_bit); +#endif +} + +ccl_device_inline float half_to_float_image(half h) +{ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) + return __half2float(h); +#else + const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13); + return __int_as_float(x); +#endif +} + +ccl_device_inline float4 half4_to_float4_image(const half4 h) +{ + /* Unable to use because it gives different results half_to_float_image, can we + * modify float_to_half_image so the conversion results are identical? */ +#if 0 /* defined(__KERNEL_AVX2__) */ + /* CPU: AVX. */ + __m128i x = _mm_castpd_si128(_mm_load_sd((const double *)&h)); + return float4(_mm_cvtph_ps(x)); +#endif + + const float4 f = make_float4(half_to_float_image(h.x), + half_to_float_image(h.y), + half_to_float_image(h.z), + half_to_float_image(h.w)); + return f; +} + +/* Conversion to half float texture for display. + * + * Simplified float to half for fast display texture conversion on processors + * without a native instruction. Assumes no negative, no NaN, no inf, and sets + * denormal to 0. */ + +ccl_device_inline half float_to_half_display(const float f) +{ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) + return __float2half(f); +#else + const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f); + const int absolute = x & 0x7FFFFFFF; + const int Z = absolute + 0xC8000000; + const int result = (absolute < 0x38800000) ? 0 : Z; + const int rshift = (result >> 13); + return (rshift & 0x7FFF); +#endif +} + +ccl_device_inline half4 float4_to_half4_display(const float4 f) +{ +#ifdef __KERNEL_SSE2__ + /* CPU: SSE and AVX. */ + ssef x = min(max(load4f(f), 0.0f), 65504.0f); +# ifdef __KERNEL_AVX2__ + ssei rpack = _mm_cvtps_ph(x, 0); +# else + ssei absolute = cast(x) & 0x7FFFFFFF; + ssei Z = absolute + 0xC8000000; + ssei result = andnot(absolute < 0x38800000, Z); + ssei rshift = (result >> 13) & 0x7FFF; + ssei rpack = _mm_packs_epi32(rshift, rshift); +# endif + half4 h; + _mm_storel_pi((__m64 *)&h, _mm_castsi128_ps(rpack)); + return h; +#else + /* GPU and scalar fallback. */ + const half4 h = {float_to_half_display(f.x), + float_to_half_display(f.y), + float_to_half_display(f.z), + float_to_half_display(f.w)}; + return h; +#endif +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_HALF_H__ */ diff --git a/intern/cycles/util/hash.h b/intern/cycles/util/hash.h new file mode 100644 index 00000000000..013a0f90a27 --- /dev/null +++ b/intern/cycles/util/hash.h @@ -0,0 +1,389 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_HASH_H__ +#define __UTIL_HASH_H__ + +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* ***** Jenkins Lookup3 Hash Functions ***** */ + +/* Source: http://burtleburtle.net/bob/c/lookup3.c */ + +#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) + +#define mix(a, b, c) \ + { \ + a -= c; \ + a ^= rot(c, 4); \ + c += b; \ + b -= a; \ + b ^= rot(a, 6); \ + a += c; \ + c -= b; \ + c ^= rot(b, 8); \ + b += a; \ + a -= c; \ + a ^= rot(c, 16); \ + c += b; \ + b -= a; \ + b ^= rot(a, 19); \ + a += c; \ + c -= b; \ + c ^= rot(b, 4); \ + b += a; \ + } \ + ((void)0) + +#define final(a, b, c) \ + { \ + c ^= b; \ + c -= rot(b, 14); \ + a ^= c; \ + a -= rot(c, 11); \ + b ^= a; \ + b -= rot(a, 25); \ + c ^= b; \ + c -= rot(b, 16); \ + a ^= c; \ + a -= rot(c, 4); \ + b ^= a; \ + b -= rot(a, 14); \ + c ^= b; \ + c -= rot(b, 24); \ + } \ + ((void)0) + +ccl_device_inline uint hash_uint(uint kx) +{ + uint a, b, c; + a = b = c = 0xdeadbeef + (1 << 2) + 13; + + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline uint hash_uint2(uint kx, uint ky) +{ + uint a, b, c; + a = b = c = 0xdeadbeef + (2 << 2) + 13; + + b += ky; + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline uint hash_uint3(uint kx, uint ky, uint kz) +{ + uint a, b, c; + a = b = c = 0xdeadbeef + (3 << 2) + 13; + + c += kz; + b += ky; + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline uint hash_uint4(uint kx, uint ky, uint kz, uint kw) +{ + uint a, b, c; + a = b = c = 0xdeadbeef + (4 << 2) + 13; + + a += kx; + b += ky; + c += kz; + mix(a, b, c); + + a += kw; + final(a, b, c); + + return c; +} + +#undef rot +#undef final +#undef mix + +/* Hashing uint or uint[234] into a float in the range [0, 1]. */ + +ccl_device_inline float hash_uint_to_float(uint kx) +{ + return (float)hash_uint(kx) / (float)0xFFFFFFFFu; +} + +ccl_device_inline float hash_uint2_to_float(uint kx, uint ky) +{ + return (float)hash_uint2(kx, ky) / (float)0xFFFFFFFFu; +} + +ccl_device_inline float hash_uint3_to_float(uint kx, uint ky, uint kz) +{ + return (float)hash_uint3(kx, ky, kz) / (float)0xFFFFFFFFu; +} + +ccl_device_inline float hash_uint4_to_float(uint kx, uint ky, uint kz, uint kw) +{ + return (float)hash_uint4(kx, ky, kz, kw) / (float)0xFFFFFFFFu; +} + +/* Hashing float or float[234] into a float in the range [0, 1]. */ + +ccl_device_inline float hash_float_to_float(float k) +{ + return hash_uint_to_float(__float_as_uint(k)); +} + +ccl_device_inline float hash_float2_to_float(float2 k) +{ + return hash_uint2_to_float(__float_as_uint(k.x), __float_as_uint(k.y)); +} + +ccl_device_inline float hash_float3_to_float(float3 k) +{ + return hash_uint3_to_float(__float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z)); +} + +ccl_device_inline float hash_float4_to_float(float4 k) +{ + return hash_uint4_to_float( + __float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z), __float_as_uint(k.w)); +} + +/* Hashing float[234] into float[234] of components in the range [0, 1]. */ + +ccl_device_inline float2 hash_float2_to_float2(float2 k) +{ + return make_float2(hash_float2_to_float(k), hash_float3_to_float(make_float3(k.x, k.y, 1.0))); +} + +ccl_device_inline float3 hash_float3_to_float3(float3 k) +{ + return make_float3(hash_float3_to_float(k), + hash_float4_to_float(make_float4(k.x, k.y, k.z, 1.0)), + hash_float4_to_float(make_float4(k.x, k.y, k.z, 2.0))); +} + +ccl_device_inline float4 hash_float4_to_float4(float4 k) +{ + return make_float4(hash_float4_to_float(k), + hash_float4_to_float(make_float4(k.w, k.x, k.y, k.z)), + hash_float4_to_float(make_float4(k.z, k.w, k.x, k.y)), + hash_float4_to_float(make_float4(k.y, k.z, k.w, k.x))); +} + +/* Hashing float or float[234] into float3 of components in range [0, 1]. */ + +ccl_device_inline float3 hash_float_to_float3(float k) +{ + return make_float3(hash_float_to_float(k), + hash_float2_to_float(make_float2(k, 1.0)), + hash_float2_to_float(make_float2(k, 2.0))); +} + +ccl_device_inline float3 hash_float2_to_float3(float2 k) +{ + return make_float3(hash_float2_to_float(k), + hash_float3_to_float(make_float3(k.x, k.y, 1.0)), + hash_float3_to_float(make_float3(k.x, k.y, 2.0))); +} + +ccl_device_inline float3 hash_float4_to_float3(float4 k) +{ + return make_float3(hash_float4_to_float(k), + hash_float4_to_float(make_float4(k.z, k.x, k.w, k.y)), + hash_float4_to_float(make_float4(k.w, k.z, k.y, k.x))); +} + +/* SSE Versions Of Jenkins Lookup3 Hash Functions */ + +#ifdef __KERNEL_SSE2__ +# define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k)))) + +# define mix(a, b, c) \ + { \ + a -= c; \ + a ^= rot(c, 4); \ + c += b; \ + b -= a; \ + b ^= rot(a, 6); \ + a += c; \ + c -= b; \ + c ^= rot(b, 8); \ + b += a; \ + a -= c; \ + a ^= rot(c, 16); \ + c += b; \ + b -= a; \ + b ^= rot(a, 19); \ + a += c; \ + c -= b; \ + c ^= rot(b, 4); \ + b += a; \ + } + +# define final(a, b, c) \ + { \ + c ^= b; \ + c -= rot(b, 14); \ + a ^= c; \ + a -= rot(c, 11); \ + b ^= a; \ + b -= rot(a, 25); \ + c ^= b; \ + c -= rot(b, 16); \ + a ^= c; \ + a -= rot(c, 4); \ + b ^= a; \ + b -= rot(a, 14); \ + c ^= b; \ + c -= rot(b, 24); \ + } + +ccl_device_inline ssei hash_ssei(ssei kx) +{ + ssei a, b, c; + a = b = c = ssei(0xdeadbeef + (1 << 2) + 13); + + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline ssei hash_ssei2(ssei kx, ssei ky) +{ + ssei a, b, c; + a = b = c = ssei(0xdeadbeef + (2 << 2) + 13); + + b += ky; + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline ssei hash_ssei3(ssei kx, ssei ky, ssei kz) +{ + ssei a, b, c; + a = b = c = ssei(0xdeadbeef + (3 << 2) + 13); + + c += kz; + b += ky; + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline ssei hash_ssei4(ssei kx, ssei ky, ssei kz, ssei kw) +{ + ssei a, b, c; + a = b = c = ssei(0xdeadbeef + (4 << 2) + 13); + + a += kx; + b += ky; + c += kz; + mix(a, b, c); + + a += kw; + final(a, b, c); + + return c; +} + +# if defined(__KERNEL_AVX__) +ccl_device_inline avxi hash_avxi(avxi kx) +{ + avxi a, b, c; + a = b = c = avxi(0xdeadbeef + (1 << 2) + 13); + + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline avxi hash_avxi2(avxi kx, avxi ky) +{ + avxi a, b, c; + a = b = c = avxi(0xdeadbeef + (2 << 2) + 13); + + b += ky; + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline avxi hash_avxi3(avxi kx, avxi ky, avxi kz) +{ + avxi a, b, c; + a = b = c = avxi(0xdeadbeef + (3 << 2) + 13); + + c += kz; + b += ky; + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline avxi hash_avxi4(avxi kx, avxi ky, avxi kz, avxi kw) +{ + avxi a, b, c; + a = b = c = avxi(0xdeadbeef + (4 << 2) + 13); + + a += kx; + b += ky; + c += kz; + mix(a, b, c); + + a += kw; + final(a, b, c); + + return c; +} +# endif + +# undef rot +# undef final +# undef mix + +#endif + +#ifndef __KERNEL_GPU__ +static inline uint hash_string(const char *str) +{ + uint i = 0, c; + + while ((c = *str++)) + i = i * 37 + c; + + return i; +} +#endif + +CCL_NAMESPACE_END + +#endif /* __UTIL_HASH_H__ */ diff --git a/intern/cycles/util/ies.cpp b/intern/cycles/util/ies.cpp new file mode 100644 index 00000000000..5e879478df5 --- /dev/null +++ b/intern/cycles/util/ies.cpp @@ -0,0 +1,411 @@ +/* + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "util/foreach.h" +#include "util/ies.h" +#include "util/math.h" +#include "util/string.h" + +CCL_NAMESPACE_BEGIN + +// NOTE: For some reason gcc-7.2 does not instantiate this versio of allocator +// gere (used in IESTextParser). Works fine for gcc-6, gcc-7.3 and gcc-8. +// +// TODO(sergey): Get to the root of this issue, or confirm this i a compiler +// issue. +template class GuardedAllocator; + +bool IESFile::load(const string &ies) +{ + clear(); + if (!parse(ies) || !process()) { + clear(); + return false; + } + return true; +} + +void IESFile::clear() +{ + intensity.clear(); + v_angles.clear(); + h_angles.clear(); +} + +int IESFile::packed_size() +{ + if (v_angles.size() && h_angles.size() > 0) { + return 2 + h_angles.size() + v_angles.size() + h_angles.size() * v_angles.size(); + } + return 0; +} + +void IESFile::pack(float *data) +{ + if (v_angles.size() && h_angles.size()) { + *(data++) = __int_as_float(h_angles.size()); + *(data++) = __int_as_float(v_angles.size()); + + memcpy(data, &h_angles[0], h_angles.size() * sizeof(float)); + data += h_angles.size(); + memcpy(data, &v_angles[0], v_angles.size() * sizeof(float)); + data += v_angles.size(); + + for (int h = 0; h < intensity.size(); h++) { + memcpy(data, &intensity[h][0], v_angles.size() * sizeof(float)); + data += v_angles.size(); + } + } +} + +class IESTextParser { + public: + vector text; + char *data; + + IESTextParser(const string &str) : text(str.begin(), str.end()) + { + std::replace(text.begin(), text.end(), ',', ' '); + data = strstr(&text[0], "\nTILT="); + } + + bool eof() + { + return (data == NULL) || (data[0] == '\0'); + } + + double get_double() + { + if (eof()) { + return 0.0; + } + char *old_data = data; + double val = strtod(data, &data); + if (data == old_data) { + data = NULL; + return 0.0; + } + return val; + } + + long get_long() + { + if (eof()) { + return 0; + } + char *old_data = data; + long val = strtol(data, &data, 10); + if (data == old_data) { + data = NULL; + return 0; + } + return val; + } +}; + +bool IESFile::parse(const string &ies) +{ + if (ies.empty()) { + return false; + } + + IESTextParser parser(ies); + if (parser.eof()) { + return false; + } + + /* Handle the tilt data block. */ + if (strncmp(parser.data, "\nTILT=INCLUDE", 13) == 0) { + parser.data += 13; + parser.get_double(); /* Lamp to Luminaire geometry */ + int num_tilt = parser.get_long(); /* Amount of tilt angles and factors */ + /* Skip over angles and factors. */ + for (int i = 0; i < 2 * num_tilt; i++) { + parser.get_double(); + } + } + else { + /* Skip to next line. */ + parser.data = strstr(parser.data + 1, "\n"); + } + + if (parser.eof()) { + return false; + } + parser.data++; + + parser.get_long(); /* Number of lamps */ + parser.get_double(); /* Lumens per lamp */ + double factor = parser.get_double(); /* Candela multiplier */ + int v_angles_num = parser.get_long(); /* Number of vertical angles */ + int h_angles_num = parser.get_long(); /* Number of horizontal angles */ + type = (IESType)parser.get_long(); /* Photometric type */ + + /* TODO(lukas): Test whether the current type B processing can also deal with type A files. + * In theory the only difference should be orientation which we ignore anyways, but with IES you + * never know... + */ + if (type != TYPE_B && type != TYPE_C) { + return false; + } + + parser.get_long(); /* Unit of the geometry data */ + parser.get_double(); /* Width */ + parser.get_double(); /* Length */ + parser.get_double(); /* Height */ + factor *= parser.get_double(); /* Ballast factor */ + factor *= parser.get_double(); /* Ballast-Lamp Photometric factor */ + parser.get_double(); /* Input Watts */ + + /* Intensity values in IES files are specified in candela (lumen/sr), a photometric quantity. + * Cycles expects radiometric quantities, though, which requires a conversion. + * However, the Luminous efficacy (ratio of lumens per Watt) depends on the spectral distribution + * of the light source since lumens take human perception into account. + * Since this spectral distribution is not known from the IES file, a typical one must be + * assumed. The D65 standard illuminant has a Luminous efficacy of 177.83, which is used here to + * convert to Watt/sr. A more advanced approach would be to add a Blackbody Temperature input to + * the node and numerically integrate the Luminous efficacy from the resulting spectral + * distribution. Also, the Watt/sr value must be multiplied by 4*pi to get the Watt value that + * Cycles expects for lamp strength. Therefore, the conversion here uses 4*pi/177.83 as a Candela + * to Watt factor. + */ + factor *= 0.0706650768394; + + v_angles.reserve(v_angles_num); + for (int i = 0; i < v_angles_num; i++) { + v_angles.push_back((float)parser.get_double()); + } + + h_angles.reserve(h_angles_num); + for (int i = 0; i < h_angles_num; i++) { + h_angles.push_back((float)parser.get_double()); + } + + intensity.resize(h_angles_num); + for (int i = 0; i < h_angles_num; i++) { + intensity[i].reserve(v_angles_num); + for (int j = 0; j < v_angles_num; j++) { + intensity[i].push_back((float)(factor * parser.get_double())); + } + } + + return !parser.eof(); +} + +bool IESFile::process_type_b() +{ + vector> newintensity; + newintensity.resize(v_angles.size()); + for (int i = 0; i < v_angles.size(); i++) { + newintensity[i].reserve(h_angles.size()); + for (int j = 0; j < h_angles.size(); j++) { + newintensity[i].push_back(intensity[j][i]); + } + } + intensity.swap(newintensity); + h_angles.swap(v_angles); + + float h_first = h_angles[0], h_last = h_angles[h_angles.size() - 1]; + if (h_last != 90.0f) { + return false; + } + + if (h_first == 0.0f) { + /* The range in the file corresponds to 90°-180°, we need to mirror that to get the + * full 180° range. */ + vector new_h_angles; + vector> new_intensity; + int hnum = h_angles.size(); + new_h_angles.reserve(2 * hnum - 1); + new_intensity.reserve(2 * hnum - 1); + for (int i = hnum - 1; i > 0; i--) { + new_h_angles.push_back(90.0f - h_angles[i]); + new_intensity.push_back(intensity[i]); + } + for (int i = 0; i < hnum; i++) { + new_h_angles.push_back(90.0f + h_angles[i]); + new_intensity.push_back(intensity[i]); + } + h_angles.swap(new_h_angles); + intensity.swap(new_intensity); + } + else if (h_first == -90.0f) { + /* We have full 180° coverage, so just shift to match the angle range convention. */ + for (int i = 0; i < h_angles.size(); i++) { + h_angles[i] += 90.0f; + } + } + /* To get correct results with the cubic interpolation in the kernel, the horizontal range + * has to cover all 360°. Therefore, we copy the 0° entry to 360° to ensure full coverage + * and seamless interpolation. */ + h_angles.push_back(360.0f); + intensity.push_back(intensity[0]); + + float v_first = v_angles[0], v_last = v_angles[v_angles.size() - 1]; + if (v_last != 90.0f) { + return false; + } + + if (v_first == 0.0f) { + /* The range in the file corresponds to 90°-180°, we need to mirror that to get the + * full 180° range. */ + vector new_v_angles; + int hnum = h_angles.size(); + int vnum = v_angles.size(); + new_v_angles.reserve(2 * vnum - 1); + for (int i = vnum - 1; i > 0; i--) { + new_v_angles.push_back(90.0f - v_angles[i]); + } + for (int i = 0; i < vnum; i++) { + new_v_angles.push_back(90.0f + v_angles[i]); + } + for (int i = 0; i < hnum; i++) { + vector new_intensity; + new_intensity.reserve(2 * vnum - 1); + for (int j = vnum - 2; j >= 0; j--) { + new_intensity.push_back(intensity[i][j]); + } + new_intensity.insert(new_intensity.end(), intensity[i].begin(), intensity[i].end()); + intensity[i].swap(new_intensity); + } + v_angles.swap(new_v_angles); + } + else if (v_first == -90.0f) { + /* We have full 180° coverage, so just shift to match the angle range convention. */ + for (int i = 0; i < v_angles.size(); i++) { + v_angles[i] += 90.0f; + } + } + + return true; +} + +bool IESFile::process_type_c() +{ + if (h_angles[0] == 90.0f) { + /* Some files are stored from 90° to 270°, so we just rotate them to the regular 0°-180° range + * here. */ + for (int i = 0; i < h_angles.size(); i++) { + h_angles[i] -= 90.0f; + } + } + + if (h_angles[0] != 0.0f) { + return false; + } + + if (h_angles.size() == 1) { + h_angles.push_back(360.0f); + intensity.push_back(intensity[0]); + } + + if (h_angles[h_angles.size() - 1] == 90.0f) { + /* Only one quadrant is defined, so we need to mirror twice (from one to two, then to four). + * Since the two->four mirroring step might also be required if we get an input of two + * quadrants, we only do the first mirror here and later do the second mirror in either case. + */ + int hnum = h_angles.size(); + for (int i = hnum - 2; i >= 0; i--) { + h_angles.push_back(180.0f - h_angles[i]); + intensity.push_back(intensity[i]); + } + } + + if (h_angles[h_angles.size() - 1] == 180.0f) { + /* Mirror half to the full range. */ + int hnum = h_angles.size(); + for (int i = hnum - 2; i >= 0; i--) { + h_angles.push_back(360.0f - h_angles[i]); + intensity.push_back(intensity[i]); + } + } + + /* Some files skip the 360° entry (contrary to standard) because it's supposed to be identical to + * the 0° entry. If the file has a discernible order in its spacing, just fix this. */ + if (h_angles[h_angles.size() - 1] != 360.0f) { + int hnum = h_angles.size(); + float last_step = h_angles[hnum - 1] - h_angles[hnum - 2]; + float first_step = h_angles[1] - h_angles[0]; + float difference = 360.0f - h_angles[hnum - 1]; + if (last_step == difference || first_step == difference) { + h_angles.push_back(360.0f); + intensity.push_back(intensity[0]); + } + else { + return false; + } + } + + float v_first = v_angles[0], v_last = v_angles[v_angles.size() - 1]; + if (v_first == 90.0f) { + if (v_last == 180.0f) { + /* Flip to ensure that vertical angles always start at 0°. */ + for (int i = 0; i < v_angles.size(); i++) { + v_angles[i] = 180.0f - v_angles[i]; + } + } + else { + return false; + } + } + else if (v_first != 0.0f) { + return false; + } + + return true; +} + +bool IESFile::process() +{ + if (h_angles.size() == 0 || v_angles.size() == 0) { + return false; + } + + if (type == TYPE_B) { + if (!process_type_b()) { + return false; + } + } + else { + assert(type == TYPE_C); + if (!process_type_c()) { + return false; + } + } + + assert(v_angles[0] == 0.0f); + assert(h_angles[0] == 0.0f); + assert(h_angles[h_angles.size() - 1] == 360.0f); + + /* Convert from deg to rad. */ + for (int i = 0; i < v_angles.size(); i++) { + v_angles[i] *= M_PI_F / 180.f; + } + for (int i = 0; i < h_angles.size(); i++) { + h_angles[i] *= M_PI_F / 180.f; + } + + return true; +} + +IESFile::~IESFile() +{ + clear(); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/ies.h b/intern/cycles/util/ies.h new file mode 100644 index 00000000000..7be072dd5f5 --- /dev/null +++ b/intern/cycles/util/ies.h @@ -0,0 +1,59 @@ +/* + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_IES_H__ +#define __UTIL_IES_H__ + +#include "util/string.h" +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +class IESFile { + public: + IESFile() + { + } + ~IESFile(); + + int packed_size(); + void pack(float *data); + + bool load(const string &ies); + void clear(); + + protected: + bool parse(const string &ies); + bool process(); + bool process_type_b(); + bool process_type_c(); + + /* The brightness distribution is stored in spherical coordinates. + * The horizontal angles correspond to theta in the regular notation + * and always span the full range from 0° to 360°. + * The vertical angles correspond to phi and always start at 0°. */ + vector v_angles, h_angles; + /* The actual values are stored here, with every entry storing the values + * of one horizontal segment. */ + vector> intensity; + + /* Types of angle representation in IES files. Currently, only B and C are supported. */ + enum IESType { TYPE_A = 3, TYPE_B = 2, TYPE_C = 1 } type; +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_IES_H__ */ diff --git a/intern/cycles/util/image.h b/intern/cycles/util/image.h new file mode 100644 index 00000000000..69fc3a50c1d --- /dev/null +++ b/intern/cycles/util/image.h @@ -0,0 +1,98 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_IMAGE_H__ +# define __UTIL_IMAGE_H__ + +/* OpenImageIO is used for all image file reading and writing. */ + +# include + +# include "util/half.h" +# include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +OIIO_NAMESPACE_USING + +template +void util_image_resize_pixels(const vector &input_pixels, + const size_t input_width, + const size_t input_height, + const size_t input_depth, + const size_t components, + vector *output_pixels, + size_t *output_width, + size_t *output_height, + size_t *output_depth); + +/* Cast input pixel from unknown storage to float. */ +template inline float util_image_cast_to_float(T value); + +template<> inline float util_image_cast_to_float(float value) +{ + return value; +} +template<> inline float util_image_cast_to_float(uchar value) +{ + return (float)value / 255.0f; +} +template<> inline float util_image_cast_to_float(uint16_t value) +{ + return (float)value / 65535.0f; +} +template<> inline float util_image_cast_to_float(half value) +{ + return half_to_float_image(value); +} + +/* Cast float value to output pixel type. */ +template inline T util_image_cast_from_float(float value); + +template<> inline float util_image_cast_from_float(float value) +{ + return value; +} +template<> inline uchar util_image_cast_from_float(float value) +{ + if (value < 0.0f) { + return 0; + } + else if (value > (1.0f - 0.5f / 255.0f)) { + return 255; + } + return (uchar)((255.0f * value) + 0.5f); +} +template<> inline uint16_t util_image_cast_from_float(float value) +{ + if (value < 0.0f) { + return 0; + } + else if (value > (1.0f - 0.5f / 65535.0f)) { + return 65535; + } + return (uint16_t)((65535.0f * value) + 0.5f); +} +template<> inline half util_image_cast_from_float(float value) +{ + return float_to_half_image(value); +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_IMAGE_H__ */ + +#include "util/image_impl.h" diff --git a/intern/cycles/util/image_impl.h b/intern/cycles/util/image_impl.h new file mode 100644 index 00000000000..3d8eed80775 --- /dev/null +++ b/intern/cycles/util/image_impl.h @@ -0,0 +1,175 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_IMAGE_IMPL_H__ +#define __UTIL_IMAGE_IMPL_H__ + +#include "util/algorithm.h" +#include "util/half.h" +#include "util/image.h" + +CCL_NAMESPACE_BEGIN + +namespace { + +template +const T *util_image_read(const vector &pixels, + const size_t width, + const size_t height, + const size_t /*depth*/, + const size_t components, + const size_t x, + const size_t y, + const size_t z) +{ + const size_t index = ((size_t)z * (width * height) + (size_t)y * width + (size_t)x) * components; + return &pixels[index]; +} + +template +void util_image_downscale_sample(const vector &pixels, + const size_t width, + const size_t height, + const size_t depth, + const size_t components, + const size_t kernel_size, + const float x, + const float y, + const float z, + T *result) +{ + assert(components <= 4); + const size_t ix = (size_t)x, iy = (size_t)y, iz = (size_t)z; + /* TODO(sergey): Support something smarter than box filer. */ + float accum[4] = {0}; + size_t count = 0; + for (size_t dz = 0; dz < kernel_size; ++dz) { + for (size_t dy = 0; dy < kernel_size; ++dy) { + for (size_t dx = 0; dx < kernel_size; ++dx) { + const size_t nx = ix + dx, ny = iy + dy, nz = iz + dz; + if (nx >= width || ny >= height || nz >= depth) { + continue; + } + const T *pixel = util_image_read(pixels, width, height, depth, components, nx, ny, nz); + for (size_t k = 0; k < components; ++k) { + accum[k] += util_image_cast_to_float(pixel[k]); + } + ++count; + } + } + } + if (count != 0) { + const float inv_count = 1.0f / (float)count; + for (size_t k = 0; k < components; ++k) { + result[k] = util_image_cast_from_float(accum[k] * inv_count); + } + } + else { + for (size_t k = 0; k < components; ++k) { + result[k] = T(0.0f); + } + } +} + +template +void util_image_downscale_pixels(const vector &input_pixels, + const size_t input_width, + const size_t input_height, + const size_t input_depth, + const size_t components, + const float inv_scale_factor, + const size_t output_width, + const size_t output_height, + const size_t output_depth, + vector *output_pixels) +{ + const size_t kernel_size = (size_t)(inv_scale_factor + 0.5f); + for (size_t z = 0; z < output_depth; ++z) { + for (size_t y = 0; y < output_height; ++y) { + for (size_t x = 0; x < output_width; ++x) { + const float input_x = (float)x * inv_scale_factor, input_y = (float)y * inv_scale_factor, + input_z = (float)z * inv_scale_factor; + const size_t output_index = (z * output_width * output_height + y * output_width + x) * + components; + util_image_downscale_sample(input_pixels, + input_width, + input_height, + input_depth, + components, + kernel_size, + input_x, + input_y, + input_z, + &output_pixels->at(output_index)); + } + } + } +} + +} /* namespace */ + +template +void util_image_resize_pixels(const vector &input_pixels, + const size_t input_width, + const size_t input_height, + const size_t input_depth, + const size_t components, + const float scale_factor, + vector *output_pixels, + size_t *output_width, + size_t *output_height, + size_t *output_depth) +{ + /* Early output for case when no scaling is applied. */ + if (scale_factor == 1.0f) { + *output_width = input_width; + *output_height = input_height; + *output_depth = input_depth; + *output_pixels = input_pixels; + return; + } + /* First of all, we calculate output image dimensions. + * We clamp them to be 1 pixel at least so we do not generate degenerate + * image. + */ + *output_width = max((size_t)((float)input_width * scale_factor), (size_t)1); + *output_height = max((size_t)((float)input_height * scale_factor), (size_t)1); + *output_depth = max((size_t)((float)input_depth * scale_factor), (size_t)1); + /* Prepare pixel storage for the result. */ + const size_t num_output_pixels = ((*output_width) * (*output_height) * (*output_depth)) * + components; + output_pixels->resize(num_output_pixels); + if (scale_factor < 1.0f) { + const float inv_scale_factor = 1.0f / scale_factor; + util_image_downscale_pixels(input_pixels, + input_width, + input_height, + input_depth, + components, + inv_scale_factor, + *output_width, + *output_height, + *output_depth, + output_pixels); + } + else { + /* TODO(sergey): Needs implementation. */ + } +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_IMAGE_IMPL_H__ */ diff --git a/intern/cycles/util/list.h b/intern/cycles/util/list.h new file mode 100644 index 00000000000..f555b001186 --- /dev/null +++ b/intern/cycles/util/list.h @@ -0,0 +1,28 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_LIST_H__ +#define __UTIL_LIST_H__ + +#include + +CCL_NAMESPACE_BEGIN + +using std::list; + +CCL_NAMESPACE_END + +#endif /* __UTIL_LIST_H__ */ diff --git a/intern/cycles/util/log.cpp b/intern/cycles/util/log.cpp new file mode 100644 index 00000000000..68a5a3f576f --- /dev/null +++ b/intern/cycles/util/log.cpp @@ -0,0 +1,96 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/log.h" + +#include "util/math.h" +#include "util/string.h" + +#include +#ifdef _MSC_VER +# define snprintf _snprintf +#endif + +CCL_NAMESPACE_BEGIN + +#ifdef WITH_CYCLES_LOGGING +static bool is_verbosity_set() +{ + using CYCLES_GFLAGS_NAMESPACE::GetCommandLineOption; + + std::string verbosity; + if (!GetCommandLineOption("v", &verbosity)) { + return false; + } + return verbosity != "0"; +} +#endif + +void util_logging_init(const char *argv0) +{ +#ifdef WITH_CYCLES_LOGGING + using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; + + google::InitGoogleLogging(argv0); + SetCommandLineOption("logtostderr", "1"); + if (!is_verbosity_set()) { + SetCommandLineOption("v", "0"); + } + SetCommandLineOption("stderrthreshold", "0"); + SetCommandLineOption("minloglevel", "0"); +#else + (void)argv0; +#endif +} + +void util_logging_start() +{ +#ifdef WITH_CYCLES_LOGGING + using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; + SetCommandLineOption("logtostderr", "1"); + if (!is_verbosity_set()) { + SetCommandLineOption("v", "2"); + } + SetCommandLineOption("stderrthreshold", "0"); + SetCommandLineOption("minloglevel", "0"); +#endif +} + +void util_logging_verbosity_set(int verbosity) +{ +#ifdef WITH_CYCLES_LOGGING + using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; + char val[10]; + snprintf(val, sizeof(val), "%d", verbosity); + SetCommandLineOption("v", val); +#else + (void)verbosity; +#endif +} + +std::ostream &operator<<(std::ostream &os, const int2 &value) +{ + os << "(" << value.x << ", " << value.y << ")"; + return os; +} + +std::ostream &operator<<(std::ostream &os, const float3 &value) +{ + os << "(" << value.x << ", " << value.y << ", " << value.z << ")"; + return os; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/log.h b/intern/cycles/util/log.h new file mode 100644 index 00000000000..35c2d436d09 --- /dev/null +++ b/intern/cycles/util/log.h @@ -0,0 +1,101 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_LOGGING_H__ +#define __UTIL_LOGGING_H__ + +#if defined(WITH_CYCLES_LOGGING) && !defined(__KERNEL_GPU__) +# include +# include +#endif + +#include + +CCL_NAMESPACE_BEGIN + +#if !defined(WITH_CYCLES_LOGGING) || defined(__KERNEL_GPU__) +class StubStream { + public: + template StubStream &operator<<(const T &) + { + return *this; + } +}; + +class LogMessageVoidify { + public: + LogMessageVoidify() + { + } + void operator&(const StubStream &) + { + } +}; + +# define LOG_SUPPRESS() (true) ? ((void)0) : LogMessageVoidify() & StubStream() +# define LOG(severity) LOG_SUPPRESS() +# define VLOG(severity) LOG_SUPPRESS() +# define VLOG_IF(severity, condition) LOG_SUPPRESS() +# define VLOG_IS_ON(severity) false + +# define CHECK(expression) LOG_SUPPRESS() + +# define CHECK_NOTNULL(expression) LOG_SUPPRESS() +# define CHECK_NULL(expression) LOG_SUPPRESS() + +# define CHECK_NEAR(actual, expected, eps) LOG_SUPPRESS() + +# define CHECK_GE(a, b) LOG_SUPPRESS() +# define CHECK_NE(a, b) LOG_SUPPRESS() +# define CHECK_EQ(a, b) LOG_SUPPRESS() +# define CHECK_GT(a, b) LOG_SUPPRESS() +# define CHECK_LT(a, b) LOG_SUPPRESS() +# define CHECK_LE(a, b) LOG_SUPPRESS() + +# define DCHECK(expression) LOG_SUPPRESS() + +# define DCHECK_NOTNULL(expression) LOG_SUPPRESS() +# define DCHECK_NULL(expression) LOG_SUPPRESS() + +# define DCHECK_NEAR(actual, expected, eps) LOG_SUPPRESS() + +# define DCHECK_GE(a, b) LOG_SUPPRESS() +# define DCHECK_NE(a, b) LOG_SUPPRESS() +# define DCHECK_EQ(a, b) LOG_SUPPRESS() +# define DCHECK_GT(a, b) LOG_SUPPRESS() +# define DCHECK_LT(a, b) LOG_SUPPRESS() +# define DCHECK_LE(a, b) LOG_SUPPRESS() + +# define LOG_ASSERT(expression) LOG_SUPPRESS() +#endif + +#define VLOG_ONCE(level, flag) \ + if (!flag) \ + flag = true, VLOG(level) + +struct int2; +struct float3; + +void util_logging_init(const char *argv0); +void util_logging_start(); +void util_logging_verbosity_set(int verbosity); + +std::ostream &operator<<(std::ostream &os, const int2 &value); +std::ostream &operator<<(std::ostream &os, const float3 &value); + +CCL_NAMESPACE_END + +#endif /* __UTIL_LOGGING_H__ */ diff --git a/intern/cycles/util/map.h b/intern/cycles/util/map.h new file mode 100644 index 00000000000..f1b2522362f --- /dev/null +++ b/intern/cycles/util/map.h @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MAP_H__ +#define __UTIL_MAP_H__ + +#include +#include + +CCL_NAMESPACE_BEGIN + +using std::map; +using std::pair; +using std::unordered_map; +using std::unordered_multimap; + +template static void map_free_memory(T &data) +{ + /* Use swap() trick to actually free all internal memory. */ + T empty_data; + data.swap(empty_data); +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_MAP_H__ */ diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h new file mode 100644 index 00000000000..e7fc492733f --- /dev/null +++ b/intern/cycles/util/math.h @@ -0,0 +1,870 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MATH_H__ +#define __UTIL_MATH_H__ + +/* Math + * + * Basic math functions on scalar and vector types. This header is used by + * both the kernel code when compiled as C++, and other C++ non-kernel code. */ + +#ifndef __KERNEL_GPU__ +# include +#endif + +#ifdef __HIP__ +# include +#endif + +#include +#include +#include + +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* Float Pi variations */ + +/* Division */ +#ifndef M_PI_F +# define M_PI_F (3.1415926535897932f) /* pi */ +#endif +#ifndef M_PI_2_F +# define M_PI_2_F (1.5707963267948966f) /* pi/2 */ +#endif +#ifndef M_PI_4_F +# define M_PI_4_F (0.7853981633974830f) /* pi/4 */ +#endif +#ifndef M_1_PI_F +# define M_1_PI_F (0.3183098861837067f) /* 1/pi */ +#endif +#ifndef M_2_PI_F +# define M_2_PI_F (0.6366197723675813f) /* 2/pi */ +#endif +#ifndef M_1_2PI_F +# define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */ +#endif +#ifndef M_SQRT_PI_8_F +# define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */ +#endif +#ifndef M_LN_2PI_F +# define M_LN_2PI_F (1.8378770664093454f) /* ln(2*pi) */ +#endif + +/* Multiplication */ +#ifndef M_2PI_F +# define M_2PI_F (6.2831853071795864f) /* 2*pi */ +#endif +#ifndef M_4PI_F +# define M_4PI_F (12.566370614359172f) /* 4*pi */ +#endif + +/* Float sqrt variations */ +#ifndef M_SQRT2_F +# define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ +#endif +#ifndef M_LN2_F +# define M_LN2_F (0.6931471805599453f) /* ln(2) */ +#endif +#ifndef M_LN10_F +# define M_LN10_F (2.3025850929940457f) /* ln(10) */ +#endif + +/* Scalar */ + +#ifndef __HIP__ +# ifdef _WIN32 +ccl_device_inline float fmaxf(float a, float b) +{ + return (a > b) ? a : b; +} + +ccl_device_inline float fminf(float a, float b) +{ + return (a < b) ? a : b; +} + +# endif /* _WIN32 */ +#endif /* __HIP__ */ + +#ifndef __KERNEL_GPU__ +using std::isfinite; +using std::isnan; +using std::sqrt; + +ccl_device_inline int abs(int x) +{ + return (x > 0) ? x : -x; +} + +ccl_device_inline int max(int a, int b) +{ + return (a > b) ? a : b; +} + +ccl_device_inline int min(int a, int b) +{ + return (a < b) ? a : b; +} + +ccl_device_inline uint min(uint a, uint b) +{ + return (a < b) ? a : b; +} + +ccl_device_inline float max(float a, float b) +{ + return (a > b) ? a : b; +} + +ccl_device_inline float min(float a, float b) +{ + return (a < b) ? a : b; +} + +ccl_device_inline double max(double a, double b) +{ + return (a > b) ? a : b; +} + +ccl_device_inline double min(double a, double b) +{ + return (a < b) ? a : b; +} + +/* These 2 guys are templated for usage with registers data. + * + * NOTE: Since this is CPU-only functions it is ok to use references here. + * But for other devices we'll need to be careful about this. + */ + +template ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d) +{ + return min(min(a, b), min(c, d)); +} + +template ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d) +{ + return max(max(a, b), max(c, d)); +} +#endif /* __KERNEL_GPU__ */ + +ccl_device_inline float min4(float a, float b, float c, float d) +{ + return min(min(a, b), min(c, d)); +} + +ccl_device_inline float max4(float a, float b, float c, float d) +{ + return max(max(a, b), max(c, d)); +} + +/* Int/Float conversion */ + +ccl_device_inline int as_int(uint i) +{ + union { + uint ui; + int i; + } u; + u.ui = i; + return u.i; +} + +ccl_device_inline uint as_uint(int i) +{ + union { + uint ui; + int i; + } u; + u.i = i; + return u.ui; +} + +ccl_device_inline uint as_uint(float f) +{ + union { + uint i; + float f; + } u; + u.f = f; + return u.i; +} + +#ifndef __HIP__ +ccl_device_inline int __float_as_int(float f) +{ + union { + int i; + float f; + } u; + u.f = f; + return u.i; +} + +ccl_device_inline float __int_as_float(int i) +{ + union { + int i; + float f; + } u; + u.i = i; + return u.f; +} + +ccl_device_inline uint __float_as_uint(float f) +{ + union { + uint i; + float f; + } u; + u.f = f; + return u.i; +} + +ccl_device_inline float __uint_as_float(uint i) +{ + union { + uint i; + float f; + } u; + u.i = i; + return u.f; +} +#endif + +ccl_device_inline int4 __float4_as_int4(float4 f) +{ +#ifdef __KERNEL_SSE__ + return int4(_mm_castps_si128(f.m128)); +#else + return make_int4( + __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w)); +#endif +} + +ccl_device_inline float4 __int4_as_float4(int4 i) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_castsi128_ps(i.m128)); +#else + return make_float4( + __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w)); +#endif +} + +template ccl_device_inline uint pointer_pack_to_uint_0(T *ptr) +{ + return ((uint64_t)ptr) & 0xFFFFFFFF; +} + +template ccl_device_inline uint pointer_pack_to_uint_1(T *ptr) +{ + return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF; +} + +template ccl_device_inline T *pointer_unpack_from_uint(const uint a, const uint b) +{ + return (T *)(((uint64_t)b << 32) | a); +} + +ccl_device_inline uint uint16_pack_to_uint(const uint a, const uint b) +{ + return (a << 16) | b; +} + +ccl_device_inline uint uint16_unpack_from_uint_0(const uint i) +{ + return i >> 16; +} + +ccl_device_inline uint uint16_unpack_from_uint_1(const uint i) +{ + return i & 0xFFFF; +} + +/* Versions of functions which are safe for fast math. */ +ccl_device_inline bool isnan_safe(float f) +{ + unsigned int x = __float_as_uint(f); + return (x << 1) > 0xff000000u; +} + +ccl_device_inline bool isfinite_safe(float f) +{ + /* By IEEE 754 rule, 2*Inf equals Inf */ + unsigned int x = __float_as_uint(f); + return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u); +} + +ccl_device_inline float ensure_finite(float v) +{ + return isfinite_safe(v) ? v : 0.0f; +} + +ccl_device_inline int clamp(int a, int mn, int mx) +{ + return min(max(a, mn), mx); +} + +ccl_device_inline float clamp(float a, float mn, float mx) +{ + return min(max(a, mn), mx); +} + +ccl_device_inline float mix(float a, float b, float t) +{ + return a + t * (b - a); +} + +ccl_device_inline float smoothstep(float edge0, float edge1, float x) +{ + float result; + if (x < edge0) + result = 0.0f; + else if (x >= edge1) + result = 1.0f; + else { + float t = (x - edge0) / (edge1 - edge0); + result = (3.0f - 2.0f * t) * (t * t); + } + return result; +} + +#ifndef __KERNEL_CUDA__ +ccl_device_inline float saturate(float a) +{ + return clamp(a, 0.0f, 1.0f); +} +#endif /* __KERNEL_CUDA__ */ + +ccl_device_inline int float_to_int(float f) +{ + return (int)f; +} + +ccl_device_inline int floor_to_int(float f) +{ + return float_to_int(floorf(f)); +} + +ccl_device_inline int quick_floor_to_int(float x) +{ + return float_to_int(x) - ((x < 0) ? 1 : 0); +} + +ccl_device_inline float floorfrac(float x, ccl_private int *i) +{ + *i = quick_floor_to_int(x); + return x - *i; +} + +ccl_device_inline int ceil_to_int(float f) +{ + return float_to_int(ceilf(f)); +} + +ccl_device_inline float fractf(float x) +{ + return x - floorf(x); +} + +/* Adapted from godot-engine math_funcs.h. */ +ccl_device_inline float wrapf(float value, float max, float min) +{ + float range = max - min; + return (range != 0.0f) ? value - (range * floorf((value - min) / range)) : min; +} + +ccl_device_inline float pingpongf(float a, float b) +{ + return (b != 0.0f) ? fabsf(fractf((a - b) / (b * 2.0f)) * b * 2.0f - b) : 0.0f; +} + +ccl_device_inline float smoothminf(float a, float b, float k) +{ + if (k != 0.0f) { + float h = fmaxf(k - fabsf(a - b), 0.0f) / k; + return fminf(a, b) - h * h * h * k * (1.0f / 6.0f); + } + else { + return fminf(a, b); + } +} + +ccl_device_inline float signf(float f) +{ + return (f < 0.0f) ? -1.0f : 1.0f; +} + +ccl_device_inline float nonzerof(float f, float eps) +{ + if (fabsf(f) < eps) + return signf(f) * eps; + else + return f; +} + +/* `signum` function testing for zero. Matches GLSL and OSL functions. */ +ccl_device_inline float compatible_signf(float f) +{ + if (f == 0.0f) { + return 0.0f; + } + else { + return signf(f); + } +} + +ccl_device_inline float smoothstepf(float f) +{ + float ff = f * f; + return (3.0f * ff - 2.0f * ff * f); +} + +ccl_device_inline int mod(int x, int m) +{ + return (x % m + m) % m; +} + +ccl_device_inline float3 float2_to_float3(const float2 a) +{ + return make_float3(a.x, a.y, 0.0f); +} + +ccl_device_inline float3 float4_to_float3(const float4 a) +{ + return make_float3(a.x, a.y, a.z); +} + +ccl_device_inline float4 float3_to_float4(const float3 a) +{ + return make_float4(a.x, a.y, a.z, 1.0f); +} + +ccl_device_inline float inverse_lerp(float a, float b, float x) +{ + return (x - a) / (b - a); +} + +/* Cubic interpolation between b and c, a and d are the previous and next point. */ +ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x) +{ + return 0.5f * + (((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x + + (c - a)) * + x + + b; +} + +CCL_NAMESPACE_END + +#include "util/math_int2.h" +#include "util/math_int3.h" +#include "util/math_int4.h" + +#include "util/math_float2.h" +#include "util/math_float3.h" +#include "util/math_float4.h" + +#include "util/rect.h" + +CCL_NAMESPACE_BEGIN + +/* Interpolation */ + +template A lerp(const A &a, const A &b, const B &t) +{ + return (A)(a * ((B)1 - t) + b * t); +} + +/* Triangle */ + +ccl_device_inline float triangle_area(ccl_private const float3 &v1, + ccl_private const float3 &v2, + ccl_private const float3 &v3) +{ + return len(cross(v3 - v2, v1 - v2)) * 0.5f; +} + +/* Orthonormal vectors */ + +ccl_device_inline void make_orthonormals(const float3 N, + ccl_private float3 *a, + ccl_private float3 *b) +{ +#if 0 + if (fabsf(N.y) >= 0.999f) { + *a = make_float3(1, 0, 0); + *b = make_float3(0, 0, 1); + return; + } + if (fabsf(N.z) >= 0.999f) { + *a = make_float3(1, 0, 0); + *b = make_float3(0, 1, 0); + return; + } +#endif + + if (N.x != N.y || N.x != N.z) + *a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x); //(1,1,1)x N + else + *a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x); //(-1,1,1)x N + + *a = normalize(*a); + *b = cross(N, *a); +} + +/* Color division */ + +ccl_device_inline float3 safe_invert_color(float3 a) +{ + float x, y, z; + + x = (a.x != 0.0f) ? 1.0f / a.x : 0.0f; + y = (a.y != 0.0f) ? 1.0f / a.y : 0.0f; + z = (a.z != 0.0f) ? 1.0f / a.z : 0.0f; + + return make_float3(x, y, z); +} + +ccl_device_inline float3 safe_divide_color(float3 a, float3 b) +{ + float x, y, z; + + x = (b.x != 0.0f) ? a.x / b.x : 0.0f; + y = (b.y != 0.0f) ? a.y / b.y : 0.0f; + z = (b.z != 0.0f) ? a.z / b.z : 0.0f; + + return make_float3(x, y, z); +} + +ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b) +{ + float x, y, z; + + x = (b.x != 0.0f) ? a.x / b.x : 0.0f; + y = (b.y != 0.0f) ? a.y / b.y : 0.0f; + z = (b.z != 0.0f) ? a.z / b.z : 0.0f; + + /* try to get gray even if b is zero */ + if (b.x == 0.0f) { + if (b.y == 0.0f) { + x = z; + y = z; + } + else if (b.z == 0.0f) { + x = y; + z = y; + } + else + x = 0.5f * (y + z); + } + else if (b.y == 0.0f) { + if (b.z == 0.0f) { + y = x; + z = x; + } + else + y = 0.5f * (x + z); + } + else if (b.z == 0.0f) { + z = 0.5f * (x + y); + } + + return make_float3(x, y, z); +} + +/* Rotation of point around axis and angle */ + +ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) +{ + float costheta = cosf(angle); + float sintheta = sinf(angle); + float3 r; + + r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) + + (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) + + (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z); + + r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) + + ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) + + (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z); + + r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) + + (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) + + ((costheta + (1 - costheta) * axis.z * axis.z) * p.z); + + return r; +} + +/* NaN-safe math ops */ + +ccl_device_inline float safe_sqrtf(float f) +{ + return sqrtf(max(f, 0.0f)); +} + +ccl_device_inline float inversesqrtf(float f) +{ + return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f; +} + +ccl_device float safe_asinf(float a) +{ + return asinf(clamp(a, -1.0f, 1.0f)); +} + +ccl_device float safe_acosf(float a) +{ + return acosf(clamp(a, -1.0f, 1.0f)); +} + +ccl_device float compatible_powf(float x, float y) +{ +#ifdef __KERNEL_GPU__ + if (y == 0.0f) /* x^0 -> 1, including 0^0 */ + return 1.0f; + + /* GPU pow doesn't accept negative x, do manual checks here */ + if (x < 0.0f) { + if (fmodf(-y, 2.0f) == 0.0f) + return powf(-x, y); + else + return -powf(-x, y); + } + else if (x == 0.0f) + return 0.0f; +#endif + return powf(x, y); +} + +ccl_device float safe_powf(float a, float b) +{ + if (UNLIKELY(a < 0.0f && b != float_to_int(b))) + return 0.0f; + + return compatible_powf(a, b); +} + +ccl_device float safe_divide(float a, float b) +{ + return (b != 0.0f) ? a / b : 0.0f; +} + +ccl_device float safe_logf(float a, float b) +{ + if (UNLIKELY(a <= 0.0f || b <= 0.0f)) + return 0.0f; + + return safe_divide(logf(a), logf(b)); +} + +ccl_device float safe_modulo(float a, float b) +{ + return (b != 0.0f) ? fmodf(a, b) : 0.0f; +} + +ccl_device_inline float sqr(float a) +{ + return a * a; +} + +ccl_device_inline float pow20(float a) +{ + return sqr(sqr(sqr(sqr(a)) * a)); +} + +ccl_device_inline float pow22(float a) +{ + return sqr(a * sqr(sqr(sqr(a)) * a)); +} + +ccl_device_inline float beta(float x, float y) +{ + return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y)); +} + +ccl_device_inline float xor_signmask(float x, int y) +{ + return __int_as_float(__float_as_int(x) ^ y); +} + +ccl_device float bits_to_01(uint bits) +{ + return bits * (1.0f / (float)0xFFFFFFFF); +} + +ccl_device_inline uint count_leading_zeros(uint x) +{ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) + return __clz(x); +#else + assert(x != 0); +# ifdef _MSC_VER + unsigned long leading_zero = 0; + _BitScanReverse(&leading_zero, x); + return (31 - leading_zero); +# else + return __builtin_clz(x); +# endif +#endif +} + +ccl_device_inline uint count_trailing_zeros(uint x) +{ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) + return (__ffs(x) - 1); +#else + assert(x != 0); +# ifdef _MSC_VER + unsigned long ctz = 0; + _BitScanForward(&ctz, x); + return ctz; +# else + return __builtin_ctz(x); +# endif +#endif +} + +ccl_device_inline uint find_first_set(uint x) +{ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) + return __ffs(x); +#else +# ifdef _MSC_VER + return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0; +# else + return __builtin_ffs(x); +# endif +#endif +} + +/* projections */ +ccl_device_inline float2 map_to_tube(const float3 co) +{ + float len, u, v; + len = sqrtf(co.x * co.x + co.y * co.y); + if (len > 0.0f) { + u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f; + v = (co.z + 1.0f) * 0.5f; + } + else { + u = v = 0.0f; + } + return make_float2(u, v); +} + +ccl_device_inline float2 map_to_sphere(const float3 co) +{ + float l = len(co); + float u, v; + if (l > 0.0f) { + if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) { + u = 0.0f; /* Otherwise domain error. */ + } + else { + u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f; + } + v = 1.0f - safe_acosf(co.z / l) / M_PI_F; + } + else { + u = v = 0.0f; + } + return make_float2(u, v); +} + +/* Compares two floats. + * Returns true if their absolute difference is smaller than abs_diff (for numbers near zero) + * or their relative difference is less than ulp_diff ULPs. + * Based on + * https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ + */ + +ccl_device_inline float compare_floats(float a, float b, float abs_diff, int ulp_diff) +{ + if (fabsf(a - b) < abs_diff) { + return true; + } + + if ((a < 0.0f) != (b < 0.0f)) { + return false; + } + + return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff); +} + +/* Calculate the angle between the two vectors a and b. + * The usual approach `acos(dot(a, b))` has severe precision issues for small angles, + * which are avoided by this method. + * Based on "Mangled Angles" from https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf + */ +ccl_device_inline float precise_angle(float3 a, float3 b) +{ + return 2.0f * atan2f(len(a - b), len(a + b)); +} + +/* Return value which is greater than the given one and is a power of two. */ +ccl_device_inline uint next_power_of_two(uint x) +{ + return x == 0 ? 1 : 1 << (32 - count_leading_zeros(x)); +} + +/* Return value which is lower than the given one and is a power of two. */ +ccl_device_inline uint prev_power_of_two(uint x) +{ + return x < 2 ? x : 1 << (31 - count_leading_zeros(x - 1)); +} + +#ifndef __has_builtin +# define __has_builtin(v) 0 +#endif + +/* Reverses the bits of a 32 bit integer. */ +ccl_device_inline uint32_t reverse_integer_bits(uint32_t x) +{ + /* Use a native instruction if it exists. */ +#if defined(__arm__) || defined(__aarch64__) + __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x)); + return x; +#elif defined(__KERNEL_CUDA__) + return __brev(x); +#elif __has_builtin(__builtin_bitreverse32) + return __builtin_bitreverse32(x); +#else + /* Flip pairwise. */ + x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + /* Flip pairs. */ + x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + /* Flip nibbles. */ + x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + /* Flip bytes. CPUs have an instruction for that, pretty fast one. */ +# ifdef _MSC_VER + return _byteswap_ulong(x); +# elif defined(__INTEL_COMPILER) + return (uint32_t)_bswap((int)x); +# else + /* Assuming gcc or clang. */ + return __builtin_bswap32(x); +# endif +#endif +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_MATH_H__ */ diff --git a/intern/cycles/util/math_cdf.cpp b/intern/cycles/util/math_cdf.cpp new file mode 100644 index 00000000000..02c6646f824 --- /dev/null +++ b/intern/cycles/util/math_cdf.cpp @@ -0,0 +1,70 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/math_cdf.h" + +#include "util/algorithm.h" +#include "util/math.h" + +CCL_NAMESPACE_BEGIN + +/* Invert pre-calculated CDF function. */ +void util_cdf_invert(const int resolution, + const float from, + const float to, + const vector &cdf, + const bool make_symmetric, + vector &inv_cdf) +{ + const float inv_resolution = 1.0f / (float)resolution; + const float range = to - from; + inv_cdf.resize(resolution); + if (make_symmetric) { + const int half_size = (resolution - 1) / 2; + for (int i = 0; i <= half_size; i++) { + float x = i / (float)half_size; + int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin(); + float t; + if (index < cdf.size() - 1) { + t = (x - cdf[index]) / (cdf[index + 1] - cdf[index]); + } + else { + t = 0.0f; + index = cdf.size() - 1; + } + float y = ((index + t) / (resolution - 1)) * (2.0f * range); + inv_cdf[half_size + i] = 0.5f * (1.0f + y); + inv_cdf[half_size - i] = 0.5f * (1.0f - y); + } + } + else { + for (int i = 0; i < resolution; i++) { + float x = from + range * (float)i * inv_resolution; + int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin(); + float t; + if (index < cdf.size() - 1) { + t = (x - cdf[index]) / (cdf[index + 1] - cdf[index]); + } + else { + t = 0.0f; + index = resolution; + } + inv_cdf[i] = (index + t) * inv_resolution; + } + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/math_cdf.h b/intern/cycles/util/math_cdf.h new file mode 100644 index 00000000000..4c57dac4bbe --- /dev/null +++ b/intern/cycles/util/math_cdf.h @@ -0,0 +1,75 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MATH_CDF_H__ +#define __UTIL_MATH_CDF_H__ + +#include "util/algorithm.h" +#include "util/math.h" +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +/* Evaluate CDF of a given functor with given range and resolution. */ +template +void util_cdf_evaluate( + const int resolution, const float from, const float to, Functor functor, vector &cdf) +{ + const int cdf_count = resolution + 1; + const float range = to - from; + cdf.resize(cdf_count); + cdf[0] = 0.0f; + /* Actual CDF evaluation. */ + for (int i = 0; i < resolution; ++i) { + float x = from + range * (float)i / (resolution - 1); + float y = functor(x); + cdf[i + 1] = cdf[i] + fabsf(y); + } + /* Normalize the CDF. */ + for (int i = 0; i <= resolution; i++) { + cdf[i] /= cdf[resolution]; + } +} + +/* Invert pre-calculated CDF function. */ +void util_cdf_invert(const int resolution, + const float from, + const float to, + const vector &cdf, + const bool make_symmetric, + vector &inv_cdf); + +/* Evaluate inverted CDF of a given functor with given range and resolution. */ +template +void util_cdf_inverted(const int resolution, + const float from, + const float to, + Functor functor, + const bool make_symmetric, + vector &inv_cdf) +{ + vector cdf; + /* There is no much smartness going around lower resolution for the CDF table, + * this just to match the old code from pixel filter so it all stays exactly + * the same and no regression tests are failed. + */ + util_cdf_evaluate(resolution - 1, from, to, functor, cdf); + util_cdf_invert(resolution, from, to, cdf, make_symmetric, inv_cdf); +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_MATH_H_CDF__ */ diff --git a/intern/cycles/util/math_fast.h b/intern/cycles/util/math_fast.h new file mode 100644 index 00000000000..cc924f36a71 --- /dev/null +++ b/intern/cycles/util/math_fast.h @@ -0,0 +1,652 @@ +/* + * Adapted from OpenImageIO library with this license: + * + * Copyright 2008-2014 Larry Gritz and the other authors and contributors. + * All Rights Reserved. + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the software's owners nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * (This is the Modified BSD License) + * + * A few bits here are based upon code from NVIDIA that was also released + * under the same modified BSD license, and marked as: + * Copyright 2004 NVIDIA Corporation. All Rights Reserved. + * + * Some parts of this file were first open-sourced in Open Shading Language, + * then later moved here. The original copyright notice was: + * Copyright (c) 2009-2014 Sony Pictures Imageworks Inc., et al. + * + * Many of the math functions were copied from or inspired by other + * public domain sources or open source packages with compatible licenses. + * The individual functions give references were applicable. + */ + +#ifndef __UTIL_FAST_MATH__ +#define __UTIL_FAST_MATH__ + +CCL_NAMESPACE_BEGIN + +ccl_device_inline float madd(const float a, const float b, const float c) +{ + /* NOTE: In the future we may want to explicitly ask for a fused + * multiply-add in a specialized version for float. + * + * NOTE: GCC/ICC will turn this (for float) into a FMA unless + * explicitly asked not to, clang seems to leave the code alone. + */ + return a * b + c; +} + +ccl_device_inline float4 madd4(const float4 a, const float4 b, const float4 c) +{ + return a * b + c; +} + +/* + * FAST & APPROXIMATE MATH + * + * The functions named "fast_*" provide a set of replacements to libm that + * are much faster at the expense of some accuracy and robust handling of + * extreme values. One design goal for these approximation was to avoid + * branches as much as possible and operate on single precision values only + * so that SIMD versions should be straightforward ports We also try to + * implement "safe" semantics (ie: clamp to valid range where possible) + * natively since wrapping these inline calls in another layer would be + * wasteful. + * + * Some functions are fast_safe_*, which is both a faster approximation as + * well as clamped input domain to ensure no NaN, Inf, or divide by zero. + */ + +/* Round to nearest integer, returning as an int. */ +ccl_device_inline int fast_rint(float x) +{ + /* used by sin/cos/tan range reduction. */ +#ifdef __KERNEL_SSE4__ + /* Single `roundps` instruction on SSE4.1+ (for gcc/clang at least). */ + return float_to_int(rintf(x)); +#else + /* emulate rounding by adding/subtracting 0.5. */ + return float_to_int(x + copysignf(0.5f, x)); +#endif +} + +ccl_device float fast_sinf(float x) +{ + /* Very accurate argument reduction from SLEEF, + * starts failing around x=262000 + * + * Results on: [-2pi,2pi]. + * + * Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp, + * 1.19209e-07 max error + */ + int q = fast_rint(x * M_1_PI_F); + float qf = (float)q; + x = madd(qf, -0.78515625f * 4, x); + x = madd(qf, -0.00024187564849853515625f * 4, x); + x = madd(qf, -3.7747668102383613586e-08f * 4, x); + x = madd(qf, -1.2816720341285448015e-12f * 4, x); + x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals */ + float s = x * x; + if ((q & 1) != 0) + x = -x; + /* This polynomial approximation has very low error on [-pi/2,+pi/2] + * 1.19209e-07 max error in total over [-2pi,+2pi]. */ + float u = 2.6083159809786593541503e-06f; + u = madd(u, s, -0.0001981069071916863322258f); + u = madd(u, s, +0.00833307858556509017944336f); + u = madd(u, s, -0.166666597127914428710938f); + u = madd(s, u * x, x); + /* For large x, the argument reduction can fail and the polynomial can be + * evaluated with arguments outside the valid internal. Just clamp the bad + * values away (setting to 0.0f means no branches need to be generated). */ + if (fabsf(u) > 1.0f) { + u = 0.0f; + } + return u; +} + +ccl_device float fast_cosf(float x) +{ + /* Same argument reduction as fast_sinf(). */ + int q = fast_rint(x * M_1_PI_F); + float qf = (float)q; + x = madd(qf, -0.78515625f * 4, x); + x = madd(qf, -0.00024187564849853515625f * 4, x); + x = madd(qf, -3.7747668102383613586e-08f * 4, x); + x = madd(qf, -1.2816720341285448015e-12f * 4, x); + x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals. */ + float s = x * x; + /* Polynomial from SLEEF's sincosf, max error is + * 4.33127e-07 over [-2pi,2pi] (98% of values are "exact"). */ + float u = -2.71811842367242206819355e-07f; + u = madd(u, s, +2.47990446951007470488548e-05f); + u = madd(u, s, -0.00138888787478208541870117f); + u = madd(u, s, +0.0416666641831398010253906f); + u = madd(u, s, -0.5f); + u = madd(u, s, +1.0f); + if ((q & 1) != 0) { + u = -u; + } + if (fabsf(u) > 1.0f) { + u = 0.0f; + } + return u; +} + +ccl_device void fast_sincosf(float x, ccl_private float *sine, ccl_private float *cosine) +{ + /* Same argument reduction as fast_sin. */ + int q = fast_rint(x * M_1_PI_F); + float qf = (float)q; + x = madd(qf, -0.78515625f * 4, x); + x = madd(qf, -0.00024187564849853515625f * 4, x); + x = madd(qf, -3.7747668102383613586e-08f * 4, x); + x = madd(qf, -1.2816720341285448015e-12f * 4, x); + x = M_PI_2_F - (M_PI_2_F - x); // crush denormals + float s = x * x; + /* NOTE: same exact polynomials as fast_sinf() and fast_cosf() above. */ + if ((q & 1) != 0) { + x = -x; + } + float su = 2.6083159809786593541503e-06f; + su = madd(su, s, -0.0001981069071916863322258f); + su = madd(su, s, +0.00833307858556509017944336f); + su = madd(su, s, -0.166666597127914428710938f); + su = madd(s, su * x, x); + float cu = -2.71811842367242206819355e-07f; + cu = madd(cu, s, +2.47990446951007470488548e-05f); + cu = madd(cu, s, -0.00138888787478208541870117f); + cu = madd(cu, s, +0.0416666641831398010253906f); + cu = madd(cu, s, -0.5f); + cu = madd(cu, s, +1.0f); + if ((q & 1) != 0) { + cu = -cu; + } + if (fabsf(su) > 1.0f) { + su = 0.0f; + } + if (fabsf(cu) > 1.0f) { + cu = 0.0f; + } + *sine = su; + *cosine = cu; +} + +/* NOTE: this approximation is only valid on [-8192.0,+8192.0], it starts + * becoming really poor outside of this range because the reciprocal amplifies + * errors. + */ +ccl_device float fast_tanf(float x) +{ + /* Derived from SLEEF implementation. + * + * Note that we cannot apply the "denormal crush" trick everywhere because + * we sometimes need to take the reciprocal of the polynomial + */ + int q = fast_rint(x * 2.0f * M_1_PI_F); + float qf = (float)q; + x = madd(qf, -0.78515625f * 2, x); + x = madd(qf, -0.00024187564849853515625f * 2, x); + x = madd(qf, -3.7747668102383613586e-08f * 2, x); + x = madd(qf, -1.2816720341285448015e-12f * 2, x); + if ((q & 1) == 0) { + /* Crush denormals (only if we aren't inverting the result later). */ + x = M_PI_4_F - (M_PI_4_F - x); + } + float s = x * x; + float u = 0.00927245803177356719970703f; + u = madd(u, s, 0.00331984995864331722259521f); + u = madd(u, s, 0.0242998078465461730957031f); + u = madd(u, s, 0.0534495301544666290283203f); + u = madd(u, s, 0.133383005857467651367188f); + u = madd(u, s, 0.333331853151321411132812f); + u = madd(s, u * x, x); + if ((q & 1) != 0) { + u = -1.0f / u; + } + return u; +} + +/* Fast, approximate sin(x*M_PI) with maximum absolute error of 0.000918954611. + * + * Adapted from http://devmaster.net/posts/9648/fast-and-accurate-sine-cosine#comment-76773 + */ +ccl_device float fast_sinpif(float x) +{ + /* Fast trick to strip the integral part off, so our domain is [-1, 1]. */ + const float z = x - ((x + 25165824.0f) - 25165824.0f); + const float y = z - z * fabsf(z); + const float Q = 3.10396624f; + const float P = 3.584135056f; /* P = 16-4*Q */ + return y * (Q + P * fabsf(y)); + + /* The original article used inferior constants for Q and P and + * so had max error 1.091e-3. + * + * The optimal value for Q was determined by exhaustive search, minimizing + * the absolute numerical error relative to float(std::sin(double(phi*M_PI))) + * over the interval [0,2] (which is where most of the invocations happen). + * + * The basic idea of this approximation starts with the coarse approximation: + * sin(pi*x) ~= f(x) = 4 * (x - x * abs(x)) + * + * This approximation always _over_ estimates the target. On the other hand, + * the curve: + * sin(pi*x) ~= f(x) * abs(f(x)) / 4 + * + * always lies _under_ the target. Thus we can simply numerically search for + * the optimal constant to LERP these curves into a more precise + * approximation. + * + * After folding the constants together and simplifying the resulting math, + * we end up with the compact implementation above. + * + * NOTE: this function actually computes sin(x * pi) which avoids one or two + * mults in many cases and guarantees exact values at integer periods. + */ +} + +/* Fast approximate cos(x*M_PI) with ~0.1% absolute error. */ +ccl_device_inline float fast_cospif(float x) +{ + return fast_sinpif(x + 0.5f); +} + +ccl_device float fast_acosf(float x) +{ + const float f = fabsf(x); + /* clamp and crush denormals. */ + const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; + /* Based on http://www.pouet.net/topic.php?which=9132&page=2 + * 85% accurate (ulp 0) + * Examined 2130706434 values of acos: + * 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush" + * Examined 2130706434 values of acos: + * 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush" + */ + const float a = sqrtf(1.0f - m) * + (1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f))); + return x < 0 ? M_PI_F - a : a; +} + +ccl_device float fast_asinf(float x) +{ + /* Based on acosf approximation above. + * Max error is 4.51133e-05 (ulps are higher because we are consistently off + * by a little amount). + */ + const float f = fabsf(x); + /* Clamp and crush denormals. */ + const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; + const float a = M_PI_2_F - + sqrtf(1.0f - m) * (1.5707963267f + + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f))); + return copysignf(a, x); +} + +ccl_device float fast_atanf(float x) +{ + const float a = fabsf(x); + const float k = a > 1.0f ? 1 / a : a; + const float s = 1.0f - (1.0f - k); /* Crush denormals. */ + const float t = s * s; + /* http://mathforum.org/library/drmath/view/62672.html + * Examined 4278190080 values of atan: + * 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals) + * Examined 4278190080 values of atan: + * 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals) + */ + float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); + if (a > 1.0f) { + r = M_PI_2_F - r; + } + return copysignf(r, x); +} + +ccl_device float fast_atan2f(float y, float x) +{ + /* Based on atan approximation above. + * + * The special cases around 0 and infinity were tested explicitly. + * + * The only case not handled correctly is x=NaN,y=0 which returns 0 instead + * of nan. + */ + const float a = fabsf(x); + const float b = fabsf(y); + + const float k = (b == 0) ? 0.0f : ((a == b) ? 1.0f : (b > a ? a / b : b / a)); + const float s = 1.0f - (1.0f - k); /* Crush denormals */ + const float t = s * s; + + float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); + + if (b > a) { + /* Account for arg reduction. */ + r = M_PI_2_F - r; + } + /* Test sign bit of x. */ + if (__float_as_uint(x) & 0x80000000u) { + r = M_PI_F - r; + } + return copysignf(r, y); +} + +/* Based on: + * + * https://github.com/LiraNuna/glsl-sse2/blob/master/source/vec4.h + */ +ccl_device float fast_log2f(float x) +{ + /* NOTE: clamp to avoid special cases and make result "safe" from large + * negative values/NAN's. */ + x = clamp(x, FLT_MIN, FLT_MAX); + unsigned bits = __float_as_uint(x); + int exponent = (int)(bits >> 23) - 127; + float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f; + /* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]: + * 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error. + * ulp histogram: + * 0 = 97.46% + * 1 = 2.29% + * 2 = 0.11% + */ + float f2 = f * f; + float f4 = f2 * f2; + float hi = madd(f, -0.00931049621349f, 0.05206469089414f); + float lo = madd(f, 0.47868480909345f, -0.72116591947498f); + hi = madd(f, hi, -0.13753123777116f); + hi = madd(f, hi, 0.24187369696082f); + hi = madd(f, hi, -0.34730547155299f); + lo = madd(f, lo, 1.442689881667200f); + return ((f4 * hi) + (f * lo)) + exponent; +} + +ccl_device_inline float fast_logf(float x) +{ + /* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]: + * 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error. + */ + return fast_log2f(x) * M_LN2_F; +} + +ccl_device_inline float fast_log10(float x) +{ + /* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]: + * 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error. + */ + return fast_log2f(x) * M_LN2_F / M_LN10_F; +} + +ccl_device float fast_logb(float x) +{ + /* Don't bother with denormals. */ + x = fabsf(x); + x = clamp(x, FLT_MIN, FLT_MAX); + unsigned bits = __float_as_uint(x); + return (float)((int)(bits >> 23) - 127); +} + +ccl_device float fast_exp2f(float x) +{ + /* Clamp to safe range for final addition. */ + x = clamp(x, -126.0f, 126.0f); + /* Range reduction. */ + int m = (int)x; + x -= m; + x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */ + /* 5th degree polynomial generated with sollya + * Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff, + * 232 max ulp. + * + * ulp histogram: + * 0 = 87.81% + * 1 = 4.18% + */ + float r = 1.33336498402e-3f; + r = madd(x, r, 9.810352697968e-3f); + r = madd(x, r, 5.551834031939e-2f); + r = madd(x, r, 0.2401793301105f); + r = madd(x, r, 0.693144857883f); + r = madd(x, r, 1.0f); + /* Multiply by 2 ^ m by adding in the exponent. */ + /* NOTE: left-shift of negative number is undefined behavior. */ + return __uint_as_float(__float_as_uint(r) + ((unsigned)m << 23)); +} + +ccl_device_inline float fast_expf(float x) +{ + /* Examined 2237485550 values of exp on [-87.3300018,87.3300018]: + * 2.6666452 avg ulp diff, 230 max ulp. + */ + return fast_exp2f(x / M_LN2_F); +} + +#if defined(__KERNEL_CPU__) && !defined(_MSC_VER) +/* MSVC seems to have a code-gen bug here in at least SSE41/AVX, see + * T78047 and T78869 for details. Just disable for now, it only makes + * a small difference in denoising performance. */ +ccl_device float4 fast_exp2f4(float4 x) +{ + const float4 one = make_float4(1.0f); + const float4 limit = make_float4(126.0f); + x = clamp(x, -limit, limit); + int4 m = make_int4(x); + x = one - (one - (x - make_float4(m))); + float4 r = make_float4(1.33336498402e-3f); + r = madd4(x, r, make_float4(9.810352697968e-3f)); + r = madd4(x, r, make_float4(5.551834031939e-2f)); + r = madd4(x, r, make_float4(0.2401793301105f)); + r = madd4(x, r, make_float4(0.693144857883f)); + r = madd4(x, r, make_float4(1.0f)); + return __int4_as_float4(__float4_as_int4(r) + (m << 23)); +} + +ccl_device_inline float4 fast_expf4(float4 x) +{ + return fast_exp2f4(x / M_LN2_F); +} +#else +ccl_device_inline float4 fast_expf4(float4 x) +{ + return make_float4(fast_expf(x.x), fast_expf(x.y), fast_expf(x.z), fast_expf(x.w)); +} +#endif + +ccl_device_inline float fast_exp10(float x) +{ + /* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]: + * 2.71732409 avg ulp diff, 232 max ulp. + */ + return fast_exp2f(x * M_LN10_F / M_LN2_F); +} + +ccl_device_inline float fast_expm1f(float x) +{ + if (fabsf(x) < 1e-5f) { + x = 1.0f - (1.0f - x); /* Crush denormals. */ + return madd(0.5f, x * x, x); + } + else { + return fast_expf(x) - 1.0f; + } +} + +ccl_device float fast_sinhf(float x) +{ + float a = fabsf(x); + if (a > 1.0f) { + /* Examined 53389559 values of sinh on [1,87.3300018]: + * 33.6886442 avg ulp diff, 178 max ulp. */ + float e = fast_expf(a); + return copysignf(0.5f * e - 0.5f / e, x); + } + else { + a = 1.0f - (1.0f - a); /* Crush denorms. */ + float a2 = a * a; + /* Degree 7 polynomial generated with sollya. */ + /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */ + float r = 2.03945513931e-4f; + r = madd(r, a2, 8.32990277558e-3f); + r = madd(r, a2, 0.1666673421859f); + r = madd(r * a, a2, a); + return copysignf(r, x); + } +} + +ccl_device_inline float fast_coshf(float x) +{ + /* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]: + * 1.78256726 avg ulp diff, 178 max ulp. + */ + float e = fast_expf(fabsf(x)); + return 0.5f * e + 0.5f / e; +} + +ccl_device_inline float fast_tanhf(float x) +{ + /* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]: + * 3.12924e-06 max error. + */ + /* NOTE: ulp error is high because of sub-optimal handling around the origin. */ + float e = fast_expf(2.0f * fabsf(x)); + return copysignf(1.0f - 2.0f / (1.0f + e), x); +} + +ccl_device float fast_safe_powf(float x, float y) +{ + if (y == 0) + return 1.0f; /* x^1=1 */ + if (x == 0) + return 0.0f; /* 0^y=0 */ + float sign = 1.0f; + if (x < 0.0f) { + /* if x is negative, only deal with integer powers + * powf returns NaN for non-integers, we will return 0 instead. + */ + int ybits = __float_as_int(y) & 0x7fffffff; + if (ybits >= 0x4b800000) { + // always even int, keep positive + } + else if (ybits >= 0x3f800000) { + /* Bigger than 1, check. */ + int k = (ybits >> 23) - 127; /* Get exponent. */ + int j = ybits >> (23 - k); /* Shift out possible fractional bits. */ + if ((j << (23 - k)) == ybits) { /* rebuild number and check for a match. */ + /* +1 for even, -1 for odd. */ + sign = __int_as_float(0x3f800000 | (j << 31)); + } + else { + /* Not an integer. */ + return 0.0f; + } + } + else { + /* Not an integer. */ + return 0.0f; + } + } + return sign * fast_exp2f(y * fast_log2f(fabsf(x))); +} + +/* TODO(sergey): Check speed with our erf functions implementation from + * bsdf_microfacet.h. + */ + +ccl_device_inline float fast_erff(float x) +{ + /* Examined 1082130433 values of erff on [0,4]: 1.93715e-06 max error. */ + /* Abramowitz and Stegun, 7.1.28. */ + const float a1 = 0.0705230784f; + const float a2 = 0.0422820123f; + const float a3 = 0.0092705272f; + const float a4 = 0.0001520143f; + const float a5 = 0.0002765672f; + const float a6 = 0.0000430638f; + const float a = fabsf(x); + if (a >= 12.3f) { + return copysignf(1.0f, x); + } + const float b = 1.0f - (1.0f - a); /* Crush denormals. */ + const float r = madd( + madd(madd(madd(madd(madd(a6, b, a5), b, a4), b, a3), b, a2), b, a1), b, 1.0f); + const float s = r * r; /* ^2 */ + const float t = s * s; /* ^4 */ + const float u = t * t; /* ^8 */ + const float v = u * u; /* ^16 */ + return copysignf(1.0f - 1.0f / v, x); +} + +ccl_device_inline float fast_erfcf(float x) +{ + /* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error. + * + * ulp histogram: + * + * 0 = 80.30% + */ + return 1.0f - fast_erff(x); +} + +ccl_device_inline float fast_ierff(float x) +{ + /* From: Approximating the `erfinv` function by Mike Giles. */ + /* To avoid trouble at the limit, clamp input to 1-eps. */ + float a = fabsf(x); + if (a > 0.99999994f) { + a = 0.99999994f; + } + float w = -fast_logf((1.0f - a) * (1.0f + a)), p; + if (w < 5.0f) { + w = w - 2.5f; + p = 2.81022636e-08f; + p = madd(p, w, 3.43273939e-07f); + p = madd(p, w, -3.5233877e-06f); + p = madd(p, w, -4.39150654e-06f); + p = madd(p, w, 0.00021858087f); + p = madd(p, w, -0.00125372503f); + p = madd(p, w, -0.00417768164f); + p = madd(p, w, 0.246640727f); + p = madd(p, w, 1.50140941f); + } + else { + w = sqrtf(w) - 3.0f; + p = -0.000200214257f; + p = madd(p, w, 0.000100950558f); + p = madd(p, w, 0.00134934322f); + p = madd(p, w, -0.00367342844f); + p = madd(p, w, 0.00573950773f); + p = madd(p, w, -0.0076224613f); + p = madd(p, w, 0.00943887047f); + p = madd(p, w, 1.00167406f); + p = madd(p, w, 2.83297682f); + } + return p * x; +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_FAST_MATH__ */ diff --git a/intern/cycles/util/math_float2.h b/intern/cycles/util/math_float2.h new file mode 100644 index 00000000000..87141d5bc37 --- /dev/null +++ b/intern/cycles/util/math_float2.h @@ -0,0 +1,269 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MATH_FLOAT2_H__ +#define __UTIL_MATH_FLOAT2_H__ + +#ifndef __UTIL_MATH_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +/******************************************************************************* + * Declaration. + */ + +ccl_device_inline float2 operator-(const float2 &a); +ccl_device_inline float2 operator*(const float2 &a, const float2 &b); +ccl_device_inline float2 operator*(const float2 &a, float f); +ccl_device_inline float2 operator*(float f, const float2 &a); +ccl_device_inline float2 operator/(float f, const float2 &a); +ccl_device_inline float2 operator/(const float2 &a, float f); +ccl_device_inline float2 operator/(const float2 &a, const float2 &b); +ccl_device_inline float2 operator+(const float2 &a, const float f); +ccl_device_inline float2 operator+(const float2 &a, const float2 &b); +ccl_device_inline float2 operator-(const float2 &a, const float f); +ccl_device_inline float2 operator-(const float2 &a, const float2 &b); +ccl_device_inline float2 operator+=(float2 &a, const float2 &b); +ccl_device_inline float2 operator*=(float2 &a, const float2 &b); +ccl_device_inline float2 operator*=(float2 &a, float f); +ccl_device_inline float2 operator/=(float2 &a, const float2 &b); +ccl_device_inline float2 operator/=(float2 &a, float f); + +ccl_device_inline bool operator==(const float2 &a, const float2 &b); +ccl_device_inline bool operator!=(const float2 &a, const float2 &b); + +ccl_device_inline bool is_zero(const float2 &a); +ccl_device_inline float average(const float2 &a); +ccl_device_inline float distance(const float2 &a, const float2 &b); +ccl_device_inline float dot(const float2 &a, const float2 &b); +ccl_device_inline float cross(const float2 &a, const float2 &b); +ccl_device_inline float len(const float2 &a); +ccl_device_inline float2 normalize(const float2 &a); +ccl_device_inline float2 normalize_len(const float2 &a, float *t); +ccl_device_inline float2 safe_normalize(const float2 &a); +ccl_device_inline float2 min(const float2 &a, const float2 &b); +ccl_device_inline float2 max(const float2 &a, const float2 &b); +ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx); +ccl_device_inline float2 fabs(const float2 &a); +ccl_device_inline float2 as_float2(const float4 &a); +ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t); +ccl_device_inline float2 floor(const float2 &a); + +ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b); + +/******************************************************************************* + * Definition. + */ + +ccl_device_inline float2 zero_float2() +{ + return make_float2(0.0f, 0.0f); +} + +ccl_device_inline float2 one_float2() +{ + return make_float2(1.0f, 1.0f); +} + +ccl_device_inline float2 operator-(const float2 &a) +{ + return make_float2(-a.x, -a.y); +} + +ccl_device_inline float2 operator*(const float2 &a, const float2 &b) +{ + return make_float2(a.x * b.x, a.y * b.y); +} + +ccl_device_inline float2 operator*(const float2 &a, float f) +{ + return make_float2(a.x * f, a.y * f); +} + +ccl_device_inline float2 operator*(float f, const float2 &a) +{ + return make_float2(a.x * f, a.y * f); +} + +ccl_device_inline float2 operator/(float f, const float2 &a) +{ + return make_float2(f / a.x, f / a.y); +} + +ccl_device_inline float2 operator/(const float2 &a, float f) +{ + float invf = 1.0f / f; + return make_float2(a.x * invf, a.y * invf); +} + +ccl_device_inline float2 operator/(const float2 &a, const float2 &b) +{ + return make_float2(a.x / b.x, a.y / b.y); +} + +ccl_device_inline float2 operator+(const float2 &a, const float f) +{ + return a + make_float2(f, f); +} + +ccl_device_inline float2 operator+(const float2 &a, const float2 &b) +{ + return make_float2(a.x + b.x, a.y + b.y); +} + +ccl_device_inline float2 operator-(const float2 &a, const float f) +{ + return a - make_float2(f, f); +} + +ccl_device_inline float2 operator-(const float2 &a, const float2 &b) +{ + return make_float2(a.x - b.x, a.y - b.y); +} + +ccl_device_inline float2 operator+=(float2 &a, const float2 &b) +{ + return a = a + b; +} + +ccl_device_inline float2 operator*=(float2 &a, const float2 &b) +{ + return a = a * b; +} + +ccl_device_inline float2 operator*=(float2 &a, float f) +{ + return a = a * f; +} + +ccl_device_inline float2 operator/=(float2 &a, const float2 &b) +{ + return a = a / b; +} + +ccl_device_inline float2 operator/=(float2 &a, float f) +{ + float invf = 1.0f / f; + return a = a * invf; +} + +ccl_device_inline bool operator==(const float2 &a, const float2 &b) +{ + return (a.x == b.x && a.y == b.y); +} + +ccl_device_inline bool operator!=(const float2 &a, const float2 &b) +{ + return !(a == b); +} + +ccl_device_inline bool is_zero(const float2 &a) +{ + return (a.x == 0.0f && a.y == 0.0f); +} + +ccl_device_inline float average(const float2 &a) +{ + return (a.x + a.y) * (1.0f / 2.0f); +} + +ccl_device_inline float distance(const float2 &a, const float2 &b) +{ + return len(a - b); +} + +ccl_device_inline float dot(const float2 &a, const float2 &b) +{ + return a.x * b.x + a.y * b.y; +} + +ccl_device_inline float cross(const float2 &a, const float2 &b) +{ + return (a.x * b.y - a.y * b.x); +} + +ccl_device_inline float len(const float2 &a) +{ + return sqrtf(dot(a, a)); +} + +ccl_device_inline float2 normalize(const float2 &a) +{ + return a / len(a); +} + +ccl_device_inline float2 normalize_len(const float2 &a, ccl_private float *t) +{ + *t = len(a); + return a / (*t); +} + +ccl_device_inline float2 safe_normalize(const float2 &a) +{ + float t = len(a); + return (t != 0.0f) ? a / t : a; +} + +ccl_device_inline float2 min(const float2 &a, const float2 &b) +{ + return make_float2(min(a.x, b.x), min(a.y, b.y)); +} + +ccl_device_inline float2 max(const float2 &a, const float2 &b) +{ + return make_float2(max(a.x, b.x), max(a.y, b.y)); +} + +ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx) +{ + return min(max(a, mn), mx); +} + +ccl_device_inline float2 fabs(const float2 &a) +{ + return make_float2(fabsf(a.x), fabsf(a.y)); +} + +ccl_device_inline float2 as_float2(const float4 &a) +{ + return make_float2(a.x, a.y); +} + +ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t) +{ + return a + t * (b - a); +} + +ccl_device_inline float2 mix(const float2 &a, const float2 &b, float t) +{ + return a + t * (b - a); +} + +ccl_device_inline float2 floor(const float2 &a) +{ + return make_float2(floorf(a.x), floorf(a.y)); +} + +ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b) +{ + return (b != 0.0f) ? a / b : zero_float2(); +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_MATH_FLOAT2_H__ */ diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h new file mode 100644 index 00000000000..e780d7e0a7c --- /dev/null +++ b/intern/cycles/util/math_float3.h @@ -0,0 +1,530 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MATH_FLOAT3_H__ +#define __UTIL_MATH_FLOAT3_H__ + +#ifndef __UTIL_MATH_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +/******************************************************************************* + * Declaration. + */ + +ccl_device_inline float3 operator-(const float3 &a); +ccl_device_inline float3 operator*(const float3 &a, const float3 &b); +ccl_device_inline float3 operator*(const float3 &a, const float f); +ccl_device_inline float3 operator*(const float f, const float3 &a); +ccl_device_inline float3 operator/(const float f, const float3 &a); +ccl_device_inline float3 operator/(const float3 &a, const float f); +ccl_device_inline float3 operator/(const float3 &a, const float3 &b); +ccl_device_inline float3 operator+(const float3 &a, const float f); +ccl_device_inline float3 operator+(const float3 &a, const float3 &b); +ccl_device_inline float3 operator-(const float3 &a, const float f); +ccl_device_inline float3 operator-(const float3 &a, const float3 &b); +ccl_device_inline float3 operator+=(float3 &a, const float3 &b); +ccl_device_inline float3 operator-=(float3 &a, const float3 &b); +ccl_device_inline float3 operator*=(float3 &a, const float3 &b); +ccl_device_inline float3 operator*=(float3 &a, float f); +ccl_device_inline float3 operator/=(float3 &a, const float3 &b); +ccl_device_inline float3 operator/=(float3 &a, float f); + +ccl_device_inline bool operator==(const float3 &a, const float3 &b); +ccl_device_inline bool operator!=(const float3 &a, const float3 &b); + +ccl_device_inline float distance(const float3 &a, const float3 &b); +ccl_device_inline float dot(const float3 &a, const float3 &b); +ccl_device_inline float dot_xy(const float3 &a, const float3 &b); +ccl_device_inline float3 cross(const float3 &a, const float3 &b); +ccl_device_inline float3 normalize(const float3 &a); +ccl_device_inline float3 min(const float3 &a, const float3 &b); +ccl_device_inline float3 max(const float3 &a, const float3 &b); +ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx); +ccl_device_inline float3 fabs(const float3 &a); +ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t); +ccl_device_inline float3 rcp(const float3 &a); +ccl_device_inline float3 sqrt(const float3 &a); +ccl_device_inline float3 floor(const float3 &a); +ccl_device_inline float3 ceil(const float3 &a); + +ccl_device_inline float min3(float3 a); +ccl_device_inline float max3(float3 a); +ccl_device_inline float len(const float3 a); +ccl_device_inline float len_squared(const float3 a); + +ccl_device_inline float3 reflect(const float3 incident, const float3 normal); +ccl_device_inline float3 project(const float3 v, const float3 v_proj); + +ccl_device_inline float3 saturate3(float3 a); +ccl_device_inline float3 safe_normalize(const float3 a); +ccl_device_inline float3 normalize_len(const float3 a, float *t); +ccl_device_inline float3 safe_normalize_len(const float3 a, float *t); +ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b); +ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b); +ccl_device_inline float3 interp(float3 a, float3 b, float t); +ccl_device_inline float3 sqr3(float3 a); + +ccl_device_inline bool is_zero(const float3 a); +ccl_device_inline float reduce_add(const float3 a); +ccl_device_inline float average(const float3 a); +ccl_device_inline bool isequal_float3(const float3 a, const float3 b); + +/******************************************************************************* + * Definition. + */ + +ccl_device_inline float3 zero_float3() +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_setzero_ps()); +#else + return make_float3(0.0f, 0.0f, 0.0f); +#endif +} + +ccl_device_inline float3 one_float3() +{ + return make_float3(1.0f, 1.0f, 1.0f); +} + +ccl_device_inline float3 operator-(const float3 &a) +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); +#else + return make_float3(-a.x, -a.y, -a.z); +#endif +} + +ccl_device_inline float3 operator*(const float3 &a, const float3 &b) +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_mul_ps(a.m128, b.m128)); +#else + return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); +#endif +} + +ccl_device_inline float3 operator*(const float3 &a, const float f) +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f))); +#else + return make_float3(a.x * f, a.y * f, a.z * f); +#endif +} + +ccl_device_inline float3 operator*(const float f, const float3 &a) +{ +#if defined(__KERNEL_SSE__) + return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128)); +#else + return make_float3(a.x * f, a.y * f, a.z * f); +#endif +} + +ccl_device_inline float3 operator/(const float f, const float3 &a) +{ +#if defined(__KERNEL_SSE__) + return float3(_mm_div_ps(_mm_set1_ps(f), a.m128)); +#else + return make_float3(f / a.x, f / a.y, f / a.z); +#endif +} + +ccl_device_inline float3 operator/(const float3 &a, const float f) +{ + float invf = 1.0f / f; + return a * invf; +} + +ccl_device_inline float3 operator/(const float3 &a, const float3 &b) +{ +#if defined(__KERNEL_SSE__) + return float3(_mm_div_ps(a.m128, b.m128)); +#else + return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); +#endif +} + +ccl_device_inline float3 operator+(const float3 &a, const float f) +{ + return a + make_float3(f, f, f); +} + +ccl_device_inline float3 operator+(const float3 &a, const float3 &b) +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_add_ps(a.m128, b.m128)); +#else + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); +#endif +} + +ccl_device_inline float3 operator-(const float3 &a, const float f) +{ + return a - make_float3(f, f, f); +} + +ccl_device_inline float3 operator-(const float3 &a, const float3 &b) +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_sub_ps(a.m128, b.m128)); +#else + return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); +#endif +} + +ccl_device_inline float3 operator+=(float3 &a, const float3 &b) +{ + return a = a + b; +} + +ccl_device_inline float3 operator-=(float3 &a, const float3 &b) +{ + return a = a - b; +} + +ccl_device_inline float3 operator*=(float3 &a, const float3 &b) +{ + return a = a * b; +} + +ccl_device_inline float3 operator*=(float3 &a, float f) +{ + return a = a * f; +} + +ccl_device_inline float3 operator/=(float3 &a, const float3 &b) +{ + return a = a / b; +} + +ccl_device_inline float3 operator/=(float3 &a, float f) +{ + float invf = 1.0f / f; + return a = a * invf; +} + +ccl_device_inline bool operator==(const float3 &a, const float3 &b) +{ +#ifdef __KERNEL_SSE__ + return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7; +#else + return (a.x == b.x && a.y == b.y && a.z == b.z); +#endif +} + +ccl_device_inline bool operator!=(const float3 &a, const float3 &b) +{ + return !(a == b); +} + +ccl_device_inline float distance(const float3 &a, const float3 &b) +{ + return len(a - b); +} + +ccl_device_inline float dot(const float3 &a, const float3 &b) +{ +#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F)); +#else + return a.x * b.x + a.y * b.y + a.z * b.z; +#endif +} + +ccl_device_inline float dot_xy(const float3 &a, const float3 &b) +{ +#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a, b), b)); +#else + return a.x * b.x + a.y * b.y; +#endif +} + +ccl_device_inline float3 cross(const float3 &a, const float3 &b) +{ + float3 r = make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); + return r; +} + +ccl_device_inline float3 normalize(const float3 &a) +{ +#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F)); + return float3(_mm_div_ps(a.m128, norm)); +#else + return a / len(a); +#endif +} + +ccl_device_inline float3 min(const float3 &a, const float3 &b) +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_min_ps(a.m128, b.m128)); +#else + return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); +#endif +} + +ccl_device_inline float3 max(const float3 &a, const float3 &b) +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_max_ps(a.m128, b.m128)); +#else + return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); +#endif +} + +ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx) +{ + return min(max(a, mn), mx); +} + +ccl_device_inline float3 fabs(const float3 &a) +{ +#ifdef __KERNEL_SSE__ +# ifdef __KERNEL_NEON__ + return float3(vabsq_f32(a.m128)); +# else + __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + return float3(_mm_and_ps(a.m128, mask)); +# endif +#else + return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z)); +#endif +} + +ccl_device_inline float3 sqrt(const float3 &a) +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_sqrt_ps(a)); +#else + return make_float3(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z)); +#endif +} + +ccl_device_inline float3 floor(const float3 &a) +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_floor_ps(a)); +#else + return make_float3(floorf(a.x), floorf(a.y), floorf(a.z)); +#endif +} + +ccl_device_inline float3 ceil(const float3 &a) +{ +#ifdef __KERNEL_SSE__ + return float3(_mm_ceil_ps(a)); +#else + return make_float3(ceilf(a.x), ceilf(a.y), ceilf(a.z)); +#endif +} + +ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t) +{ + return a + t * (b - a); +} + +ccl_device_inline float3 rcp(const float3 &a) +{ +#ifdef __KERNEL_SSE__ + /* Don't use _mm_rcp_ps due to poor precision. */ + return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128)); +#else + return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z); +#endif +} + +ccl_device_inline float min3(float3 a) +{ + return min(min(a.x, a.y), a.z); +} + +ccl_device_inline float max3(float3 a) +{ + return max(max(a.x, a.y), a.z); +} + +ccl_device_inline float len(const float3 a) +{ +#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F))); +#else + return sqrtf(dot(a, a)); +#endif +} + +ccl_device_inline float len_squared(const float3 a) +{ + return dot(a, a); +} + +ccl_device_inline float3 reflect(const float3 incident, const float3 normal) +{ + float3 unit_normal = normalize(normal); + return incident - 2.0f * unit_normal * dot(incident, unit_normal); +} + +ccl_device_inline float3 refract(const float3 incident, const float3 normal, const float eta) +{ + float k = 1.0f - eta * eta * (1.0f - dot(normal, incident) * dot(normal, incident)); + if (k < 0.0f) + return zero_float3(); + else + return eta * incident - (eta * dot(normal, incident) + sqrt(k)) * normal; +} + +ccl_device_inline float3 faceforward(const float3 vector, + const float3 incident, + const float3 reference) +{ + return (dot(reference, incident) < 0.0f) ? vector : -vector; +} + +ccl_device_inline float3 project(const float3 v, const float3 v_proj) +{ + float len_squared = dot(v_proj, v_proj); + return (len_squared != 0.0f) ? (dot(v, v_proj) / len_squared) * v_proj : zero_float3(); +} + +ccl_device_inline float3 saturate3(float3 a) +{ + return make_float3(saturate(a.x), saturate(a.y), saturate(a.z)); +} + +ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t) +{ + *t = len(a); + float x = 1.0f / *t; + return a * x; +} + +ccl_device_inline float3 safe_normalize(const float3 a) +{ + float t = len(a); + return (t != 0.0f) ? a * (1.0f / t) : a; +} + +ccl_device_inline float3 safe_normalize_len(const float3 a, ccl_private float *t) +{ + *t = len(a); + return (*t != 0.0f) ? a / (*t) : a; +} + +ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b) +{ + return make_float3((b.x != 0.0f) ? a.x / b.x : 0.0f, + (b.y != 0.0f) ? a.y / b.y : 0.0f, + (b.z != 0.0f) ? a.z / b.z : 0.0f); +} + +ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b) +{ + return (b != 0.0f) ? a / b : zero_float3(); +} + +ccl_device_inline float3 interp(float3 a, float3 b, float t) +{ + return a + t * (b - a); +} + +ccl_device_inline float3 sqr3(float3 a) +{ + return a * a; +} + +ccl_device_inline bool is_zero(const float3 a) +{ +#ifdef __KERNEL_SSE__ + return a == make_float3(0.0f); +#else + return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); +#endif +} + +ccl_device_inline float reduce_add(const float3 a) +{ +#if defined(__KERNEL_SSE__) && defined(__KERNEL_NEON__) + __m128 t = a.m128; + t[3] = 0.0f; + return vaddvq_f32(t); +#else + return (a.x + a.y + a.z); +#endif +} + +ccl_device_inline float average(const float3 a) +{ + return reduce_add(a) * (1.0f / 3.0f); +} + +ccl_device_inline bool isequal_float3(const float3 a, const float3 b) +{ + return a == b; +} + +ccl_device_inline float3 pow3(float3 v, float e) +{ + return make_float3(powf(v.x, e), powf(v.y, e), powf(v.z, e)); +} + +ccl_device_inline float3 exp3(float3 v) +{ + return make_float3(expf(v.x), expf(v.y), expf(v.z)); +} + +ccl_device_inline float3 log3(float3 v) +{ + return make_float3(logf(v.x), logf(v.y), logf(v.z)); +} + +ccl_device_inline int3 quick_floor_to_int3(const float3 a) +{ +#ifdef __KERNEL_SSE__ + int3 b = int3(_mm_cvttps_epi32(a.m128)); + int3 isneg = int3(_mm_castps_si128(_mm_cmplt_ps(a.m128, _mm_set_ps1(0.0f)))); + /* Unsaturated add 0xffffffff is the same as subtract -1. */ + return b + isneg; +#else + return make_int3(quick_floor_to_int(a.x), quick_floor_to_int(a.y), quick_floor_to_int(a.z)); +#endif +} + +ccl_device_inline bool isfinite3_safe(float3 v) +{ + return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z); +} + +ccl_device_inline float3 ensure_finite3(float3 v) +{ + if (!isfinite_safe(v.x)) + v.x = 0.0f; + if (!isfinite_safe(v.y)) + v.y = 0.0f; + if (!isfinite_safe(v.z)) + v.z = 0.0f; + return v; +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_MATH_FLOAT3_H__ */ diff --git a/intern/cycles/util/math_float4.h b/intern/cycles/util/math_float4.h new file mode 100644 index 00000000000..c76959ee7ff --- /dev/null +++ b/intern/cycles/util/math_float4.h @@ -0,0 +1,536 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MATH_FLOAT4_H__ +#define __UTIL_MATH_FLOAT4_H__ + +#ifndef __UTIL_MATH_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +/******************************************************************************* + * Declaration. + */ + +ccl_device_inline float4 operator-(const float4 &a); +ccl_device_inline float4 operator*(const float4 &a, const float4 &b); +ccl_device_inline float4 operator*(const float4 &a, float f); +ccl_device_inline float4 operator*(float f, const float4 &a); +ccl_device_inline float4 operator/(const float4 &a, float f); +ccl_device_inline float4 operator/(const float4 &a, const float4 &b); +ccl_device_inline float4 operator+(const float4 &a, const float f); +ccl_device_inline float4 operator+(const float4 &a, const float4 &b); +ccl_device_inline float4 operator-(const float4 &a, const float f); +ccl_device_inline float4 operator-(const float4 &a, const float4 &b); +ccl_device_inline float4 operator+=(float4 &a, const float4 &b); +ccl_device_inline float4 operator*=(float4 &a, const float4 &b); +ccl_device_inline float4 operator*=(float4 &a, float f); +ccl_device_inline float4 operator/=(float4 &a, float f); + +ccl_device_inline int4 operator<(const float4 &a, const float4 &b); +ccl_device_inline int4 operator>=(const float4 &a, const float4 &b); +ccl_device_inline int4 operator<=(const float4 &a, const float4 &b); +ccl_device_inline bool operator==(const float4 &a, const float4 &b); + +ccl_device_inline float distance(const float4 &a, const float4 &b); +ccl_device_inline float dot(const float4 &a, const float4 &b); +ccl_device_inline float len_squared(const float4 &a); +ccl_device_inline float4 rcp(const float4 &a); +ccl_device_inline float4 sqrt(const float4 &a); +ccl_device_inline float4 sqr(const float4 &a); +ccl_device_inline float4 cross(const float4 &a, const float4 &b); +ccl_device_inline bool is_zero(const float4 &a); +ccl_device_inline float average(const float4 &a); +ccl_device_inline float len(const float4 &a); +ccl_device_inline float4 normalize(const float4 &a); +ccl_device_inline float4 safe_normalize(const float4 &a); +ccl_device_inline float4 min(const float4 &a, const float4 &b); +ccl_device_inline float4 max(const float4 &a, const float4 &b); +ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx); +ccl_device_inline float4 fabs(const float4 &a); +ccl_device_inline float4 floor(const float4 &a); +ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t); + +ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b); + +#ifdef __KERNEL_SSE__ +template +__forceinline const float4 shuffle(const float4 &b); +template +__forceinline const float4 shuffle(const float4 &a, const float4 &b); + +template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b); + +template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b); +template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b); + +# ifdef __KERNEL_SSE3__ +template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b); +template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b); +# endif +#endif /* __KERNEL_SSE__ */ + +#ifndef __KERNEL_GPU__ +ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b); +ccl_device_inline float4 reduce_min(const float4 &a); +ccl_device_inline float4 reduce_max(const float4 &a); +ccl_device_inline float4 reduce_add(const float4 &a); +#endif /* !__KERNEL_GPU__ */ + +/******************************************************************************* + * Definition. + */ + +ccl_device_inline float4 zero_float4() +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_setzero_ps()); +#else + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); +#endif +} + +ccl_device_inline float4 one_float4() +{ + return make_float4(1.0f, 1.0f, 1.0f, 1.0f); +} + +ccl_device_inline float4 operator-(const float4 &a) +{ +#ifdef __KERNEL_SSE__ + __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); + return float4(_mm_xor_ps(a.m128, mask)); +#else + return make_float4(-a.x, -a.y, -a.z, -a.w); +#endif +} + +ccl_device_inline float4 operator*(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_mul_ps(a.m128, b.m128)); +#else + return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +#endif +} + +ccl_device_inline float4 operator*(const float4 &a, float f) +{ +#if defined(__KERNEL_SSE__) + return a * make_float4(f); +#else + return make_float4(a.x * f, a.y * f, a.z * f, a.w * f); +#endif +} + +ccl_device_inline float4 operator*(float f, const float4 &a) +{ + return a * f; +} + +ccl_device_inline float4 operator/(const float4 &a, float f) +{ + return a * (1.0f / f); +} + +ccl_device_inline float4 operator/(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_div_ps(a.m128, b.m128)); +#else + return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); +#endif +} + +ccl_device_inline float4 operator+(const float4 &a, const float f) +{ + return a + make_float4(f, f, f, f); +} + +ccl_device_inline float4 operator+(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_add_ps(a.m128, b.m128)); +#else + return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +#endif +} + +ccl_device_inline float4 operator-(const float4 &a, const float f) +{ + return a - make_float4(f, f, f, f); +} + +ccl_device_inline float4 operator-(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_sub_ps(a.m128, b.m128)); +#else + return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +#endif +} + +ccl_device_inline float4 operator+=(float4 &a, const float4 &b) +{ + return a = a + b; +} + +ccl_device_inline float4 operator-=(float4 &a, const float4 &b) +{ + return a = a - b; +} + +ccl_device_inline float4 operator*=(float4 &a, const float4 &b) +{ + return a = a * b; +} + +ccl_device_inline float4 operator*=(float4 &a, float f) +{ + return a = a * f; +} + +ccl_device_inline float4 operator/=(float4 &a, float f) +{ + return a = a / f; +} + +ccl_device_inline int4 operator<(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return int4(_mm_castps_si128(_mm_cmplt_ps(a.m128, b.m128))); +#else + return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); +#endif +} + +ccl_device_inline int4 operator>=(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return int4(_mm_castps_si128(_mm_cmpge_ps(a.m128, b.m128))); +#else + return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); +#endif +} + +ccl_device_inline int4 operator<=(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return int4(_mm_castps_si128(_mm_cmple_ps(a.m128, b.m128))); +#else + return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w); +#endif +} + +ccl_device_inline bool operator==(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15; +#else + return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); +#endif +} + +ccl_device_inline float distance(const float4 &a, const float4 &b) +{ + return len(a - b); +} + +ccl_device_inline float dot(const float4 &a, const float4 &b) +{ +#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) +# if defined(__KERNEL_NEON__) + __m128 t = vmulq_f32(a, b); + return vaddvq_f32(t); +# else + return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF)); +# endif +#else + return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w); +#endif +} + +ccl_device_inline float len_squared(const float4 &a) +{ + return dot(a, a); +} + +ccl_device_inline float4 rcp(const float4 &a) +{ +#ifdef __KERNEL_SSE__ + /* Don't use _mm_rcp_ps due to poor precision. */ + return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128)); +#else + return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w); +#endif +} + +ccl_device_inline float4 sqrt(const float4 &a) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_sqrt_ps(a.m128)); +#else + return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w)); +#endif +} + +ccl_device_inline float4 sqr(const float4 &a) +{ + return a * a; +} + +ccl_device_inline float4 cross(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return (shuffle<1, 2, 0, 0>(a) * shuffle<2, 0, 1, 0>(b)) - + (shuffle<2, 0, 1, 0>(a) * shuffle<1, 2, 0, 0>(b)); +#else + return make_float4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f); +#endif +} + +ccl_device_inline bool is_zero(const float4 &a) +{ +#ifdef __KERNEL_SSE__ + return a == make_float4(0.0f); +#else + return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); +#endif +} + +ccl_device_inline float4 reduce_add(const float4 &a) +{ +#if defined(__KERNEL_SSE__) +# if defined(__KERNEL_NEON__) + return float4(vdupq_n_f32(vaddvq_f32(a))); +# elif defined(__KERNEL_SSE3__) + float4 h(_mm_hadd_ps(a.m128, a.m128)); + return float4(_mm_hadd_ps(h.m128, h.m128)); +# else + float4 h(shuffle<1, 0, 3, 2>(a) + a); + return shuffle<2, 3, 0, 1>(h) + h; +# endif +#else + float sum = (a.x + a.y) + (a.z + a.w); + return make_float4(sum, sum, sum, sum); +#endif +} + +ccl_device_inline float average(const float4 &a) +{ + return reduce_add(a).x * 0.25f; +} + +ccl_device_inline float len(const float4 &a) +{ + return sqrtf(dot(a, a)); +} + +ccl_device_inline float4 normalize(const float4 &a) +{ + return a / len(a); +} + +ccl_device_inline float4 safe_normalize(const float4 &a) +{ + float t = len(a); + return (t != 0.0f) ? a / t : a; +} + +ccl_device_inline float4 min(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_min_ps(a.m128, b.m128)); +#else + return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +#endif +} + +ccl_device_inline float4 max(const float4 &a, const float4 &b) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_max_ps(a.m128, b.m128)); +#else + return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); +#endif +} + +ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx) +{ + return min(max(a, mn), mx); +} + +ccl_device_inline float4 fabs(const float4 &a) +{ +#if defined(__KERNEL_SSE__) +# if defined(__KERNEL_NEON__) + return float4(vabsq_f32(a)); +# else + return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); +# endif +#else + return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); +#endif +} + +ccl_device_inline float4 floor(const float4 &a) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_floor_ps(a)); +#else + return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w)); +#endif +} + +ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t) +{ + return a + t * (b - a); +} + +#ifdef __KERNEL_SSE__ +template +__forceinline const float4 shuffle(const float4 &b) +{ +# if defined(__KERNEL_NEON__) + return float4(shuffle_neon<__m128, index_0, index_1, index_2, index_3>(b.m128)); +# else + return float4(_mm_castsi128_ps( + _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0)))); +# endif +} + +template +__forceinline const float4 shuffle(const float4 &a, const float4 &b) +{ +# if defined(__KERNEL_NEON__) + return float4(shuffle_neon<__m128, index_0, index_1, index_2, index_3>(a.m128, b.m128)); +# else + return float4(_mm_shuffle_ps(a.m128, b.m128, _MM_SHUFFLE(index_3, index_2, index_1, index_0))); +# endif +} + +template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b) +{ + return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b)))); +} + +template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b) +{ + return float4(_mm_movelh_ps(a.m128, b.m128)); +} + +template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b) +{ + return float4(_mm_movehl_ps(b.m128, a.m128)); +} + +# ifdef __KERNEL_SSE3__ +template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b) +{ + return float4(_mm_moveldup_ps(b)); +} + +template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b) +{ + return float4(_mm_movehdup_ps(b)); +} +# endif /* __KERNEL_SSE3__ */ +#endif /* __KERNEL_SSE__ */ + +#ifndef __KERNEL_GPU__ +ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b) +{ +# ifdef __KERNEL_SSE__ + return float4(_mm_blendv_ps(b.m128, a.m128, _mm_castsi128_ps(mask.m128))); +# else + return make_float4( + (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w); +# endif +} + +ccl_device_inline float4 mask(const int4 &mask, const float4 &a) +{ + /* Replace elements of x with zero where mask isn't set. */ + return select(mask, a, make_float4(0.0f)); +} + +ccl_device_inline float4 reduce_min(const float4 &a) +{ +# if defined(__KERNEL_SSE__) +# if defined(__KERNEL_NEON__) + return float4(vdupq_n_f32(vminvq_f32(a))); +# else + float4 h = min(shuffle<1, 0, 3, 2>(a), a); + return min(shuffle<2, 3, 0, 1>(h), h); +# endif +# else + return make_float4(min(min(a.x, a.y), min(a.z, a.w))); +# endif +} + +ccl_device_inline float4 reduce_max(const float4 &a) +{ +# if defined(__KERNEL_SSE__) +# if defined(__KERNEL_NEON__) + return float4(vdupq_n_f32(vmaxvq_f32(a))); +# else + float4 h = max(shuffle<1, 0, 3, 2>(a), a); + return max(shuffle<2, 3, 0, 1>(h), h); +# endif +# else + return make_float4(max(max(a.x, a.y), max(a.z, a.w))); +# endif +} + +ccl_device_inline float4 load_float4(ccl_private const float *v) +{ +# ifdef __KERNEL_SSE__ + return float4(_mm_loadu_ps(v)); +# else + return make_float4(v[0], v[1], v[2], v[3]); +# endif +} + +#endif /* !__KERNEL_GPU__ */ + +ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b) +{ + return (b != 0.0f) ? a / b : zero_float4(); +} + +ccl_device_inline bool isfinite4_safe(float4 v) +{ + return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z) && isfinite_safe(v.w); +} + +ccl_device_inline float4 ensure_finite4(float4 v) +{ + if (!isfinite_safe(v.x)) + v.x = 0.0f; + if (!isfinite_safe(v.y)) + v.y = 0.0f; + if (!isfinite_safe(v.z)) + v.z = 0.0f; + if (!isfinite_safe(v.w)) + v.w = 0.0f; + return v; +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_MATH_FLOAT4_H__ */ diff --git a/intern/cycles/util/math_int2.h b/intern/cycles/util/math_int2.h new file mode 100644 index 00000000000..5b04be92152 --- /dev/null +++ b/intern/cycles/util/math_int2.h @@ -0,0 +1,73 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MATH_INT2_H__ +#define __UTIL_MATH_INT2_H__ + +#ifndef __UTIL_MATH_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +/******************************************************************************* + * Declaration. + */ + +ccl_device_inline bool operator==(const int2 a, const int2 b); +ccl_device_inline int2 operator+(const int2 &a, const int2 &b); +ccl_device_inline int2 operator+=(int2 &a, const int2 &b); +ccl_device_inline int2 operator-(const int2 &a, const int2 &b); +ccl_device_inline int2 operator*(const int2 &a, const int2 &b); +ccl_device_inline int2 operator/(const int2 &a, const int2 &b); + +/******************************************************************************* + * Definition. + */ + +ccl_device_inline bool operator==(const int2 a, const int2 b) +{ + return (a.x == b.x && a.y == b.y); +} + +ccl_device_inline int2 operator+(const int2 &a, const int2 &b) +{ + return make_int2(a.x + b.x, a.y + b.y); +} + +ccl_device_inline int2 operator+=(int2 &a, const int2 &b) +{ + return a = a + b; +} + +ccl_device_inline int2 operator-(const int2 &a, const int2 &b) +{ + return make_int2(a.x - b.x, a.y - b.y); +} + +ccl_device_inline int2 operator*(const int2 &a, const int2 &b) +{ + return make_int2(a.x * b.x, a.y * b.y); +} + +ccl_device_inline int2 operator/(const int2 &a, const int2 &b) +{ + return make_int2(a.x / b.x, a.y / b.y); +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_MATH_INT2_H__ */ diff --git a/intern/cycles/util/math_int3.h b/intern/cycles/util/math_int3.h new file mode 100644 index 00000000000..128f2cb53b8 --- /dev/null +++ b/intern/cycles/util/math_int3.h @@ -0,0 +1,110 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MATH_INT3_H__ +#define __UTIL_MATH_INT3_H__ + +#ifndef __UTIL_MATH_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +/******************************************************************************* + * Declaration. + */ + +ccl_device_inline int3 min(int3 a, int3 b); +ccl_device_inline int3 max(int3 a, int3 b); +ccl_device_inline int3 clamp(const int3 &a, int mn, int mx); +ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx); + +/******************************************************************************* + * Definition. + */ + +ccl_device_inline int3 min(int3 a, int3 b) +{ +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) + return int3(_mm_min_epi32(a.m128, b.m128)); +#else + return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); +#endif +} + +ccl_device_inline int3 max(int3 a, int3 b) +{ +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) + return int3(_mm_max_epi32(a.m128, b.m128)); +#else + return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); +#endif +} + +ccl_device_inline int3 clamp(const int3 &a, int mn, int mx) +{ +#ifdef __KERNEL_SSE__ + return min(max(a, make_int3(mn)), make_int3(mx)); +#else + return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)); +#endif +} + +ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx) +{ +#ifdef __KERNEL_SSE__ + return min(max(a, mn), make_int3(mx)); +#else + return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)); +#endif +} + +ccl_device_inline bool operator==(const int3 &a, const int3 &b) +{ + return a.x == b.x && a.y == b.y && a.z == b.z; +} + +ccl_device_inline bool operator!=(const int3 &a, const int3 &b) +{ + return !(a == b); +} + +ccl_device_inline bool operator<(const int3 &a, const int3 &b) +{ + return a.x < b.x && a.y < b.y && a.z < b.z; +} + +ccl_device_inline int3 operator+(const int3 &a, const int3 &b) +{ +#ifdef __KERNEL_SSE__ + return int3(_mm_add_epi32(a.m128, b.m128)); +#else + return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); +#endif +} + +ccl_device_inline int3 operator-(const int3 &a, const int3 &b) +{ +#ifdef __KERNEL_SSE__ + return int3(_mm_sub_epi32(a.m128, b.m128)); +#else + return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); +#endif +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_MATH_INT3_H__ */ diff --git a/intern/cycles/util/math_int4.h b/intern/cycles/util/math_int4.h new file mode 100644 index 00000000000..9e3f001efc2 --- /dev/null +++ b/intern/cycles/util/math_int4.h @@ -0,0 +1,156 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MATH_INT4_H__ +#define __UTIL_MATH_INT4_H__ + +#ifndef __UTIL_MATH_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +/******************************************************************************* + * Declaration. + */ + +#ifndef __KERNEL_GPU__ +ccl_device_inline int4 operator+(const int4 &a, const int4 &b); +ccl_device_inline int4 operator+=(int4 &a, const int4 &b); +ccl_device_inline int4 operator>>(const int4 &a, int i); +ccl_device_inline int4 operator<<(const int4 &a, int i); +ccl_device_inline int4 operator<(const int4 &a, const int4 &b); +ccl_device_inline int4 operator>=(const int4 &a, const int4 &b); +ccl_device_inline int4 operator&(const int4 &a, const int4 &b); +ccl_device_inline int4 min(int4 a, int4 b); +ccl_device_inline int4 max(int4 a, int4 b); +ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx); +ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b); +#endif /* __KERNEL_GPU__ */ + +/******************************************************************************* + * Definition. + */ + +#ifndef __KERNEL_GPU__ +ccl_device_inline int4 operator+(const int4 &a, const int4 &b) +{ +# ifdef __KERNEL_SSE__ + return int4(_mm_add_epi32(a.m128, b.m128)); +# else + return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +# endif +} + +ccl_device_inline int4 operator+=(int4 &a, const int4 &b) +{ + return a = a + b; +} + +ccl_device_inline int4 operator>>(const int4 &a, int i) +{ +# ifdef __KERNEL_SSE__ + return int4(_mm_srai_epi32(a.m128, i)); +# else + return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i); +# endif +} + +ccl_device_inline int4 operator<<(const int4 &a, int i) +{ +# ifdef __KERNEL_SSE__ + return int4(_mm_slli_epi32(a.m128, i)); +# else + return make_int4(a.x << i, a.y << i, a.z << i, a.w << i); +# endif +} + +ccl_device_inline int4 operator<(const int4 &a, const int4 &b) +{ +# ifdef __KERNEL_SSE__ + return int4(_mm_cmplt_epi32(a.m128, b.m128)); +# else + return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); +# endif +} + +ccl_device_inline int4 operator>=(const int4 &a, const int4 &b) +{ +# ifdef __KERNEL_SSE__ + return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128))); +# else + return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); +# endif +} + +ccl_device_inline int4 operator&(const int4 &a, const int4 &b) +{ +# ifdef __KERNEL_SSE__ + return int4(_mm_and_si128(a.m128, b.m128)); +# else + return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w); +# endif +} + +ccl_device_inline int4 min(int4 a, int4 b) +{ +# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) + return int4(_mm_min_epi32(a.m128, b.m128)); +# else + return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +# endif +} + +ccl_device_inline int4 max(int4 a, int4 b) +{ +# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) + return int4(_mm_max_epi32(a.m128, b.m128)); +# else + return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); +# endif +} + +ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx) +{ + return min(max(a, mn), mx); +} + +ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b) +{ +# ifdef __KERNEL_SSE__ + const __m128 m = _mm_cvtepi32_ps(mask); + /* TODO(sergey): avoid cvt. */ + return int4(_mm_castps_si128( + _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b))))); +# else + return make_int4( + (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w); +# endif +} + +ccl_device_inline int4 load_int4(const int *v) +{ +# ifdef __KERNEL_SSE__ + return int4(_mm_loadu_si128((__m128i *)v)); +# else + return make_int4(v[0], v[1], v[2], v[3]); +# endif +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_MATH_INT4_H__ */ diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h new file mode 100644 index 00000000000..0c431a36afb --- /dev/null +++ b/intern/cycles/util/math_intersect.h @@ -0,0 +1,249 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MATH_INTERSECT_H__ +#define __UTIL_MATH_INTERSECT_H__ + +CCL_NAMESPACE_BEGIN + +/* Ray Intersection */ + +ccl_device bool ray_sphere_intersect(float3 ray_P, + float3 ray_D, + float ray_t, + float3 sphere_P, + float sphere_radius, + ccl_private float3 *isect_P, + ccl_private float *isect_t) +{ + const float3 d = sphere_P - ray_P; + const float radiussq = sphere_radius * sphere_radius; + const float tsq = dot(d, d); + + if (tsq > radiussq) { + /* Ray origin outside sphere. */ + const float tp = dot(d, ray_D); + if (tp < 0.0f) { + /* Ray points away from sphere. */ + return false; + } + const float dsq = tsq - tp * tp; /* Pythagoras. */ + if (dsq > radiussq) { + /* Closest point on ray outside sphere. */ + return false; + } + const float t = tp - sqrtf(radiussq - dsq); /* pythagoras */ + if (t < ray_t) { + *isect_t = t; + *isect_P = ray_P + ray_D * t; + return true; + } + } + return false; +} + +ccl_device bool ray_aligned_disk_intersect(float3 ray_P, + float3 ray_D, + float ray_t, + float3 disk_P, + float disk_radius, + ccl_private float3 *isect_P, + ccl_private float *isect_t) +{ + /* Aligned disk normal. */ + float disk_t; + const float3 disk_N = normalize_len(ray_P - disk_P, &disk_t); + const float div = dot(ray_D, disk_N); + if (UNLIKELY(div == 0.0f)) { + return false; + } + /* Compute t to intersection point. */ + const float t = -disk_t / div; + if (t < 0.0f || t > ray_t) { + return false; + } + /* Test if within radius. */ + float3 P = ray_P + ray_D * t; + if (len_squared(P - disk_P) > disk_radius * disk_radius) { + return false; + } + *isect_P = P; + *isect_t = t; + return true; +} + +ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P, + float3 ray_dir, + float ray_t, +#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + const ssef *ssef_verts, +#else + const float3 tri_a, + const float3 tri_b, + const float3 tri_c, +#endif + ccl_private float *isect_u, + ccl_private float *isect_v, + ccl_private float *isect_t) +{ +#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + typedef ssef float3; + const float3 tri_a(ssef_verts[0]); + const float3 tri_b(ssef_verts[1]); + const float3 tri_c(ssef_verts[2]); + const float3 P(ray_P); + const float3 dir(ray_dir); +#else +# define dot3(a, b) dot(a, b) + const float3 P = ray_P; + const float3 dir = ray_dir; +#endif + + /* Calculate vertices relative to ray origin. */ + const float3 v0 = tri_c - P; + const float3 v1 = tri_a - P; + const float3 v2 = tri_b - P; + + /* Calculate triangle edges. */ + const float3 e0 = v2 - v0; + const float3 e1 = v0 - v1; + const float3 e2 = v1 - v2; + + /* Perform edge tests. */ +#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + const float3 crossU = cross(v2 + v0, e0); + const float3 crossV = cross(v0 + v1, e1); + const float3 crossW = cross(v1 + v2, e2); + + ssef crossX(crossU); + ssef crossY(crossV); + ssef crossZ(crossW); + ssef zero = _mm_setzero_ps(); + _MM_TRANSPOSE4_PS(crossX, crossY, crossZ, zero); + + const ssef dirX(ray_dir.x); + const ssef dirY(ray_dir.y); + const ssef dirZ(ray_dir.z); + + ssef UVWW = madd(crossX, dirX, madd(crossY, dirY, crossZ * dirZ)); +#else /* __KERNEL_SSE2__ */ + const float U = dot(cross(v2 + v0, e0), ray_dir); + const float V = dot(cross(v0 + v1, e1), ray_dir); + const float W = dot(cross(v1 + v2, e2), ray_dir); +#endif /* __KERNEL_SSE2__ */ + +#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + int uvw_sign = movemask(UVWW) & 0x7; + if (uvw_sign != 0) { + if (uvw_sign != 0x7) { + return false; + } + } +#else + const float minUVW = min(U, min(V, W)); + const float maxUVW = max(U, max(V, W)); + + if (minUVW < 0.0f && maxUVW > 0.0f) { + return false; + } +#endif + + /* Calculate geometry normal and denominator. */ + const float3 Ng1 = cross(e1, e0); + // const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0); + const float3 Ng = Ng1 + Ng1; + const float den = dot3(Ng, dir); + /* Avoid division by 0. */ + if (UNLIKELY(den == 0.0f)) { + return false; + } + + /* Perform depth test. */ + const float T = dot3(v0, Ng); + const int sign_den = (__float_as_int(den) & 0x80000000); + const float sign_T = xor_signmask(T, sign_den); + if ((sign_T < 0.0f) || (sign_T > ray_t * xor_signmask(den, sign_den))) { + return false; + } + + const float inv_den = 1.0f / den; +#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + UVWW *= inv_den; + _mm_store_ss(isect_u, UVWW); + _mm_store_ss(isect_v, shuffle<1, 1, 3, 3>(UVWW)); +#else + *isect_u = U * inv_den; + *isect_v = V * inv_den; +#endif + *isect_t = T * inv_den; + return true; + +#undef dot3 +} + +/* Tests for an intersection between a ray and a quad defined by + * its midpoint, normal and sides. + * If ellipse is true, hits outside the ellipse that's enclosed by the + * quad are rejected. + */ +ccl_device bool ray_quad_intersect(float3 ray_P, + float3 ray_D, + float ray_mint, + float ray_maxt, + float3 quad_P, + float3 quad_u, + float3 quad_v, + float3 quad_n, + ccl_private float3 *isect_P, + ccl_private float *isect_t, + ccl_private float *isect_u, + ccl_private float *isect_v, + bool ellipse) +{ + /* Perform intersection test. */ + float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n); + if (t < ray_mint || t > ray_maxt) { + return false; + } + const float3 hit = ray_P + t * ray_D; + const float3 inplane = hit - quad_P; + const float u = dot(inplane, quad_u) / dot(quad_u, quad_u); + if (u < -0.5f || u > 0.5f) { + return false; + } + const float v = dot(inplane, quad_v) / dot(quad_v, quad_v); + if (v < -0.5f || v > 0.5f) { + return false; + } + if (ellipse && (u * u + v * v > 0.25f)) { + return false; + } + /* Store the result. */ + /* TODO(sergey): Check whether we can avoid some checks here. */ + if (isect_P != NULL) + *isect_P = hit; + if (isect_t != NULL) + *isect_t = t; + if (isect_u != NULL) + *isect_u = u + 0.5f; + if (isect_v != NULL) + *isect_v = v + 0.5f; + return true; +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_MATH_INTERSECT_H__ */ diff --git a/intern/cycles/util/math_matrix.h b/intern/cycles/util/math_matrix.h new file mode 100644 index 00000000000..bff7ddb4cee --- /dev/null +++ b/intern/cycles/util/math_matrix.h @@ -0,0 +1,454 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MATH_MATRIX_H__ +#define __UTIL_MATH_MATRIX_H__ + +CCL_NAMESPACE_BEGIN + +#define MAT(A, size, row, col) A[(row) * (size) + (col)] + +/* Variants that use a constant stride on GPUS. */ +#ifdef __KERNEL_GPU__ +# define MATS(A, n, r, c, s) A[((r) * (n) + (c)) * (s)] +/* Element access when only the lower-triangular elements are stored. */ +# define MATHS(A, r, c, s) A[((r) * ((r) + 1) / 2 + (c)) * (s)] +# define VECS(V, i, s) V[(i) * (s)] +#else +# define MATS(A, n, r, c, s) MAT(A, n, r, c) +# define MATHS(A, r, c, s) A[(r) * ((r) + 1) / 2 + (c)] +# define VECS(V, i, s) V[i] +#endif + +/* Zeroing helpers. */ + +ccl_device_inline void math_vector_zero(ccl_private float *v, int n) +{ + for (int i = 0; i < n; i++) { + v[i] = 0.0f; + } +} + +ccl_device_inline void math_matrix_zero(ccl_private float *A, int n) +{ + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MAT(A, n, row, col) = 0.0f; + } + } +} + +/* Elementary vector operations. */ + +ccl_device_inline void math_vector_add(ccl_private float *a, + ccl_private const float *ccl_restrict b, + int n) +{ + for (int i = 0; i < n; i++) { + a[i] += b[i]; + } +} + +ccl_device_inline void math_vector_mul(ccl_private float *a, + ccl_private const float *ccl_restrict b, + int n) +{ + for (int i = 0; i < n; i++) { + a[i] *= b[i]; + } +} + +ccl_device_inline void math_vector_mul_strided(ccl_global float *a, + ccl_private const float *ccl_restrict b, + int astride, + int n) +{ + for (int i = 0; i < n; i++) { + a[i * astride] *= b[i]; + } +} + +ccl_device_inline void math_vector_scale(ccl_private float *a, float b, int n) +{ + for (int i = 0; i < n; i++) { + a[i] *= b; + } +} + +ccl_device_inline void math_vector_max(ccl_private float *a, + ccl_private const float *ccl_restrict b, + int n) +{ + for (int i = 0; i < n; i++) { + a[i] = max(a[i], b[i]); + } +} + +ccl_device_inline void math_vec3_add(ccl_private float3 *v, int n, ccl_private float *x, float3 w) +{ + for (int i = 0; i < n; i++) { + v[i] += w * x[i]; + } +} + +ccl_device_inline void math_vec3_add_strided( + ccl_global float3 *v, int n, ccl_private float *x, float3 w, int stride) +{ + for (int i = 0; i < n; i++) { + ccl_global float *elem = (ccl_global float *)(v + i * stride); + atomic_add_and_fetch_float(elem + 0, w.x * x[i]); + atomic_add_and_fetch_float(elem + 1, w.y * x[i]); + atomic_add_and_fetch_float(elem + 2, w.z * x[i]); + } +} + +/* Elementary matrix operations. + * Note: TriMatrix refers to a square matrix that is symmetric, + * and therefore its upper-triangular part isn't stored. */ + +ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A, + int n, + float val, + int stride) +{ + for (int row = 0; row < n; row++) { + MATHS(A, row, row, stride) += val; + } +} + +/* Add Gramian matrix of v to A. + * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */ +ccl_device_inline void math_matrix_add_gramian(ccl_private float *A, + int n, + ccl_private const float *ccl_restrict v, + float weight) +{ + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MAT(A, n, row, col) += v[row] * v[col] * weight; + } + } +} + +/* Add Gramian matrix of v to A. + * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */ +ccl_device_inline void math_trimatrix_add_gramian_strided( + ccl_global float *A, int n, ccl_private const float *ccl_restrict v, float weight, int stride) +{ + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + atomic_add_and_fetch_float(&MATHS(A, row, col, stride), v[row] * v[col] * weight); + } + } +} + +ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A, + int n, + ccl_private const float *ccl_restrict v, + float weight) +{ + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MATHS(A, row, col, 1) += v[row] * v[col] * weight; + } + } +} + +/* Transpose matrix A in place. */ +ccl_device_inline void math_matrix_transpose(ccl_global float *A, int n, int stride) +{ + for (int i = 0; i < n; i++) { + for (int j = 0; j < i; j++) { + float temp = MATS(A, n, i, j, stride); + MATS(A, n, i, j, stride) = MATS(A, n, j, i, stride); + MATS(A, n, j, i, stride) = temp; + } + } +} + +/* Solvers for matrix problems */ + +/* In-place Cholesky-Banachiewicz decomposition of the square, positive-definite matrix A + * into a lower triangular matrix L so that A = L*L^T. A is being overwritten by L. + * Also, only the lower triangular part of A is ever accessed. */ +ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride) +{ + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + float sum_col = MATHS(A, row, col, stride); + for (int k = 0; k < col; k++) { + sum_col -= MATHS(A, row, k, stride) * MATHS(A, col, k, stride); + } + if (row == col) { + sum_col = sqrtf(max(sum_col, 0.0f)); + } + else { + sum_col /= MATHS(A, col, col, stride); + } + MATHS(A, row, col, stride) = sum_col; + } + } +} + +/* Solve A*S=y for S given A and y, + * where A is symmetrical positive-semi-definite and both inputs are destroyed in the process. + * + * We can apply Cholesky decomposition to find a lower triangular L so that L*Lt = A. + * With that we get (L*Lt)*S = L*(Lt*S) = L*b = y, defining b as Lt*S. + * Since L is lower triangular, finding b is relatively easy since y is known. + * Then, the remaining problem is Lt*S = b, which again can be solved easily. + * + * This is useful for solving the normal equation S=inv(Xt*W*X)*Xt*W*y, since Xt*W*X is + * symmetrical positive-semidefinite by construction, + * so we can just use this function with A=Xt*W*X and y=Xt*W*y. */ +ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A, + ccl_global float3 *y, + int n, + int stride) +{ + /* Since the first entry of the design row is always 1, the upper-left element of XtWX is a good + * heuristic for the amount of pixels considered (with weighting), + * therefore the amount of correction is scaled based on it. */ + math_trimatrix_add_diagonal(A, n, 3e-7f * A[0], stride); /* Improve the numerical stability. */ + math_trimatrix_cholesky(A, n, stride); /* Replace A with L so that L*Lt = A. */ + + /* Use forward substitution to solve L*b = y, replacing y by b. */ + for (int row = 0; row < n; row++) { + float3 sum = VECS(y, row, stride); + for (int col = 0; col < row; col++) + sum -= MATHS(A, row, col, stride) * VECS(y, col, stride); + VECS(y, row, stride) = sum / MATHS(A, row, row, stride); + } + + /* Use backward substitution to solve Lt*S = b, replacing b by S. */ + for (int row = n - 1; row >= 0; row--) { + float3 sum = VECS(y, row, stride); + for (int col = row + 1; col < n; col++) + sum -= MATHS(A, col, row, stride) * VECS(y, col, stride); + VECS(y, row, stride) = sum / MATHS(A, row, row, stride); + } +} + +/* Perform the Jacobi Eigenvalue Method on matrix A. + * A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever + * accessed. The algorithm overwrites the contents of A. + * + * After returning, A will be overwritten with D, which is (almost) diagonal, + * and V will contain the eigenvectors of the original A in its rows (!), + * so that A = V^T*D*V. Therefore, the diagonal elements of D are the (sorted) eigenvalues of A. + */ +ccl_device void math_matrix_jacobi_eigendecomposition(ccl_private float *A, + ccl_global float *V, + int n, + int v_stride) +{ + const float singular_epsilon = 1e-9f; + + for (int row = 0; row < n; row++) { + for (int col = 0; col < n; col++) { + MATS(V, n, row, col, v_stride) = (col == row) ? 1.0f : 0.0f; + } + } + + for (int sweep = 0; sweep < 8; sweep++) { + float off_diagonal = 0.0f; + for (int row = 1; row < n; row++) { + for (int col = 0; col < row; col++) { + off_diagonal += fabsf(MAT(A, n, row, col)); + } + } + if (off_diagonal < 1e-7f) { + /* The matrix has nearly reached diagonal form. + * Since the eigenvalues are only used to determine truncation, their exact values aren't + * required - a relative error of a few ULPs won't matter at all. */ + break; + } + + /* Set the threshold for the small element rotation skip in the first sweep: + * Skip all elements that are less than a tenth of the average off-diagonal element. */ + float threshold = 0.2f * off_diagonal / (n * n); + + for (int row = 1; row < n; row++) { + for (int col = 0; col < row; col++) { + /* Perform a Jacobi rotation on this element that reduces it to zero. */ + float element = MAT(A, n, row, col); + float abs_element = fabsf(element); + + /* If we're in a later sweep and the element already is very small, + * just set it to zero and skip the rotation. */ + if (sweep > 3 && abs_element <= singular_epsilon * fabsf(MAT(A, n, row, row)) && + abs_element <= singular_epsilon * fabsf(MAT(A, n, col, col))) { + MAT(A, n, row, col) = 0.0f; + continue; + } + + if (element == 0.0f) { + continue; + } + + /* If we're in one of the first sweeps and the element is smaller than the threshold, + * skip it. */ + if (sweep < 3 && (abs_element < threshold)) { + continue; + } + + /* Determine rotation: The rotation is characterized by its angle phi - or, + * in the actual implementation, sin(phi) and cos(phi). + * To find those, we first compute their ratio - that might be unstable if the angle + * approaches 90°, so there's a fallback for that case. + * Then, we compute sin(phi) and cos(phi) themselves. */ + float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col); + float ratio; + if (abs_element > singular_epsilon * fabsf(singular_diff)) { + float cot_2phi = 0.5f * singular_diff / element; + ratio = 1.0f / (fabsf(cot_2phi) + sqrtf(1.0f + cot_2phi * cot_2phi)); + if (cot_2phi < 0.0f) + ratio = -ratio; /* Copy sign. */ + } + else { + ratio = element / singular_diff; + } + + float c = 1.0f / sqrtf(1.0f + ratio * ratio); + float s = ratio * c; + /* To improve numerical stability by avoiding cancellation, the update equations are + * reformulized to use sin(phi) and tan(phi/2) instead. */ + float tan_phi_2 = s / (1.0f + c); + + /* Update the singular values in the diagonal. */ + float singular_delta = ratio * element; + MAT(A, n, row, row) += singular_delta; + MAT(A, n, col, col) -= singular_delta; + + /* Set the element itself to zero. */ + MAT(A, n, row, col) = 0.0f; + + /* Perform the actual rotations on the matrices. */ +#define ROT(M, r1, c1, r2, c2, stride) \ + { \ + float M1 = MATS(M, n, r1, c1, stride); \ + float M2 = MATS(M, n, r2, c2, stride); \ + MATS(M, n, r1, c1, stride) -= s * (M2 + tan_phi_2 * M1); \ + MATS(M, n, r2, c2, stride) += s * (M1 - tan_phi_2 * M2); \ + } + + /* Split into three parts to ensure correct accesses since we only store the + * lower-triangular part of A. */ + for (int i = 0; i < col; i++) + ROT(A, col, i, row, i, 1); + for (int i = col + 1; i < row; i++) + ROT(A, i, col, row, i, 1); + for (int i = row + 1; i < n; i++) + ROT(A, i, col, i, row, 1); + + for (int i = 0; i < n; i++) + ROT(V, col, i, row, i, v_stride); +#undef ROT + } + } + } + + /* Sort eigenvalues and the associated eigenvectors. */ + for (int i = 0; i < n - 1; i++) { + float v = MAT(A, n, i, i); + int k = i; + for (int j = i; j < n; j++) { + if (MAT(A, n, j, j) >= v) { + v = MAT(A, n, j, j); + k = j; + } + } + if (k != i) { + /* Swap eigenvalues. */ + MAT(A, n, k, k) = MAT(A, n, i, i); + MAT(A, n, i, i) = v; + /* Swap eigenvectors. */ + for (int j = 0; j < n; j++) { + float v = MATS(V, n, i, j, v_stride); + MATS(V, n, i, j, v_stride) = MATS(V, n, k, j, v_stride); + MATS(V, n, k, j, v_stride) = v; + } + } + } +} + +#ifdef __KERNEL_SSE3__ +ccl_device_inline void math_vector_zero_sse(float4 *A, int n) +{ + for (int i = 0; i < n; i++) { + A[i] = make_float4(0.0f); + } +} + +ccl_device_inline void math_matrix_zero_sse(float4 *A, int n) +{ + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MAT(A, n, row, col) = make_float4(0.0f); + } + } +} + +/* Add Gramian matrix of v to A. + * The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j]. */ +ccl_device_inline void math_matrix_add_gramian_sse(float4 *A, + int n, + const float4 *ccl_restrict v, + float4 weight) +{ + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MAT(A, n, row, col) = MAT(A, n, row, col) + v[row] * v[col] * weight; + } + } +} + +ccl_device_inline void math_vector_add_sse(float4 *V, int n, const float4 *ccl_restrict a) +{ + for (int i = 0; i < n; i++) { + V[i] += a[i]; + } +} + +ccl_device_inline void math_vector_mul_sse(float4 *V, int n, const float4 *ccl_restrict a) +{ + for (int i = 0; i < n; i++) { + V[i] *= a[i]; + } +} + +ccl_device_inline void math_vector_max_sse(float4 *a, const float4 *ccl_restrict b, int n) +{ + for (int i = 0; i < n; i++) { + a[i] = max(a[i], b[i]); + } +} + +ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restrict B) +{ + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0]; + } + } +} +#endif + +#undef MAT + +CCL_NAMESPACE_END + +#endif /* __UTIL_MATH_MATRIX_H__ */ diff --git a/intern/cycles/util/md5.cpp b/intern/cycles/util/md5.cpp new file mode 100644 index 00000000000..47e489b1aed --- /dev/null +++ b/intern/cycles/util/md5.cpp @@ -0,0 +1,387 @@ +/* + * Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + * + * L. Peter Deutsch + * ghost@aladdin.com + */ + +/* Minor modifications done to remove some code and change style. */ + +#include "util/md5.h" +#include "util/path.h" + +#include +#include + +CCL_NAMESPACE_BEGIN + +#define T_MASK ((uint32_t)~0) +#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87) +#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9) +#define T3 0x242070db +#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111) +#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050) +#define T6 0x4787c62a +#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec) +#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe) +#define T9 0x698098d8 +#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850) +#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e) +#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841) +#define T13 0x6b901122 +#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c) +#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71) +#define T16 0x49b40821 +#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d) +#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf) +#define T19 0x265e5a51 +#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855) +#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2) +#define T22 0x02441453 +#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e) +#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437) +#define T25 0x21e1cde6 +#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829) +#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278) +#define T28 0x455a14ed +#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa) +#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07) +#define T31 0x676f02d9 +#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375) +#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd) +#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e) +#define T35 0x6d9d6122 +#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3) +#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb) +#define T38 0x4bdecfa9 +#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f) +#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f) +#define T41 0x289b7ec6 +#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805) +#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a) +#define T44 0x04881d05 +#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6) +#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a) +#define T47 0x1fa27cf8 +#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a) +#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb) +#define T50 0x432aff97 +#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58) +#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6) +#define T53 0x655b59c3 +#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d) +#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82) +#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e) +#define T57 0x6fa87e4f +#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f) +#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb) +#define T60 0x4e0811a1 +#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d) +#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca) +#define T63 0x2ad7d2bb +#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e) + +void MD5Hash::process(const uint8_t *data /*[64]*/) +{ + uint32_t a = abcd[0], b = abcd[1], c = abcd[2], d = abcd[3]; + uint32_t t; + /* Define storage for little-endian or both types of CPUs. */ + uint32_t xbuf[16]; + const uint32_t *X; + + { + /* + * Determine dynamically whether this is a big-endian or + * little-endian machine, since we can use a more efficient + * algorithm on the latter. + */ + static const int w = 1; + + if (*((const uint8_t *)&w)) /* dynamic little-endian */ + { + /* + * On little-endian machines, we can process properly aligned + * data without copying it. + */ + if (!((data - (const uint8_t *)0) & 3)) { + /* data are properly aligned */ + X = (const uint32_t *)data; + } + else { + /* not aligned */ + memcpy(xbuf, data, 64); + X = xbuf; + } + } + else { /* dynamic big-endian */ + /* + * On big-endian machines, we must arrange the bytes in the + * right order. + */ + const uint8_t *xp = data; + int i; + + X = xbuf; /* (dynamic only) */ + for (i = 0; i < 16; ++i, xp += 4) + xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24); + } + } + +#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + + /* Round 1. */ + /* Let [abcd k s i] denote the operation + * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ +#define F(x, y, z) (((x) & (y)) | (~(x) & (z))) +#define SET(a, b, c, d, k, s, Ti) \ + t = a + F(b, c, d) + X[k] + Ti; \ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 0, 7, T1); + SET(d, a, b, c, 1, 12, T2); + SET(c, d, a, b, 2, 17, T3); + SET(b, c, d, a, 3, 22, T4); + SET(a, b, c, d, 4, 7, T5); + SET(d, a, b, c, 5, 12, T6); + SET(c, d, a, b, 6, 17, T7); + SET(b, c, d, a, 7, 22, T8); + SET(a, b, c, d, 8, 7, T9); + SET(d, a, b, c, 9, 12, T10); + SET(c, d, a, b, 10, 17, T11); + SET(b, c, d, a, 11, 22, T12); + SET(a, b, c, d, 12, 7, T13); + SET(d, a, b, c, 13, 12, T14); + SET(c, d, a, b, 14, 17, T15); + SET(b, c, d, a, 15, 22, T16); +#undef SET + + /* Round 2. */ + /* Let [abcd k s i] denote the operation + * a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ +#define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) +#define SET(a, b, c, d, k, s, Ti) \ + t = a + G(b, c, d) + X[k] + Ti; \ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 1, 5, T17); + SET(d, a, b, c, 6, 9, T18); + SET(c, d, a, b, 11, 14, T19); + SET(b, c, d, a, 0, 20, T20); + SET(a, b, c, d, 5, 5, T21); + SET(d, a, b, c, 10, 9, T22); + SET(c, d, a, b, 15, 14, T23); + SET(b, c, d, a, 4, 20, T24); + SET(a, b, c, d, 9, 5, T25); + SET(d, a, b, c, 14, 9, T26); + SET(c, d, a, b, 3, 14, T27); + SET(b, c, d, a, 8, 20, T28); + SET(a, b, c, d, 13, 5, T29); + SET(d, a, b, c, 2, 9, T30); + SET(c, d, a, b, 7, 14, T31); + SET(b, c, d, a, 12, 20, T32); +#undef SET + + /* Round 3. */ + /* Let [abcd k s t] denote the operation + * a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */ +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define SET(a, b, c, d, k, s, Ti) \ + t = a + H(b, c, d) + X[k] + Ti; \ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 5, 4, T33); + SET(d, a, b, c, 8, 11, T34); + SET(c, d, a, b, 11, 16, T35); + SET(b, c, d, a, 14, 23, T36); + SET(a, b, c, d, 1, 4, T37); + SET(d, a, b, c, 4, 11, T38); + SET(c, d, a, b, 7, 16, T39); + SET(b, c, d, a, 10, 23, T40); + SET(a, b, c, d, 13, 4, T41); + SET(d, a, b, c, 0, 11, T42); + SET(c, d, a, b, 3, 16, T43); + SET(b, c, d, a, 6, 23, T44); + SET(a, b, c, d, 9, 4, T45); + SET(d, a, b, c, 12, 11, T46); + SET(c, d, a, b, 15, 16, T47); + SET(b, c, d, a, 2, 23, T48); +#undef SET + + /* Round 4. */ + /* Let [abcd k s t] denote the operation + * a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ +#define I(x, y, z) ((y) ^ ((x) | ~(z))) +#define SET(a, b, c, d, k, s, Ti) \ + t = a + I(b, c, d) + X[k] + Ti; \ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 0, 6, T49); + SET(d, a, b, c, 7, 10, T50); + SET(c, d, a, b, 14, 15, T51); + SET(b, c, d, a, 5, 21, T52); + SET(a, b, c, d, 12, 6, T53); + SET(d, a, b, c, 3, 10, T54); + SET(c, d, a, b, 10, 15, T55); + SET(b, c, d, a, 1, 21, T56); + SET(a, b, c, d, 8, 6, T57); + SET(d, a, b, c, 15, 10, T58); + SET(c, d, a, b, 6, 15, T59); + SET(b, c, d, a, 13, 21, T60); + SET(a, b, c, d, 4, 6, T61); + SET(d, a, b, c, 11, 10, T62); + SET(c, d, a, b, 2, 15, T63); + SET(b, c, d, a, 9, 21, T64); +#undef SET + + /* Then perform the following additions. (That is increment each + * of the four registers by the value it had before this block + * was started.) */ + abcd[0] += a; + abcd[1] += b; + abcd[2] += c; + abcd[3] += d; +} + +MD5Hash::MD5Hash() +{ + count[0] = count[1] = 0; + abcd[0] = 0x67452301; + abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476; + abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301; + abcd[3] = 0x10325476; +} + +MD5Hash::~MD5Hash() +{ +} + +void MD5Hash::append(const uint8_t *data, int nbytes) +{ + const uint8_t *p = data; + int left = nbytes; + int offset = (count[0] >> 3) & 63; + uint32_t nbits = (uint32_t)(nbytes << 3); + + if (nbytes <= 0) + return; + + /* Update the message length. */ + count[1] += nbytes >> 29; + count[0] += nbits; + if (count[0] < nbits) + count[1]++; + + /* Process an initial partial block. */ + if (offset) { + int copy = (offset + nbytes > 64 ? 64 - offset : nbytes); + + memcpy(buf + offset, p, copy); + if (offset + copy < 64) + return; + p += copy; + left -= copy; + process(buf); + } + + /* Process full blocks. */ + for (; left >= 64; p += 64, left -= 64) + process(p); + + /* Process a final partial block. */ + if (left) + memcpy(buf, p, left); +} + +void MD5Hash::append(const string &str) +{ + if (str.size()) { + append((const uint8_t *)str.c_str(), str.size()); + } +} + +bool MD5Hash::append_file(const string &filepath) +{ + FILE *f = path_fopen(filepath, "rb"); + + if (!f) { + fprintf(stderr, "MD5: failed to open file %s\n", filepath.c_str()); + return false; + } + + const size_t buffer_size = 1024; + uint8_t buffer[buffer_size]; + size_t n; + + do { + n = fread(buffer, 1, buffer_size, f); + append(buffer, n); + } while (n == buffer_size); + + bool success = (ferror(f) == 0); + + fclose(f); + + return success; +} + +void MD5Hash::finish(uint8_t digest[16]) +{ + static const uint8_t pad[64] = {0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + uint8_t data[8]; + int i; + + /* Save the length before padding. */ + for (i = 0; i < 8; ++i) + data[i] = (uint8_t)(count[i >> 2] >> ((i & 3) << 3)); + + /* Pad to 56 bytes mod 64. */ + append(pad, ((55 - (count[0] >> 3)) & 63) + 1); + /* Append the length. */ + append(data, 8); + + for (i = 0; i < 16; ++i) + digest[i] = (uint8_t)(abcd[i >> 2] >> ((i & 3) << 3)); +} + +string MD5Hash::get_hex() +{ + uint8_t digest[16]; + char buf[16 * 2 + 1]; + + finish(digest); + + for (int i = 0; i < 16; i++) + sprintf(buf + i * 2, "%02X", (unsigned int)digest[i]); + buf[sizeof(buf) - 1] = '\0'; + + return string(buf); +} + +string util_md5_string(const string &str) +{ + MD5Hash md5; + md5.append((uint8_t *)str.c_str(), str.size()); + return md5.get_hex(); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/md5.h b/intern/cycles/util/md5.h new file mode 100644 index 00000000000..cc7cbef6a49 --- /dev/null +++ b/intern/cycles/util/md5.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + * + * L. Peter Deutsch + * ghost@aladdin.com + */ + +/* MD5 + * + * Simply MD5 hash computation, used by disk cache. Adapted from external + * code, with minor code modifications done to remove some unused code and + * change code style. */ + +#ifndef __UTIL_MD5_H__ +#define __UTIL_MD5_H__ + +#include "util/string.h" +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +class MD5Hash { + public: + MD5Hash(); + ~MD5Hash(); + + void append(const uint8_t *data, int size); + void append(const string &str); + bool append_file(const string &filepath); + string get_hex(); + + protected: + void process(const uint8_t *data); + void finish(uint8_t digest[16]); + + uint32_t count[2]; /* message length in bits, LSW first. */ + uint32_t abcd[4]; /* digest buffer */ + uint8_t buf[64]; /* accumulate block */ +}; + +string util_md5_string(const string &str); + +CCL_NAMESPACE_END + +#endif /* __UTIL_MD5_H__ */ diff --git a/intern/cycles/util/murmurhash.cpp b/intern/cycles/util/murmurhash.cpp new file mode 100644 index 00000000000..9ba0a282cc2 --- /dev/null +++ b/intern/cycles/util/murmurhash.cpp @@ -0,0 +1,126 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is taken from alShaders/Cryptomatte/MurmurHash3.h: + * + * MurmurHash3 was written by Austin Appleby, and is placed in the public + * domain. The author hereby disclaims copyright to this source code. + */ + +#include +#include + +#include "util/algorithm.h" +#include "util/murmurhash.h" + +#if defined(_MSC_VER) +# define ROTL32(x, y) _rotl(x, y) +# define ROTL64(x, y) _rotl64(x, y) +# define BIG_CONSTANT(x) (x) +#else +ccl_device_inline uint32_t rotl32(uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} +# define ROTL32(x, y) rotl32(x, y) +# define BIG_CONSTANT(x) (x##LLU) +#endif + +CCL_NAMESPACE_BEGIN + +/* Block read - if your platform needs to do endian-swapping or can only + * handle aligned reads, do the conversion here. */ +ccl_device_inline uint32_t mm_hash_getblock32(const uint32_t *p, int i) +{ + return p[i]; +} + +/* Finalization mix - force all bits of a hash block to avalanche */ +ccl_device_inline uint32_t mm_hash_fmix32(uint32_t h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed) +{ + const uint8_t *data = (const uint8_t *)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4); + + for (int i = -nblocks; i; i++) { + uint32_t k1 = mm_hash_getblock32(blocks, i); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + const uint8_t *tail = (const uint8_t *)(data + nblocks * 4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + ATTR_FALLTHROUGH; + case 2: + k1 ^= tail[1] << 8; + ATTR_FALLTHROUGH; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + } + + h1 ^= len; + h1 = mm_hash_fmix32(h1); + return h1; +} + +/* This is taken from the cryptomatte specification 1.0 */ +float util_hash_to_float(uint32_t hash) +{ + uint32_t mantissa = hash & ((1 << 23) - 1); + uint32_t exponent = (hash >> 23) & ((1 << 8) - 1); + exponent = max(exponent, (uint32_t)1); + exponent = min(exponent, (uint32_t)254); + exponent = exponent << 23; + uint32_t sign = (hash >> 31); + sign = sign << 31; + uint32_t float_bits = sign | exponent | mantissa; + float f; + memcpy(&f, &float_bits, sizeof(uint32_t)); + return f; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/murmurhash.h b/intern/cycles/util/murmurhash.h new file mode 100644 index 00000000000..7c303db6ffa --- /dev/null +++ b/intern/cycles/util/murmurhash.h @@ -0,0 +1,29 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_MURMURHASH_H__ +#define __UTIL_MURMURHASH_H__ + +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed); +float util_hash_to_float(uint32_t hash); + +CCL_NAMESPACE_END + +#endif /* __UTIL_MURMURHASH_H__ */ diff --git a/intern/cycles/util/opengl.h b/intern/cycles/util/opengl.h new file mode 100644 index 00000000000..7a8d5eec1f9 --- /dev/null +++ b/intern/cycles/util/opengl.h @@ -0,0 +1,25 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_OPENGL_H__ +#define __UTIL_OPENGL_H__ + +/* OpenGL header includes, used everywhere we use OpenGL, to deal with + * platform differences in one central place. */ + +#include + +#endif /* __UTIL_OPENGL_H__ */ diff --git a/intern/cycles/util/openimagedenoise.h b/intern/cycles/util/openimagedenoise.h new file mode 100644 index 00000000000..cc7b14ae18f --- /dev/null +++ b/intern/cycles/util/openimagedenoise.h @@ -0,0 +1,44 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_OPENIMAGEDENOISE_H__ +#define __UTIL_OPENIMAGEDENOISE_H__ + +#ifdef WITH_OPENIMAGEDENOISE +# include +#endif + +#include "util/system.h" + +CCL_NAMESPACE_BEGIN + +static inline bool openimagedenoise_supported() +{ +#ifdef WITH_OPENIMAGEDENOISE +# ifdef __APPLE__ + /* Always supported through Accelerate framework BNNS. */ + return true; +# else + return system_cpu_support_sse41(); +# endif +#else + return false; +#endif +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_OPENIMAGEDENOISE_H__ */ diff --git a/intern/cycles/util/openvdb.h b/intern/cycles/util/openvdb.h new file mode 100644 index 00000000000..ae5326e3199 --- /dev/null +++ b/intern/cycles/util/openvdb.h @@ -0,0 +1,68 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_OPENVDB_H__ +#define __UTIL_OPENVDB_H__ + +#ifdef WITH_OPENVDB +# include + +namespace openvdb { + +using Vec4fTree = tree::Tree4::Type; +using Vec4fGrid = Grid; + +/* Apply operation to known grid types. */ +template +bool grid_type_operation(const openvdb::GridBase::ConstPtr &grid, OpType &&op) +{ + if (grid->isType()) { + return op.template operator()(grid); + } + else if (grid->isType()) { + return op.template operator()(grid); + } + else if (grid->isType()) { + return op.template operator()(grid); + } + else if (grid->isType()) { + return op.template operator()(grid); + } + else if (grid->isType()) { + return op.template operator()(grid); + } + else if (grid->isType()) { + return op.template operator()(grid); + } + else if (grid->isType()) { + return op.template operator()(grid); + } + else if (grid->isType()) { + return op.template operator()(grid); + } + else if (grid->isType()) { + return op.template operator()(grid); + } + else { + return false; + } +} + +}; // namespace openvdb + +#endif + +#endif /* __UTIL_OPENVDB_H__ */ diff --git a/intern/cycles/util/optimization.h b/intern/cycles/util/optimization.h new file mode 100644 index 00000000000..7ecd3893cf4 --- /dev/null +++ b/intern/cycles/util/optimization.h @@ -0,0 +1,77 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_OPTIMIZATION_H__ +#define __UTIL_OPTIMIZATION_H__ + +#ifndef __KERNEL_GPU__ + +/* x86 + * + * Compile a regular, SSE2 and SSE3 kernel. */ + +# if defined(i386) || defined(_M_IX86) + +/* We require minimum SSE2 support on x86, so auto enable. */ +# define __KERNEL_SSE2__ +# ifdef WITH_KERNEL_SSE2 +# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +# endif +# ifdef WITH_KERNEL_SSE3 +# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +# endif + +/* x86-64 + * + * Compile a regular (includes SSE2), SSE3, SSE 4.1, AVX and AVX2 kernel. */ + +# elif defined(__x86_64__) || defined(_M_X64) + +/* SSE2 is always available on x86-64 CPUs, so auto enable */ +# define __KERNEL_SSE2__ +/* no SSE2 kernel on x86-64, part of regular kernel */ +# ifdef WITH_KERNEL_SSE3 +# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +# endif +# ifdef WITH_KERNEL_SSE41 +# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 +# endif +# ifdef WITH_KERNEL_AVX +# define WITH_CYCLES_OPTIMIZED_KERNEL_AVX +# endif +# ifdef WITH_KERNEL_AVX2 +# define WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 +# endif + +/* Arm Neon + * + * Compile a SSE4 kernel emulated with Neon. Most code is shared with + * SSE, some specializations for performance and compatibility are made + * made testing for __KERNEL_NEON__. */ + +# elif defined(__ARM_NEON) && defined(WITH_SSE2NEON) + +# define __KERNEL_NEON__ +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSE41__ + +# endif + +#endif + +#endif /* __UTIL_OPTIMIZATION_H__ */ diff --git a/intern/cycles/util/param.h b/intern/cycles/util/param.h new file mode 100644 index 00000000000..3f8e2d6d700 --- /dev/null +++ b/intern/cycles/util/param.h @@ -0,0 +1,40 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_PARAM_H__ +#define __UTIL_PARAM_H__ + +/* Parameter value lists from OpenImageIO are used to store custom properties + * on various data, which can then later be used in shaders. */ + +#include +#include +#include + +CCL_NAMESPACE_BEGIN + +OIIO_NAMESPACE_USING + +static constexpr TypeDesc TypeFloat2(TypeDesc::FLOAT, TypeDesc::VEC2); +static constexpr TypeDesc TypeRGBA(TypeDesc::FLOAT, TypeDesc::VEC4, TypeDesc::COLOR); +static constexpr TypeDesc TypeFloatArray4(TypeDesc::FLOAT, + TypeDesc::SCALAR, + TypeDesc::NOSEMANTICS, + 4); + +CCL_NAMESPACE_END + +#endif /* __UTIL_PARAM_H__ */ diff --git a/intern/cycles/util/path.cpp b/intern/cycles/util/path.cpp new file mode 100644 index 00000000000..5704c4ef8ef --- /dev/null +++ b/intern/cycles/util/path.cpp @@ -0,0 +1,781 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/path.h" +#include "util/md5.h" +#include "util/string.h" + +#include +#include +#include + +OIIO_NAMESPACE_USING + +#include + +#include + +#if defined(_WIN32) +# define DIR_SEP '\\' +# define DIR_SEP_ALT '/' +# include +#else +# define DIR_SEP '/' +# include +# include +# include +# include +#endif + +#ifdef HAVE_SHLWAPI_H +# include +#endif + +#include "util/map.h" +#include "util/windows.h" + +CCL_NAMESPACE_BEGIN + +#ifdef _WIN32 +# if defined(_MSC_VER) || defined(__MINGW64__) +typedef struct _stat64 path_stat_t; +# elif defined(__MINGW32__) +typedef struct _stati64 path_stat_t; +# else +typedef struct _stat path_stat_t; +# endif +# ifndef S_ISDIR +# define S_ISDIR(x) (((x)&_S_IFDIR) == _S_IFDIR) +# endif +#else +typedef struct stat path_stat_t; +#endif + +static string cached_path = ""; +static string cached_user_path = ""; +static string cached_temp_path = ""; +static string cached_xdg_cache_path = ""; + +namespace { + +#ifdef _WIN32 +class directory_iterator { + public: + class path_info { + public: + path_info(const string &path, const WIN32_FIND_DATAW &find_data) + : path_(path), find_data_(find_data) + { + } + + string path() + { + return path_join(path_, string_from_wstring(find_data_.cFileName)); + } + + protected: + const string &path_; + const WIN32_FIND_DATAW &find_data_; + }; + + directory_iterator() : path_info_("", find_data_), h_find_(INVALID_HANDLE_VALUE) + { + } + + explicit directory_iterator(const string &path) : path_(path), path_info_(path, find_data_) + { + string wildcard = path; + if (wildcard[wildcard.size() - 1] != DIR_SEP) { + wildcard += DIR_SEP; + } + wildcard += "*"; + h_find_ = FindFirstFileW(string_to_wstring(wildcard).c_str(), &find_data_); + if (h_find_ != INVALID_HANDLE_VALUE) { + skip_dots(); + } + } + + ~directory_iterator() + { + if (h_find_ != INVALID_HANDLE_VALUE) { + FindClose(h_find_); + } + } + + directory_iterator &operator++() + { + step(); + return *this; + } + + path_info *operator->() + { + return &path_info_; + } + + bool operator!=(const directory_iterator &other) + { + return h_find_ != other.h_find_; + } + + protected: + bool step() + { + if (do_step()) { + return skip_dots(); + } + return false; + } + + bool do_step() + { + if (h_find_ != INVALID_HANDLE_VALUE) { + bool result = FindNextFileW(h_find_, &find_data_) == TRUE; + if (!result) { + FindClose(h_find_); + h_find_ = INVALID_HANDLE_VALUE; + } + return result; + } + return false; + } + + bool skip_dots() + { + while (wcscmp(find_data_.cFileName, L".") == 0 || wcscmp(find_data_.cFileName, L"..") == 0) { + if (!do_step()) { + return false; + } + } + return true; + } + + string path_; + path_info path_info_; + WIN32_FIND_DATAW find_data_; + HANDLE h_find_; +}; +#else /* _WIN32 */ + +class directory_iterator { + public: + class path_info { + public: + explicit path_info(const string &path) : path_(path), entry_(NULL) + { + } + + string path() + { + return path_join(path_, entry_->d_name); + } + + void current_entry_set(const struct dirent *entry) + { + entry_ = entry; + } + + protected: + const string &path_; + const struct dirent *entry_; + }; + + directory_iterator() : path_info_(""), name_list_(NULL), num_entries_(-1), cur_entry_(-1) + { + } + + explicit directory_iterator(const string &path) : path_(path), path_info_(path_), cur_entry_(0) + { + num_entries_ = scandir(path.c_str(), &name_list_, NULL, alphasort); + if (num_entries_ < 0) { + perror("scandir"); + } + else { + skip_dots(); + } + } + + ~directory_iterator() + { + destroy_name_list(); + } + + directory_iterator &operator++() + { + step(); + return *this; + } + + path_info *operator->() + { + path_info_.current_entry_set(name_list_[cur_entry_]); + return &path_info_; + } + + bool operator!=(const directory_iterator &other) + { + return name_list_ != other.name_list_; + } + + protected: + bool step() + { + if (do_step()) { + return skip_dots(); + } + return false; + } + + bool do_step() + { + ++cur_entry_; + if (cur_entry_ >= num_entries_) { + destroy_name_list(); + return false; + } + return true; + } + + /* Skip . and .. folders. */ + bool skip_dots() + { + while (strcmp(name_list_[cur_entry_]->d_name, ".") == 0 || + strcmp(name_list_[cur_entry_]->d_name, "..") == 0) { + if (!step()) { + return false; + } + } + return true; + } + + void destroy_name_list() + { + if (name_list_ == NULL) { + return; + } + for (int i = 0; i < num_entries_; ++i) { + free(name_list_[i]); + } + free(name_list_); + name_list_ = NULL; + } + + string path_; + path_info path_info_; + struct dirent **name_list_; + int num_entries_, cur_entry_; +}; + +#endif /* _WIN32 */ + +size_t find_last_slash(const string &path) +{ + for (size_t i = 0; i < path.size(); ++i) { + size_t index = path.size() - 1 - i; +#ifdef _WIN32 + if (path[index] == DIR_SEP || path[index] == DIR_SEP_ALT) +#else + if (path[index] == DIR_SEP) +#endif + { + return index; + } + } + return string::npos; +} + +} /* namespace */ + +static char *path_specials(const string &sub) +{ + static bool env_init = false; + static char *env_shader_path; + static char *env_source_path; + if (!env_init) { + env_shader_path = getenv("CYCLES_SHADER_PATH"); + /* NOTE: It is KERNEL in env variable for compatibility reasons. */ + env_source_path = getenv("CYCLES_KERNEL_PATH"); + env_init = true; + } + if (env_shader_path != NULL && sub == "shader") { + return env_shader_path; + } + else if (env_shader_path != NULL && sub == "source") { + return env_source_path; + } + return NULL; +} + +#if defined(__linux__) || defined(__APPLE__) +static string path_xdg_cache_get() +{ + const char *home = getenv("XDG_CACHE_HOME"); + if (home) { + return string(home); + } + else { + home = getenv("HOME"); + if (home == NULL) { + home = getpwuid(getuid())->pw_dir; + } + return path_join(string(home), ".cache"); + } +} +#endif + +void path_init(const string &path, const string &user_path, const string &temp_path) +{ + cached_path = path; + cached_user_path = user_path; + cached_temp_path = temp_path; + +#ifdef _MSC_VER + // workaround for https://svn.boost.org/trac/boost/ticket/6320 + // indirectly init boost codec here since it's not thread safe, and can + // cause crashes when it happens in multithreaded image load + OIIO::Filesystem::exists(path); +#endif +} + +string path_get(const string &sub) +{ + char *special = path_specials(sub); + if (special != NULL) + return special; + + if (cached_path == "") + cached_path = path_dirname(Sysutil::this_program_path()); + + return path_join(cached_path, sub); +} + +string path_user_get(const string &sub) +{ + if (cached_user_path == "") + cached_user_path = path_dirname(Sysutil::this_program_path()); + + return path_join(cached_user_path, sub); +} + +string path_cache_get(const string &sub) +{ +#if defined(__linux__) || defined(__APPLE__) + if (cached_xdg_cache_path == "") { + cached_xdg_cache_path = path_xdg_cache_get(); + } + string result = path_join(cached_xdg_cache_path, "cycles"); + return path_join(result, sub); +#else + /* TODO(sergey): What that should be on Windows? */ + return path_user_get(path_join("cache", sub)); +#endif +} + +string path_temp_get(const string &sub) +{ + if (cached_temp_path == "") { + cached_temp_path = Filesystem::temp_directory_path(); + } + + return path_join(cached_temp_path, sub); +} + +#if defined(__linux__) || defined(__APPLE__) +string path_xdg_home_get(const string &sub = ""); +#endif + +string path_filename(const string &path) +{ + size_t index = find_last_slash(path); + if (index != string::npos) { + /* Corner cases to match boost behavior. */ +#ifndef _WIN32 + if (index == 0 && path.size() == 1) { + return path; + } +#endif + if (index == path.size() - 1) { +#ifdef _WIN32 + if (index == 2) { + return string(1, DIR_SEP); + } +#endif + return "."; + } + return path.substr(index + 1, path.size() - index - 1); + } + return path; +} + +string path_dirname(const string &path) +{ + size_t index = find_last_slash(path); + if (index != string::npos) { +#ifndef _WIN32 + if (index == 0 && path.size() > 1) { + return string(1, DIR_SEP); + } +#endif + return path.substr(0, index); + } + return ""; +} + +string path_join(const string &dir, const string &file) +{ + if (dir.size() == 0) { + return file; + } + if (file.size() == 0) { + return dir; + } + string result = dir; +#ifndef _WIN32 + if (result[result.size() - 1] != DIR_SEP && file[0] != DIR_SEP) +#else + if (result[result.size() - 1] != DIR_SEP && result[result.size() - 1] != DIR_SEP_ALT && + file[0] != DIR_SEP && file[0] != DIR_SEP_ALT) +#endif + { + result += DIR_SEP; + } + result += file; + return result; +} + +string path_escape(const string &path) +{ + string result = path; + string_replace(result, " ", "\\ "); + return result; +} + +bool path_is_relative(const string &path) +{ +#ifdef _WIN32 +# ifdef HAVE_SHLWAPI_H + return PathIsRelative(path.c_str()); +# else /* HAVE_SHLWAPI_H */ + if (path.size() >= 3) { + return !(((path[0] >= 'a' && path[0] <= 'z') || (path[0] >= 'A' && path[0] <= 'Z')) && + path[1] == ':' && path[2] == DIR_SEP); + } + return true; +# endif /* HAVE_SHLWAPI_H */ +#else /* _WIN32 */ + if (path.size() == 0) { + return 1; + } + return path[0] != DIR_SEP; +#endif /* _WIN32 */ +} + +#ifdef _WIN32 +/* Add a slash if the UNC path points to a share. */ +static string path_unc_add_slash_to_share(const string &path) +{ + size_t slash_after_server = path.find(DIR_SEP, 2); + if (slash_after_server != string::npos) { + size_t slash_after_share = path.find(DIR_SEP, slash_after_server + 1); + if (slash_after_share == string::npos) { + return path + DIR_SEP; + } + } + return path; +} + +/* Convert: + * \\?\UNC\server\share\folder\... to \\server\share\folder\... + * \\?\C:\ to C:\ and \\?\C:\folder\... to C:\folder\... + */ +static string path_unc_to_short(const string &path) +{ + size_t len = path.size(); + if ((len > 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP) && (path[2] == '?') && + ((path[3] == DIR_SEP) || (path[3] == DIR_SEP_ALT))) { + if ((len > 5) && (path[5] == ':')) { + return path.substr(4, len - 4); + } + else if ((len > 7) && (path.substr(4, 3) == "UNC") && + ((path[7] == DIR_SEP) || (path[7] == DIR_SEP_ALT))) { + return "\\\\" + path.substr(8, len - 8); + } + } + return path; +} + +static string path_cleanup_unc(const string &path) +{ + string result = path_unc_to_short(path); + if (path.size() > 2) { + /* It's possible path is now a non-UNC. */ + if (result[0] == DIR_SEP && result[1] == DIR_SEP) { + return path_unc_add_slash_to_share(result); + } + } + return result; +} + +/* Make path compatible for stat() functions. */ +static string path_make_compatible(const string &path) +{ + string result = path; + /* In Windows stat() doesn't recognize dir ending on a slash. */ + if (result.size() > 3 && result[result.size() - 1] == DIR_SEP) { + result.resize(result.size() - 1); + } + /* Clean up UNC path. */ + if ((path.size() >= 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP)) { + result = path_cleanup_unc(result); + } + /* Make sure volume-only path ends up wit ha directory separator. */ + if (result.size() == 2 && result[1] == ':') { + result += DIR_SEP; + } + return result; +} + +static int path_wstat(const wstring &path_wc, path_stat_t *st) +{ +# if defined(_MSC_VER) || defined(__MINGW64__) + return _wstat64(path_wc.c_str(), st); +# elif defined(__MINGW32__) + return _wstati64(path_wc.c_str(), st); +# else + return _wstat(path_wc.c_str(), st); +# endif +} + +static int path_stat(const string &path, path_stat_t *st) +{ + wstring path_wc = string_to_wstring(path); + return path_wstat(path_wc, st); +} +#else /* _WIN32 */ +static int path_stat(const string &path, path_stat_t *st) +{ + return stat(path.c_str(), st); +} +#endif /* _WIN32 */ + +size_t path_file_size(const string &path) +{ + path_stat_t st; + if (path_stat(path, &st) != 0) { + return -1; + } + return st.st_size; +} + +bool path_exists(const string &path) +{ +#ifdef _WIN32 + string fixed_path = path_make_compatible(path); + wstring path_wc = string_to_wstring(fixed_path); + path_stat_t st; + if (path_wstat(path_wc, &st) != 0) { + return false; + } + return st.st_mode != 0; +#else /* _WIN32 */ + struct stat st; + if (stat(path.c_str(), &st) != 0) { + return 0; + } + return st.st_mode != 0; +#endif /* _WIN32 */ +} + +bool path_is_directory(const string &path) +{ + path_stat_t st; + if (path_stat(path, &st) != 0) { + return false; + } + return S_ISDIR(st.st_mode); +} + +static void path_files_md5_hash_recursive(MD5Hash &hash, const string &dir) +{ + if (path_exists(dir)) { + directory_iterator it(dir), it_end; + + for (; it != it_end; ++it) { + if (path_is_directory(it->path())) { + path_files_md5_hash_recursive(hash, it->path()); + } + else { + string filepath = it->path(); + + hash.append((const uint8_t *)filepath.c_str(), filepath.size()); + hash.append_file(filepath); + } + } + } +} + +string path_files_md5_hash(const string &dir) +{ + /* computes md5 hash of all files in the directory */ + MD5Hash hash; + + path_files_md5_hash_recursive(hash, dir); + + return hash.get_hex(); +} + +static bool create_directories_recursivey(const string &path) +{ + if (path_is_directory(path)) { + /* Directory already exists, nothing to do. */ + return true; + } + if (path_exists(path)) { + /* File exists and it's not a directory. */ + return false; + } + + string parent = path_dirname(path); + if (parent.size() > 0 && parent != path) { + if (!create_directories_recursivey(parent)) { + return false; + } + } + +#ifdef _WIN32 + wstring path_wc = string_to_wstring(path); + return _wmkdir(path_wc.c_str()) == 0; +#else + return mkdir(path.c_str(), 0777) == 0; +#endif +} + +void path_create_directories(const string &filepath) +{ + string path = path_dirname(filepath); + create_directories_recursivey(path); +} + +bool path_write_binary(const string &path, const vector &binary) +{ + path_create_directories(path); + + /* write binary file from memory */ + FILE *f = path_fopen(path, "wb"); + + if (!f) + return false; + + if (binary.size() > 0) + fwrite(&binary[0], sizeof(uint8_t), binary.size(), f); + + fclose(f); + + return true; +} + +bool path_write_text(const string &path, string &text) +{ + vector binary(text.length(), 0); + std::copy(text.begin(), text.end(), binary.begin()); + + return path_write_binary(path, binary); +} + +bool path_read_binary(const string &path, vector &binary) +{ + /* read binary file into memory */ + FILE *f = path_fopen(path, "rb"); + + if (!f) { + binary.resize(0); + return false; + } + + binary.resize(path_file_size(path)); + + if (binary.size() == 0) { + fclose(f); + return false; + } + + if (fread(&binary[0], sizeof(uint8_t), binary.size(), f) != binary.size()) { + fclose(f); + return false; + } + + fclose(f); + + return true; +} + +bool path_read_text(const string &path, string &text) +{ + vector binary; + + if (!path_exists(path) || !path_read_binary(path, binary)) + return false; + + const char *str = (const char *)&binary[0]; + size_t size = binary.size(); + text = string(str, size); + + return true; +} + +uint64_t path_modified_time(const string &path) +{ + path_stat_t st; + if (path_stat(path, &st) != 0) { + return 0; + } + return st.st_mtime; +} + +bool path_remove(const string &path) +{ + return remove(path.c_str()) == 0; +} + +FILE *path_fopen(const string &path, const string &mode) +{ +#ifdef _WIN32 + wstring path_wc = string_to_wstring(path); + wstring mode_wc = string_to_wstring(mode); + return _wfopen(path_wc.c_str(), mode_wc.c_str()); +#else + return fopen(path.c_str(), mode.c_str()); +#endif +} + +void path_cache_clear_except(const string &name, const set &except) +{ + string dir = path_user_get("cache"); + + if (path_exists(dir)) { + directory_iterator it(dir), it_end; + + for (; it != it_end; ++it) { + string filename = path_filename(it->path()); + + if (string_startswith(filename, name.c_str())) + if (except.find(filename) == except.end()) + path_remove(it->path()); + } + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/path.h b/intern/cycles/util/path.h new file mode 100644 index 00000000000..a1394555302 --- /dev/null +++ b/intern/cycles/util/path.h @@ -0,0 +1,74 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_PATH_H__ +#define __UTIL_PATH_H__ + +/* Utility functions to get paths to files distributed with the program. For + * the standalone apps, paths are relative to the executable, for dynamically + * linked libraries, the path to the library may be set with path_init, which + * then makes all paths relative to that. */ + +#include + +#include "util/set.h" +#include "util/string.h" +#include "util/types.h" +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +/* program paths */ +void path_init(const string &path = "", const string &user_path = "", const string &tmp_path = ""); +string path_get(const string &sub = ""); +string path_user_get(const string &sub = ""); +string path_temp_get(const string &sub = ""); +string path_cache_get(const string &sub = ""); + +/* path string manipulation */ +string path_filename(const string &path); +string path_dirname(const string &path); +string path_join(const string &dir, const string &file); +string path_escape(const string &path); +bool path_is_relative(const string &path); + +/* file info */ +size_t path_file_size(const string &path); +bool path_exists(const string &path); +bool path_is_directory(const string &path); +string path_files_md5_hash(const string &dir); +uint64_t path_modified_time(const string &path); + +/* directory utility */ +void path_create_directories(const string &path); + +/* file read/write utilities */ +FILE *path_fopen(const string &path, const string &mode); + +bool path_write_binary(const string &path, const vector &binary); +bool path_write_text(const string &path, string &text); +bool path_read_binary(const string &path, vector &binary); +bool path_read_text(const string &path, string &text); + +/* File manipulation. */ +bool path_remove(const string &path); + +/* cache utility */ +void path_cache_clear_except(const string &name, const set &except); + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/util/profiling.cpp b/intern/cycles/util/profiling.cpp new file mode 100644 index 00000000000..55b35b7320f --- /dev/null +++ b/intern/cycles/util/profiling.cpp @@ -0,0 +1,174 @@ +/* + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/profiling.h" +#include "util/algorithm.h" +#include "util/foreach.h" +#include "util/set.h" + +CCL_NAMESPACE_BEGIN + +Profiler::Profiler() : do_stop_worker(true), worker(NULL) +{ +} + +Profiler::~Profiler() +{ + assert(worker == NULL); +} + +void Profiler::run() +{ + uint64_t updates = 0; + auto start_time = std::chrono::system_clock::now(); + while (!do_stop_worker) { + thread_scoped_lock lock(mutex); + foreach (ProfilingState *state, states) { + uint32_t cur_event = state->event; + int32_t cur_shader = state->shader; + int32_t cur_object = state->object; + + /* The state reads/writes should be atomic, but just to be sure + * check the values for validity anyways. */ + if (cur_event < PROFILING_NUM_EVENTS) { + event_samples[cur_event]++; + } + + if (cur_shader >= 0 && cur_shader < shader_samples.size()) { + shader_samples[cur_shader]++; + } + + if (cur_object >= 0 && cur_object < object_samples.size()) { + object_samples[cur_object]++; + } + } + lock.unlock(); + + /* Relative waits always overshoot a bit, so just waiting 1ms every + * time would cause the sampling to drift over time. + * By keeping track of the absolute time, the wait times correct themselves - + * if one wait overshoots a lot, the next one will be shorter to compensate. */ + updates++; + std::this_thread::sleep_until(start_time + updates * std::chrono::milliseconds(1)); + } +} + +void Profiler::reset(int num_shaders, int num_objects) +{ + bool running = (worker != NULL); + if (running) { + stop(); + } + + /* Resize and clear the accumulation vectors. */ + shader_hits.assign(num_shaders, 0); + object_hits.assign(num_objects, 0); + + event_samples.assign(PROFILING_NUM_EVENTS, 0); + shader_samples.assign(num_shaders, 0); + object_samples.assign(num_objects, 0); + + if (running) { + start(); + } +} + +void Profiler::start() +{ + assert(worker == NULL); + do_stop_worker = false; + worker = new thread(function_bind(&Profiler::run, this)); +} + +void Profiler::stop() +{ + if (worker != NULL) { + do_stop_worker = true; + + worker->join(); + delete worker; + worker = NULL; + } +} + +void Profiler::add_state(ProfilingState *state) +{ + thread_scoped_lock lock(mutex); + + /* Add the ProfilingState from the list of sampled states. */ + assert(std::find(states.begin(), states.end(), state) == states.end()); + states.push_back(state); + + /* Resize thread-local hit counters. */ + state->shader_hits.assign(shader_hits.size(), 0); + state->object_hits.assign(object_hits.size(), 0); + + /* Initialize the state. */ + state->event = PROFILING_UNKNOWN; + state->shader = -1; + state->object = -1; + state->active = true; +} + +void Profiler::remove_state(ProfilingState *state) +{ + thread_scoped_lock lock(mutex); + + /* Remove the ProfilingState from the list of sampled states. */ + states.erase(std::remove(states.begin(), states.end(), state), states.end()); + state->active = false; + + /* Merge thread-local hit counters. */ + assert(shader_hits.size() == state->shader_hits.size()); + for (int i = 0; i < shader_hits.size(); i++) { + shader_hits[i] += state->shader_hits[i]; + } + + assert(object_hits.size() == state->object_hits.size()); + for (int i = 0; i < object_hits.size(); i++) { + object_hits[i] += state->object_hits[i]; + } +} + +uint64_t Profiler::get_event(ProfilingEvent event) +{ + assert(worker == NULL); + return event_samples[event]; +} + +bool Profiler::get_shader(int shader, uint64_t &samples, uint64_t &hits) +{ + assert(worker == NULL); + if (shader_samples[shader] == 0) { + return false; + } + samples = shader_samples[shader]; + hits = shader_hits[shader]; + return true; +} + +bool Profiler::get_object(int object, uint64_t &samples, uint64_t &hits) +{ + assert(worker == NULL); + if (object_samples[object] == 0) { + return false; + } + samples = object_samples[object]; + hits = object_hits[object]; + return true; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/profiling.h b/intern/cycles/util/profiling.h new file mode 100644 index 00000000000..b30aac90879 --- /dev/null +++ b/intern/cycles/util/profiling.h @@ -0,0 +1,180 @@ +/* + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_PROFILING_H__ +#define __UTIL_PROFILING_H__ + +#include + +#include "util/map.h" +#include "util/thread.h" +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +enum ProfilingEvent : uint32_t { + PROFILING_UNKNOWN, + PROFILING_RAY_SETUP, + + PROFILING_INTERSECT_CLOSEST, + PROFILING_INTERSECT_SUBSURFACE, + PROFILING_INTERSECT_SHADOW, + PROFILING_INTERSECT_VOLUME_STACK, + + PROFILING_SHADE_SURFACE_SETUP, + PROFILING_SHADE_SURFACE_EVAL, + PROFILING_SHADE_SURFACE_DIRECT_LIGHT, + PROFILING_SHADE_SURFACE_INDIRECT_LIGHT, + PROFILING_SHADE_SURFACE_AO, + PROFILING_SHADE_SURFACE_PASSES, + + PROFILING_SHADE_VOLUME_SETUP, + PROFILING_SHADE_VOLUME_INTEGRATE, + PROFILING_SHADE_VOLUME_DIRECT_LIGHT, + PROFILING_SHADE_VOLUME_INDIRECT_LIGHT, + + PROFILING_SHADE_SHADOW_SETUP, + PROFILING_SHADE_SHADOW_SURFACE, + PROFILING_SHADE_SHADOW_VOLUME, + + PROFILING_SHADE_LIGHT_SETUP, + PROFILING_SHADE_LIGHT_EVAL, + + PROFILING_NUM_EVENTS, +}; + +/* Contains the current execution state of a worker thread. + * These values are constantly updated by the worker. + * Periodically the profiler thread will wake up, read them + * and update its internal counters based on it. + * + * Atomics aren't needed here since we're only doing direct + * writes and reads to (4-byte-aligned) uint32_t, which is + * guaranteed to be atomic on x86 since the 486. + * Memory ordering is not guaranteed but does not matter. + * + * And even on other architectures, the extremely rare corner + * case of reading an intermediate state could at worst result + * in a single incorrect sample. */ +struct ProfilingState { + volatile uint32_t event = PROFILING_UNKNOWN; + volatile int32_t shader = -1; + volatile int32_t object = -1; + volatile bool active = false; + + vector shader_hits; + vector object_hits; +}; + +class Profiler { + public: + Profiler(); + ~Profiler(); + + void reset(int num_shaders, int num_objects); + + void start(); + void stop(); + + void add_state(ProfilingState *state); + void remove_state(ProfilingState *state); + + uint64_t get_event(ProfilingEvent event); + bool get_shader(int shader, uint64_t &samples, uint64_t &hits); + bool get_object(int object, uint64_t &samples, uint64_t &hits); + + protected: + void run(); + + /* Tracks how often the worker was in each ProfilingEvent while sampling, + * so multiplying the values by the sample frequency (currently 1ms) + * gives the approximate time spent in each state. */ + vector event_samples; + vector shader_samples; + vector object_samples; + + /* Tracks the total amounts every object/shader was hit. + * Used to evaluate relative cost, written by the render thread. + * Indexed by the shader and object IDs that the kernel also uses + * to index __object_flag and __shaders. */ + vector shader_hits; + vector object_hits; + + volatile bool do_stop_worker; + thread *worker; + + thread_mutex mutex; + vector states; +}; + +class ProfilingHelper { + public: + ProfilingHelper(ProfilingState *state, ProfilingEvent event) : state(state) + { + previous_event = state->event; + state->event = event; + } + + ~ProfilingHelper() + { + state->event = previous_event; + } + + inline void set_event(ProfilingEvent event) + { + state->event = event; + } + + protected: + ProfilingState *state; + uint32_t previous_event; +}; + +class ProfilingWithShaderHelper : public ProfilingHelper { + public: + ProfilingWithShaderHelper(ProfilingState *state, ProfilingEvent event) + : ProfilingHelper(state, event) + { + } + + ~ProfilingWithShaderHelper() + { + state->object = -1; + state->shader = -1; + } + + inline void set_shader(int object, int shader) + { + if (state->active) { + state->shader = shader; + state->object = object; + + if (shader >= 0) { + assert(shader < state->shader_hits.size()); + state->shader_hits[shader]++; + } + + if (object >= 0) { + assert(object < state->object_hits.size()); + state->object_hits[object]++; + } + } + } +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_PROFILING_H__ */ diff --git a/intern/cycles/util/progress.h b/intern/cycles/util/progress.h new file mode 100644 index 00000000000..4b0ff08aa7e --- /dev/null +++ b/intern/cycles/util/progress.h @@ -0,0 +1,370 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_PROGRESS_H__ +#define __UTIL_PROGRESS_H__ + +/* Progress + * + * Simple class to communicate progress status messages, timing information, + * update notifications from a job running in another thread. All methods + * except for the constructor/destructor are thread safe. */ + +#include "util/function.h" +#include "util/string.h" +#include "util/thread.h" +#include "util/time.h" + +CCL_NAMESPACE_BEGIN + +class Progress { + public: + Progress() + { + pixel_samples = 0; + total_pixel_samples = 0; + current_tile_sample = 0; + rendered_tiles = 0; + denoised_tiles = 0; + start_time = time_dt(); + render_start_time = time_dt(); + end_time = 0.0; + status = "Initializing"; + substatus = ""; + sync_status = ""; + sync_substatus = ""; + update_cb = function_null; + cancel = false; + cancel_message = ""; + error = false; + error_message = ""; + cancel_cb = function_null; + } + + Progress(Progress &progress) + { + *this = progress; + } + + Progress &operator=(Progress &progress) + { + thread_scoped_lock lock(progress.progress_mutex); + + progress.get_status(status, substatus); + + pixel_samples = progress.pixel_samples; + total_pixel_samples = progress.total_pixel_samples; + current_tile_sample = progress.get_current_sample(); + + return *this; + } + + void reset() + { + pixel_samples = 0; + total_pixel_samples = 0; + current_tile_sample = 0; + rendered_tiles = 0; + denoised_tiles = 0; + start_time = time_dt(); + render_start_time = time_dt(); + end_time = 0.0; + status = "Initializing"; + substatus = ""; + sync_status = ""; + sync_substatus = ""; + cancel = false; + cancel_message = ""; + error = false; + error_message = ""; + } + + /* cancel */ + void set_cancel(const string &cancel_message_) + { + thread_scoped_lock lock(progress_mutex); + cancel_message = cancel_message_; + cancel = true; + } + + bool get_cancel() const + { + if (!cancel && cancel_cb) + cancel_cb(); + + return cancel; + } + + string get_cancel_message() const + { + thread_scoped_lock lock(progress_mutex); + return cancel_message; + } + + void set_cancel_callback(function function) + { + cancel_cb = function; + } + + /* error */ + void set_error(const string &error_message_) + { + thread_scoped_lock lock(progress_mutex); + error_message = error_message_; + error = true; + /* If error happens we also stop rendering. */ + cancel_message = error_message_; + cancel = true; + } + + bool get_error() const + { + return error; + } + + string get_error_message() const + { + thread_scoped_lock lock(progress_mutex); + return error_message; + } + + /* tile and timing information */ + + void set_start_time() + { + thread_scoped_lock lock(progress_mutex); + + start_time = time_dt(); + end_time = 0.0; + } + + void set_render_start_time() + { + thread_scoped_lock lock(progress_mutex); + + render_start_time = time_dt(); + } + + void add_skip_time(const scoped_timer &start_timer, bool only_render) + { + double skip_time = time_dt() - start_timer.get_start(); + + render_start_time += skip_time; + if (!only_render) { + start_time += skip_time; + } + } + + void get_time(double &total_time_, double &render_time_) const + { + thread_scoped_lock lock(progress_mutex); + + double time = (end_time > 0) ? end_time : time_dt(); + + total_time_ = time - start_time; + render_time_ = time - render_start_time; + } + + void set_end_time() + { + end_time = time_dt(); + } + + void reset_sample() + { + thread_scoped_lock lock(progress_mutex); + + pixel_samples = 0; + current_tile_sample = 0; + rendered_tiles = 0; + denoised_tiles = 0; + } + + void set_total_pixel_samples(uint64_t total_pixel_samples_) + { + thread_scoped_lock lock(progress_mutex); + + total_pixel_samples = total_pixel_samples_; + } + + float get_progress() const + { + thread_scoped_lock lock(progress_mutex); + + if (total_pixel_samples > 0) { + return ((float)pixel_samples) / total_pixel_samples; + } + return 0.0f; + } + + void add_samples(uint64_t pixel_samples_, int tile_sample) + { + thread_scoped_lock lock(progress_mutex); + + pixel_samples += pixel_samples_; + current_tile_sample = tile_sample; + } + + void add_samples_update(uint64_t pixel_samples_, int tile_sample) + { + add_samples(pixel_samples_, tile_sample); + set_update(); + } + + void add_finished_tile(bool denoised) + { + thread_scoped_lock lock(progress_mutex); + + if (denoised) { + denoised_tiles++; + } + else { + rendered_tiles++; + } + } + + int get_current_sample() const + { + thread_scoped_lock lock(progress_mutex); + /* Note that the value here always belongs to the last tile that updated, + * so it's only useful if there is only one active tile. */ + return current_tile_sample; + } + + int get_rendered_tiles() const + { + thread_scoped_lock lock(progress_mutex); + return rendered_tiles; + } + + int get_denoised_tiles() const + { + thread_scoped_lock lock(progress_mutex); + return denoised_tiles; + } + + /* status messages */ + + void set_status(const string &status_, const string &substatus_ = "") + { + { + thread_scoped_lock lock(progress_mutex); + status = status_; + substatus = substatus_; + } + + set_update(); + } + + void set_substatus(const string &substatus_) + { + { + thread_scoped_lock lock(progress_mutex); + substatus = substatus_; + } + + set_update(); + } + + void set_sync_status(const string &status_, const string &substatus_ = "") + { + { + thread_scoped_lock lock(progress_mutex); + sync_status = status_; + sync_substatus = substatus_; + } + + set_update(); + } + + void set_sync_substatus(const string &substatus_) + { + { + thread_scoped_lock lock(progress_mutex); + sync_substatus = substatus_; + } + + set_update(); + } + + void get_status(string &status_, string &substatus_) const + { + thread_scoped_lock lock(progress_mutex); + + if (sync_status != "") { + status_ = sync_status; + substatus_ = sync_substatus; + } + else { + status_ = status; + substatus_ = substatus; + } + } + + /* callback */ + + void set_update() + { + if (update_cb) { + thread_scoped_lock lock(update_mutex); + update_cb(); + } + } + + void set_update_callback(function function) + { + update_cb = function; + } + + protected: + mutable thread_mutex progress_mutex; + mutable thread_mutex update_mutex; + function update_cb; + function cancel_cb; + + /* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel. + * This makes the progress estimate more accurate when tiles with different sizes are used. + * + * total_pixel_samples is the total amount of pixel samples that will be rendered. */ + uint64_t pixel_samples, total_pixel_samples; + /* Stores the current sample count of the last tile that called the update function. + * It's used to display the sample count if only one tile is active. */ + int current_tile_sample; + /* Stores the number of tiles that's already finished. + * Used to determine whether all but the last tile are finished rendering, + * in which case the current_tile_sample is displayed. */ + int rendered_tiles, denoised_tiles; + + double start_time, render_start_time; + /* End time written when render is done, so it doesn't keep increasing on redraws. */ + double end_time; + + string status; + string substatus; + + string sync_status; + string sync_substatus; + + volatile bool cancel; + string cancel_message; + + volatile bool error; + string error_message; +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_PROGRESS_H__ */ diff --git a/intern/cycles/util/projection.h b/intern/cycles/util/projection.h new file mode 100644 index 00000000000..8d822a3777d --- /dev/null +++ b/intern/cycles/util/projection.h @@ -0,0 +1,217 @@ +/* + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_PROJECTION_H__ +#define __UTIL_PROJECTION_H__ + +#include "util/transform.h" + +CCL_NAMESPACE_BEGIN + +/* 4x4 projection matrix, perspective or orthographic. */ + +typedef struct ProjectionTransform { + float4 x, y, z, w; /* rows */ + +#ifndef __KERNEL_GPU__ + ProjectionTransform() + { + } + + explicit ProjectionTransform(const Transform &tfm) + : x(tfm.x), y(tfm.y), z(tfm.z), w(make_float4(0.0f, 0.0f, 0.0f, 1.0f)) + { + } +#endif +} ProjectionTransform; + +typedef struct PerspectiveMotionTransform { + ProjectionTransform pre; + ProjectionTransform post; +} PerspectiveMotionTransform; + +/* Functions */ + +ccl_device_inline float3 transform_perspective(ccl_private const ProjectionTransform *t, + const float3 a) +{ + float4 b = make_float4(a.x, a.y, a.z, 1.0f); + float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b)); + float w = dot(t->w, b); + + return (w != 0.0f) ? c / w : zero_float3(); +} + +ccl_device_inline float3 transform_perspective_direction(ccl_private const ProjectionTransform *t, + const float3 a) +{ + float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z, + a.x * t->y.x + a.y * t->y.y + a.z * t->y.z, + a.x * t->z.x + a.y * t->z.y + a.z * t->z.z); + + return c; +} + +#ifndef __KERNEL_GPU__ + +ccl_device_inline Transform projection_to_transform(const ProjectionTransform &a) +{ + Transform tfm = {a.x, a.y, a.z}; + return tfm; +} + +ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTransform &a) +{ + ProjectionTransform t; + + t.x.x = a.x.x; + t.x.y = a.y.x; + t.x.z = a.z.x; + t.x.w = a.w.x; + t.y.x = a.x.y; + t.y.y = a.y.y; + t.y.z = a.z.y; + t.y.w = a.w.y; + t.z.x = a.x.z; + t.z.y = a.y.z; + t.z.z = a.z.z; + t.z.w = a.w.z; + t.w.x = a.x.w; + t.w.y = a.y.w; + t.w.z = a.z.w; + t.w.w = a.w.w; + + return t; +} + +ProjectionTransform projection_inverse(const ProjectionTransform &a); + +ccl_device_inline ProjectionTransform make_projection(float a, + float b, + float c, + float d, + float e, + float f, + float g, + float h, + float i, + float j, + float k, + float l, + float m, + float n, + float o, + float p) +{ + ProjectionTransform t; + + t.x.x = a; + t.x.y = b; + t.x.z = c; + t.x.w = d; + t.y.x = e; + t.y.y = f; + t.y.z = g; + t.y.w = h; + t.z.x = i; + t.z.y = j; + t.z.z = k; + t.z.w = l; + t.w.x = m; + t.w.y = n; + t.w.z = o; + t.w.w = p; + + return t; +} +ccl_device_inline ProjectionTransform projection_identity() +{ + return make_projection(1.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 1.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 1.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 1.0f); +} + +ccl_device_inline ProjectionTransform operator*(const ProjectionTransform &a, + const ProjectionTransform &b) +{ + ProjectionTransform c = projection_transpose(b); + ProjectionTransform t; + + t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w)); + t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w)); + t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w)); + t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w)); + + return t; +} + +ccl_device_inline ProjectionTransform operator*(const ProjectionTransform &a, const Transform &b) +{ + return a * ProjectionTransform(b); +} + +ccl_device_inline ProjectionTransform operator*(const Transform &a, const ProjectionTransform &b) +{ + return ProjectionTransform(a) * b; +} + +ccl_device_inline void print_projection(const char *label, const ProjectionTransform &t) +{ + print_float4(label, t.x); + print_float4(label, t.y); + print_float4(label, t.z); + print_float4(label, t.w); + printf("\n"); +} + +ccl_device_inline ProjectionTransform projection_perspective(float fov, float n, float f) +{ + ProjectionTransform persp = make_projection( + 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, f / (f - n), -f * n / (f - n), 0, 0, 1, 0); + + float inv_angle = 1.0f / tanf(0.5f * fov); + + Transform scale = transform_scale(inv_angle, inv_angle, 1); + + return scale * persp; +} + +ccl_device_inline ProjectionTransform projection_orthographic(float znear, float zfar) +{ + Transform t = transform_scale(1.0f, 1.0f, 1.0f / (zfar - znear)) * + transform_translate(0.0f, 0.0f, -znear); + + return ProjectionTransform(t); +} + +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_PROJECTION_H__ */ diff --git a/intern/cycles/util/queue.h b/intern/cycles/util/queue.h new file mode 100644 index 00000000000..622f4fe3e47 --- /dev/null +++ b/intern/cycles/util/queue.h @@ -0,0 +1,28 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_QUEUE_H__ +#define __UTIL_QUEUE_H__ + +#include + +CCL_NAMESPACE_BEGIN + +using std::queue; + +CCL_NAMESPACE_END + +#endif /* __UTIL_LIST_H__ */ diff --git a/intern/cycles/util/rect.h b/intern/cycles/util/rect.h new file mode 100644 index 00000000000..79d64b917b7 --- /dev/null +++ b/intern/cycles/util/rect.h @@ -0,0 +1,75 @@ +/* + * Copyright 2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_RECT_H__ +#define __UTIL_RECT_H__ + +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* Rectangles are represented as a int4 containing the coordinates of the lower-left and + * upper-right corners in the order (x0, y0, x1, y1). */ + +ccl_device_inline int4 rect_from_shape(int x0, int y0, int w, int h) +{ + return make_int4(x0, y0, x0 + w, y0 + h); +} + +ccl_device_inline int4 rect_expand(int4 rect, int d) +{ + return make_int4(rect.x - d, rect.y - d, rect.z + d, rect.w + d); +} + +/* Returns the intersection of two rects. */ +ccl_device_inline int4 rect_clip(int4 a, int4 b) +{ + return make_int4(max(a.x, b.x), max(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +} + +ccl_device_inline bool rect_is_valid(int4 rect) +{ + return (rect.z > rect.x) && (rect.w > rect.y); +} + +/* Returns the local row-major index of the pixel inside the rect. */ +ccl_device_inline int coord_to_local_index(int4 rect, int x, int y) +{ + int w = rect.z - rect.x; + return (y - rect.y) * w + (x - rect.x); +} + +/* Finds the coordinates of a pixel given by its row-major index in the rect, + * and returns whether the pixel is inside it. */ +ccl_device_inline bool local_index_to_coord(int4 rect, + int idx, + ccl_private int *x, + ccl_private int *y) +{ + int w = rect.z - rect.x; + *x = (idx % w) + rect.x; + *y = (idx / w) + rect.y; + return (*y < rect.w); +} + +ccl_device_inline int rect_size(int4 rect) +{ + return (rect.z - rect.x) * (rect.w - rect.y); +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_RECT_H__ */ diff --git a/intern/cycles/util/semaphore.h b/intern/cycles/util/semaphore.h new file mode 100644 index 00000000000..8da8a232ba2 --- /dev/null +++ b/intern/cycles/util/semaphore.h @@ -0,0 +1,61 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_SEMAPHORE_H__ +#define __UTIL_SEMAPHORE_H__ + +#include "util/thread.h" + +CCL_NAMESPACE_BEGIN + +/* Counting Semaphore + * + * To restrict concurrent access to a resource to a specified number + * of threads. Similar to std::counting_semaphore from C++20. */ + +class thread_counting_semaphore { + public: + explicit thread_counting_semaphore(const int count) : count(count) + { + } + + thread_counting_semaphore(const thread_counting_semaphore &) = delete; + + void acquire() + { + thread_scoped_lock lock(mutex); + while (count == 0) { + condition.wait(lock); + } + count--; + } + + void release() + { + thread_scoped_lock lock(mutex); + count++; + condition.notify_one(); + } + + protected: + thread_mutex mutex; + thread_condition_variable condition; + int count; +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_SEMAPHORE_H__ */ diff --git a/intern/cycles/util/set.h b/intern/cycles/util/set.h new file mode 100644 index 00000000000..298e1f7729a --- /dev/null +++ b/intern/cycles/util/set.h @@ -0,0 +1,34 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_SET_H__ +#define __UTIL_SET_H__ + +#include +#include + +#if defined(_MSC_VER) && (_MSC_VER >= 1900) +# include +#endif + +CCL_NAMESPACE_BEGIN + +using std::set; +using std::unordered_set; + +CCL_NAMESPACE_END + +#endif /* __UTIL_SET_H__ */ diff --git a/intern/cycles/util/simd.cpp b/intern/cycles/util/simd.cpp new file mode 100644 index 00000000000..089444bb6cc --- /dev/null +++ b/intern/cycles/util/simd.cpp @@ -0,0 +1,44 @@ +/* + * Copyright 2011-2013 Intel Corporation + * Modifications Copyright 2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if (defined(WITH_KERNEL_SSE2)) || (defined(WITH_KERNEL_NATIVE) && defined(__SSE2__)) + +# define __KERNEL_SSE2__ +# include "util/simd.h" + +CCL_NAMESPACE_BEGIN + +const __m128 _mm_lookupmask_ps[16] = {_mm_castsi128_ps(_mm_set_epi32(0, 0, 0, 0)), + _mm_castsi128_ps(_mm_set_epi32(0, 0, 0, -1)), + _mm_castsi128_ps(_mm_set_epi32(0, 0, -1, 0)), + _mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1)), + _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, 0)), + _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, -1)), + _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, 0)), + _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)), + _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, 0)), + _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, -1)), + _mm_castsi128_ps(_mm_set_epi32(-1, 0, -1, 0)), + _mm_castsi128_ps(_mm_set_epi32(-1, 0, -1, -1)), + _mm_castsi128_ps(_mm_set_epi32(-1, -1, 0, 0)), + _mm_castsi128_ps(_mm_set_epi32(-1, -1, 0, -1)), + _mm_castsi128_ps(_mm_set_epi32(-1, -1, -1, 0)), + _mm_castsi128_ps(_mm_set_epi32(-1, -1, -1, -1))}; + +CCL_NAMESPACE_END + +#endif // WITH_KERNEL_SSE2 diff --git a/intern/cycles/util/simd.h b/intern/cycles/util/simd.h new file mode 100644 index 00000000000..cc4950891d0 --- /dev/null +++ b/intern/cycles/util/simd.h @@ -0,0 +1,572 @@ +/* + * Copyright 2011-2013 Intel Corporation + * Modifications Copyright 2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0(the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_SIMD_TYPES_H__ +#define __UTIL_SIMD_TYPES_H__ + +#include +#include + +#include "util/defines.h" + +/* SSE Intrinsics includes + * + * We assume __KERNEL_SSEX__ flags to have been defined at this point. + * + * MinGW64 has conflicting declarations for these SSE headers in . + * Since we can't avoid including , better only include that */ +#if defined(FREE_WINDOWS64) +# include "util/windows.h" +#elif defined(_MSC_VER) +# include +#elif (defined(__x86_64__) || defined(__i386__)) +# include +#elif defined(__KERNEL_NEON__) +# define SSE2NEON_PRECISE_MINMAX 1 +# include +#endif + +/* Floating Point Control, for Embree. */ +#if defined(__x86_64__) || defined(_M_X64) +# define SIMD_SET_FLUSH_TO_ZERO \ + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#else +# define SIMD_SET_FLUSH_TO_ZERO +#endif + +CCL_NAMESPACE_BEGIN + +/* Data structures used by SSE classes. */ +#ifdef __KERNEL_SSE2__ + +extern const __m128 _mm_lookupmask_ps[16]; + +static struct TrueTy { + __forceinline operator bool() const + { + return true; + } +} True ccl_attr_maybe_unused; + +static struct FalseTy { + __forceinline operator bool() const + { + return false; + } +} False ccl_attr_maybe_unused; + +static struct ZeroTy { + __forceinline operator float() const + { + return 0; + } + __forceinline operator int() const + { + return 0; + } +} zero ccl_attr_maybe_unused; + +static struct OneTy { + __forceinline operator float() const + { + return 1; + } + __forceinline operator int() const + { + return 1; + } +} one ccl_attr_maybe_unused; + +static struct NegInfTy { + __forceinline operator float() const + { + return -std::numeric_limits::infinity(); + } + __forceinline operator int() const + { + return std::numeric_limits::min(); + } +} neg_inf ccl_attr_maybe_unused; + +static struct PosInfTy { + __forceinline operator float() const + { + return std::numeric_limits::infinity(); + } + __forceinline operator int() const + { + return std::numeric_limits::max(); + } +} inf ccl_attr_maybe_unused, pos_inf ccl_attr_maybe_unused; + +static struct StepTy { +} step ccl_attr_maybe_unused; + +#endif + +/* Utilities used by Neon */ +#if defined(__KERNEL_NEON__) +template type shuffle_neon(const type &a) +{ + if (i0 == i1 && i0 == i2 && i0 == i3) { + return type(vdupq_laneq_s32(int32x4_t(a), i0)); + } + static const uint8_t tbl[16] = {(i0 * 4) + 0, + (i0 * 4) + 1, + (i0 * 4) + 2, + (i0 * 4) + 3, + (i1 * 4) + 0, + (i1 * 4) + 1, + (i1 * 4) + 2, + (i1 * 4) + 3, + (i2 * 4) + 0, + (i2 * 4) + 1, + (i2 * 4) + 2, + (i2 * 4) + 3, + (i3 * 4) + 0, + (i3 * 4) + 1, + (i3 * 4) + 2, + (i3 * 4) + 3}; + + return type(vqtbl1q_s8(int8x16_t(a), *(uint8x16_t *)tbl)); +} + +template +type shuffle_neon(const type &a, const type &b) +{ + if (&a == &b) { + static const uint8_t tbl[16] = {(i0 * 4) + 0, + (i0 * 4) + 1, + (i0 * 4) + 2, + (i0 * 4) + 3, + (i1 * 4) + 0, + (i1 * 4) + 1, + (i1 * 4) + 2, + (i1 * 4) + 3, + (i2 * 4) + 0, + (i2 * 4) + 1, + (i2 * 4) + 2, + (i2 * 4) + 3, + (i3 * 4) + 0, + (i3 * 4) + 1, + (i3 * 4) + 2, + (i3 * 4) + 3}; + + return type(vqtbl1q_s8(int8x16_t(b), *(uint8x16_t *)tbl)); + } + else { + + static const uint8_t tbl[16] = {(i0 * 4) + 0, + (i0 * 4) + 1, + (i0 * 4) + 2, + (i0 * 4) + 3, + (i1 * 4) + 0, + (i1 * 4) + 1, + (i1 * 4) + 2, + (i1 * 4) + 3, + (i2 * 4) + 0 + 16, + (i2 * 4) + 1 + 16, + (i2 * 4) + 2 + 16, + (i2 * 4) + 3 + 16, + (i3 * 4) + 0 + 16, + (i3 * 4) + 1 + 16, + (i3 * 4) + 2 + 16, + (i3 * 4) + 3 + 16}; + + return type(vqtbl2q_s8((int8x16x2_t){int8x16_t(a), int8x16_t(b)}, *(uint8x16_t *)tbl)); + } +} +#endif /* __KERNEL_NEON */ + +/* Intrinsics Functions + * + * For fast bit operations. */ + +#if defined(__BMI__) && defined(__GNUC__) +# ifndef _tzcnt_u32 +# define _tzcnt_u32 __tzcnt_u32 +# endif +# ifndef _tzcnt_u64 +# define _tzcnt_u64 __tzcnt_u64 +# endif +#endif + +#if defined(__LZCNT__) +# define _lzcnt_u32 __lzcnt32 +# define _lzcnt_u64 __lzcnt64 +#endif + +#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__clang__) +/* Intrinsic functions on Windows. */ +__forceinline uint32_t __bsf(uint32_t v) +{ +# if defined(__KERNEL_AVX2__) + return _tzcnt_u32(v); +# else + unsigned long r = 0; + _BitScanForward(&r, v); + return r; +# endif +} + +__forceinline uint32_t __bsr(uint32_t v) +{ + unsigned long r = 0; + _BitScanReverse(&r, v); + return r; +} + +__forceinline uint32_t __btc(uint32_t v, uint32_t i) +{ + long r = v; + _bittestandcomplement(&r, i); + return r; +} + +__forceinline uint32_t bitscan(uint32_t v) +{ +# if defined(__KERNEL_AVX2__) + return _tzcnt_u32(v); +# else + return __bsf(v); +# endif +} + +# if defined(__KERNEL_64_BIT__) + +__forceinline uint64_t __bsf(uint64_t v) +{ +# if defined(__KERNEL_AVX2__) + return _tzcnt_u64(v); +# else + unsigned long r = 0; + _BitScanForward64(&r, v); + return r; +# endif +} + +__forceinline uint64_t __bsr(uint64_t v) +{ + unsigned long r = 0; + _BitScanReverse64(&r, v); + return r; +} + +__forceinline uint64_t __btc(uint64_t v, uint64_t i) +{ + uint64_t r = v; + _bittestandcomplement64((__int64 *)&r, i); + return r; +} + +__forceinline uint64_t bitscan(uint64_t v) +{ +# if defined(__KERNEL_AVX2__) +# if defined(__KERNEL_64_BIT__) + return _tzcnt_u64(v); +# else + return _tzcnt_u32(v); +# endif +# else + return __bsf(v); +# endif +} + +# endif /* __KERNEL_64_BIT__ */ + +#elif (defined(__x86_64__) || defined(__i386__)) && defined(__KERNEL_SSE2__) +/* Intrinsic functions with x86 SSE. */ + +__forceinline uint32_t __bsf(const uint32_t v) +{ + uint32_t r = 0; + asm("bsf %1,%0" : "=r"(r) : "r"(v)); + return r; +} + +__forceinline uint32_t __bsr(const uint32_t v) +{ + uint32_t r = 0; + asm("bsr %1,%0" : "=r"(r) : "r"(v)); + return r; +} + +__forceinline uint32_t __btc(const uint32_t v, uint32_t i) +{ + uint32_t r = 0; + asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); + return r; +} + +# if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \ + !(defined(__ILP32__) && defined(__x86_64__)) +__forceinline uint64_t __bsf(const uint64_t v) +{ + uint64_t r = 0; + asm("bsf %1,%0" : "=r"(r) : "r"(v)); + return r; +} +# endif + +__forceinline uint64_t __bsr(const uint64_t v) +{ + uint64_t r = 0; + asm("bsr %1,%0" : "=r"(r) : "r"(v)); + return r; +} + +__forceinline uint64_t __btc(const uint64_t v, const uint64_t i) +{ + uint64_t r = 0; + asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); + return r; +} + +__forceinline uint32_t bitscan(uint32_t v) +{ +# if defined(__KERNEL_AVX2__) + return _tzcnt_u32(v); +# else + return __bsf(v); +# endif +} + +# if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \ + !(defined(__ILP32__) && defined(__x86_64__)) +__forceinline uint64_t bitscan(uint64_t v) +{ +# if defined(__KERNEL_AVX2__) +# if defined(__KERNEL_64_BIT__) + return _tzcnt_u64(v); +# else + return _tzcnt_u32(v); +# endif +# else + return __bsf(v); +# endif +} +# endif + +#else +/* Intrinsic functions fallback for arbitrary processor. */ +__forceinline uint32_t __bsf(const uint32_t x) +{ + for (int i = 0; i < 32; i++) { + if (x & (1U << i)) + return i; + } + return 32; +} + +__forceinline uint32_t __bsr(const uint32_t x) +{ + for (int i = 0; i < 32; i++) { + if (x & (1U << (31 - i))) + return (31 - i); + } + return 32; +} + +__forceinline uint32_t __btc(const uint32_t x, const uint32_t bit) +{ + uint32_t mask = 1U << bit; + return x & (~mask); +} + +__forceinline uint32_t __bsf(const uint64_t x) +{ + for (int i = 0; i < 64; i++) { + if (x & (1UL << i)) + return i; + } + return 64; +} + +__forceinline uint32_t __bsr(const uint64_t x) +{ + for (int i = 0; i < 64; i++) { + if (x & (1UL << (63 - i))) + return (63 - i); + } + return 64; +} + +__forceinline uint64_t __btc(const uint64_t x, const uint32_t bit) +{ + uint64_t mask = 1UL << bit; + return x & (~mask); +} + +__forceinline uint32_t bitscan(uint32_t value) +{ + assert(value != 0); + uint32_t bit = 0; + while ((value & (1 << bit)) == 0) { + ++bit; + } + return bit; +} + +__forceinline uint64_t bitscan(uint64_t value) +{ + assert(value != 0); + uint64_t bit = 0; + while ((value & (1 << bit)) == 0) { + ++bit; + } + return bit; +} + +#endif /* Intrinsics */ + +/* SSE compatibility. + * + * Various utilities to smooth over differences between SSE versions and + * implementations. */ +#ifdef __KERNEL_SSE2__ + +/* Test __KERNEL_SSE41__ for MSVC which does not define __SSE4_1__, and test + * __SSE4_1__ to avoid OpenImageIO conflicts with our emulation macros on other + * platforms when compiling code outside the kernel. */ +# if !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) + +/* Emulation of SSE4 functions with SSE2 */ + +# define _MM_FROUND_TO_NEAREST_INT 0x00 +# define _MM_FROUND_TO_NEG_INF 0x01 +# define _MM_FROUND_TO_POS_INF 0x02 +# define _MM_FROUND_TO_ZERO 0x03 +# define _MM_FROUND_CUR_DIRECTION 0x04 + +# undef _mm_blendv_ps +# define _mm_blendv_ps _mm_blendv_ps_emu +__forceinline __m128 _mm_blendv_ps_emu(__m128 value, __m128 input, __m128 mask) +{ + __m128i isignmask = _mm_set1_epi32(0x80000000); + __m128 signmask = _mm_castsi128_ps(isignmask); + __m128i iandsign = _mm_castps_si128(_mm_and_ps(mask, signmask)); + __m128i icmpmask = _mm_cmpeq_epi32(iandsign, isignmask); + __m128 cmpmask = _mm_castsi128_ps(icmpmask); + return _mm_or_ps(_mm_and_ps(cmpmask, input), _mm_andnot_ps(cmpmask, value)); +} + +# undef _mm_blend_ps +# define _mm_blend_ps _mm_blend_ps_emu +__forceinline __m128 _mm_blend_ps_emu(__m128 value, __m128 input, const int mask) +{ + assert(mask < 0x10); + return _mm_blendv_ps(value, input, _mm_lookupmask_ps[mask]); +} + +# undef _mm_blendv_epi8 +# define _mm_blendv_epi8 _mm_blendv_epi8_emu +__forceinline __m128i _mm_blendv_epi8_emu(__m128i value, __m128i input, __m128i mask) +{ + return _mm_or_si128(_mm_and_si128(mask, input), _mm_andnot_si128(mask, value)); +} + +# undef _mm_min_epi32 +# define _mm_min_epi32 _mm_min_epi32_emu +__forceinline __m128i _mm_min_epi32_emu(__m128i value, __m128i input) +{ + return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input)); +} + +# undef _mm_max_epi32 +# define _mm_max_epi32 _mm_max_epi32_emu +__forceinline __m128i _mm_max_epi32_emu(__m128i value, __m128i input) +{ + return _mm_blendv_epi8(value, input, _mm_cmplt_epi32(value, input)); +} + +# ifndef __KERNEL_NEON__ +# undef _mm_extract_epi32 +# define _mm_extract_epi32 _mm_extract_epi32_emu +__forceinline int _mm_extract_epi32_emu(__m128i input, const int index) +{ + switch (index) { + case 0: + return _mm_cvtsi128_si32(input); + case 1: + return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(1, 1, 1, 1))); + case 2: + return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(2, 2, 2, 2))); + case 3: + return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(3, 3, 3, 3))); + default: + assert(false); + return 0; + } +} +# endif + +# undef _mm_insert_epi32 +# define _mm_insert_epi32 _mm_insert_epi32_emu +__forceinline __m128i _mm_insert_epi32_emu(__m128i value, int input, const int index) +{ + assert(index >= 0 && index < 4); + ((int *)&value)[index] = input; + return value; +} + +# undef _mm_insert_ps +# define _mm_insert_ps _mm_insert_ps_emu +__forceinline __m128 _mm_insert_ps_emu(__m128 value, __m128 input, const int index) +{ + assert(index < 0x100); + ((float *)&value)[(index >> 4) & 0x3] = ((float *)&input)[index >> 6]; + return _mm_andnot_ps(_mm_lookupmask_ps[index & 0xf], value); +} + +# undef _mm_round_ps +# define _mm_round_ps _mm_round_ps_emu +__forceinline __m128 _mm_round_ps_emu(__m128 value, const int flags) +{ + switch (flags) { + case _MM_FROUND_TO_NEAREST_INT: + return _mm_cvtepi32_ps(_mm_cvtps_epi32(value)); + case _MM_FROUND_TO_NEG_INF: + return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(-0.5f)))); + case _MM_FROUND_TO_POS_INF: + return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(0.5f)))); + case _MM_FROUND_TO_ZERO: + return _mm_cvtepi32_ps(_mm_cvttps_epi32(value)); + } + return value; +} + +# endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */ + +/* Older GCC versions do not have _mm256_cvtss_f32 yet, so define it ourselves. + * _mm256_castps256_ps128 generates no instructions so this is just as efficient. */ +# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) +# undef _mm256_cvtss_f32 +# define _mm256_cvtss_f32(a) (_mm_cvtss_f32(_mm256_castps256_ps128(a))) +# endif + +#endif /* __KERNEL_SSE2__ */ + +/* quiet unused define warnings */ +#if defined(__KERNEL_SSE2__) || defined(__KERNEL_SSE3__) || defined(__KERNEL_SSSE3__) || \ + defined(__KERNEL_SSE41__) || defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) +/* do nothing */ +#endif + +CCL_NAMESPACE_END + +#endif /* __UTIL_SIMD_TYPES_H__ */ diff --git a/intern/cycles/util/sseb.h b/intern/cycles/util/sseb.h new file mode 100644 index 00000000000..6afce4f8909 --- /dev/null +++ b/intern/cycles/util/sseb.h @@ -0,0 +1,358 @@ +/* + * Copyright 2011-2013 Intel Corporation + * Modifications Copyright 2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0(the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_SSEB_H__ +#define __UTIL_SSEB_H__ + +CCL_NAMESPACE_BEGIN + +#ifdef __KERNEL_SSE2__ + +struct ssei; +struct ssef; + +/*! 4-wide SSE bool type. */ +struct sseb { + typedef sseb Mask; // mask type + typedef ssei Int; // int type + typedef ssef Float; // float type + + enum { size = 4 }; // number of SIMD elements + union { + __m128 m128; + int32_t v[4]; + }; // data + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline sseb() + { + } + __forceinline sseb(const sseb &other) + { + m128 = other.m128; + } + __forceinline sseb &operator=(const sseb &other) + { + m128 = other.m128; + return *this; + } + + __forceinline sseb(const __m128 input) : m128(input) + { + } + __forceinline operator const __m128 &(void) const + { + return m128; + } + __forceinline operator const __m128i(void) const + { + return _mm_castps_si128(m128); + } + __forceinline operator const __m128d(void) const + { + return _mm_castps_pd(m128); + } + + __forceinline sseb(bool a) + : m128(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) + { + } + __forceinline sseb(bool a, bool b) + : m128(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) + { + } + __forceinline sseb(bool a, bool b, bool c, bool d) + : m128(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) + { + } + __forceinline sseb(int mask) + { + assert(mask >= 0 && mask < 16); + m128 = _mm_lookupmask_ps[mask]; + } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline sseb(FalseTy) : m128(_mm_setzero_ps()) + { + } + __forceinline sseb(TrueTy) + : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) + { + } + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool operator[](const size_t i) const + { + assert(i < 4); + return (_mm_movemask_ps(m128) >> i) & 1; + } + __forceinline int32_t &operator[](const size_t i) + { + assert(i < 4); + return v[i]; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +/// Unary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const sseb operator!(const sseb &a) +{ + return _mm_xor_ps(a, sseb(True)); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Binary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const sseb operator&(const sseb &a, const sseb &b) +{ + return _mm_and_ps(a, b); +} +__forceinline const sseb operator|(const sseb &a, const sseb &b) +{ + return _mm_or_ps(a, b); +} +__forceinline const sseb operator^(const sseb &a, const sseb &b) +{ + return _mm_xor_ps(a, b); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Assignment Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const sseb operator&=(sseb &a, const sseb &b) +{ + return a = a & b; +} +__forceinline const sseb operator|=(sseb &a, const sseb &b) +{ + return a = a | b; +} +__forceinline const sseb operator^=(sseb &a, const sseb &b) +{ + return a = a ^ b; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Comparison Operators + Select +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const sseb operator!=(const sseb &a, const sseb &b) +{ + return _mm_xor_ps(a, b); +} +__forceinline const sseb operator==(const sseb &a, const sseb &b) +{ + return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b)); +} + +__forceinline const sseb select(const sseb &m, const sseb &t, const sseb &f) +{ +# if defined(__KERNEL_SSE41__) + return _mm_blendv_ps(f, t, m); +# else + return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +/// Movement/Shifting/Shuffling Functions +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const sseb unpacklo(const sseb &a, const sseb &b) +{ + return _mm_unpacklo_ps(a, b); +} +__forceinline const sseb unpackhi(const sseb &a, const sseb &b) +{ + return _mm_unpackhi_ps(a, b); +} + +template +__forceinline const sseb shuffle(const sseb &a) +{ +# ifdef __KERNEL_NEON__ + return shuffle_neon(a); +# else + return _mm_castsi128_ps(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0))); +# endif +} + +# ifndef __KERNEL_NEON__ +template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a) +{ + return _mm_movelh_ps(a, a); +} + +template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a) +{ + return _mm_movehl_ps(a, a); +} +# endif + +template +__forceinline const sseb shuffle(const sseb &a, const sseb &b) +{ +# ifdef __KERNEL_NEON__ + return shuffle_neon(a, b); +# else + return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); +# endif +} + +# ifndef __KERNEL_NEON__ +template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a, const sseb &b) +{ + return _mm_movelh_ps(a, b); +} + +template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a, const sseb &b) +{ + return _mm_movehl_ps(b, a); +} +# endif + +# if defined(__KERNEL_SSE3__) && !defined(__KERNEL_NEON__) +template<> __forceinline const sseb shuffle<0, 0, 2, 2>(const sseb &a) +{ + return _mm_moveldup_ps(a); +} +template<> __forceinline const sseb shuffle<1, 1, 3, 3>(const sseb &a) +{ + return _mm_movehdup_ps(a); +} +# endif + +# if defined(__KERNEL_SSE41__) +template +__forceinline const sseb insert(const sseb &a, const sseb &b) +{ +# ifdef __KERNEL_NEON__ + sseb res = a; + if (clr) + res[dst] = 0; + else + res[dst] = b[src]; + return res; +# else + return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); +# endif +} +template __forceinline const sseb insert(const sseb &a, const sseb &b) +{ + return insert(a, b); +} +template __forceinline const sseb insert(const sseb &a, const bool b) +{ + return insert(a, sseb(b)); +} +# endif + +//////////////////////////////////////////////////////////////////////////////// +/// Reduction Operations +//////////////////////////////////////////////////////////////////////////////// + +# if defined(__KERNEL_SSE41__) +__forceinline uint32_t popcnt(const sseb &a) +{ +# if defined(__KERNEL_NEON__) + const int32x4_t mask = {1, 1, 1, 1}; + int32x4_t t = vandq_s32(vreinterpretq_s32_m128(a.m128), mask); + return vaddvq_s32(t); +# else + return _mm_popcnt_u32(_mm_movemask_ps(a)); +# endif +} +# else +__forceinline uint32_t popcnt(const sseb &a) +{ + return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]); +} +# endif + +__forceinline bool reduce_and(const sseb &a) +{ +# if defined(__KERNEL_NEON__) + return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) == -4; +# else + return _mm_movemask_ps(a) == 0xf; +# endif +} +__forceinline bool reduce_or(const sseb &a) +{ +# if defined(__KERNEL_NEON__) + return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) != 0x0; +# else + return _mm_movemask_ps(a) != 0x0; +# endif +} +__forceinline bool all(const sseb &b) +{ +# if defined(__KERNEL_NEON__) + return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == -4; +# else + return _mm_movemask_ps(b) == 0xf; +# endif +} +__forceinline bool any(const sseb &b) +{ +# if defined(__KERNEL_NEON__) + return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) != 0x0; +# else + return _mm_movemask_ps(b) != 0x0; +# endif +} +__forceinline bool none(const sseb &b) +{ +# if defined(__KERNEL_NEON__) + return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == 0x0; +# else + return _mm_movemask_ps(b) == 0x0; +# endif +} + +__forceinline uint32_t movemask(const sseb &a) +{ + return _mm_movemask_ps(a); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Debug Functions +//////////////////////////////////////////////////////////////////////////////// + +ccl_device_inline void print_sseb(const char *label, const sseb &a) +{ + printf("%s: %d %d %d %d\n", label, a[0], a[1], a[2], a[3]); +} + +#endif + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/util/ssef.h b/intern/cycles/util/ssef.h new file mode 100644 index 00000000000..ea5e78b54d2 --- /dev/null +++ b/intern/cycles/util/ssef.h @@ -0,0 +1,1104 @@ +/* + * Copyright 2011-2013 Intel Corporation + * Modifications Copyright 2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0(the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_SSEF_H__ +#define __UTIL_SSEF_H__ + +#include "util/ssei.h" + +CCL_NAMESPACE_BEGIN + +#ifdef __KERNEL_SSE2__ + +struct sseb; +struct ssef; + +/*! 4-wide SSE float type. */ +struct ssef { + typedef sseb Mask; // mask type + typedef ssei Int; // int type + typedef ssef Float; // float type + + enum { size = 4 }; // number of SIMD elements + union { + __m128 m128; + float f[4]; + int i[4]; + }; // data + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline ssef() + { + } + __forceinline ssef(const ssef &other) + { + m128 = other.m128; + } + __forceinline ssef &operator=(const ssef &other) + { + m128 = other.m128; + return *this; + } + + __forceinline ssef(const __m128 a) : m128(a) + { + } + __forceinline operator const __m128 &() const + { + return m128; + } + __forceinline operator __m128 &() + { + return m128; + } + + __forceinline ssef(float a) : m128(_mm_set1_ps(a)) + { + } + __forceinline ssef(float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d)) + { + } + + __forceinline explicit ssef(const __m128i a) : m128(_mm_cvtepi32_ps(a)) + { + } + + //////////////////////////////////////////////////////////////////////////////// + /// Loads and Stores + //////////////////////////////////////////////////////////////////////////////// + +# if defined(__KERNEL_AVX__) + static __forceinline ssef broadcast(const void *const a) + { + return _mm_broadcast_ss((float *)a); + } +# else + static __forceinline ssef broadcast(const void *const a) + { + return _mm_set1_ps(*(float *)a); + } +# endif + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const float &operator[](const size_t i) const + { + assert(i < 4); + return f[i]; + } + __forceinline float &operator[](const size_t i) + { + assert(i < 4); + return f[i]; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +/// Unary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const ssef cast(const __m128i &a) +{ + return _mm_castsi128_ps(a); +} +__forceinline const ssef operator+(const ssef &a) +{ + return a; +} +__forceinline const ssef operator-(const ssef &a) +{ + return _mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); +} +__forceinline const ssef abs(const ssef &a) +{ + return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))); +} +# if defined(__KERNEL_SSE41__) +__forceinline const ssef sign(const ssef &a) +{ + return _mm_blendv_ps(ssef(1.0f), -ssef(1.0f), _mm_cmplt_ps(a, ssef(0.0f))); +} +# endif +__forceinline const ssef signmsk(const ssef &a) +{ + return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); +} + +__forceinline const ssef rcp(const ssef &a) +{ + const ssef r = _mm_rcp_ps(a.m128); + return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a)); +} +__forceinline const ssef sqr(const ssef &a) +{ + return _mm_mul_ps(a, a); +} +__forceinline const ssef mm_sqrt(const ssef &a) +{ + return _mm_sqrt_ps(a.m128); +} +__forceinline const ssef rsqrt(const ssef &a) +{ + const ssef r = _mm_rsqrt_ps(a.m128); + return _mm_add_ps( + _mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r), + _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r), + _mm_mul_ps(r, r))); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Binary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const ssef operator+(const ssef &a, const ssef &b) +{ + return _mm_add_ps(a.m128, b.m128); +} +__forceinline const ssef operator+(const ssef &a, const float &b) +{ + return a + ssef(b); +} +__forceinline const ssef operator+(const float &a, const ssef &b) +{ + return ssef(a) + b; +} + +__forceinline const ssef operator-(const ssef &a, const ssef &b) +{ + return _mm_sub_ps(a.m128, b.m128); +} +__forceinline const ssef operator-(const ssef &a, const float &b) +{ + return a - ssef(b); +} +__forceinline const ssef operator-(const float &a, const ssef &b) +{ + return ssef(a) - b; +} + +__forceinline const ssef operator*(const ssef &a, const ssef &b) +{ + return _mm_mul_ps(a.m128, b.m128); +} +__forceinline const ssef operator*(const ssef &a, const float &b) +{ + return a * ssef(b); +} +__forceinline const ssef operator*(const float &a, const ssef &b) +{ + return ssef(a) * b; +} + +__forceinline const ssef operator/(const ssef &a, const ssef &b) +{ + return _mm_div_ps(a.m128, b.m128); +} +__forceinline const ssef operator/(const ssef &a, const float &b) +{ + return a / ssef(b); +} +__forceinline const ssef operator/(const float &a, const ssef &b) +{ + return ssef(a) / b; +} + +__forceinline const ssef operator^(const ssef &a, const ssef &b) +{ + return _mm_xor_ps(a.m128, b.m128); +} +__forceinline const ssef operator^(const ssef &a, const ssei &b) +{ + return _mm_xor_ps(a.m128, _mm_castsi128_ps(b.m128)); +} + +__forceinline const ssef operator&(const ssef &a, const ssef &b) +{ + return _mm_and_ps(a.m128, b.m128); +} +__forceinline const ssef operator&(const ssef &a, const ssei &b) +{ + return _mm_and_ps(a.m128, _mm_castsi128_ps(b.m128)); +} + +__forceinline const ssef operator|(const ssef &a, const ssef &b) +{ + return _mm_or_ps(a.m128, b.m128); +} +__forceinline const ssef operator|(const ssef &a, const ssei &b) +{ + return _mm_or_ps(a.m128, _mm_castsi128_ps(b.m128)); +} + +__forceinline const ssef andnot(const ssef &a, const ssef &b) +{ + return _mm_andnot_ps(a.m128, b.m128); +} + +__forceinline const ssef min(const ssef &a, const ssef &b) +{ + return _mm_min_ps(a.m128, b.m128); +} +__forceinline const ssef min(const ssef &a, const float &b) +{ + return _mm_min_ps(a.m128, ssef(b)); +} +__forceinline const ssef min(const float &a, const ssef &b) +{ + return _mm_min_ps(ssef(a), b.m128); +} + +__forceinline const ssef max(const ssef &a, const ssef &b) +{ + return _mm_max_ps(a.m128, b.m128); +} +__forceinline const ssef max(const ssef &a, const float &b) +{ + return _mm_max_ps(a.m128, ssef(b)); +} +__forceinline const ssef max(const float &a, const ssef &b) +{ + return _mm_max_ps(ssef(a), b.m128); +} + +# if defined(__KERNEL_SSE41__) +__forceinline ssef mini(const ssef &a, const ssef &b) +{ + const ssei ai = _mm_castps_si128(a); + const ssei bi = _mm_castps_si128(b); + const ssei ci = _mm_min_epi32(ai, bi); + return _mm_castsi128_ps(ci); +} +# endif + +# if defined(__KERNEL_SSE41__) +__forceinline ssef maxi(const ssef &a, const ssef &b) +{ + const ssei ai = _mm_castps_si128(a); + const ssei bi = _mm_castps_si128(b); + const ssei ci = _mm_max_epi32(ai, bi); + return _mm_castsi128_ps(ci); +} +# endif + +//////////////////////////////////////////////////////////////////////////////// +/// Ternary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const ssef madd(const ssef &a, const ssef &b, const ssef &c) +{ +# if defined(__KERNEL_NEON__) + return vfmaq_f32(c, a, b); +# elif defined(__KERNEL_AVX2__) + return _mm_fmadd_ps(a, b, c); +# else + return a * b + c; +# endif +} +__forceinline const ssef msub(const ssef &a, const ssef &b, const ssef &c) +{ +# if defined(__KERNEL_NEON__) + return vfmaq_f32(vnegq_f32(c), a, b); +# elif defined(__KERNEL_AVX2__) + return _mm_fmsub_ps(a, b, c); +# else + return a * b - c; +# endif +} +__forceinline const ssef nmadd(const ssef &a, const ssef &b, const ssef &c) +{ +# if defined(__KERNEL_NEON__) + return vfmsq_f32(c, a, b); +# elif defined(__KERNEL_AVX2__) + return _mm_fnmadd_ps(a, b, c); +# else + return c - a * b; +# endif +} +__forceinline const ssef nmsub(const ssef &a, const ssef &b, const ssef &c) +{ +# if defined(__KERNEL_NEON__) + return vfmsq_f32(vnegq_f32(c), a, b); +# elif defined(__KERNEL_AVX2__) + return _mm_fnmsub_ps(a, b, c); +# else + return -a * b - c; +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +/// Assignment Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline ssef &operator+=(ssef &a, const ssef &b) +{ + return a = a + b; +} +__forceinline ssef &operator+=(ssef &a, const float &b) +{ + return a = a + b; +} + +__forceinline ssef &operator-=(ssef &a, const ssef &b) +{ + return a = a - b; +} +__forceinline ssef &operator-=(ssef &a, const float &b) +{ + return a = a - b; +} + +__forceinline ssef &operator*=(ssef &a, const ssef &b) +{ + return a = a * b; +} +__forceinline ssef &operator*=(ssef &a, const float &b) +{ + return a = a * b; +} + +__forceinline ssef &operator/=(ssef &a, const ssef &b) +{ + return a = a / b; +} +__forceinline ssef &operator/=(ssef &a, const float &b) +{ + return a = a / b; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Comparison Operators + Select +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const sseb operator==(const ssef &a, const ssef &b) +{ + return _mm_cmpeq_ps(a.m128, b.m128); +} +__forceinline const sseb operator==(const ssef &a, const float &b) +{ + return a == ssef(b); +} +__forceinline const sseb operator==(const float &a, const ssef &b) +{ + return ssef(a) == b; +} + +__forceinline const sseb operator!=(const ssef &a, const ssef &b) +{ + return _mm_cmpneq_ps(a.m128, b.m128); +} +__forceinline const sseb operator!=(const ssef &a, const float &b) +{ + return a != ssef(b); +} +__forceinline const sseb operator!=(const float &a, const ssef &b) +{ + return ssef(a) != b; +} + +__forceinline const sseb operator<(const ssef &a, const ssef &b) +{ + return _mm_cmplt_ps(a.m128, b.m128); +} +__forceinline const sseb operator<(const ssef &a, const float &b) +{ + return a < ssef(b); +} +__forceinline const sseb operator<(const float &a, const ssef &b) +{ + return ssef(a) < b; +} + +__forceinline const sseb operator>=(const ssef &a, const ssef &b) +{ + return _mm_cmpnlt_ps(a.m128, b.m128); +} +__forceinline const sseb operator>=(const ssef &a, const float &b) +{ + return a >= ssef(b); +} +__forceinline const sseb operator>=(const float &a, const ssef &b) +{ + return ssef(a) >= b; +} + +__forceinline const sseb operator>(const ssef &a, const ssef &b) +{ + return _mm_cmpnle_ps(a.m128, b.m128); +} +__forceinline const sseb operator>(const ssef &a, const float &b) +{ + return a > ssef(b); +} +__forceinline const sseb operator>(const float &a, const ssef &b) +{ + return ssef(a) > b; +} + +__forceinline const sseb operator<=(const ssef &a, const ssef &b) +{ + return _mm_cmple_ps(a.m128, b.m128); +} +__forceinline const sseb operator<=(const ssef &a, const float &b) +{ + return a <= ssef(b); +} +__forceinline const sseb operator<=(const float &a, const ssef &b) +{ + return ssef(a) <= b; +} + +__forceinline const ssef select(const sseb &m, const ssef &t, const ssef &f) +{ +# ifdef __KERNEL_SSE41__ + return _mm_blendv_ps(f, t, m); +# else + return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); +# endif +} + +__forceinline const ssef select(const ssef &m, const ssef &t, const ssef &f) +{ +# ifdef __KERNEL_SSE41__ + return _mm_blendv_ps(f, t, m); +# else + return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); +# endif +} + +__forceinline const ssef select(const int mask, const ssef &t, const ssef &f) +{ +# if defined(__KERNEL_SSE41__) && \ + ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER)) + return _mm_blend_ps(f, t, mask); +# else + return select(sseb(mask), t, f); +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +/// Rounding Functions +//////////////////////////////////////////////////////////////////////////////// + +# if defined(__KERNEL_SSE41__) +__forceinline const ssef round_even(const ssef &a) +{ +# ifdef __KERNEL_NEON__ + return vrndnq_f32(a); +# else + return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); +# endif +} +__forceinline const ssef round_down(const ssef &a) +{ +# ifdef __KERNEL_NEON__ + return vrndmq_f32(a); +# else + return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF); +# endif +} +__forceinline const ssef round_up(const ssef &a) +{ +# ifdef __KERNEL_NEON__ + return vrndpq_f32(a); +# else + return _mm_round_ps(a, _MM_FROUND_TO_POS_INF); +# endif +} +__forceinline const ssef round_zero(const ssef &a) +{ +# ifdef __KERNEL_NEON__ + return vrndq_f32(a); +# else + return _mm_round_ps(a, _MM_FROUND_TO_ZERO); +# endif +} +__forceinline const ssef floor(const ssef &a) +{ +# ifdef __KERNEL_NEON__ + return vrndnq_f32(a); +# else + return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF); +# endif +} +__forceinline const ssef ceil(const ssef &a) +{ +# ifdef __KERNEL_NEON__ + return vrndpq_f32(a); +# else + return _mm_round_ps(a, _MM_FROUND_TO_POS_INF); +# endif +} +# endif + +__forceinline ssei truncatei(const ssef &a) +{ + return _mm_cvttps_epi32(a.m128); +} + +/* This is about 25% faster than straightforward floor to integer conversion + * due to better pipelining. + * + * Unsaturated add 0xffffffff (a < 0) is the same as subtract -1. + */ +__forceinline ssei floori(const ssef &a) +{ + return truncatei(a) + cast((a < 0.0f).m128); +} + +__forceinline ssef floorfrac(const ssef &x, ssei *i) +{ + *i = floori(x); + return x - ssef(*i); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Common Functions +//////////////////////////////////////////////////////////////////////////////// + +__forceinline ssef mix(const ssef &a, const ssef &b, const ssef &t) +{ + return madd(t, b, (ssef(1.0f) - t) * a); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Movement/Shifting/Shuffling Functions +//////////////////////////////////////////////////////////////////////////////// + +__forceinline ssef unpacklo(const ssef &a, const ssef &b) +{ + return _mm_unpacklo_ps(a.m128, b.m128); +} +__forceinline ssef unpackhi(const ssef &a, const ssef &b) +{ + return _mm_unpackhi_ps(a.m128, b.m128); +} + +template +__forceinline const ssef shuffle(const ssef &b) +{ +# ifdef __KERNEL_NEON__ + return shuffle_neon(b.m128); +# else + return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0))); +# endif +} + +template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a) +{ + return _mm_movelh_ps(a, a); +} + +template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a) +{ + return _mm_movehl_ps(a, a); +} + +template +__forceinline const ssef shuffle(const ssef &a, const ssef &b) +{ +# ifdef __KERNEL_NEON__ + return shuffle_neon(a, b); +# else + return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); +# endif +} + +template __forceinline const ssef shuffle(const ssef &a, const ssef &b) +{ +# ifdef __KERNEL_NEON__ + return shuffle_neon(a, b); +# else + return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0)); +# endif +} + +# ifndef __KERNEL_NEON__ +template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a, const ssef &b) +{ + return _mm_movelh_ps(a, b); +} + +template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a, const ssef &b) +{ + return _mm_movehl_ps(b, a); +} +# endif + +# if defined(__KERNEL_SSSE3__) +__forceinline const ssef shuffle8(const ssef &a, const ssei &shuf) +{ + return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf)); +} +# endif + +# if defined(__KERNEL_SSE3__) +template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef &b) +{ + return _mm_moveldup_ps(b); +} +template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef &b) +{ + return _mm_movehdup_ps(b); +} +# endif + +template __forceinline const ssef shuffle(const ssef &b) +{ + return shuffle(b); +} + +# if defined(__KERNEL_AVX__) +__forceinline const ssef shuffle(const ssef &a, const ssei &shuf) +{ + return _mm_permutevar_ps(a, shuf); +} +# endif + +template __forceinline float extract(const ssef &a) +{ + return _mm_cvtss_f32(shuffle(a)); +} +template<> __forceinline float extract<0>(const ssef &a) +{ + return _mm_cvtss_f32(a); +} + +# if defined(__KERNEL_SSE41__) +template +__forceinline const ssef insert(const ssef &a, const ssef &b) +{ +# ifdef __KERNEL_NEON__ + ssef res = a; + if (clr) + res[dst] = 0; + else + res[dst] = b[src]; + return res; +# else + return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); +# endif +} +template __forceinline const ssef insert(const ssef &a, const ssef &b) +{ + return insert(a, b); +} +template __forceinline const ssef insert(const ssef &a, const float b) +{ + return insert(a, _mm_set_ss(b)); +} +# else +template __forceinline const ssef insert(const ssef &a, const float b) +{ + ssef c = a; + c[dst] = b; + return c; +} +# endif + +//////////////////////////////////////////////////////////////////////////////// +/// Transpose +//////////////////////////////////////////////////////////////////////////////// + +__forceinline void transpose(const ssef &r0, + const ssef &r1, + const ssef &r2, + const ssef &r3, + ssef &c0, + ssef &c1, + ssef &c2, + ssef &c3) +{ + ssef l02 = unpacklo(r0, r2); + ssef h02 = unpackhi(r0, r2); + ssef l13 = unpacklo(r1, r3); + ssef h13 = unpackhi(r1, r3); + c0 = unpacklo(l02, l13); + c1 = unpackhi(l02, l13); + c2 = unpacklo(h02, h13); + c3 = unpackhi(h02, h13); +} + +__forceinline void transpose( + const ssef &r0, const ssef &r1, const ssef &r2, const ssef &r3, ssef &c0, ssef &c1, ssef &c2) +{ + ssef l02 = unpacklo(r0, r2); + ssef h02 = unpackhi(r0, r2); + ssef l13 = unpacklo(r1, r3); + ssef h13 = unpackhi(r1, r3); + c0 = unpacklo(l02, l13); + c1 = unpackhi(l02, l13); + c2 = unpacklo(h02, h13); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Reductions +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const ssef vreduce_min(const ssef &v) +{ +# ifdef __KERNEL_NEON__ + return vdupq_n_f32(vminvq_f32(v)); +# else + ssef h = min(shuffle<1, 0, 3, 2>(v), v); + return min(shuffle<2, 3, 0, 1>(h), h); +# endif +} +__forceinline const ssef vreduce_max(const ssef &v) +{ +# ifdef __KERNEL_NEON__ + return vdupq_n_f32(vmaxvq_f32(v)); +# else + ssef h = max(shuffle<1, 0, 3, 2>(v), v); + return max(shuffle<2, 3, 0, 1>(h), h); +# endif +} +__forceinline const ssef vreduce_add(const ssef &v) +{ +# ifdef __KERNEL_NEON__ + return vdupq_n_f32(vaddvq_f32(v)); +# else + ssef h = shuffle<1, 0, 3, 2>(v) + v; + return shuffle<2, 3, 0, 1>(h) + h; +# endif +} + +__forceinline float reduce_min(const ssef &v) +{ +# ifdef __KERNEL_NEON__ + return vminvq_f32(v); +# else + return _mm_cvtss_f32(vreduce_min(v)); +# endif +} +__forceinline float reduce_max(const ssef &v) +{ +# ifdef __KERNEL_NEON__ + return vmaxvq_f32(v); +# else + return _mm_cvtss_f32(vreduce_max(v)); +# endif +} +__forceinline float reduce_add(const ssef &v) +{ +# ifdef __KERNEL_NEON__ + return vaddvq_f32(v); +# else + return _mm_cvtss_f32(vreduce_add(v)); +# endif +} + +__forceinline uint32_t select_min(const ssef &v) +{ + return __bsf(movemask(v == vreduce_min(v))); +} +__forceinline uint32_t select_max(const ssef &v) +{ + return __bsf(movemask(v == vreduce_max(v))); +} + +__forceinline uint32_t select_min(const sseb &valid, const ssef &v) +{ + const ssef a = select(valid, v, ssef(pos_inf)); + return __bsf(movemask(valid & (a == vreduce_min(a)))); +} +__forceinline uint32_t select_max(const sseb &valid, const ssef &v) +{ + const ssef a = select(valid, v, ssef(neg_inf)); + return __bsf(movemask(valid & (a == vreduce_max(a)))); +} + +__forceinline uint32_t movemask(const ssef &a) +{ + return _mm_movemask_ps(a); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Memory load and store operations +//////////////////////////////////////////////////////////////////////////////// + +__forceinline ssef load4f(const float4 &a) +{ +# ifdef __KERNEL_WITH_SSE_ALIGN__ + return _mm_load_ps(&a.x); +# else + return _mm_loadu_ps(&a.x); +# endif +} + +__forceinline ssef load4f(const float3 &a) +{ +# ifdef __KERNEL_WITH_SSE_ALIGN__ + return _mm_load_ps(&a.x); +# else + return _mm_loadu_ps(&a.x); +# endif +} + +__forceinline ssef load4f(const void *const a) +{ + return _mm_load_ps((float *)a); +} + +__forceinline ssef load1f_first(const float a) +{ + return _mm_set_ss(a); +} + +__forceinline void store4f(void *ptr, const ssef &v) +{ + _mm_store_ps((float *)ptr, v); +} + +__forceinline ssef loadu4f(const void *const a) +{ + return _mm_loadu_ps((float *)a); +} + +__forceinline void storeu4f(void *ptr, const ssef &v) +{ + _mm_storeu_ps((float *)ptr, v); +} + +__forceinline void store4f(const sseb &mask, void *ptr, const ssef &f) +{ +# if defined(__KERNEL_AVX__) + _mm_maskstore_ps((float *)ptr, (__m128i)mask, f); +# else + *(ssef *)ptr = select(mask, f, *(ssef *)ptr); +# endif +} + +__forceinline ssef load4f_nt(void *ptr) +{ +# if defined(__KERNEL_SSE41__) + return _mm_castsi128_ps(_mm_stream_load_si128((__m128i *)ptr)); +# else + return _mm_load_ps((float *)ptr); +# endif +} + +__forceinline void store4f_nt(void *ptr, const ssef &v) +{ +# if defined(__KERNEL_SSE41__) + _mm_stream_ps((float *)ptr, v); +# else + _mm_store_ps((float *)ptr, v); +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +/// Euclidian Space Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline float dot(const ssef &a, const ssef &b) +{ + return reduce_add(a * b); +} + +/* calculate shuffled cross product, useful when order of components does not matter */ +__forceinline ssef cross_zxy(const ssef &a, const ssef &b) +{ + const ssef a0 = a; + const ssef b0 = shuffle<1, 2, 0, 3>(b); + const ssef a1 = shuffle<1, 2, 0, 3>(a); + const ssef b1 = b; + return msub(a0, b0, a1 * b1); +} + +__forceinline ssef cross(const ssef &a, const ssef &b) +{ + return shuffle<1, 2, 0, 3>(cross_zxy(a, b)); +} + +ccl_device_inline const ssef dot3_splat(const ssef &a, const ssef &b) +{ +# ifdef __KERNEL_SSE41__ + return _mm_dp_ps(a.m128, b.m128, 0x7f); +# else + ssef t = a * b; + return ssef(((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]); +# endif +} + +/* squared length taking only specified axes into account */ +template ccl_device_inline float len_squared(const ssef &a) +{ +# ifndef __KERNEL_SSE41__ + float4 &t = (float4 &)a; + return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) + + (W ? t.w * t.w : 0.0f); +# else + return extract<0>( + ssef(_mm_dp_ps(a.m128, a.m128, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf))); +# endif +} + +ccl_device_inline float dot3(const ssef &a, const ssef &b) +{ +# ifdef __KERNEL_SSE41__ + return extract<0>(ssef(_mm_dp_ps(a.m128, b.m128, 0x7f))); +# else + ssef t = a * b; + return ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]; +# endif +} + +ccl_device_inline const ssef len3_squared_splat(const ssef &a) +{ + return dot3_splat(a, a); +} + +ccl_device_inline float len3_squared(const ssef &a) +{ + return dot3(a, a); +} + +ccl_device_inline float len3(const ssef &a) +{ + return extract<0>(mm_sqrt(dot3_splat(a, a))); +} + +/* SSE shuffle utility functions */ + +# ifdef __KERNEL_SSSE3__ + +/* faster version for SSSE3 */ +typedef ssei shuffle_swap_t; + +ccl_device_inline shuffle_swap_t shuffle_swap_identity() +{ + return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +} + +ccl_device_inline shuffle_swap_t shuffle_swap_swap() +{ + return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); +} + +ccl_device_inline const ssef shuffle_swap(const ssef &a, const shuffle_swap_t &shuf) +{ + return cast(_mm_shuffle_epi8(cast(a), shuf)); +} + +# else + +/* somewhat slower version for SSE2 */ +typedef int shuffle_swap_t; + +ccl_device_inline shuffle_swap_t shuffle_swap_identity() +{ + return 0; +} + +ccl_device_inline shuffle_swap_t shuffle_swap_swap() +{ + return 1; +} + +ccl_device_inline const ssef shuffle_swap(const ssef &a, shuffle_swap_t shuf) +{ + /* shuffle value must be a constant, so we need to branch */ + if (shuf) + return shuffle<1, 0, 3, 2>(a); + else + return shuffle<3, 2, 1, 0>(a); +} + +# endif + +# if defined(__KERNEL_SSE41__) && !defined(__KERNEL_NEON__) + +ccl_device_inline void gen_idirsplat_swap(const ssef &pn, + const shuffle_swap_t &shuf_identity, + const shuffle_swap_t &shuf_swap, + const float3 &idir, + ssef idirsplat[3], + shuffle_swap_t shufflexyz[3]) +{ + const __m128 idirsplat_raw[] = {_mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z)}; + idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn); + idirsplat[1] = _mm_xor_ps(idirsplat_raw[1], pn); + idirsplat[2] = _mm_xor_ps(idirsplat_raw[2], pn); + + const ssef signmask = cast(ssei(0x80000000)); + const ssef shuf_identity_f = cast(shuf_identity); + const ssef shuf_swap_f = cast(shuf_swap); + + shufflexyz[0] = _mm_castps_si128( + _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[0], signmask))); + shufflexyz[1] = _mm_castps_si128( + _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[1], signmask))); + shufflexyz[2] = _mm_castps_si128( + _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[2], signmask))); +} + +# else + +ccl_device_inline void gen_idirsplat_swap(const ssef &pn, + const shuffle_swap_t &shuf_identity, + const shuffle_swap_t &shuf_swap, + const float3 &idir, + ssef idirsplat[3], + shuffle_swap_t shufflexyz[3]) +{ + idirsplat[0] = ssef(idir.x) ^ pn; + idirsplat[1] = ssef(idir.y) ^ pn; + idirsplat[2] = ssef(idir.z) ^ pn; + + shufflexyz[0] = (idir.x >= 0) ? shuf_identity : shuf_swap; + shufflexyz[1] = (idir.y >= 0) ? shuf_identity : shuf_swap; + shufflexyz[2] = (idir.z >= 0) ? shuf_identity : shuf_swap; +} + +# endif + +ccl_device_inline const ssef uint32_to_float(const ssei &in) +{ + ssei a = _mm_srli_epi32(in, 16); + ssei b = _mm_and_si128(in, _mm_set1_epi32(0x0000ffff)); + ssei c = _mm_or_si128(a, _mm_set1_epi32(0x53000000)); + ssef d = _mm_cvtepi32_ps(b); + ssef e = _mm_sub_ps(_mm_castsi128_ps(c), _mm_castsi128_ps(_mm_set1_epi32(0x53000000))); + return _mm_add_ps(e, d); +} + +template +ccl_device_inline const ssef set_sign_bit(const ssef &a) +{ + return cast(cast(a) ^ ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31)); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Debug Functions +//////////////////////////////////////////////////////////////////////////////// + +ccl_device_inline void print_ssef(const char *label, const ssef &a) +{ + printf( + "%s: %.8f %.8f %.8f %.8f\n", label, (double)a[0], (double)a[1], (double)a[2], (double)a[3]); +} + +#endif + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/util/ssei.h b/intern/cycles/util/ssei.h new file mode 100644 index 00000000000..94412fb77e7 --- /dev/null +++ b/intern/cycles/util/ssei.h @@ -0,0 +1,646 @@ +/* + * Copyright 2011-2013 Intel Corporation + * Modifications Copyright 2014, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0(the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_SSEI_H__ +#define __UTIL_SSEI_H__ + +CCL_NAMESPACE_BEGIN + +#ifdef __KERNEL_SSE2__ + +struct sseb; +struct ssef; + +/*! 4-wide SSE integer type. */ +struct ssei { + typedef sseb Mask; // mask type + typedef ssei Int; // int type + typedef ssef Float; // float type + + enum { size = 4 }; // number of SIMD elements + union { + __m128i m128; + int32_t i[4]; + }; // data + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline ssei() + { + } + __forceinline ssei(const ssei &a) + { + m128 = a.m128; + } + __forceinline ssei &operator=(const ssei &a) + { + m128 = a.m128; + return *this; + } + + __forceinline ssei(const __m128i a) : m128(a) + { + } + __forceinline operator const __m128i &(void) const + { + return m128; + } + __forceinline operator __m128i &(void) + { + return m128; + } + + __forceinline ssei(const int a) : m128(_mm_set1_epi32(a)) + { + } + __forceinline ssei(int a, int b, int c, int d) : m128(_mm_setr_epi32(a, b, c, d)) + { + } + + __forceinline explicit ssei(const __m128 a) : m128(_mm_cvtps_epi32(a)) + { + } + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const int32_t &operator[](const size_t index) const + { + assert(index < 4); + return i[index]; + } + __forceinline int32_t &operator[](const size_t index) + { + assert(index < 4); + return i[index]; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +/// Unary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const ssei cast(const __m128 &a) +{ + return _mm_castps_si128(a); +} +__forceinline const ssei operator+(const ssei &a) +{ + return a; +} +__forceinline const ssei operator-(const ssei &a) +{ + return _mm_sub_epi32(_mm_setzero_si128(), a.m128); +} +# if defined(__KERNEL_SSSE3__) +__forceinline const ssei abs(const ssei &a) +{ + return _mm_abs_epi32(a.m128); +} +# endif + +//////////////////////////////////////////////////////////////////////////////// +/// Binary Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const ssei operator+(const ssei &a, const ssei &b) +{ + return _mm_add_epi32(a.m128, b.m128); +} +__forceinline const ssei operator+(const ssei &a, const int32_t &b) +{ + return a + ssei(b); +} +__forceinline const ssei operator+(const int32_t &a, const ssei &b) +{ + return ssei(a) + b; +} + +__forceinline const ssei operator-(const ssei &a, const ssei &b) +{ + return _mm_sub_epi32(a.m128, b.m128); +} +__forceinline const ssei operator-(const ssei &a, const int32_t &b) +{ + return a - ssei(b); +} +__forceinline const ssei operator-(const int32_t &a, const ssei &b) +{ + return ssei(a) - b; +} + +# if defined(__KERNEL_SSE41__) +__forceinline const ssei operator*(const ssei &a, const ssei &b) +{ + return _mm_mullo_epi32(a.m128, b.m128); +} +__forceinline const ssei operator*(const ssei &a, const int32_t &b) +{ + return a * ssei(b); +} +__forceinline const ssei operator*(const int32_t &a, const ssei &b) +{ + return ssei(a) * b; +} +# endif + +__forceinline const ssei operator&(const ssei &a, const ssei &b) +{ + return _mm_and_si128(a.m128, b.m128); +} +__forceinline const ssei operator&(const ssei &a, const int32_t &b) +{ + return a & ssei(b); +} +__forceinline const ssei operator&(const int32_t &a, const ssei &b) +{ + return ssei(a) & b; +} + +__forceinline const ssei operator|(const ssei &a, const ssei &b) +{ + return _mm_or_si128(a.m128, b.m128); +} +__forceinline const ssei operator|(const ssei &a, const int32_t &b) +{ + return a | ssei(b); +} +__forceinline const ssei operator|(const int32_t &a, const ssei &b) +{ + return ssei(a) | b; +} + +__forceinline const ssei operator^(const ssei &a, const ssei &b) +{ + return _mm_xor_si128(a.m128, b.m128); +} +__forceinline const ssei operator^(const ssei &a, const int32_t &b) +{ + return a ^ ssei(b); +} +__forceinline const ssei operator^(const int32_t &a, const ssei &b) +{ + return ssei(a) ^ b; +} + +__forceinline const ssei operator<<(const ssei &a, const int32_t &n) +{ + return _mm_slli_epi32(a.m128, n); +} +__forceinline const ssei operator>>(const ssei &a, const int32_t &n) +{ + return _mm_srai_epi32(a.m128, n); +} + +__forceinline const ssei andnot(const ssei &a, const ssei &b) +{ + return _mm_andnot_si128(a.m128, b.m128); +} +__forceinline const ssei andnot(const sseb &a, const ssei &b) +{ + return _mm_andnot_si128(cast(a.m128), b.m128); +} +__forceinline const ssei andnot(const ssei &a, const sseb &b) +{ + return _mm_andnot_si128(a.m128, cast(b.m128)); +} + +__forceinline const ssei sra(const ssei &a, const int32_t &b) +{ + return _mm_srai_epi32(a.m128, b); +} +__forceinline const ssei srl(const ssei &a, const int32_t &b) +{ + return _mm_srli_epi32(a.m128, b); +} + +# if defined(__KERNEL_SSE41__) +__forceinline const ssei min(const ssei &a, const ssei &b) +{ + return _mm_min_epi32(a.m128, b.m128); +} +__forceinline const ssei min(const ssei &a, const int32_t &b) +{ + return min(a, ssei(b)); +} +__forceinline const ssei min(const int32_t &a, const ssei &b) +{ + return min(ssei(a), b); +} + +__forceinline const ssei max(const ssei &a, const ssei &b) +{ + return _mm_max_epi32(a.m128, b.m128); +} +__forceinline const ssei max(const ssei &a, const int32_t &b) +{ + return max(a, ssei(b)); +} +__forceinline const ssei max(const int32_t &a, const ssei &b) +{ + return max(ssei(a), b); +} +# endif + +//////////////////////////////////////////////////////////////////////////////// +/// Assignment Operators +//////////////////////////////////////////////////////////////////////////////// + +__forceinline ssei &operator+=(ssei &a, const ssei &b) +{ + return a = a + b; +} +__forceinline ssei &operator+=(ssei &a, const int32_t &b) +{ + return a = a + b; +} + +__forceinline ssei &operator-=(ssei &a, const ssei &b) +{ + return a = a - b; +} +__forceinline ssei &operator-=(ssei &a, const int32_t &b) +{ + return a = a - b; +} + +# if defined(__KERNEL_SSE41__) +__forceinline ssei &operator*=(ssei &a, const ssei &b) +{ + return a = a * b; +} +__forceinline ssei &operator*=(ssei &a, const int32_t &b) +{ + return a = a * b; +} +# endif + +__forceinline ssei &operator&=(ssei &a, const ssei &b) +{ + return a = a & b; +} +__forceinline ssei &operator&=(ssei &a, const int32_t &b) +{ + return a = a & b; +} + +__forceinline ssei &operator|=(ssei &a, const ssei &b) +{ + return a = a | b; +} +__forceinline ssei &operator|=(ssei &a, const int32_t &b) +{ + return a = a | b; +} + +__forceinline ssei &operator^=(ssei &a, const ssei &b) +{ + return a = a ^ b; +} +__forceinline ssei &operator^=(ssei &a, const int32_t &b) +{ + return a = a ^ b; +} + +__forceinline ssei &operator<<=(ssei &a, const int32_t &b) +{ + return a = a << b; +} +__forceinline ssei &operator>>=(ssei &a, const int32_t &b) +{ + return a = a >> b; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Comparison Operators + Select +//////////////////////////////////////////////////////////////////////////////// + +__forceinline const sseb operator==(const ssei &a, const ssei &b) +{ + return _mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128)); +} +__forceinline const sseb operator==(const ssei &a, const int32_t &b) +{ + return a == ssei(b); +} +__forceinline const sseb operator==(const int32_t &a, const ssei &b) +{ + return ssei(a) == b; +} + +__forceinline const sseb operator!=(const ssei &a, const ssei &b) +{ + return !(a == b); +} +__forceinline const sseb operator!=(const ssei &a, const int32_t &b) +{ + return a != ssei(b); +} +__forceinline const sseb operator!=(const int32_t &a, const ssei &b) +{ + return ssei(a) != b; +} + +__forceinline const sseb operator<(const ssei &a, const ssei &b) +{ + return _mm_castsi128_ps(_mm_cmplt_epi32(a.m128, b.m128)); +} +__forceinline const sseb operator<(const ssei &a, const int32_t &b) +{ + return a < ssei(b); +} +__forceinline const sseb operator<(const int32_t &a, const ssei &b) +{ + return ssei(a) < b; +} + +__forceinline const sseb operator>=(const ssei &a, const ssei &b) +{ + return !(a < b); +} +__forceinline const sseb operator>=(const ssei &a, const int32_t &b) +{ + return a >= ssei(b); +} +__forceinline const sseb operator>=(const int32_t &a, const ssei &b) +{ + return ssei(a) >= b; +} + +__forceinline const sseb operator>(const ssei &a, const ssei &b) +{ + return _mm_castsi128_ps(_mm_cmpgt_epi32(a.m128, b.m128)); +} +__forceinline const sseb operator>(const ssei &a, const int32_t &b) +{ + return a > ssei(b); +} +__forceinline const sseb operator>(const int32_t &a, const ssei &b) +{ + return ssei(a) > b; +} + +__forceinline const sseb operator<=(const ssei &a, const ssei &b) +{ + return !(a > b); +} +__forceinline const sseb operator<=(const ssei &a, const int32_t &b) +{ + return a <= ssei(b); +} +__forceinline const sseb operator<=(const int32_t &a, const ssei &b) +{ + return ssei(a) <= b; +} + +__forceinline const ssei select(const sseb &m, const ssei &t, const ssei &f) +{ +# ifdef __KERNEL_SSE41__ + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); +# else + return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f)); +# endif +} + +__forceinline const ssei select(const int mask, const ssei &t, const ssei &f) +{ +# if defined(__KERNEL_SSE41__) && \ + ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER)) + return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask)); +# else + return select(sseb(mask), t, f); +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +// Movement/Shifting/Shuffling Functions +//////////////////////////////////////////////////////////////////////////////// + +__forceinline ssei unpacklo(const ssei &a, const ssei &b) +{ + return _mm_unpacklo_epi32(a, b); +} +__forceinline ssei unpackhi(const ssei &a, const ssei &b) +{ + return _mm_unpackhi_epi32(a, b); +} + +template +__forceinline const ssei shuffle(const ssei &a) +{ +# ifdef __KERNEL_NEON__ + int32x4_t result = shuffle_neon(vreinterpretq_s32_m128i(a)); + return vreinterpretq_m128i_s32(result); +# else + return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)); +# endif +} + +template +__forceinline const ssei shuffle(const ssei &a, const ssei &b) +{ +# ifdef __KERNEL_NEON__ + int32x4_t result = shuffle_neon(vreinterpretq_s32_m128i(a), + vreinterpretq_s32_m128i(b)); + return vreinterpretq_m128i_s32(result); +# else + return _mm_castps_si128( + _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); +# endif +} + +template __forceinline const ssei shuffle(const ssei &b) +{ + return shuffle(b); +} + +# if defined(__KERNEL_SSE41__) +template __forceinline int extract(const ssei &b) +{ + return _mm_extract_epi32(b, src); +} +template __forceinline const ssei insert(const ssei &a, const int32_t b) +{ + return _mm_insert_epi32(a, b, dst); +} +# else +template __forceinline int extract(const ssei &b) +{ + return b[src]; +} +template __forceinline const ssei insert(const ssei &a, const int32_t b) +{ + ssei c = a; + c[dst] = b; + return c; +} +# endif + +//////////////////////////////////////////////////////////////////////////////// +/// Reductions +//////////////////////////////////////////////////////////////////////////////// + +# if defined(__KERNEL_SSE41__) +__forceinline const ssei vreduce_min(const ssei &v) +{ + ssei h = min(shuffle<1, 0, 3, 2>(v), v); + return min(shuffle<2, 3, 0, 1>(h), h); +} +__forceinline const ssei vreduce_max(const ssei &v) +{ + ssei h = max(shuffle<1, 0, 3, 2>(v), v); + return max(shuffle<2, 3, 0, 1>(h), h); +} +__forceinline const ssei vreduce_add(const ssei &v) +{ + ssei h = shuffle<1, 0, 3, 2>(v) + v; + return shuffle<2, 3, 0, 1>(h) + h; +} + +__forceinline int reduce_min(const ssei &v) +{ +# ifdef __KERNEL_NEON__ + return vminvq_s32(vreinterpretq_s32_m128i(v)); +# else + return extract<0>(vreduce_min(v)); +# endif +} +__forceinline int reduce_max(const ssei &v) +{ +# ifdef __KERNEL_NEON__ + return vmaxvq_s32(vreinterpretq_s32_m128i(v)); +# else + return extract<0>(vreduce_max(v)); +# endif +} +__forceinline int reduce_add(const ssei &v) +{ +# ifdef __KERNEL_NEON__ + return vaddvq_s32(vreinterpretq_s32_m128i(v)); +# else + return extract<0>(vreduce_add(v)); +# endif +} + +__forceinline uint32_t select_min(const ssei &v) +{ + return __bsf(movemask(v == vreduce_min(v))); +} +__forceinline uint32_t select_max(const ssei &v) +{ + return __bsf(movemask(v == vreduce_max(v))); +} + +__forceinline uint32_t select_min(const sseb &valid, const ssei &v) +{ + const ssei a = select(valid, v, ssei((int)pos_inf)); + return __bsf(movemask(valid & (a == vreduce_min(a)))); +} +__forceinline uint32_t select_max(const sseb &valid, const ssei &v) +{ + const ssei a = select(valid, v, ssei((int)neg_inf)); + return __bsf(movemask(valid & (a == vreduce_max(a)))); +} + +# else + +__forceinline int ssei_min(int a, int b) +{ + return (a < b) ? a : b; +} +__forceinline int ssei_max(int a, int b) +{ + return (a > b) ? a : b; +} +__forceinline int reduce_min(const ssei &v) +{ + return ssei_min(ssei_min(v[0], v[1]), ssei_min(v[2], v[3])); +} +__forceinline int reduce_max(const ssei &v) +{ + return ssei_max(ssei_max(v[0], v[1]), ssei_max(v[2], v[3])); +} +__forceinline int reduce_add(const ssei &v) +{ + return v[0] + v[1] + v[2] + v[3]; +} + +# endif + +//////////////////////////////////////////////////////////////////////////////// +/// Memory load and store operations +//////////////////////////////////////////////////////////////////////////////// + +__forceinline ssei load4i(const void *const a) +{ + return _mm_load_si128((__m128i *)a); +} + +__forceinline void store4i(void *ptr, const ssei &v) +{ + _mm_store_si128((__m128i *)ptr, v); +} + +__forceinline void storeu4i(void *ptr, const ssei &v) +{ + _mm_storeu_si128((__m128i *)ptr, v); +} + +__forceinline void store4i(const sseb &mask, void *ptr, const ssei &i) +{ +# if defined(__KERNEL_AVX__) + _mm_maskstore_ps((float *)ptr, (__m128i)mask, _mm_castsi128_ps(i)); +# else + *(ssei *)ptr = select(mask, i, *(ssei *)ptr); +# endif +} + +__forceinline ssei load4i_nt(void *ptr) +{ +# if defined(__KERNEL_SSE41__) + return _mm_stream_load_si128((__m128i *)ptr); +# else + return _mm_load_si128((__m128i *)ptr); +# endif +} + +__forceinline void store4i_nt(void *ptr, const ssei &v) +{ +# if defined(__KERNEL_SSE41__) + _mm_stream_ps((float *)ptr, _mm_castsi128_ps(v)); +# else + _mm_store_si128((__m128i *)ptr, v); +# endif +} + +//////////////////////////////////////////////////////////////////////////////// +/// Debug Functions +//////////////////////////////////////////////////////////////////////////////// + +ccl_device_inline void print_ssei(const char *label, const ssei &a) +{ + printf("%s: %df %df %df %d\n", label, a[0], a[1], a[2], a[3]); +} + +#endif + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/util/stack_allocator.h b/intern/cycles/util/stack_allocator.h new file mode 100644 index 00000000000..ef31c0fe5e2 --- /dev/null +++ b/intern/cycles/util/stack_allocator.h @@ -0,0 +1,165 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_STACK_ALLOCATOR_H__ +#define __UTIL_STACK_ALLOCATOR_H__ + +#include +#include + +CCL_NAMESPACE_BEGIN + +/* Stack allocator for the use with STL. */ +template class ccl_try_align(16) StackAllocator +{ + public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef T &reference; + typedef const T &const_reference; + typedef T value_type; + + /* Allocator construction/destruction. */ + + StackAllocator() : pointer_(0), use_stack_(true) + { + } + + StackAllocator(const StackAllocator &) : pointer_(0), use_stack_(true) + { + } + + template + StackAllocator(const StackAllocator &) : pointer_(0), use_stack_(false) + { + } + + /* Memory allocation/deallocation. */ + + T *allocate(size_t n, const void *hint = 0) + { + (void)hint; + if (n == 0) { + return NULL; + } + if (pointer_ + n >= SIZE || use_stack_ == false) { + size_t size = n * sizeof(T); + util_guarded_mem_alloc(size); + T *mem; +#ifdef WITH_BLENDER_GUARDEDALLOC + mem = (T *)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); +#else + mem = (T *)malloc(size); +#endif + if (mem == NULL) { + throw std::bad_alloc(); + } + return mem; + } + T *mem = &data_[pointer_]; + pointer_ += n; + return mem; + } + + void deallocate(T * p, size_t n) + { + if (p == NULL) { + return; + } + if (p < data_ || p >= data_ + SIZE) { + util_guarded_mem_free(n * sizeof(T)); +#ifdef WITH_BLENDER_GUARDEDALLOC + MEM_freeN(p); +#else + free(p); +#endif + return; + } + /* We don't support memory free for the stack allocator. */ + } + + /* Address of an reference. */ + + T *address(T & x) const + { + return &x; + } + + const T *address(const T &x) const + { + return &x; + } + + /* Object construction/destruction. */ + + void construct(T * p, const T &val) + { + if (p != NULL) { + new ((T *)p) T(val); + } + } + + void destroy(T * p) + { + p->~T(); + } + + /* Maximum allocation size. */ + + size_t max_size() const + { + return size_t(-1); + } + + /* Rebind to other type of allocator. */ + + template struct rebind { + typedef StackAllocator other; + }; + + /* Operators */ + + template inline StackAllocator &operator=(const StackAllocator &) + { + return *this; + } + + StackAllocator &operator=(const StackAllocator &) + { + return *this; + } + + inline bool operator==(StackAllocator const & /*other*/) const + { + return true; + } + + inline bool operator!=(StackAllocator const &other) const + { + return !operator==(other); + } + + private: + int pointer_; + bool use_stack_; + T data_[SIZE]; +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_STACK_ALLOCATOR_H__ */ diff --git a/intern/cycles/util/static_assert.h b/intern/cycles/util/static_assert.h new file mode 100644 index 00000000000..7df52d462b7 --- /dev/null +++ b/intern/cycles/util/static_assert.h @@ -0,0 +1,36 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* clang-format off */ + +/* #define static_assert triggers a bug in some clang-format versions, disable + * format for entire file to keep results consistent. */ + +#ifndef __UTIL_STATIC_ASSERT_H__ +#define __UTIL_STATIC_ASSERT_H__ + +CCL_NAMESPACE_BEGIN + +#if defined(CYCLES_CUBIN_CC) +# define static_assert(statement, message) +#endif + +#define static_assert_align(st, align) \ + static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT + +CCL_NAMESPACE_END + +#endif /* __UTIL_STATIC_ASSERT_H__ */ diff --git a/intern/cycles/util/stats.h b/intern/cycles/util/stats.h new file mode 100644 index 00000000000..590973f1cbc --- /dev/null +++ b/intern/cycles/util/stats.h @@ -0,0 +1,54 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_STATS_H__ +#define __UTIL_STATS_H__ + +#include "util/atomic.h" +#include "util/profiling.h" + +CCL_NAMESPACE_BEGIN + +class Stats { + public: + enum static_init_t { static_init = 0 }; + + Stats() : mem_used(0), mem_peak(0) + { + } + explicit Stats(static_init_t) + { + } + + void mem_alloc(size_t size) + { + atomic_add_and_fetch_z(&mem_used, size); + atomic_fetch_and_update_max_z(&mem_peak, mem_used); + } + + void mem_free(size_t size) + { + assert(mem_used >= size); + atomic_sub_and_fetch_z(&mem_used, size); + } + + size_t mem_used; + size_t mem_peak; +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_STATS_H__ */ diff --git a/intern/cycles/util/string.cpp b/intern/cycles/util/string.cpp new file mode 100644 index 00000000000..b98272f7759 --- /dev/null +++ b/intern/cycles/util/string.cpp @@ -0,0 +1,268 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include "util/foreach.h" +#include "util/string.h" +#include "util/windows.h" + +#ifdef _WIN32 +# ifndef vsnprintf +# define vsnprintf _vsnprintf +# endif +#endif /* _WIN32 */ + +CCL_NAMESPACE_BEGIN + +string string_printf(const char *format, ...) +{ + vector str(128, 0); + + while (1) { + va_list args; + int result; + + va_start(args, format); + result = vsnprintf(&str[0], str.size(), format, args); + va_end(args); + + if (result == -1) { + /* not enough space or formatting error */ + if (str.size() > 65536) { + assert(0); + return string(""); + } + + str.resize(str.size() * 2, 0); + continue; + } + else if (result >= (int)str.size()) { + /* not enough space */ + str.resize(result + 1, 0); + continue; + } + + return string(&str[0]); + } +} + +bool string_iequals(const string &a, const string &b) +{ + if (a.size() == b.size()) { + for (size_t i = 0; i < a.size(); i++) + if (toupper(a[i]) != toupper(b[i])) + return false; + + return true; + } + + return false; +} + +void string_split(vector &tokens, + const string &str, + const string &separators, + bool skip_empty_tokens) +{ + size_t token_start = 0, token_length = 0; + for (size_t i = 0; i < str.size(); ++i) { + const char ch = str[i]; + if (separators.find(ch) == string::npos) { + /* Current character is not a separator, + * append it to token by increasing token length. + */ + ++token_length; + } + else { + /* Current character is a separator, + * append current token to the list. + */ + if (!skip_empty_tokens || token_length > 0) { + string token = str.substr(token_start, token_length); + tokens.push_back(token); + } + token_start = i + 1; + token_length = 0; + } + } + /* Append token from the tail of the string if exists. */ + if (token_length) { + string token = str.substr(token_start, token_length); + tokens.push_back(token); + } +} + +bool string_startswith(const string_view s, const string_view start) +{ + const size_t len = start.size(); + + if (len > s.size()) { + return false; + } + + return strncmp(s.c_str(), start.data(), len) == 0; +} + +bool string_endswith(const string_view s, const string_view end) +{ + const size_t len = end.size(); + + if (len > s.size()) { + return false; + } + + return strncmp(s.c_str() + s.size() - len, end.data(), len) == 0; +} + +string string_strip(const string &s) +{ + string result = s; + result.erase(0, result.find_first_not_of(' ')); + result.erase(result.find_last_not_of(' ') + 1); + return result; +} + +void string_replace(string &haystack, const string &needle, const string &other) +{ + size_t i = 0, index; + while ((index = haystack.find(needle, i)) != string::npos) { + haystack.replace(index, needle.size(), other); + i = index + other.size(); + } +} + +string string_remove_trademark(const string &s) +{ + string result = s; + + /* Special case, so we don't leave sequential spaces behind. */ + /* TODO(sergey): Consider using regex perhaps? */ + string_replace(result, " (TM)", ""); + string_replace(result, " (R)", ""); + + string_replace(result, "(TM)", ""); + string_replace(result, "(R)", ""); + + return string_strip(result); +} + +string string_from_bool(bool var) +{ + if (var) + return "True"; + else + return "False"; +} + +string to_string(const char *str) +{ + return string(str); +} + +string string_to_lower(const string &s) +{ + string r = s; + std::transform(r.begin(), r.end(), r.begin(), [](char c) { return std::tolower(c); }); + return r; +} + +/* Wide char strings helpers for Windows. */ + +#ifdef _WIN32 + +wstring string_to_wstring(const string &str) +{ + const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0); + wstring str_wc(length_wc, 0); + MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &str_wc[0], length_wc); + return str_wc; +} + +string string_from_wstring(const wstring &str) +{ + int length_mb = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(), NULL, 0, NULL, NULL); + string str_mb(length_mb, 0); + WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(), &str_mb[0], length_mb, NULL, NULL); + return str_mb; +} + +string string_to_ansi(const string &str) +{ + const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0); + wstring str_wc(length_wc, 0); + MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &str_wc[0], length_wc); + + int length_mb = WideCharToMultiByte( + CP_ACP, 0, str_wc.c_str(), str_wc.size(), NULL, 0, NULL, NULL); + + string str_mb(length_mb, 0); + WideCharToMultiByte(CP_ACP, 0, str_wc.c_str(), str_wc.size(), &str_mb[0], length_mb, NULL, NULL); + + return str_mb; +} + +#endif /* _WIN32 */ + +string string_human_readable_size(size_t size) +{ + static const char suffixes[] = "BKMGTPEZY"; + + const char *suffix = suffixes; + size_t r = 0; + + while (size >= 1024) { + r = size % 1024; + size /= 1024; + suffix++; + } + + if (*suffix != 'B') + return string_printf("%.2f%c", double(size * 1024 + r) / 1024.0, *suffix); + else + return string_printf("%zu", size); +} + +string string_human_readable_number(size_t num) +{ + if (num == 0) { + return "0"; + } + + /* Add thousands separators. */ + char buf[32]; + + char *p = buf + 31; + *p = '\0'; + + int i = -1; + while (num) { + if (++i && i % 3 == 0) + *(--p) = ','; + + *(--p) = '0' + (num % 10); + + num /= 10; + } + + return p; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/string.h b/intern/cycles/util/string.h new file mode 100644 index 00000000000..cc20a6df120 --- /dev/null +++ b/intern/cycles/util/string.h @@ -0,0 +1,81 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +/* Use string view implementation from OIIO. + * Ideally, need to switch to `std::string_view`, but this first requires getting rid of using + * namespace OIIO as it causes symbol collision. */ +#include + +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +using std::istringstream; +using std::ostringstream; +using std::string; +using std::stringstream; +using std::to_string; + +using OIIO::string_view; + +#ifdef __GNUC__ +# define PRINTF_ATTRIBUTE __attribute__((format(printf, 1, 2))) +#else +# define PRINTF_ATTRIBUTE +#endif + +string string_printf(const char *format, ...) PRINTF_ATTRIBUTE; + +bool string_iequals(const string &a, const string &b); +void string_split(vector &tokens, + const string &str, + const string &separators = "\t ", + bool skip_empty_tokens = true); +void string_replace(string &haystack, const string &needle, const string &other); +bool string_startswith(string_view s, string_view start); +bool string_endswith(string_view s, string_view end); +string string_strip(const string &s); +string string_remove_trademark(const string &s); +string string_from_bool(const bool var); +string to_string(const char *str); +string string_to_lower(const string &s); + +/* Wide char strings are only used on Windows to deal with non-ASCII + * characters in file names and such. No reason to use such strings + * for something else at this moment. + * + * Please note that strings are expected to be in UTF-8 codepage, and + * if ANSI is needed then explicit conversion required. + */ +#ifdef _WIN32 +using std::wstring; +wstring string_to_wstring(const string &path); +string string_from_wstring(const wstring &path); +string string_to_ansi(const string &str); +#endif + +/* Make a string from a size in bytes in human readable form. */ +string string_human_readable_size(size_t size); +/* Make a string from a unit-less quantity in human readable form. */ +string string_human_readable_number(size_t num); + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/system.cpp b/intern/cycles/util/system.cpp new file mode 100644 index 00000000000..f12e15e756f --- /dev/null +++ b/intern/cycles/util/system.cpp @@ -0,0 +1,415 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/system.h" + +#include "util/log.h" +#include "util/string.h" +#include "util/types.h" + +#include + +#include +OIIO_NAMESPACE_USING + +#ifdef _WIN32 +# if (!defined(FREE_WINDOWS)) +# include +# endif +# include "util/windows.h" +#elif defined(__APPLE__) +# include +# include +# include +#else +# include +# include +#endif + +CCL_NAMESPACE_BEGIN + +bool system_cpu_ensure_initialized() +{ + static bool is_initialized = false; + static bool result = false; + if (is_initialized) { + return result; + } + is_initialized = true; + const NUMAAPI_Result numa_result = numaAPI_Initialize(); + result = (numa_result == NUMAAPI_SUCCESS); + return result; +} + +/* Fallback solution, which doesn't use NUMA/CPU groups. */ +static int system_cpu_thread_count_fallback() +{ +#ifdef _WIN32 + SYSTEM_INFO info; + GetSystemInfo(&info); + return info.dwNumberOfProcessors; +#elif defined(__APPLE__) + int count; + size_t len = sizeof(count); + int mib[2] = {CTL_HW, HW_NCPU}; + sysctl(mib, 2, &count, &len, NULL, 0); + return count; +#else + return sysconf(_SC_NPROCESSORS_ONLN); +#endif +} + +int system_cpu_thread_count() +{ + const int num_nodes = system_cpu_num_numa_nodes(); + int num_threads = 0; + for (int node = 0; node < num_nodes; ++node) { + if (!system_cpu_is_numa_node_available(node)) { + continue; + } + num_threads += system_cpu_num_numa_node_processors(node); + } + return num_threads; +} + +int system_cpu_num_numa_nodes() +{ + if (!system_cpu_ensure_initialized()) { + /* Fallback to a single node with all the threads. */ + return 1; + } + return numaAPI_GetNumNodes(); +} + +bool system_cpu_is_numa_node_available(int node) +{ + if (!system_cpu_ensure_initialized()) { + return true; + } + return numaAPI_IsNodeAvailable(node); +} + +int system_cpu_num_numa_node_processors(int node) +{ + if (!system_cpu_ensure_initialized()) { + return system_cpu_thread_count_fallback(); + } + return numaAPI_GetNumNodeProcessors(node); +} + +bool system_cpu_run_thread_on_node(int node) +{ + if (!system_cpu_ensure_initialized()) { + return true; + } + return numaAPI_RunThreadOnNode(node); +} + +int system_console_width() +{ + int columns = 0; + +#ifdef _WIN32 + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) { + columns = csbi.dwSize.X; + } +#else + struct winsize w; + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { + columns = w.ws_col; + } +#endif + + return (columns > 0) ? columns : 80; +} + +int system_cpu_num_active_group_processors() +{ + if (!system_cpu_ensure_initialized()) { + return system_cpu_thread_count_fallback(); + } + return numaAPI_GetNumCurrentNodesProcessors(); +} + +/* Equivalent of Windows __cpuid for x86 processors on other platforms. */ +#if (!defined(_WIN32) || defined(FREE_WINDOWS)) && (defined(__x86_64__) || defined(__i386__)) +static void __cpuid(int data[4], int selector) +{ +# if defined(__x86_64__) + asm("cpuid" : "=a"(data[0]), "=b"(data[1]), "=c"(data[2]), "=d"(data[3]) : "a"(selector)); +# elif defined(__i386__) + asm("pushl %%ebx \n\t" + "cpuid \n\t" + "movl %%ebx, %1 \n\t" + "popl %%ebx \n\t" + : "=a"(data[0]), "=r"(data[1]), "=c"(data[2]), "=d"(data[3]) + : "a"(selector) + : "ebx"); +# else + data[0] = data[1] = data[2] = data[3] = 0; +# endif +} +#endif + +string system_cpu_brand_string() +{ +#if defined(__APPLE__) + /* Get from system on macOS. */ + char modelname[512] = ""; + size_t bufferlen = 512; + if (sysctlbyname("machdep.cpu.brand_string", &modelname, &bufferlen, NULL, 0) == 0) { + return modelname; + } +#elif defined(WIN32) || defined(__x86_64__) || defined(__i386__) + /* Get from intrinsics on Windows and x86. */ + char buf[49] = {0}; + int result[4] = {0}; + + __cpuid(result, 0x80000000); + + if (result[0] != 0 && result[0] >= (int)0x80000004) { + __cpuid((int *)(buf + 0), 0x80000002); + __cpuid((int *)(buf + 16), 0x80000003); + __cpuid((int *)(buf + 32), 0x80000004); + + string brand = buf; + + /* Make it a bit more presentable. */ + brand = string_remove_trademark(brand); + + return brand; + } +#else + /* Get from /proc/cpuinfo on Unix systems. */ + FILE *cpuinfo = fopen("/proc/cpuinfo", "r"); + if (cpuinfo != nullptr) { + char cpuinfo_buf[513] = ""; + fread(cpuinfo_buf, sizeof(cpuinfo_buf) - 1, 1, cpuinfo); + fclose(cpuinfo); + + char *modelname = strstr(cpuinfo_buf, "model name"); + if (modelname != nullptr) { + modelname = strchr(modelname, ':'); + if (modelname != nullptr) { + modelname += 2; + char *modelname_end = strchr(modelname, '\n'); + if (modelname_end != nullptr) { + *modelname_end = '\0'; + return modelname; + } + } + } + } +#endif + return "Unknown CPU"; +} + +int system_cpu_bits() +{ + return (sizeof(void *) * 8); +} + +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) + +struct CPUCapabilities { + bool x64; + bool mmx; + bool sse; + bool sse2; + bool sse3; + bool ssse3; + bool sse41; + bool sse42; + bool sse4a; + bool avx; + bool f16c; + bool avx2; + bool xop; + bool fma3; + bool fma4; + bool bmi1; + bool bmi2; +}; + +static CPUCapabilities &system_cpu_capabilities() +{ + static CPUCapabilities caps; + static bool caps_init = false; + + if (!caps_init) { + int result[4], num; + + memset(&caps, 0, sizeof(caps)); + + __cpuid(result, 0); + num = result[0]; + + if (num >= 1) { + __cpuid(result, 0x00000001); + caps.mmx = (result[3] & ((int)1 << 23)) != 0; + caps.sse = (result[3] & ((int)1 << 25)) != 0; + caps.sse2 = (result[3] & ((int)1 << 26)) != 0; + caps.sse3 = (result[2] & ((int)1 << 0)) != 0; + + caps.ssse3 = (result[2] & ((int)1 << 9)) != 0; + caps.sse41 = (result[2] & ((int)1 << 19)) != 0; + caps.sse42 = (result[2] & ((int)1 << 20)) != 0; + + caps.fma3 = (result[2] & ((int)1 << 12)) != 0; + caps.avx = false; + bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0; + bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0; + + if (os_uses_xsave_xrestore && cpu_avx_support) { + // Check if the OS will save the YMM registers + uint32_t xcr_feature_mask; +# if defined(__GNUC__) + int edx; /* not used */ + /* actual opcode for xgetbv */ + __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr_feature_mask), "=d"(edx) : "c"(0)); +# elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + /* Minimum VS2010 SP1 compiler is required. */ + xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK); +# else + xcr_feature_mask = 0; +# endif + caps.avx = (xcr_feature_mask & 0x6) == 0x6; + } + + caps.f16c = (result[2] & ((int)1 << 29)) != 0; + + __cpuid(result, 0x00000007); + caps.bmi1 = (result[1] & ((int)1 << 3)) != 0; + caps.bmi2 = (result[1] & ((int)1 << 8)) != 0; + caps.avx2 = (result[1] & ((int)1 << 5)) != 0; + } + + caps_init = true; + } + + return caps; +} + +bool system_cpu_support_sse2() +{ + CPUCapabilities &caps = system_cpu_capabilities(); + return caps.sse && caps.sse2; +} + +bool system_cpu_support_sse3() +{ + CPUCapabilities &caps = system_cpu_capabilities(); + return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3; +} + +bool system_cpu_support_sse41() +{ + CPUCapabilities &caps = system_cpu_capabilities(); + return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41; +} + +bool system_cpu_support_avx() +{ + CPUCapabilities &caps = system_cpu_capabilities(); + return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx; +} + +bool system_cpu_support_avx2() +{ + CPUCapabilities &caps = system_cpu_capabilities(); + return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c && + caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2; +} +#else + +bool system_cpu_support_sse2() +{ + return false; +} + +bool system_cpu_support_sse3() +{ + return false; +} + +bool system_cpu_support_sse41() +{ + return false; +} + +bool system_cpu_support_avx() +{ + return false; +} +bool system_cpu_support_avx2() +{ + return false; +} + +#endif + +bool system_call_self(const vector &args) +{ + /* Escape program and arguments in case they contain spaces. */ + string cmd = "\"" + Sysutil::this_program_path() + "\""; + + for (int i = 0; i < args.size(); i++) { + cmd += " \"" + args[i] + "\""; + } + +#ifdef _WIN32 + /* Use cmd /S to avoid issues with spaces in arguments. */ + cmd = "cmd /S /C \"" + cmd + " > nul \""; +#else + /* Quiet output. */ + cmd += " > /dev/null"; +#endif + + return (system(cmd.c_str()) == 0); +} + +size_t system_physical_ram() +{ +#ifdef _WIN32 + MEMORYSTATUSEX ram; + ram.dwLength = sizeof(ram); + GlobalMemoryStatusEx(&ram); + return ram.ullTotalPhys; +#elif defined(__APPLE__) + uint64_t ram = 0; + size_t len = sizeof(ram); + if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) { + return ram; + } + return 0; +#else + size_t ps = sysconf(_SC_PAGESIZE); + size_t pn = sysconf(_SC_PHYS_PAGES); + return ps * pn; +#endif +} + +uint64_t system_self_process_id() +{ +#ifdef _WIN32 + return GetCurrentProcessId(); +#else + return getpid(); +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/system.h b/intern/cycles/util/system.h new file mode 100644 index 00000000000..425c7255cbe --- /dev/null +++ b/intern/cycles/util/system.h @@ -0,0 +1,73 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_SYSTEM_H__ +#define __UTIL_SYSTEM_H__ + +#include "util/string.h" +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +/* Make sure CPU groups / NUMA API is initialized. */ +bool system_cpu_ensure_initialized(); + +/* Get total number of threads in all NUMA nodes / CPU groups. */ +int system_cpu_thread_count(); + +/* Get width in characters of the current console output. */ +int system_console_width(); + +/* Get number of available nodes. + * + * This is in fact an index of last node plus one and it's not guaranteed + * that all nodes up to this one are available. */ +int system_cpu_num_numa_nodes(); + +/* Returns truth if the given node is available for compute. */ +bool system_cpu_is_numa_node_available(int node); + +/* Get number of available processors on a given node. */ +int system_cpu_num_numa_node_processors(int node); + +/* Runs the current thread and its children on a specific node. + * + * Returns truth if affinity has successfully changed. */ +bool system_cpu_run_thread_on_node(int node); + +/* Number of processors within the current CPU group (or within active thread + * thread affinity). */ +int system_cpu_num_active_group_processors(); + +string system_cpu_brand_string(); +int system_cpu_bits(); +bool system_cpu_support_sse2(); +bool system_cpu_support_sse3(); +bool system_cpu_support_sse41(); +bool system_cpu_support_avx(); +bool system_cpu_support_avx2(); + +size_t system_physical_ram(); + +/* Start a new process of the current application with the given arguments. */ +bool system_call_self(const vector &args); + +/* Get identifier of the currently running process. */ +uint64_t system_self_process_id(); + +CCL_NAMESPACE_END + +#endif /* __UTIL_SYSTEM_H__ */ diff --git a/intern/cycles/util/task.cpp b/intern/cycles/util/task.cpp new file mode 100644 index 00000000000..ce61bf8d6c4 --- /dev/null +++ b/intern/cycles/util/task.cpp @@ -0,0 +1,251 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/task.h" +#include "util/foreach.h" +#include "util/log.h" +#include "util/system.h" +#include "util/time.h" + +CCL_NAMESPACE_BEGIN + +/* Task Pool */ + +TaskPool::TaskPool() : start_time(time_dt()), num_tasks_pushed(0) +{ +} + +TaskPool::~TaskPool() +{ + cancel(); +} + +void TaskPool::push(TaskRunFunction &&task) +{ + tbb_group.run(std::move(task)); + num_tasks_pushed++; +} + +void TaskPool::wait_work(Summary *stats) +{ + tbb_group.wait(); + + if (stats != NULL) { + stats->time_total = time_dt() - start_time; + stats->num_tasks_handled = num_tasks_pushed; + } + + num_tasks_pushed = 0; +} + +void TaskPool::cancel() +{ + if (num_tasks_pushed > 0) { + tbb_group.cancel(); + tbb_group.wait(); + num_tasks_pushed = 0; + } +} + +bool TaskPool::canceled() +{ + return tbb::is_current_task_group_canceling(); +} + +/* Task Scheduler */ + +thread_mutex TaskScheduler::mutex; +int TaskScheduler::users = 0; +int TaskScheduler::active_num_threads = 0; +tbb::global_control *TaskScheduler::global_control = nullptr; + +void TaskScheduler::init(int num_threads) +{ + thread_scoped_lock lock(mutex); + /* Multiple cycles instances can use this task scheduler, sharing the same + * threads, so we keep track of the number of users. */ + ++users; + if (users != 1) { + return; + } + if (num_threads > 0) { + /* Automatic number of threads. */ + VLOG(1) << "Overriding number of TBB threads to " << num_threads << "."; + global_control = new tbb::global_control(tbb::global_control::max_allowed_parallelism, + num_threads); + active_num_threads = num_threads; + } + else { + active_num_threads = system_cpu_thread_count(); + } +} + +void TaskScheduler::exit() +{ + thread_scoped_lock lock(mutex); + users--; + if (users == 0) { + delete global_control; + global_control = nullptr; + active_num_threads = 0; + } +} + +void TaskScheduler::free_memory() +{ + assert(users == 0); +} + +int TaskScheduler::num_threads() +{ + return active_num_threads; +} + +/* Dedicated Task Pool */ + +DedicatedTaskPool::DedicatedTaskPool() +{ + do_cancel = false; + do_exit = false; + num = 0; + + worker_thread = new thread(function_bind(&DedicatedTaskPool::thread_run, this)); +} + +DedicatedTaskPool::~DedicatedTaskPool() +{ + wait(); + + do_exit = true; + queue_cond.notify_all(); + + worker_thread->join(); + delete worker_thread; +} + +void DedicatedTaskPool::push(TaskRunFunction &&task, bool front) +{ + num_increase(); + + /* add task to queue */ + queue_mutex.lock(); + if (front) + queue.emplace_front(std::move(task)); + else + queue.emplace_back(std::move(task)); + + queue_cond.notify_one(); + queue_mutex.unlock(); +} + +void DedicatedTaskPool::wait() +{ + thread_scoped_lock num_lock(num_mutex); + + while (num) + num_cond.wait(num_lock); +} + +void DedicatedTaskPool::cancel() +{ + do_cancel = true; + + clear(); + wait(); + + do_cancel = false; +} + +bool DedicatedTaskPool::canceled() +{ + return do_cancel; +} + +void DedicatedTaskPool::num_decrease(int done) +{ + thread_scoped_lock num_lock(num_mutex); + num -= done; + + assert(num >= 0); + if (num == 0) + num_cond.notify_all(); +} + +void DedicatedTaskPool::num_increase() +{ + thread_scoped_lock num_lock(num_mutex); + num++; + num_cond.notify_all(); +} + +bool DedicatedTaskPool::thread_wait_pop(TaskRunFunction &task) +{ + thread_scoped_lock queue_lock(queue_mutex); + + while (queue.empty() && !do_exit) + queue_cond.wait(queue_lock); + + if (queue.empty()) { + assert(do_exit); + return false; + } + + task = queue.front(); + queue.pop_front(); + + return true; +} + +void DedicatedTaskPool::thread_run() +{ + TaskRunFunction task; + + /* keep popping off tasks */ + while (thread_wait_pop(task)) { + /* run task */ + task(); + + /* delete task */ + task = nullptr; + + /* notify task was done */ + num_decrease(1); + } +} + +void DedicatedTaskPool::clear() +{ + thread_scoped_lock queue_lock(queue_mutex); + + /* erase all tasks from the queue */ + int done = queue.size(); + queue.clear(); + + queue_lock.unlock(); + + /* notify done */ + num_decrease(done); +} + +string TaskPool::Summary::full_report() const +{ + string report = ""; + report += string_printf("Total time: %f\n", time_total); + report += string_printf("Tasks handled: %d\n", num_tasks_handled); + return report; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/task.h b/intern/cycles/util/task.h new file mode 100644 index 00000000000..1a8f512b83a --- /dev/null +++ b/intern/cycles/util/task.h @@ -0,0 +1,148 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TASK_H__ +#define __UTIL_TASK_H__ + +#include "util/list.h" +#include "util/string.h" +#include "util/tbb.h" +#include "util/thread.h" +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +class TaskPool; +class TaskScheduler; + +typedef function TaskRunFunction; + +/* Task Pool + * + * Pool of tasks that will be executed by the central TaskScheduler. For each + * pool, we can wait for all tasks to be done, or cancel them before they are + * done. + * + * TaskRunFunction may be created with std::bind or lambda expressions. */ + +class TaskPool { + public: + struct Summary { + /* Time spent to handle all tasks. */ + double time_total; + + /* Number of all tasks handled by this pool. */ + int num_tasks_handled; + + /* A full multi-line description of the state of the pool after + * all work is done. + */ + string full_report() const; + }; + + TaskPool(); + ~TaskPool(); + + void push(TaskRunFunction &&task); + + void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */ + void cancel(); /* cancel all tasks and wait until they are no longer executing */ + + static bool canceled(); /* For worker threads, test if current task pool canceled. */ + + protected: + tbb::task_group tbb_group; + + /* ** Statistics ** */ + + /* Time stamp of first task pushed. */ + double start_time; + + /* Number of all tasks pushed to the pool. Cleared after wait_work() and cancel(). */ + int num_tasks_pushed; +}; + +/* Task Scheduler + * + * Central scheduler that holds running threads ready to execute tasks. A single + * queue holds the task from all pools. */ + +class TaskScheduler { + public: + static void init(int num_threads = 0); + static void exit(); + static void free_memory(); + + /* Approximate number of threads that will work on task, which may be lower + * or higher than the actual number of threads. Use as little as possible and + * leave splitting up tasks to the scheduler. */ + static int num_threads(); + + protected: + static thread_mutex mutex; + static int users; + static int active_num_threads; + +#ifdef WITH_TBB_GLOBAL_CONTROL + static tbb::global_control *global_control; +#endif +}; + +/* Dedicated Task Pool + * + * Like a TaskPool, but will launch one dedicated thread to execute all tasks. + * + * The run callback that actually executes the task may be created like this: + * function_bind(&MyClass::task_execute, this, _1, _2) */ + +class DedicatedTaskPool { + public: + DedicatedTaskPool(); + ~DedicatedTaskPool(); + + void push(TaskRunFunction &&run, bool front = false); + + void wait(); /* wait until all tasks are done */ + void cancel(); /* cancel all tasks, keep worker thread running */ + + bool canceled(); /* for worker thread, test if canceled */ + + protected: + void num_decrease(int done); + void num_increase(); + + void thread_run(); + bool thread_wait_pop(TaskRunFunction &task); + + void clear(); + + thread_mutex num_mutex; + thread_condition_variable num_cond; + + list queue; + thread_mutex queue_mutex; + thread_condition_variable queue_cond; + + int num; + bool do_cancel; + bool do_exit; + + thread *worker_thread; +}; + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/util/tbb.h b/intern/cycles/util/tbb.h new file mode 100644 index 00000000000..6fc3b8daad3 --- /dev/null +++ b/intern/cycles/util/tbb.h @@ -0,0 +1,55 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TBB_H__ +#define __UTIL_TBB_H__ + +/* TBB includes , do it ourselves first so we are sure + * WIN32_LEAN_AND_MEAN and similar are defined beforehand. */ +#include "util/windows.h" + +#include +#include +#include +#include +#include + +#if TBB_INTERFACE_VERSION_MAJOR >= 10 +# define WITH_TBB_GLOBAL_CONTROL +# include +#endif + +CCL_NAMESPACE_BEGIN + +using tbb::blocked_range; +using tbb::enumerable_thread_specific; +using tbb::parallel_for; + +static inline void parallel_for_cancel() +{ +#if TBB_INTERFACE_VERSION_MAJOR >= 12 + tbb::task_group_context *ctx = tbb::task::current_context(); + if (ctx) { + ctx->cancel_group_execution(); + } +#else + tbb::task::self().cancel_group_execution(); +#endif +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_TBB_H__ */ diff --git a/intern/cycles/util/texture.h b/intern/cycles/util/texture.h new file mode 100644 index 00000000000..5e37b79e340 --- /dev/null +++ b/intern/cycles/util/texture.h @@ -0,0 +1,99 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TEXTURE_H__ +#define __UTIL_TEXTURE_H__ + +#include "util/transform.h" + +CCL_NAMESPACE_BEGIN + +/* Color to use when textures are not found. */ +#define TEX_IMAGE_MISSING_R 1 +#define TEX_IMAGE_MISSING_G 0 +#define TEX_IMAGE_MISSING_B 1 +#define TEX_IMAGE_MISSING_A 1 + +/* Interpolation types for textures + * cuda also use texture space to store other objects */ +typedef enum InterpolationType { + INTERPOLATION_NONE = -1, + INTERPOLATION_LINEAR = 0, + INTERPOLATION_CLOSEST = 1, + INTERPOLATION_CUBIC = 2, + INTERPOLATION_SMART = 3, + + INTERPOLATION_NUM_TYPES, +} InterpolationType; + +typedef enum ImageDataType { + IMAGE_DATA_TYPE_FLOAT4 = 0, + IMAGE_DATA_TYPE_BYTE4 = 1, + IMAGE_DATA_TYPE_HALF4 = 2, + IMAGE_DATA_TYPE_FLOAT = 3, + IMAGE_DATA_TYPE_BYTE = 4, + IMAGE_DATA_TYPE_HALF = 5, + IMAGE_DATA_TYPE_USHORT4 = 6, + IMAGE_DATA_TYPE_USHORT = 7, + IMAGE_DATA_TYPE_NANOVDB_FLOAT = 8, + IMAGE_DATA_TYPE_NANOVDB_FLOAT3 = 9, + + IMAGE_DATA_NUM_TYPES +} ImageDataType; + +/* Alpha types + * How to treat alpha in images. */ +typedef enum ImageAlphaType { + IMAGE_ALPHA_UNASSOCIATED = 0, + IMAGE_ALPHA_ASSOCIATED = 1, + IMAGE_ALPHA_CHANNEL_PACKED = 2, + IMAGE_ALPHA_IGNORE = 3, + IMAGE_ALPHA_AUTO = 4, + + IMAGE_ALPHA_NUM_TYPES, +} ImageAlphaType; + +/* Extension types for textures. + * + * Defines how the image is extrapolated past its original bounds. */ +typedef enum ExtensionType { + /* Cause the image to repeat horizontally and vertically. */ + EXTENSION_REPEAT = 0, + /* Extend by repeating edge pixels of the image. */ + EXTENSION_EXTEND = 1, + /* Clip to image size and set exterior pixels as transparent. */ + EXTENSION_CLIP = 2, + + EXTENSION_NUM_TYPES, +} ExtensionType; + +typedef struct TextureInfo { + /* Pointer, offset or texture depending on device. */ + uint64_t data; + /* Data Type */ + uint data_type; + /* Interpolation and extension type. */ + uint interpolation, extension; + /* Dimensions. */ + uint width, height, depth; + /* Transform for 3D textures. */ + uint use_transform_3d; + Transform transform_3d; +} TextureInfo; + +CCL_NAMESPACE_END + +#endif /* __UTIL_TEXTURE_H__ */ diff --git a/intern/cycles/util/thread.cpp b/intern/cycles/util/thread.cpp new file mode 100644 index 00000000000..24a0600425d --- /dev/null +++ b/intern/cycles/util/thread.cpp @@ -0,0 +1,72 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/thread.h" + +#include "util/system.h" +#include "util/windows.h" + +CCL_NAMESPACE_BEGIN + +thread::thread(function run_cb, int node) : run_cb_(run_cb), joined_(false), node_(node) +{ +#ifdef __APPLE__ + /* Set the stack size to 2MB to match Linux. The default 512KB on macOS is + * too small for Embree, and consistent stack size also makes things more + * predictable in general. */ + pthread_attr_t attribute; + pthread_attr_init(&attribute); + pthread_attr_setstacksize(&attribute, 1024 * 1024 * 2); + pthread_create(&pthread_id, &attribute, run, (void *)this); +#else + std_thread = std::thread(&thread::run, this); +#endif +} + +thread::~thread() +{ + if (!joined_) { + join(); + } +} + +void *thread::run(void *arg) +{ + thread *self = (thread *)(arg); + if (self->node_ != -1) { + system_cpu_run_thread_on_node(self->node_); + } + self->run_cb_(); + return NULL; +} + +bool thread::join() +{ + joined_ = true; +#ifdef __APPLE__ + return pthread_join(pthread_id, NULL) == 0; +#else + try { + std_thread.join(); + return true; + } + catch (const std::system_error &) { + return false; + } +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/thread.h b/intern/cycles/util/thread.h new file mode 100644 index 00000000000..09686e4b23f --- /dev/null +++ b/intern/cycles/util/thread.h @@ -0,0 +1,90 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_THREAD_H__ +#define __UTIL_THREAD_H__ + +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# include "util/windows.h" +#else +# include +#endif + +/* NOTE: Use tbb/spin_mutex.h instead of util_tbb.h because some of the TBB + * functionality requires RTTI, which is disabled for OSL kernel. */ +#include + +#include "util/function.h" + +CCL_NAMESPACE_BEGIN + +typedef std::mutex thread_mutex; +typedef std::unique_lock thread_scoped_lock; +typedef std::condition_variable thread_condition_variable; + +/* Own thread implementation similar to std::thread, so we can set a + * custom stack size on macOS. */ + +class thread { + public: + /* NOTE: Node index of -1 means that affinity will be inherited from the + * parent thread and no override on top of that will happen. */ + thread(function run_cb, int node = -1); + ~thread(); + + static void *run(void *arg); + bool join(); + + protected: + function run_cb_; +#ifdef __APPLE__ + pthread_t pthread_id; +#else + std::thread std_thread; +#endif + bool joined_; + int node_; +}; + +using thread_spin_lock = tbb::spin_mutex; + +class thread_scoped_spin_lock { + public: + explicit thread_scoped_spin_lock(thread_spin_lock &lock) : lock_(lock) + { + lock_.lock(); + } + + ~thread_scoped_spin_lock() + { + lock_.unlock(); + } + + /* TODO(sergey): Implement manual control over lock/unlock. */ + + protected: + thread_spin_lock &lock_; +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_THREAD_H__ */ diff --git a/intern/cycles/util/time.cpp b/intern/cycles/util/time.cpp new file mode 100644 index 00000000000..62d14b063be --- /dev/null +++ b/intern/cycles/util/time.cpp @@ -0,0 +1,139 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/time.h" + +#include + +#if !defined(_WIN32) +# include +# include +#endif + +#include "util/math.h" +#include "util/string.h" +#include "util/windows.h" + +CCL_NAMESPACE_BEGIN + +#ifdef _WIN32 +double time_dt() +{ + __int64 frequency, counter; + + QueryPerformanceFrequency((LARGE_INTEGER *)&frequency); + QueryPerformanceCounter((LARGE_INTEGER *)&counter); + + return (double)counter / (double)frequency; +} + +void time_sleep(double t) +{ + Sleep((int)(t * 1000)); +} +#else +double time_dt() +{ + struct timeval now; + gettimeofday(&now, NULL); + + return now.tv_sec + now.tv_usec * 1e-6; +} + +/* sleep t seconds */ +void time_sleep(double t) +{ + /* get whole seconds */ + int s = (int)t; + + if (s >= 1) { + sleep(s); + + /* adjust parameter to remove whole seconds */ + t -= s; + } + + /* get microseconds */ + int us = (int)(t * 1e6); + if (us > 0) + usleep(us); +} +#endif + +/* Time in format "hours:minutes:seconds.hundreds" */ + +string time_human_readable_from_seconds(const double seconds) +{ + const int h = (((int)seconds) / (60 * 60)); + const int m = (((int)seconds) / 60) % 60; + const int s = (((int)seconds) % 60); + const int r = (((int)(seconds * 100)) % 100); + + if (h > 0) { + return string_printf("%.2d:%.2d:%.2d.%.2d", h, m, s, r); + } + else { + return string_printf("%.2d:%.2d.%.2d", m, s, r); + } +} + +double time_human_readable_to_seconds(const string &time_string) +{ + /* Those are multiplies of a corresponding token surrounded by : in the + * time string, which denotes how to convert value to seconds. + * Effectively: seconds, minutes, hours, days in seconds. */ + const int multipliers[] = {1, 60, 60 * 60, 24 * 60 * 60}; + const int num_multiplies = sizeof(multipliers) / sizeof(*multipliers); + if (time_string.empty()) { + return 0.0; + } + double result = 0.0; + /* Split fractions of a second from the encoded time. */ + vector fraction_tokens; + string_split(fraction_tokens, time_string, ".", false); + const int num_fraction_tokens = fraction_tokens.size(); + if (num_fraction_tokens == 0) { + /* Time string is malformed. */ + return 0.0; + } + else if (fraction_tokens.size() == 1) { + /* There is no fraction of a second specified, the rest of the code + * handles this normally. */ + } + else if (fraction_tokens.size() == 2) { + result = atof(fraction_tokens[1].c_str()); + result *= pow(0.1, fraction_tokens[1].length()); + } + else { + /* This is not a valid string, the result can not be reliable. */ + return 0.0; + } + /* Split hours, minutes and seconds. + * Hours part is optional. */ + vector tokens; + string_split(tokens, fraction_tokens[0], ":", false); + const int num_tokens = tokens.size(); + if (num_tokens > num_multiplies) { + /* Can not reliably represent the value. */ + return 0.0; + } + for (int i = 0; i < num_tokens; ++i) { + result += atoi(tokens[num_tokens - i - 1].c_str()) * multipliers[i]; + } + return result; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/time.h b/intern/cycles/util/time.h new file mode 100644 index 00000000000..380921664e8 --- /dev/null +++ b/intern/cycles/util/time.h @@ -0,0 +1,91 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TIME_H__ +#define __UTIL_TIME_H__ + +#include "util/function.h" +#include "util/string.h" + +CCL_NAMESPACE_BEGIN + +/* Give current time in seconds in double precision, with good accuracy. */ + +double time_dt(); + +/* Sleep for the specified number of seconds. */ + +void time_sleep(double t); + +/* Scoped timer. */ + +class scoped_timer { + public: + explicit scoped_timer(double *value = NULL) : value_(value) + { + time_start_ = time_dt(); + } + + ~scoped_timer() + { + if (value_ != NULL) { + *value_ = get_time(); + } + } + + double get_start() const + { + return time_start_; + } + + double get_time() const + { + return time_dt() - time_start_; + } + + protected: + double *value_; + double time_start_; +}; + +class scoped_callback_timer { + public: + using callback_type = function; + + explicit scoped_callback_timer(callback_type cb) : cb(cb) + { + } + + ~scoped_callback_timer() + { + if (cb) { + cb(timer.get_time()); + } + } + + protected: + scoped_timer timer; + callback_type cb; +}; + +/* Make human readable string from time, compatible with Blender metadata. */ + +string time_human_readable_from_seconds(const double seconds); +double time_human_readable_to_seconds(const string &str); + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/util/transform.cpp b/intern/cycles/util/transform.cpp new file mode 100644 index 00000000000..bd990cb0f79 --- /dev/null +++ b/intern/cycles/util/transform.cpp @@ -0,0 +1,345 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Adapted from code with license: + * + * Copyright (c) 2002, Industrial Light & Magic, a division of Lucas + * Digital Ltd. LLC. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Industrial Light & Magic nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "util/transform.h" +#include "util/projection.h" + +#include "util/boundbox.h" +#include "util/math.h" + +CCL_NAMESPACE_BEGIN + +/* Transform Inverse */ + +static bool transform_matrix4_gj_inverse(float R[][4], float M[][4]) +{ + /* forward elimination */ + for (int i = 0; i < 4; i++) { + int pivot = i; + float pivotsize = M[i][i]; + + if (pivotsize < 0) + pivotsize = -pivotsize; + + for (int j = i + 1; j < 4; j++) { + float tmp = M[j][i]; + + if (tmp < 0) + tmp = -tmp; + + if (tmp > pivotsize) { + pivot = j; + pivotsize = tmp; + } + } + + if (UNLIKELY(pivotsize == 0.0f)) + return false; + + if (pivot != i) { + for (int j = 0; j < 4; j++) { + float tmp; + + tmp = M[i][j]; + M[i][j] = M[pivot][j]; + M[pivot][j] = tmp; + + tmp = R[i][j]; + R[i][j] = R[pivot][j]; + R[pivot][j] = tmp; + } + } + + for (int j = i + 1; j < 4; j++) { + float f = M[j][i] / M[i][i]; + + for (int k = 0; k < 4; k++) { + M[j][k] -= f * M[i][k]; + R[j][k] -= f * R[i][k]; + } + } + } + + /* backward substitution */ + for (int i = 3; i >= 0; --i) { + float f; + + if (UNLIKELY((f = M[i][i]) == 0.0f)) + return false; + + for (int j = 0; j < 4; j++) { + M[i][j] /= f; + R[i][j] /= f; + } + + for (int j = 0; j < i; j++) { + f = M[j][i]; + + for (int k = 0; k < 4; k++) { + M[j][k] -= f * M[i][k]; + R[j][k] -= f * R[i][k]; + } + } + } + + return true; +} + +ProjectionTransform projection_inverse(const ProjectionTransform &tfm) +{ + ProjectionTransform tfmR = projection_identity(); + float M[4][4], R[4][4]; + + memcpy(R, &tfmR, sizeof(R)); + memcpy(M, &tfm, sizeof(M)); + + if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { + /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should + * never be in this situation, but try to invert it anyway with tweak */ + M[0][0] += 1e-8f; + M[1][1] += 1e-8f; + M[2][2] += 1e-8f; + + if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { + return projection_identity(); + } + } + + memcpy(&tfmR, R, sizeof(R)); + + return tfmR; +} + +Transform transform_inverse(const Transform &tfm) +{ + ProjectionTransform projection(tfm); + return projection_to_transform(projection_inverse(projection)); +} + +Transform transform_transposed_inverse(const Transform &tfm) +{ + ProjectionTransform projection(tfm); + ProjectionTransform iprojection = projection_inverse(projection); + return projection_to_transform(projection_transpose(iprojection)); +} + +/* Motion Transform */ + +float4 transform_to_quat(const Transform &tfm) +{ + double trace = (double)(tfm[0][0] + tfm[1][1] + tfm[2][2]); + float4 qt; + + if (trace > 0.0) { + double s = sqrt(trace + 1.0); + + qt.w = (float)(s / 2.0); + s = 0.5 / s; + + qt.x = (float)((double)(tfm[2][1] - tfm[1][2]) * s); + qt.y = (float)((double)(tfm[0][2] - tfm[2][0]) * s); + qt.z = (float)((double)(tfm[1][0] - tfm[0][1]) * s); + } + else { + int i = 0; + + if (tfm[1][1] > tfm[i][i]) + i = 1; + if (tfm[2][2] > tfm[i][i]) + i = 2; + + int j = (i + 1) % 3; + int k = (j + 1) % 3; + + double s = sqrt((double)(tfm[i][i] - (tfm[j][j] + tfm[k][k])) + 1.0); + + double q[3]; + q[i] = s * 0.5; + if (s != 0.0) + s = 0.5 / s; + + double w = (double)(tfm[k][j] - tfm[j][k]) * s; + q[j] = (double)(tfm[j][i] + tfm[i][j]) * s; + q[k] = (double)(tfm[k][i] + tfm[i][k]) * s; + + qt.x = (float)q[0]; + qt.y = (float)q[1]; + qt.z = (float)q[2]; + qt.w = (float)w; + } + + return qt; +} + +static void transform_decompose(DecomposedTransform *decomp, const Transform *tfm) +{ + /* extract translation */ + decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f); + + /* extract rotation */ + Transform M = *tfm; + M.x.w = 0.0f; + M.y.w = 0.0f; + M.z.w = 0.0f; + +#if 0 + Transform R = M; + float norm; + int iteration = 0; + + do { + Transform Rnext; + Transform Rit = transform_transposed_inverse(R); + + for (int i = 0; i < 3; i++) + for (int j = 0; j < 4; j++) + Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]); + + norm = 0.0f; + for (int i = 0; i < 3; i++) { + norm = max(norm, + fabsf(R[i][0] - Rnext[i][0]) + fabsf(R[i][1] - Rnext[i][1]) + + fabsf(R[i][2] - Rnext[i][2])); + } + + R = Rnext; + iteration++; + } while (iteration < 100 && norm > 1e-4f); + + if (transform_negative_scale(R)) + R = R * transform_scale(-1.0f, -1.0f, -1.0f); + + decomp->x = transform_to_quat(R); + + /* extract scale and pack it */ + Transform scale = transform_inverse(R) * M; + decomp->y.w = scale.x.x; + decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y); + decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z); +#else + float3 colx = transform_get_column(&M, 0); + float3 coly = transform_get_column(&M, 1); + float3 colz = transform_get_column(&M, 2); + + /* extract scale and shear first */ + float3 scale, shear; + scale.x = len(colx); + colx = safe_divide_float3_float(colx, scale.x); + shear.z = dot(colx, coly); + coly -= shear.z * colx; + scale.y = len(coly); + coly = safe_divide_float3_float(coly, scale.y); + shear.y = dot(colx, colz); + colz -= shear.y * colx; + shear.x = dot(coly, colz); + colz -= shear.x * coly; + scale.z = len(colz); + colz = safe_divide_float3_float(colz, scale.z); + + transform_set_column(&M, 0, colx); + transform_set_column(&M, 1, coly); + transform_set_column(&M, 2, colz); + + if (transform_negative_scale(M)) { + scale *= -1.0f; + M = M * transform_scale(-1.0f, -1.0f, -1.0f); + } + + decomp->x = transform_to_quat(M); + + decomp->y.w = scale.x; + decomp->z = make_float4(shear.z, shear.y, 0.0f, scale.y); + decomp->w = make_float4(shear.x, 0.0f, 0.0f, scale.z); +#endif +} + +void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size) +{ + /* Decompose and correct rotation. */ + for (size_t i = 0; i < size; i++) { + transform_decompose(decomp + i, motion + i); + + if (i > 0) { + /* Ensure rotation around shortest angle, negated quaternions are the same + * but this means we don't have to do the check in quat_interpolate */ + if (dot(decomp[i - 1].x, decomp[i].x) < 0.0f) + decomp[i].x = -decomp[i].x; + } + } + + /* Copy rotation to decomposed transform where scale is degenerate. This avoids weird object + * rotation interpolation when the scale goes to 0 for a time step. + * + * Note that this is very simple and naive implementation, which only deals with degenerated + * scale happening only on one frame. It is possible to improve it further by interpolating + * rotation into s degenerated range using rotation from time-steps from adjacent non-degenerated + * time steps. */ + for (size_t i = 0; i < size; i++) { + const float3 scale = make_float3(decomp[i].y.w, decomp[i].z.w, decomp[i].w.w); + if (!is_zero(scale)) { + continue; + } + + if (i > 0) { + decomp[i].x = decomp[i - 1].x; + } + else if (i < size - 1) { + decomp[i].x = decomp[i + 1].x; + } + } +} + +Transform transform_from_viewplane(BoundBox2D &viewplane) +{ + return transform_scale(1.0f / (viewplane.right - viewplane.left), + 1.0f / (viewplane.top - viewplane.bottom), + 1.0f) * + transform_translate(-viewplane.left, -viewplane.bottom, 0.0f); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h new file mode 100644 index 00000000000..7bfe747fcfb --- /dev/null +++ b/intern/cycles/util/transform.h @@ -0,0 +1,512 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TRANSFORM_H__ +#define __UTIL_TRANSFORM_H__ + +#ifndef __KERNEL_GPU__ +# include +#endif + +#include "util/math.h" +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* Affine transformation, stored as 4x3 matrix. */ + +typedef struct Transform { + float4 x, y, z; + +#ifndef __KERNEL_GPU__ + float4 operator[](int i) const + { + return *(&x + i); + } + float4 &operator[](int i) + { + return *(&x + i); + } +#endif +} Transform; + +/* Transform decomposed in rotation/translation/scale. we use the same data + * structure as Transform, and tightly pack decomposition into it. first the + * rotation (4), then translation (3), then 3x3 scale matrix (9). */ + +typedef struct DecomposedTransform { + float4 x, y, z, w; +} DecomposedTransform; + +/* Functions */ + +ccl_device_inline float3 transform_point(ccl_private const Transform *t, const float3 a) +{ + /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) + ssef x, y, z, w, aa; + aa = a.m128; + + x = _mm_loadu_ps(&t->x.x); + y = _mm_loadu_ps(&t->y.x); + z = _mm_loadu_ps(&t->z.x); + w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); + + _MM_TRANSPOSE4_PS(x, y, z, w); + + ssef tmp = shuffle<0>(aa) * x; + tmp = madd(shuffle<1>(aa), y, tmp); + tmp = madd(shuffle<2>(aa), z, tmp); + tmp += w; + + return float3(tmp.m128); +#else + float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w, + a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w, + a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w); + + return c; +#endif +} + +ccl_device_inline float3 transform_direction(ccl_private const Transform *t, const float3 a) +{ +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) + ssef x, y, z, w, aa; + aa = a.m128; + x = _mm_loadu_ps(&t->x.x); + y = _mm_loadu_ps(&t->y.x); + z = _mm_loadu_ps(&t->z.x); + w = _mm_setzero_ps(); + + _MM_TRANSPOSE4_PS(x, y, z, w); + + ssef tmp = shuffle<0>(aa) * x; + tmp = madd(shuffle<1>(aa), y, tmp); + tmp = madd(shuffle<2>(aa), z, tmp); + + return float3(tmp.m128); +#else + float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z, + a.x * t->y.x + a.y * t->y.y + a.z * t->y.z, + a.x * t->z.x + a.y * t->z.y + a.z * t->z.z); + + return c; +#endif +} + +ccl_device_inline float3 transform_direction_transposed(ccl_private const Transform *t, + const float3 a) +{ + float3 x = make_float3(t->x.x, t->y.x, t->z.x); + float3 y = make_float3(t->x.y, t->y.y, t->z.y); + float3 z = make_float3(t->x.z, t->y.z, t->z.z); + + return make_float3(dot(x, a), dot(y, a), dot(z, a)); +} + +ccl_device_inline Transform make_transform(float a, + float b, + float c, + float d, + float e, + float f, + float g, + float h, + float i, + float j, + float k, + float l) +{ + Transform t; + + t.x.x = a; + t.x.y = b; + t.x.z = c; + t.x.w = d; + t.y.x = e; + t.y.y = f; + t.y.z = g; + t.y.w = h; + t.z.x = i; + t.z.y = j; + t.z.z = k; + t.z.w = l; + + return t; +} + +ccl_device_inline Transform euler_to_transform(const float3 euler) +{ + float cx = cosf(euler.x); + float cy = cosf(euler.y); + float cz = cosf(euler.z); + float sx = sinf(euler.x); + float sy = sinf(euler.y); + float sz = sinf(euler.z); + + Transform t; + t.x.x = cy * cz; + t.y.x = cy * sz; + t.z.x = -sy; + + t.x.y = sy * sx * cz - cx * sz; + t.y.y = sy * sx * sz + cx * cz; + t.z.y = cy * sx; + + t.x.z = sy * cx * cz + sx * sz; + t.y.z = sy * cx * sz - sx * cz; + t.z.z = cy * cx; + + t.x.w = t.y.w = t.z.w = 0.0f; + return t; +} + +/* Constructs a coordinate frame from a normalized normal. */ +ccl_device_inline Transform make_transform_frame(float3 N) +{ + const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N); + const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N); + const float3 dx = normalize((dot(dx0, dx0) > dot(dx1, dx1)) ? dx0 : dx1); + const float3 dy = normalize(cross(N, dx)); + return make_transform(dx.x, dx.y, dx.z, 0.0f, dy.x, dy.y, dy.z, 0.0f, N.x, N.y, N.z, 0.0f); +} + +#ifndef __KERNEL_GPU__ + +ccl_device_inline Transform transform_zero() +{ + Transform zero = {zero_float4(), zero_float4(), zero_float4()}; + return zero; +} + +ccl_device_inline Transform operator*(const Transform a, const Transform b) +{ + float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f); + float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f); + float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f); + float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f); + + Transform t; + t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w)); + t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w)); + t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w)); + + return t; +} + +ccl_device_inline void print_transform(const char *label, const Transform &t) +{ + print_float4(label, t.x); + print_float4(label, t.y); + print_float4(label, t.z); + printf("\n"); +} + +ccl_device_inline Transform transform_translate(float3 t) +{ + return make_transform(1, 0, 0, t.x, 0, 1, 0, t.y, 0, 0, 1, t.z); +} + +ccl_device_inline Transform transform_translate(float x, float y, float z) +{ + return transform_translate(make_float3(x, y, z)); +} + +ccl_device_inline Transform transform_scale(float3 s) +{ + return make_transform(s.x, 0, 0, 0, 0, s.y, 0, 0, 0, 0, s.z, 0); +} + +ccl_device_inline Transform transform_scale(float x, float y, float z) +{ + return transform_scale(make_float3(x, y, z)); +} + +ccl_device_inline Transform transform_rotate(float angle, float3 axis) +{ + float s = sinf(angle); + float c = cosf(angle); + float t = 1.0f - c; + + axis = normalize(axis); + + return make_transform(axis.x * axis.x * t + c, + axis.x * axis.y * t - s * axis.z, + axis.x * axis.z * t + s * axis.y, + 0.0f, + + axis.y * axis.x * t + s * axis.z, + axis.y * axis.y * t + c, + axis.y * axis.z * t - s * axis.x, + 0.0f, + + axis.z * axis.x * t - s * axis.y, + axis.z * axis.y * t + s * axis.x, + axis.z * axis.z * t + c, + 0.0f); +} + +/* Euler is assumed to be in XYZ order. */ +ccl_device_inline Transform transform_euler(float3 euler) +{ + return transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) * + transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) * + transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f)); +} + +ccl_device_inline Transform transform_identity() +{ + return transform_scale(1.0f, 1.0f, 1.0f); +} + +ccl_device_inline bool operator==(const Transform &A, const Transform &B) +{ + return memcmp(&A, &B, sizeof(Transform)) == 0; +} + +ccl_device_inline bool operator!=(const Transform &A, const Transform &B) +{ + return !(A == B); +} + +ccl_device_inline float3 transform_get_column(const Transform *t, int column) +{ + return make_float3(t->x[column], t->y[column], t->z[column]); +} + +ccl_device_inline void transform_set_column(Transform *t, int column, float3 value) +{ + t->x[column] = value.x; + t->y[column] = value.y; + t->z[column] = value.z; +} + +Transform transform_inverse(const Transform &a); +Transform transform_transposed_inverse(const Transform &a); + +ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale) +{ + /* the epsilon here is quite arbitrary, but this function is only used for + * surface area and bump, where we expect it to not be so sensitive */ + float eps = 1e-6f; + + float sx = len_squared(float4_to_float3(tfm.x)); + float sy = len_squared(float4_to_float3(tfm.y)); + float sz = len_squared(float4_to_float3(tfm.z)); + float stx = len_squared(transform_get_column(&tfm, 0)); + float sty = len_squared(transform_get_column(&tfm, 1)); + float stz = len_squared(transform_get_column(&tfm, 2)); + + if (fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && fabsf(sx - stx) < eps && + fabsf(sx - sty) < eps && fabsf(sx - stz) < eps) { + scale = sx; + return true; + } + + return false; +} + +ccl_device_inline bool transform_negative_scale(const Transform &tfm) +{ + float3 c0 = transform_get_column(&tfm, 0); + float3 c1 = transform_get_column(&tfm, 1); + float3 c2 = transform_get_column(&tfm, 2); + + return (dot(cross(c0, c1), c2) < 0.0f); +} + +ccl_device_inline Transform transform_clear_scale(const Transform &tfm) +{ + Transform ntfm = tfm; + + transform_set_column(&ntfm, 0, normalize(transform_get_column(&ntfm, 0))); + transform_set_column(&ntfm, 1, normalize(transform_get_column(&ntfm, 1))); + transform_set_column(&ntfm, 2, normalize(transform_get_column(&ntfm, 2))); + + return ntfm; +} + +ccl_device_inline Transform transform_empty() +{ + return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +#endif + +/* Motion Transform */ + +ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t) +{ + /* Optix is using lerp to interpolate motion transformations. */ +#ifdef __KERNEL_OPTIX__ + return normalize((1.0f - t) * q1 + t * q2); +#else /* __KERNEL_OPTIX__ */ + /* note: this does not ensure rotation around shortest angle, q1 and q2 + * are assumed to be matched already in transform_motion_decompose */ + float costheta = dot(q1, q2); + + /* possible optimization: it might be possible to precompute theta/qperp */ + + if (costheta > 0.9995f) { + /* linear interpolation in degenerate case */ + return normalize((1.0f - t) * q1 + t * q2); + } + else { + /* slerp */ + float theta = acosf(clamp(costheta, -1.0f, 1.0f)); + float4 qperp = normalize(q2 - q1 * costheta); + float thetap = theta * t; + return q1 * cosf(thetap) + qperp * sinf(thetap); + } +#endif /* __KERNEL_OPTIX__ */ +} + +ccl_device_inline Transform transform_quick_inverse(Transform M) +{ + /* possible optimization: can we avoid doing this altogether and construct + * the inverse matrix directly from negated translation, transposed rotation, + * scale can be inverted but what about shearing? */ + Transform R; + float det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) + + M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z); + if (det == 0.0f) { + M.x.x += 1e-8f; + M.y.y += 1e-8f; + M.z.z += 1e-8f; + det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) + + M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z); + } + det = (det != 0.0f) ? 1.0f / det : 0.0f; + + float3 Rx = det * make_float3(M.z.z * M.y.y - M.z.y * M.y.z, + M.z.y * M.x.z - M.z.z * M.x.y, + M.y.z * M.x.y - M.y.y * M.x.z); + float3 Ry = det * make_float3(M.z.x * M.y.z - M.z.z * M.y.x, + M.z.z * M.x.x - M.z.x * M.x.z, + M.y.x * M.x.z - M.y.z * M.x.x); + float3 Rz = det * make_float3(M.z.y * M.y.x - M.z.x * M.y.y, + M.z.x * M.x.y - M.z.y * M.x.x, + M.y.y * M.x.x - M.y.x * M.x.y); + float3 T = -make_float3(M.x.w, M.y.w, M.z.w); + + R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T)); + R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T)); + R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T)); + + return R; +} + +ccl_device_inline void transform_compose(ccl_private Transform *tfm, + ccl_private const DecomposedTransform *decomp) +{ + /* rotation */ + float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc; + + q0 = M_SQRT2_F * decomp->x.w; + q1 = M_SQRT2_F * decomp->x.x; + q2 = M_SQRT2_F * decomp->x.y; + q3 = M_SQRT2_F * decomp->x.z; + + qda = q0 * q1; + qdb = q0 * q2; + qdc = q0 * q3; + qaa = q1 * q1; + qab = q1 * q2; + qac = q1 * q3; + qbb = q2 * q2; + qbc = q2 * q3; + qcc = q3 * q3; + + float3 rotation_x = make_float3(1.0f - qbb - qcc, -qdc + qab, qdb + qac); + float3 rotation_y = make_float3(qdc + qab, 1.0f - qaa - qcc, -qda + qbc); + float3 rotation_z = make_float3(-qdb + qac, qda + qbc, 1.0f - qaa - qbb); + + /* scale */ + float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y); + float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z); + float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w); + + /* compose with translation */ + tfm->x = make_float4( + dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x); + tfm->y = make_float4( + dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y); + tfm->z = make_float4( + dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z); +} + +/* Interpolate from array of decomposed transforms. */ +ccl_device void transform_motion_array_interpolate(Transform *tfm, + const DecomposedTransform *motion, + uint numsteps, + float time) +{ + /* Figure out which steps we need to interpolate. */ + int maxstep = numsteps - 1; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + const DecomposedTransform *a = motion + step; + const DecomposedTransform *b = motion + step + 1; + + /* Interpolate rotation, translation and scale. */ + DecomposedTransform decomp; + decomp.x = quat_interpolate(a->x, b->x, t); + decomp.y = (1.0f - t) * a->y + t * b->y; + decomp.z = (1.0f - t) * a->z + t * b->z; + decomp.w = (1.0f - t) * a->w + t * b->w; + + /* Compose rotation, translation, scale into matrix. */ + transform_compose(tfm, &decomp); +} + +ccl_device_inline bool transform_isfinite_safe(ccl_private Transform *tfm) +{ + return isfinite4_safe(tfm->x) && isfinite4_safe(tfm->y) && isfinite4_safe(tfm->z); +} + +ccl_device_inline bool transform_decomposed_isfinite_safe(ccl_private DecomposedTransform *decomp) +{ + return isfinite4_safe(decomp->x) && isfinite4_safe(decomp->y) && isfinite4_safe(decomp->z) && + isfinite4_safe(decomp->w); +} + +#ifndef __KERNEL_GPU__ + +class BoundBox2D; + +ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B) +{ + return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0; +} + +float4 transform_to_quat(const Transform &tfm); +void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size); +Transform transform_from_viewplane(BoundBox2D &viewplane); + +#endif + +/* TODO: This can be removed when we know if no devices will require explicit + * address space qualifiers for this case. */ + +#define transform_point_auto transform_point +#define transform_direction_auto transform_direction +#define transform_direction_transposed_auto transform_direction_transposed + +CCL_NAMESPACE_END + +#endif /* __UTIL_TRANSFORM_H__ */ diff --git a/intern/cycles/util/types.h b/intern/cycles/util/types.h new file mode 100644 index 00000000000..697dc2b44ea --- /dev/null +++ b/intern/cycles/util/types.h @@ -0,0 +1,138 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_H__ +#define __UTIL_TYPES_H__ + +#include + +/* Standard Integer Types */ + +#if !defined(__KERNEL_GPU__) +# include +#endif + +#include "util/defines.h" + +#ifndef __KERNEL_GPU__ +# include "util/optimization.h" +# include "util/simd.h" +#endif + +CCL_NAMESPACE_BEGIN + +/* Types + * + * Define simpler unsigned type names, and integer with defined number of bits. + * Also vector types, named to be compatible with OpenCL builtin types, while + * working for CUDA and C++ too. */ + +/* Shorter Unsigned Names */ + +typedef unsigned char uchar; +typedef unsigned int uint; +typedef unsigned short ushort; + +/* Fixed Bits Types */ + +#ifndef __KERNEL_GPU__ +/* Generic Memory Pointer */ + +typedef uint64_t device_ptr; +#endif /* __KERNEL_GPU__ */ + +ccl_device_inline size_t align_up(size_t offset, size_t alignment) +{ + return (offset + alignment - 1) & ~(alignment - 1); +} + +ccl_device_inline size_t divide_up(size_t x, size_t y) +{ + return (x + y - 1) / y; +} + +ccl_device_inline size_t round_up(size_t x, size_t multiple) +{ + return ((x + multiple - 1) / multiple) * multiple; +} + +ccl_device_inline size_t round_down(size_t x, size_t multiple) +{ + return (x / multiple) * multiple; +} + +ccl_device_inline bool is_power_of_two(size_t x) +{ + return (x & (x - 1)) == 0; +} + +CCL_NAMESPACE_END + +/* Vectorized types declaration. */ +#include "util/types_uchar2.h" +#include "util/types_uchar3.h" +#include "util/types_uchar4.h" + +#include "util/types_int2.h" +#include "util/types_int3.h" +#include "util/types_int4.h" + +#include "util/types_uint2.h" +#include "util/types_uint3.h" +#include "util/types_uint4.h" + +#include "util/types_ushort4.h" + +#include "util/types_float2.h" +#include "util/types_float3.h" +#include "util/types_float4.h" +#include "util/types_float8.h" + +#include "util/types_vector3.h" + +/* Vectorized types implementation. */ +#include "util/types_uchar2_impl.h" +#include "util/types_uchar3_impl.h" +#include "util/types_uchar4_impl.h" + +#include "util/types_int2_impl.h" +#include "util/types_int3_impl.h" +#include "util/types_int4_impl.h" + +#include "util/types_uint2_impl.h" +#include "util/types_uint3_impl.h" +#include "util/types_uint4_impl.h" + +#include "util/types_float2_impl.h" +#include "util/types_float3_impl.h" +#include "util/types_float4_impl.h" +#include "util/types_float8_impl.h" + +#include "util/types_vector3_impl.h" + +/* SSE types. */ +#ifndef __KERNEL_GPU__ +# include "util/sseb.h" +# include "util/ssef.h" +# include "util/ssei.h" +# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) +# include "util/avxb.h" +# include "util/avxf.h" +# include "util/avxi.h" +# endif +#endif + +#endif /* __UTIL_TYPES_H__ */ diff --git a/intern/cycles/util/types_float2.h b/intern/cycles/util/types_float2.h new file mode 100644 index 00000000000..e71204bef5b --- /dev/null +++ b/intern/cycles/util/types_float2.h @@ -0,0 +1,40 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_FLOAT2_H__ +#define __UTIL_TYPES_FLOAT2_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct float2 { + float x, y; + + __forceinline float operator[](int i) const; + __forceinline float &operator[](int i); +}; + +ccl_device_inline float2 make_float2(float x, float y); +ccl_device_inline void print_float2(const char *label, const float2 &a); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_FLOAT2_H__ */ diff --git a/intern/cycles/util/types_float2_impl.h b/intern/cycles/util/types_float2_impl.h new file mode 100644 index 00000000000..c02c13f8c47 --- /dev/null +++ b/intern/cycles/util/types_float2_impl.h @@ -0,0 +1,59 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_FLOAT2_IMPL_H__ +#define __UTIL_TYPES_FLOAT2_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +#ifndef __KERNEL_GPU__ +# include +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +__forceinline float float2::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); +} + +__forceinline float &float2::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); +} + +ccl_device_inline float2 make_float2(float x, float y) +{ + float2 a = {x, y}; + return a; +} + +ccl_device_inline void print_float2(const char *label, const float2 &a) +{ + printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_FLOAT2_IMPL_H__ */ diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h new file mode 100644 index 00000000000..f990367e7b8 --- /dev/null +++ b/intern/cycles/util/types_float3.h @@ -0,0 +1,60 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_FLOAT3_H__ +#define __UTIL_TYPES_FLOAT3_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct ccl_try_align(16) float3 +{ +# ifdef __KERNEL_SSE__ + union { + __m128 m128; + struct { + float x, y, z, w; + }; + }; + + __forceinline float3(); + __forceinline float3(const float3 &a); + __forceinline explicit float3(const __m128 &a); + + __forceinline operator const __m128 &() const; + __forceinline operator __m128 &(); + + __forceinline float3 &operator=(const float3 &a); +# else /* __KERNEL_SSE__ */ + float x, y, z, w; +# endif /* __KERNEL_SSE__ */ + + __forceinline float operator[](int i) const; + __forceinline float &operator[](int i); +}; + +ccl_device_inline float3 make_float3(float f); +ccl_device_inline float3 make_float3(float x, float y, float z); +ccl_device_inline void print_float3(const char *label, const float3 &a); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_FLOAT3_H__ */ diff --git a/intern/cycles/util/types_float3_impl.h b/intern/cycles/util/types_float3_impl.h new file mode 100644 index 00000000000..76a9067acc7 --- /dev/null +++ b/intern/cycles/util/types_float3_impl.h @@ -0,0 +1,103 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_FLOAT3_IMPL_H__ +#define __UTIL_TYPES_FLOAT3_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +#ifndef __KERNEL_GPU__ +# include +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +# ifdef __KERNEL_SSE__ +__forceinline float3::float3() +{ +} + +__forceinline float3::float3(const float3 &a) : m128(a.m128) +{ +} + +__forceinline float3::float3(const __m128 &a) : m128(a) +{ +} + +__forceinline float3::operator const __m128 &() const +{ + return m128; +} + +__forceinline float3::operator __m128 &() +{ + return m128; +} + +__forceinline float3 &float3::operator=(const float3 &a) +{ + m128 = a.m128; + return *this; +} +# endif /* __KERNEL_SSE__ */ + +__forceinline float float3::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); +} + +__forceinline float &float3::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); +} + +ccl_device_inline float3 make_float3(float f) +{ +# ifdef __KERNEL_SSE__ + float3 a(_mm_set1_ps(f)); +# else + float3 a = {f, f, f, f}; +# endif + return a; +} + +ccl_device_inline float3 make_float3(float x, float y, float z) +{ +# ifdef __KERNEL_SSE__ + float3 a(_mm_set_ps(0.0f, z, y, x)); +# else + float3 a = {x, y, z, 0.0f}; +# endif + return a; +} + +ccl_device_inline void print_float3(const char *label, const float3 &a) +{ + printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_FLOAT3_IMPL_H__ */ diff --git a/intern/cycles/util/types_float4.h b/intern/cycles/util/types_float4.h new file mode 100644 index 00000000000..8d4e07e7e4d --- /dev/null +++ b/intern/cycles/util/types_float4.h @@ -0,0 +1,63 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_FLOAT4_H__ +#define __UTIL_TYPES_FLOAT4_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct int4; + +struct ccl_try_align(16) float4 +{ +# ifdef __KERNEL_SSE__ + union { + __m128 m128; + struct { + float x, y, z, w; + }; + }; + + __forceinline float4(); + __forceinline explicit float4(const __m128 &a); + + __forceinline operator const __m128 &() const; + __forceinline operator __m128 &(); + + __forceinline float4 &operator=(const float4 &a); + +# else /* __KERNEL_SSE__ */ + float x, y, z, w; +# endif /* __KERNEL_SSE__ */ + + __forceinline float operator[](int i) const; + __forceinline float &operator[](int i); +}; + +ccl_device_inline float4 make_float4(float f); +ccl_device_inline float4 make_float4(float x, float y, float z, float w); +ccl_device_inline float4 make_float4(const int4 &i); +ccl_device_inline void print_float4(const char *label, const float4 &a); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_FLOAT4_H__ */ diff --git a/intern/cycles/util/types_float4_impl.h b/intern/cycles/util/types_float4_impl.h new file mode 100644 index 00000000000..d75715332e5 --- /dev/null +++ b/intern/cycles/util/types_float4_impl.h @@ -0,0 +1,109 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_FLOAT4_IMPL_H__ +#define __UTIL_TYPES_FLOAT4_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +#ifndef __KERNEL_GPU__ +# include +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +# ifdef __KERNEL_SSE__ +__forceinline float4::float4() +{ +} + +__forceinline float4::float4(const __m128 &a) : m128(a) +{ +} + +__forceinline float4::operator const __m128 &() const +{ + return m128; +} + +__forceinline float4::operator __m128 &() +{ + return m128; +} + +__forceinline float4 &float4::operator=(const float4 &a) +{ + m128 = a.m128; + return *this; +} +# endif /* __KERNEL_SSE__ */ + +__forceinline float float4::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); +} + +__forceinline float &float4::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); +} + +ccl_device_inline float4 make_float4(float f) +{ +# ifdef __KERNEL_SSE__ + float4 a(_mm_set1_ps(f)); +# else + float4 a = {f, f, f, f}; +# endif + return a; +} + +ccl_device_inline float4 make_float4(float x, float y, float z, float w) +{ +# ifdef __KERNEL_SSE__ + float4 a(_mm_set_ps(w, z, y, x)); +# else + float4 a = {x, y, z, w}; +# endif + return a; +} + +ccl_device_inline float4 make_float4(const int4 &i) +{ +# ifdef __KERNEL_SSE__ + float4 a(_mm_cvtepi32_ps(i.m128)); +# else + float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; +# endif + return a; +} + +ccl_device_inline void print_float4(const char *label, const float4 &a) +{ + printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_FLOAT4_IMPL_H__ */ diff --git a/intern/cycles/util/types_float8.h b/intern/cycles/util/types_float8.h new file mode 100644 index 00000000000..cf1f66b7622 --- /dev/null +++ b/intern/cycles/util/types_float8.h @@ -0,0 +1,74 @@ +/* + * Original code Copyright 2017, Intel Corporation + * Modifications Copyright 2018, Blender Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __UTIL_TYPES_FLOAT8_H__ +#define __UTIL_TYPES_FLOAT8_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ + +struct ccl_try_align(32) float8 +{ +# ifdef __KERNEL_AVX2__ + union { + __m256 m256; + struct { + float a, b, c, d, e, f, g, h; + }; + }; + + __forceinline float8(); + __forceinline float8(const float8 &a); + __forceinline explicit float8(const __m256 &a); + + __forceinline operator const __m256 &() const; + __forceinline operator __m256 &(); + + __forceinline float8 &operator=(const float8 &a); + +# else /* __KERNEL_AVX2__ */ + float a, b, c, d, e, f, g, h; +# endif /* __KERNEL_AVX2__ */ + + __forceinline float operator[](int i) const; + __forceinline float &operator[](int i); +}; + +ccl_device_inline float8 make_float8(float f); +ccl_device_inline float8 +make_float8(float a, float b, float c, float d, float e, float f, float g, float h); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_FLOAT8_H__ */ diff --git a/intern/cycles/util/types_float8_impl.h b/intern/cycles/util/types_float8_impl.h new file mode 100644 index 00000000000..a795666adc7 --- /dev/null +++ b/intern/cycles/util/types_float8_impl.h @@ -0,0 +1,112 @@ +/* + * Original code Copyright 2017, Intel Corporation + * Modifications Copyright 2018, Blender Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __UTIL_TYPES_FLOAT8_IMPL_H__ +#define __UTIL_TYPES_FLOAT8_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +#ifndef __KERNEL_GPU__ +# include +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +# ifdef __KERNEL_AVX2__ +__forceinline float8::float8() +{ +} + +__forceinline float8::float8(const float8 &f) : m256(f.m256) +{ +} + +__forceinline float8::float8(const __m256 &f) : m256(f) +{ +} + +__forceinline float8::operator const __m256 &() const +{ + return m256; +} + +__forceinline float8::operator __m256 &() +{ + return m256; +} + +__forceinline float8 &float8::operator=(const float8 &f) +{ + m256 = f.m256; + return *this; +} +# endif /* __KERNEL_AVX2__ */ + +__forceinline float float8::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 8); + return *(&a + i); +} + +__forceinline float &float8::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 8); + return *(&a + i); +} + +ccl_device_inline float8 make_float8(float f) +{ +# ifdef __KERNEL_AVX2__ + float8 r(_mm256_set1_ps(f)); +# else + float8 r = {f, f, f, f, f, f, f, f}; +# endif + return r; +} + +ccl_device_inline float8 +make_float8(float a, float b, float c, float d, float e, float f, float g, float h) +{ +# ifdef __KERNEL_AVX2__ + float8 r(_mm256_set_ps(a, b, c, d, e, f, g, h)); +# else + float8 r = {a, b, c, d, e, f, g, h}; +# endif + return r; +} + +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_FLOAT8_IMPL_H__ */ diff --git a/intern/cycles/util/types_int2.h b/intern/cycles/util/types_int2.h new file mode 100644 index 00000000000..75970577d77 --- /dev/null +++ b/intern/cycles/util/types_int2.h @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_INT2_H__ +#define __UTIL_TYPES_INT2_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct int2 { + int x, y; + + __forceinline int operator[](int i) const; + __forceinline int &operator[](int i); +}; + +ccl_device_inline int2 make_int2(int x, int y); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_INT2_H__ */ diff --git a/intern/cycles/util/types_int2_impl.h b/intern/cycles/util/types_int2_impl.h new file mode 100644 index 00000000000..efa63cdfd2a --- /dev/null +++ b/intern/cycles/util/types_int2_impl.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_INT2_IMPL_H__ +#define __UTIL_TYPES_INT2_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +int int2::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); +} + +int &int2::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); +} + +ccl_device_inline int2 make_int2(int x, int y) +{ + int2 a = {x, y}; + return a; +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_INT2_IMPL_H__ */ diff --git a/intern/cycles/util/types_int3.h b/intern/cycles/util/types_int3.h new file mode 100644 index 00000000000..071a886136e --- /dev/null +++ b/intern/cycles/util/types_int3.h @@ -0,0 +1,60 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_INT3_H__ +#define __UTIL_TYPES_INT3_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct ccl_try_align(16) int3 +{ +# ifdef __KERNEL_SSE__ + union { + __m128i m128; + struct { + int x, y, z, w; + }; + }; + + __forceinline int3(); + __forceinline int3(const int3 &a); + __forceinline explicit int3(const __m128i &a); + + __forceinline operator const __m128i &() const; + __forceinline operator __m128i &(); + + __forceinline int3 &operator=(const int3 &a); +# else /* __KERNEL_SSE__ */ + int x, y, z, w; +# endif /* __KERNEL_SSE__ */ + + __forceinline int operator[](int i) const; + __forceinline int &operator[](int i); +}; + +ccl_device_inline int3 make_int3(int i); +ccl_device_inline int3 make_int3(int x, int y, int z); +ccl_device_inline void print_int3(const char *label, const int3 &a); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_INT3_H__ */ diff --git a/intern/cycles/util/types_int3_impl.h b/intern/cycles/util/types_int3_impl.h new file mode 100644 index 00000000000..c91c64b804e --- /dev/null +++ b/intern/cycles/util/types_int3_impl.h @@ -0,0 +1,104 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_INT3_IMPL_H__ +#define __UTIL_TYPES_INT3_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +#ifndef __KERNEL_GPU__ +# include +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +# ifdef __KERNEL_SSE__ +__forceinline int3::int3() +{ +} + +__forceinline int3::int3(const __m128i &a) : m128(a) +{ +} + +__forceinline int3::int3(const int3 &a) : m128(a.m128) +{ +} + +__forceinline int3::operator const __m128i &() const +{ + return m128; +} + +__forceinline int3::operator __m128i &() +{ + return m128; +} + +__forceinline int3 &int3::operator=(const int3 &a) +{ + m128 = a.m128; + return *this; +} +# endif /* __KERNEL_SSE__ */ + +__forceinline int int3::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); +} + +__forceinline int &int3::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); +} + +ccl_device_inline int3 make_int3(int i) +{ +# ifdef __KERNEL_SSE__ + int3 a(_mm_set1_epi32(i)); +# else + int3 a = {i, i, i, i}; +# endif + return a; +} + +ccl_device_inline int3 make_int3(int x, int y, int z) +{ +# ifdef __KERNEL_SSE__ + int3 a(_mm_set_epi32(0, z, y, x)); +# else + int3 a = {x, y, z, 0}; +# endif + + return a; +} + +ccl_device_inline void print_int3(const char *label, const int3 &a) +{ + printf("%s: %d %d %d\n", label, a.x, a.y, a.z); +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_INT3_IMPL_H__ */ diff --git a/intern/cycles/util/types_int4.h b/intern/cycles/util/types_int4.h new file mode 100644 index 00000000000..cb497d70035 --- /dev/null +++ b/intern/cycles/util/types_int4.h @@ -0,0 +1,66 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_INT4_H__ +#define __UTIL_TYPES_INT4_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ + +struct float3; +struct float4; + +struct ccl_try_align(16) int4 +{ +# ifdef __KERNEL_SSE__ + union { + __m128i m128; + struct { + int x, y, z, w; + }; + }; + + __forceinline int4(); + __forceinline int4(const int4 &a); + __forceinline explicit int4(const __m128i &a); + + __forceinline operator const __m128i &() const; + __forceinline operator __m128i &(); + + __forceinline int4 &operator=(const int4 &a); +# else /* __KERNEL_SSE__ */ + int x, y, z, w; +# endif /* __KERNEL_SSE__ */ + + __forceinline int operator[](int i) const; + __forceinline int &operator[](int i); +}; + +ccl_device_inline int4 make_int4(int i); +ccl_device_inline int4 make_int4(int x, int y, int z, int w); +ccl_device_inline int4 make_int4(const float3 &f); +ccl_device_inline int4 make_int4(const float4 &f); +ccl_device_inline void print_int4(const char *label, const int4 &a); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_INT4_H__ */ diff --git a/intern/cycles/util/types_int4_impl.h b/intern/cycles/util/types_int4_impl.h new file mode 100644 index 00000000000..258b42c029e --- /dev/null +++ b/intern/cycles/util/types_int4_impl.h @@ -0,0 +1,123 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_INT4_IMPL_H__ +#define __UTIL_TYPES_INT4_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +#ifndef __KERNEL_GPU__ +# include +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +# ifdef __KERNEL_SSE__ +__forceinline int4::int4() +{ +} + +__forceinline int4::int4(const int4 &a) : m128(a.m128) +{ +} + +__forceinline int4::int4(const __m128i &a) : m128(a) +{ +} + +__forceinline int4::operator const __m128i &() const +{ + return m128; +} + +__forceinline int4::operator __m128i &() +{ + return m128; +} + +__forceinline int4 &int4::operator=(const int4 &a) +{ + m128 = a.m128; + return *this; +} +# endif /* __KERNEL_SSE__ */ + +__forceinline int int4::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); +} + +__forceinline int &int4::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); +} + +ccl_device_inline int4 make_int4(int i) +{ +# ifdef __KERNEL_SSE__ + int4 a(_mm_set1_epi32(i)); +# else + int4 a = {i, i, i, i}; +# endif + return a; +} + +ccl_device_inline int4 make_int4(int x, int y, int z, int w) +{ +# ifdef __KERNEL_SSE__ + int4 a(_mm_set_epi32(w, z, y, x)); +# else + int4 a = {x, y, z, w}; +# endif + return a; +} + +ccl_device_inline int4 make_int4(const float3 &f) +{ +# ifdef __KERNEL_SSE__ + int4 a(_mm_cvtps_epi32(f.m128)); +# else + int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; +# endif + return a; +} + +ccl_device_inline int4 make_int4(const float4 &f) +{ +# ifdef __KERNEL_SSE__ + int4 a(_mm_cvtps_epi32(f.m128)); +# else + int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; +# endif + return a; +} + +ccl_device_inline void print_int4(const char *label, const int4 &a) +{ + printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_INT4_IMPL_H__ */ diff --git a/intern/cycles/util/types_uchar2.h b/intern/cycles/util/types_uchar2.h new file mode 100644 index 00000000000..0dc1d46bf29 --- /dev/null +++ b/intern/cycles/util/types_uchar2.h @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UCHAR2_H__ +#define __UTIL_TYPES_UCHAR2_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct uchar2 { + uchar x, y; + + __forceinline uchar operator[](int i) const; + __forceinline uchar &operator[](int i); +}; + +ccl_device_inline uchar2 make_uchar2(uchar x, uchar y); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UCHAR2_H__ */ diff --git a/intern/cycles/util/types_uchar2_impl.h b/intern/cycles/util/types_uchar2_impl.h new file mode 100644 index 00000000000..234a71a2247 --- /dev/null +++ b/intern/cycles/util/types_uchar2_impl.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UCHAR2_IMPL_H__ +#define __UTIL_TYPES_UCHAR2_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +uchar uchar2::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); +} + +uchar &uchar2::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); +} + +ccl_device_inline uchar2 make_uchar2(uchar x, uchar y) +{ + uchar2 a = {x, y}; + return a; +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UCHAR2_IMPL_H__ */ diff --git a/intern/cycles/util/types_uchar3.h b/intern/cycles/util/types_uchar3.h new file mode 100644 index 00000000000..d3913afb3a2 --- /dev/null +++ b/intern/cycles/util/types_uchar3.h @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UCHAR3_H__ +#define __UTIL_TYPES_UCHAR3_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct uchar3 { + uchar x, y, z; + + __forceinline uchar operator[](int i) const; + __forceinline uchar &operator[](int i); +}; + +ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UCHAR3_H__ */ diff --git a/intern/cycles/util/types_uchar3_impl.h b/intern/cycles/util/types_uchar3_impl.h new file mode 100644 index 00000000000..90f510e3b28 --- /dev/null +++ b/intern/cycles/util/types_uchar3_impl.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UCHAR3_IMPL_H__ +#define __UTIL_TYPES_UCHAR3_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +uchar uchar3::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); +} + +uchar &uchar3::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); +} + +ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z) +{ + uchar3 a = {x, y, z}; + return a; +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UCHAR3_IMPL_H__ */ diff --git a/intern/cycles/util/types_uchar4.h b/intern/cycles/util/types_uchar4.h new file mode 100644 index 00000000000..bfe1c06acd8 --- /dev/null +++ b/intern/cycles/util/types_uchar4.h @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UCHAR4_H__ +#define __UTIL_TYPES_UCHAR4_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct uchar4 { + uchar x, y, z, w; + + __forceinline uchar operator[](int i) const; + __forceinline uchar &operator[](int i); +}; + +ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UCHAR4_H__ */ diff --git a/intern/cycles/util/types_uchar4_impl.h b/intern/cycles/util/types_uchar4_impl.h new file mode 100644 index 00000000000..d15c74bed03 --- /dev/null +++ b/intern/cycles/util/types_uchar4_impl.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UCHAR4_IMPL_H__ +#define __UTIL_TYPES_UCHAR4_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +uchar uchar4::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); +} + +uchar &uchar4::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); +} + +ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) +{ + uchar4 a = {x, y, z, w}; + return a; +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UCHAR4_IMPL_H__ */ diff --git a/intern/cycles/util/types_uint2.h b/intern/cycles/util/types_uint2.h new file mode 100644 index 00000000000..7419977040b --- /dev/null +++ b/intern/cycles/util/types_uint2.h @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UINT2_H__ +#define __UTIL_TYPES_UINT2_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct uint2 { + uint x, y; + + __forceinline uint operator[](uint i) const; + __forceinline uint &operator[](uint i); +}; + +ccl_device_inline uint2 make_uint2(uint x, uint y); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UINT2_H__ */ diff --git a/intern/cycles/util/types_uint2_impl.h b/intern/cycles/util/types_uint2_impl.h new file mode 100644 index 00000000000..8427f9694b5 --- /dev/null +++ b/intern/cycles/util/types_uint2_impl.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UINT2_IMPL_H__ +#define __UTIL_TYPES_UINT2_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +__forceinline uint uint2::operator[](uint i) const +{ + util_assert(i < 2); + return *(&x + i); +} + +__forceinline uint &uint2::operator[](uint i) +{ + util_assert(i < 2); + return *(&x + i); +} + +ccl_device_inline uint2 make_uint2(uint x, uint y) +{ + uint2 a = {x, y}; + return a; +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UINT2_IMPL_H__ */ diff --git a/intern/cycles/util/types_uint3.h b/intern/cycles/util/types_uint3.h new file mode 100644 index 00000000000..1e97e7f2d36 --- /dev/null +++ b/intern/cycles/util/types_uint3.h @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UINT3_H__ +#define __UTIL_TYPES_UINT3_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct uint3 { + uint x, y, z; + + __forceinline uint operator[](uint i) const; + __forceinline uint &operator[](uint i); +}; + +ccl_device_inline uint3 make_uint3(uint x, uint y, uint z); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UINT3_H__ */ diff --git a/intern/cycles/util/types_uint3_impl.h b/intern/cycles/util/types_uint3_impl.h new file mode 100644 index 00000000000..ba83cffe9a8 --- /dev/null +++ b/intern/cycles/util/types_uint3_impl.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UINT3_IMPL_H__ +#define __UTIL_TYPES_UINT3_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +__forceinline uint uint3::operator[](uint i) const +{ + util_assert(i < 3); + return *(&x + i); +} + +__forceinline uint &uint3::operator[](uint i) +{ + util_assert(i < 3); + return *(&x + i); +} + +ccl_device_inline uint3 make_uint3(uint x, uint y, uint z) +{ + uint3 a = {x, y, z}; + return a; +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UINT3_IMPL_H__ */ diff --git a/intern/cycles/util/types_uint4.h b/intern/cycles/util/types_uint4.h new file mode 100644 index 00000000000..b135877b890 --- /dev/null +++ b/intern/cycles/util/types_uint4.h @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UINT4_H__ +#define __UTIL_TYPES_UINT4_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +struct uint4 { + uint x, y, z, w; + + __forceinline uint operator[](uint i) const; + __forceinline uint &operator[](uint i); +}; + +ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UINT4_H__ */ diff --git a/intern/cycles/util/types_uint4_impl.h b/intern/cycles/util/types_uint4_impl.h new file mode 100644 index 00000000000..b860fbfc49a --- /dev/null +++ b/intern/cycles/util/types_uint4_impl.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_UINT4_IMPL_H__ +#define __UTIL_TYPES_UINT4_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +__forceinline uint uint4::operator[](uint i) const +{ + util_assert(i < 3); + return *(&x + i); +} + +__forceinline uint &uint4::operator[](uint i) +{ + util_assert(i < 3); + return *(&x + i); +} + +ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) +{ + uint4 a = {x, y, z, w}; + return a; +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_UINT4_IMPL_H__ */ diff --git a/intern/cycles/util/types_ushort4.h b/intern/cycles/util/types_ushort4.h new file mode 100644 index 00000000000..8d080bcc1b9 --- /dev/null +++ b/intern/cycles/util/types_ushort4.h @@ -0,0 +1,36 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_USHORT4_H__ +#define __UTIL_TYPES_USHORT4_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ + +struct ushort4 { + uint16_t x, y, z, w; +}; + +#endif + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_USHORT4_H__ */ diff --git a/intern/cycles/util/types_vector3.h b/intern/cycles/util/types_vector3.h new file mode 100644 index 00000000000..d46a0266855 --- /dev/null +++ b/intern/cycles/util/types_vector3.h @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_VECTOR3_H__ +#define __UTIL_TYPES_VECTOR3_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +template class vector3 { + public: + T x, y, z; + + __forceinline vector3(); + __forceinline vector3(const T &a); + __forceinline vector3(const T &x, const T &y, const T &z); +}; +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_VECTOR3_H__ */ diff --git a/intern/cycles/util/types_vector3_impl.h b/intern/cycles/util/types_vector3_impl.h new file mode 100644 index 00000000000..ff6dcd85b12 --- /dev/null +++ b/intern/cycles/util/types_vector3_impl.h @@ -0,0 +1,43 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_VECTOR3_IMPL_H__ +#define __UTIL_TYPES_VECTOR3_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util/types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +template ccl_always_inline vector3::vector3() +{ +} + +template ccl_always_inline vector3::vector3(const T &a) : x(a), y(a), z(a) +{ +} + +template +ccl_always_inline vector3::vector3(const T &x, const T &y, const T &z) : x(x), y(y), z(z) +{ +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_VECTOR3_IMPL_H__ */ diff --git a/intern/cycles/util/unique_ptr.h b/intern/cycles/util/unique_ptr.h new file mode 100644 index 00000000000..3181eafd43d --- /dev/null +++ b/intern/cycles/util/unique_ptr.h @@ -0,0 +1,29 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_UNIQUE_PTR_H__ +#define __UTIL_UNIQUE_PTR_H__ + +#include + +CCL_NAMESPACE_BEGIN + +using std::make_unique; +using std::unique_ptr; + +CCL_NAMESPACE_END + +#endif /* __UTIL_UNIQUE_PTR_H__ */ diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h deleted file mode 100644 index 63abd4e92a3..00000000000 --- a/intern/cycles/util/util_algorithm.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_ALGORITHM_H__ -#define __UTIL_ALGORITHM_H__ - -#include - -CCL_NAMESPACE_BEGIN - -using std::max; -using std::min; -using std::remove; -using std::sort; -using std::stable_sort; -using std::swap; - -CCL_NAMESPACE_END - -#endif /* __UTIL_ALGORITHM_H__ */ diff --git a/intern/cycles/util/util_aligned_malloc.cpp b/intern/cycles/util/util_aligned_malloc.cpp deleted file mode 100644 index 9b729cd4fc4..00000000000 --- a/intern/cycles/util/util_aligned_malloc.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_aligned_malloc.h" -#include "util/util_guarded_allocator.h" - -#include - -/* Adopted from Libmv. */ - -#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__) -/* Needed for memalign on Linux and _aligned_alloc on Windows. */ -# ifdef FREE_WINDOWS -/* Make sure _aligned_malloc is included. */ -# ifdef __MSVCRT_VERSION__ -# undef __MSVCRT_VERSION__ -# endif -# define __MSVCRT_VERSION__ 0x0700 -# endif /* FREE_WINDOWS */ -# include -#else -/* Apple's malloc is 16-byte aligned, and does not have malloc.h, so include - * stdilb instead. - */ -# include -#endif - -CCL_NAMESPACE_BEGIN - -void *util_aligned_malloc(size_t size, int alignment) -{ -#ifdef WITH_BLENDER_GUARDEDALLOC - return MEM_mallocN_aligned(size, alignment, "Cycles Aligned Alloc"); -#elif defined(_WIN32) - return _aligned_malloc(size, alignment); -#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__APPLE__) - void *result; - if (posix_memalign(&result, alignment, size)) { - /* Non-zero means allocation error - * either no allocation or bad alignment value. - */ - return NULL; - } - return result; -#else /* This is for Linux. */ - return memalign(alignment, size); -#endif -} - -void util_aligned_free(void *ptr) -{ -#if defined(WITH_BLENDER_GUARDEDALLOC) - if (ptr != NULL) { - MEM_freeN(ptr); - } -#elif defined(_WIN32) - _aligned_free(ptr); -#else - free(ptr); -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_aligned_malloc.h b/intern/cycles/util/util_aligned_malloc.h deleted file mode 100644 index df7d93c056d..00000000000 --- a/intern/cycles/util/util_aligned_malloc.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_ALIGNED_MALLOC_H__ -#define __UTIL_ALIGNED_MALLOC_H__ - -#include "util/util_types.h" - -CCL_NAMESPACE_BEGIN - -/* Minimum alignment needed by all CPU native data types (SSE, AVX). */ -#define MIN_ALIGNMENT_CPU_DATA_TYPES 16 - -/* Allocate block of size bytes at least aligned to a given value. */ -void *util_aligned_malloc(size_t size, int alignment); - -/* Free memory allocated by util_aligned_malloc. */ -void util_aligned_free(void *ptr); - -/* Aligned new operator. */ -template T *util_aligned_new(Args... args) -{ - void *mem = util_aligned_malloc(sizeof(T), alignof(T)); - return new (mem) T(args...); -} - -template void util_aligned_delete(T *t) -{ - if (t) { - t->~T(); - util_aligned_free(t); - } -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_ALIGNED_MALLOC_H__ */ diff --git a/intern/cycles/util/util_args.h b/intern/cycles/util/util_args.h deleted file mode 100644 index be6f2c2b9f1..00000000000 --- a/intern/cycles/util/util_args.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_ARGS_H__ -#define __UTIL_ARGS_H__ - -/* Argument Parsing for command line, we use the OpenImageIO - * library because it has nice functions to do this. */ - -#include - -CCL_NAMESPACE_BEGIN - -OIIO_NAMESPACE_USING - -CCL_NAMESPACE_END - -#endif /* __UTIL_ARGS_H__ */ diff --git a/intern/cycles/util/util_array.h b/intern/cycles/util/util_array.h deleted file mode 100644 index 73f7d6cf7f8..00000000000 --- a/intern/cycles/util/util_array.h +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Copyright 2011-2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_ARRAY_H__ -#define __UTIL_ARRAY_H__ - -#include -#include - -#include "util/util_aligned_malloc.h" -#include "util/util_guarded_allocator.h" -#include "util/util_types.h" -#include "util/util_vector.h" - -CCL_NAMESPACE_BEGIN - -/* Simplified version of vector, serving multiple purposes: - * - somewhat faster in that it does not clear memory on resize/alloc, - * this was actually showing up in profiles quite significantly. it - * also does not run any constructors/destructors - * - if this is used, we are not tempted to use inefficient operations - * - aligned allocation for CPU native data types */ - -template class array { - public: - array() : data_(NULL), datasize_(0), capacity_(0) - { - } - - explicit array(size_t newsize) - { - if (newsize == 0) { - data_ = NULL; - datasize_ = 0; - capacity_ = 0; - } - else { - data_ = mem_allocate(newsize); - datasize_ = newsize; - capacity_ = datasize_; - } - } - - array(const array &from) - { - if (from.datasize_ == 0) { - data_ = NULL; - datasize_ = 0; - capacity_ = 0; - } - else { - data_ = mem_allocate(from.datasize_); - if (from.datasize_ > 0) { - memcpy(data_, from.data_, from.datasize_ * sizeof(T)); - } - datasize_ = from.datasize_; - capacity_ = datasize_; - } - } - - array &operator=(const array &from) - { - if (this != &from) { - resize(from.size()); - if (datasize_ > 0) { - memcpy((void *)data_, from.data_, datasize_ * sizeof(T)); - } - } - - return *this; - } - - array &operator=(const vector &from) - { - resize(from.size()); - - if (from.size() > 0 && datasize_ > 0) { - memcpy(data_, &from[0], datasize_ * sizeof(T)); - } - - return *this; - } - - ~array() - { - mem_free(data_, capacity_); - } - - bool operator==(const array &other) const - { - if (datasize_ != other.datasize_) { - return false; - } - if (datasize_ == 0) { - return true; - } - - return memcmp(data_, other.data_, datasize_ * sizeof(T)) == 0; - } - - bool operator!=(const array &other) const - { - return !(*this == other); - } - - void steal_data(array &from) - { - if (this != &from) { - clear(); - - data_ = from.data_; - datasize_ = from.datasize_; - capacity_ = from.capacity_; - - from.data_ = NULL; - from.datasize_ = 0; - from.capacity_ = 0; - } - } - - void set_data(T *ptr_, size_t datasize) - { - clear(); - data_ = ptr_; - datasize_ = datasize; - capacity_ = datasize; - } - - T *steal_pointer() - { - T *ptr = data_; - data_ = NULL; - clear(); - return ptr; - } - - T *resize(size_t newsize) - { - if (newsize == 0) { - clear(); - } - else if (newsize != datasize_) { - if (newsize > capacity_) { - T *newdata = mem_allocate(newsize); - if (newdata == NULL) { - /* Allocation failed, likely out of memory. */ - clear(); - return NULL; - } - else if (data_ != NULL) { - memcpy( - (void *)newdata, data_, ((datasize_ < newsize) ? datasize_ : newsize) * sizeof(T)); - mem_free(data_, capacity_); - } - data_ = newdata; - capacity_ = newsize; - } - datasize_ = newsize; - } - return data_; - } - - T *resize(size_t newsize, const T &value) - { - size_t oldsize = size(); - resize(newsize); - - for (size_t i = oldsize; i < size(); i++) { - data_[i] = value; - } - - return data_; - } - - void clear() - { - if (data_ != NULL) { - mem_free(data_, capacity_); - data_ = NULL; - } - datasize_ = 0; - capacity_ = 0; - } - - size_t empty() const - { - return datasize_ == 0; - } - - size_t size() const - { - return datasize_; - } - - T *data() - { - return data_; - } - - const T *data() const - { - return data_; - } - - T &operator[](size_t i) const - { - assert(i < datasize_); - return data_[i]; - } - - T *begin() - { - return data_; - } - - const T *begin() const - { - return data_; - } - - T *end() - { - return data_ + datasize_; - } - - const T *end() const - { - return data_ + datasize_; - } - - void reserve(size_t newcapacity) - { - if (newcapacity > capacity_) { - T *newdata = mem_allocate(newcapacity); - if (data_ != NULL) { - memcpy(newdata, data_, ((datasize_ < newcapacity) ? datasize_ : newcapacity) * sizeof(T)); - mem_free(data_, capacity_); - } - data_ = newdata; - capacity_ = newcapacity; - } - } - - size_t capacity() const - { - return capacity_; - } - - // do not use this method unless you are sure the code is not performance critical - void push_back_slow(const T &t) - { - if (capacity_ == datasize_) { - reserve(datasize_ == 0 ? 1 : (size_t)((datasize_ + 1) * 1.2)); - } - - data_[datasize_++] = t; - } - - void push_back_reserved(const T &t) - { - assert(datasize_ < capacity_); - push_back_slow(t); - } - - void append(const array &from) - { - if (from.size()) { - size_t old_size = size(); - resize(old_size + from.size()); - memcpy(data_ + old_size, from.data(), sizeof(T) * from.size()); - } - } - - protected: - inline T *mem_allocate(size_t N) - { - if (N == 0) { - return NULL; - } - T *mem = (T *)util_aligned_malloc(sizeof(T) * N, alignment); - if (mem != NULL) { - util_guarded_mem_alloc(sizeof(T) * N); - } - else { - throw std::bad_alloc(); - } - return mem; - } - - inline void mem_free(T *mem, size_t N) - { - if (mem != NULL) { - util_guarded_mem_free(sizeof(T) * N); - util_aligned_free(mem); - } - } - - T *data_; - size_t datasize_; - size_t capacity_; -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_ARRAY_H__ */ diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h deleted file mode 100644 index faba411c769..00000000000 --- a/intern/cycles/util/util_atomic.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2014 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_ATOMIC_H__ -#define __UTIL_ATOMIC_H__ - -#ifndef __KERNEL_GPU__ - -/* Using atomic ops header from Blender. */ -# include "atomic_ops.h" - -# define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x)) -# define atomic_compare_and_swap_float(p, old_val, new_val) \ - atomic_cas_float((p), (old_val), (new_val)) - -# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) -# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1) - -# define CCL_LOCAL_MEM_FENCE 0 -# define ccl_barrier(flags) ((void)0) - -#else /* __KERNEL_GPU__ */ - -# if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) - -# define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x)) - -# define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int *)(p), (unsigned int)(x)) -# define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x)) -# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) -# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1) -# define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x)) - -ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest, - const float old_val, - const float new_val) -{ - union { - unsigned int int_value; - float float_value; - } new_value, prev_value, result; - prev_value.float_value = old_val; - new_value.float_value = new_val; - result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value, new_value.int_value); - return result.float_value; -} - -# define CCL_LOCAL_MEM_FENCE -# define ccl_barrier(flags) __syncthreads() - -# endif /* __KERNEL_CUDA__ */ - -#endif /* __KERNEL_GPU__ */ - -#endif /* __UTIL_ATOMIC_H__ */ diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h deleted file mode 100644 index 15215d04ca3..00000000000 --- a/intern/cycles/util/util_avxb.h +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright 2011-2013 Intel Corporation - * Modifications Copyright 2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0(the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_AVXB_H__ -#define __UTIL_AVXB_H__ - -CCL_NAMESPACE_BEGIN - -struct avxf; - -/*! 4-wide SSE bool type. */ -struct avxb { - typedef avxb Mask; // mask type - typedef avxf Float; // float type - - enum { size = 8 }; // number of SIMD elements - union { - __m256 m256; - int32_t v[8]; - }; // data - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline avxb() - { - } - __forceinline avxb(const avxb &other) - { - m256 = other.m256; - } - __forceinline avxb &operator=(const avxb &other) - { - m256 = other.m256; - return *this; - } - - __forceinline avxb(const __m256 input) : m256(input) - { - } - __forceinline avxb(const __m128 &a, const __m128 &b) - : m256(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1)) - { - } - __forceinline operator const __m256 &(void) const - { - return m256; - } - __forceinline operator const __m256i(void) const - { - return _mm256_castps_si256(m256); - } - __forceinline operator const __m256d(void) const - { - return _mm256_castps_pd(m256); - } - - //////////////////////////////////////////////////////////////////////////////// - /// Constants - //////////////////////////////////////////////////////////////////////////////// - - __forceinline avxb(FalseTy) : m256(_mm256_setzero_ps()) - { - } - __forceinline avxb(TrueTy) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1))) - { - } - - //////////////////////////////////////////////////////////////////////////////// - /// Array Access - //////////////////////////////////////////////////////////////////////////////// - - __forceinline bool operator[](const size_t i) const - { - assert(i < 8); - return (_mm256_movemask_ps(m256) >> i) & 1; - } - __forceinline int32_t &operator[](const size_t i) - { - assert(i < 8); - return v[i]; - } -}; - -//////////////////////////////////////////////////////////////////////////////// -/// Unary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const avxb operator!(const avxb &a) -{ - return _mm256_xor_ps(a, avxb(True)); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Binary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const avxb operator&(const avxb &a, const avxb &b) -{ - return _mm256_and_ps(a, b); -} -__forceinline const avxb operator|(const avxb &a, const avxb &b) -{ - return _mm256_or_ps(a, b); -} -__forceinline const avxb operator^(const avxb &a, const avxb &b) -{ - return _mm256_xor_ps(a, b); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Assignment Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const avxb operator&=(avxb &a, const avxb &b) -{ - return a = a & b; -} -__forceinline const avxb operator|=(avxb &a, const avxb &b) -{ - return a = a | b; -} -__forceinline const avxb operator^=(avxb &a, const avxb &b) -{ - return a = a ^ b; -} - -//////////////////////////////////////////////////////////////////////////////// -/// Comparison Operators + Select -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const avxb operator!=(const avxb &a, const avxb &b) -{ - return _mm256_xor_ps(a, b); -} -__forceinline const avxb operator==(const avxb &a, const avxb &b) -{ -#ifdef __KERNEL_AVX2__ - return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); -#else - __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0)); - __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1)); - __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0)); - __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1)); - __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo); - __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi); - __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1); - return _mm256_castsi256_ps(result); -#endif -} - -__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f) -{ -#if defined(__KERNEL_SSE41__) - return _mm256_blendv_ps(f, t, m); -#else - return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f)); -#endif -} - -//////////////////////////////////////////////////////////////////////////////// -/// Movement/Shifting/Shuffling Functions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const avxb unpacklo(const avxb &a, const avxb &b) -{ - return _mm256_unpacklo_ps(a, b); -} -__forceinline const avxb unpackhi(const avxb &a, const avxb &b) -{ - return _mm256_unpackhi_ps(a, b); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Reduction Operations -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__KERNEL_SSE41__) -__forceinline uint32_t popcnt(const avxb &a) -{ - return _mm_popcnt_u32(_mm256_movemask_ps(a)); -} -#else -__forceinline uint32_t popcnt(const avxb &a) -{ - return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]) + bool(a[4]) + bool(a[5]) + bool(a[6]) + - bool(a[7]); -} -#endif - -__forceinline bool reduce_and(const avxb &a) -{ - return _mm256_movemask_ps(a) == 0xf; -} -__forceinline bool reduce_or(const avxb &a) -{ - return _mm256_movemask_ps(a) != 0x0; -} -__forceinline bool all(const avxb &b) -{ - return _mm256_movemask_ps(b) == 0xf; -} -__forceinline bool any(const avxb &b) -{ - return _mm256_movemask_ps(b) != 0x0; -} -__forceinline bool none(const avxb &b) -{ - return _mm256_movemask_ps(b) == 0x0; -} - -__forceinline uint32_t movemask(const avxb &a) -{ - return _mm256_movemask_ps(a); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Debug Functions -//////////////////////////////////////////////////////////////////////////////// - -ccl_device_inline void print_avxb(const char *label, const avxb &a) -{ - printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]); -} - -CCL_NAMESPACE_END - -#endif diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h deleted file mode 100644 index 1fb3ded422f..00000000000 --- a/intern/cycles/util/util_avxf.h +++ /dev/null @@ -1,392 +0,0 @@ -/* - * Copyright 2016 Intel Corporation - * - * Licensed under the Apache License, Version 2.0(the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_AVXF_H__ -#define __UTIL_AVXF_H__ - -CCL_NAMESPACE_BEGIN - -struct avxb; - -struct avxf { - typedef avxf Float; - - enum { size = 8 }; /* Number of SIMD elements. */ - - union { - __m256 m256; - float f[8]; - int i[8]; - }; - - __forceinline avxf() - { - } - __forceinline avxf(const avxf &other) - { - m256 = other.m256; - } - __forceinline avxf &operator=(const avxf &other) - { - m256 = other.m256; - return *this; - } - - __forceinline avxf(const __m256 a) : m256(a) - { - } - __forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps(a)) - { - } - - __forceinline operator const __m256 &() const - { - return m256; - } - __forceinline operator __m256 &() - { - return m256; - } - - __forceinline avxf(float a) : m256(_mm256_set1_ps(a)) - { - } - - __forceinline avxf(float high32x4, float low32x4) - : m256(_mm256_set_ps( - high32x4, high32x4, high32x4, high32x4, low32x4, low32x4, low32x4, low32x4)) - { - } - - __forceinline avxf(float a3, float a2, float a1, float a0) - : m256(_mm256_set_ps(a3, a2, a1, a0, a3, a2, a1, a0)) - { - } - - __forceinline avxf( - float a7, float a6, float a5, float a4, float a3, float a2, float a1, float a0) - : m256(_mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0)) - { - } - - __forceinline avxf(float3 a) : m256(_mm256_set_ps(a.w, a.z, a.y, a.x, a.w, a.z, a.y, a.x)) - { - } - - __forceinline avxf(int a3, int a2, int a1, int a0) - { - const __m256i foo = _mm256_set_epi32(a3, a2, a1, a0, a3, a2, a1, a0); - m256 = _mm256_castsi256_ps(foo); - } - - __forceinline avxf(int a7, int a6, int a5, int a4, int a3, int a2, int a1, int a0) - { - const __m256i foo = _mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0); - m256 = _mm256_castsi256_ps(foo); - } - - __forceinline avxf(__m128 a, __m128 b) - { - const __m256 foo = _mm256_castps128_ps256(a); - m256 = _mm256_insertf128_ps(foo, b, 1); - } - - __forceinline const float &operator[](const size_t i) const - { - assert(i < 8); - return f[i]; - } - __forceinline float &operator[](const size_t i) - { - assert(i < 8); - return f[i]; - } -}; - -__forceinline avxf cross(const avxf &a, const avxf &b) -{ - avxf r(0.0, - a[4] * b[5] - a[5] * b[4], - a[6] * b[4] - a[4] * b[6], - a[5] * b[6] - a[6] * b[5], - 0.0, - a[0] * b[1] - a[1] * b[0], - a[2] * b[0] - a[0] * b[2], - a[1] * b[2] - a[2] * b[1]); - return r; -} - -__forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2) -{ - const avxf t = _mm256_mul_ps(a.m256, b.m256); - den = ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]; - den2 = ((float *)&t)[4] + ((float *)&t)[5] + ((float *)&t)[6]; -} - -//////////////////////////////////////////////////////////////////////////////// -/// Unary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const avxf cast(const __m256i &a) -{ - return _mm256_castsi256_ps(a); -} - -__forceinline const avxf mm256_sqrt(const avxf &a) -{ - return _mm256_sqrt_ps(a.m256); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Binary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const avxf operator+(const avxf &a, const avxf &b) -{ - return _mm256_add_ps(a.m256, b.m256); -} -__forceinline const avxf operator+(const avxf &a, const float &b) -{ - return a + avxf(b); -} -__forceinline const avxf operator+(const float &a, const avxf &b) -{ - return avxf(a) + b; -} - -__forceinline const avxf operator-(const avxf &a, const avxf &b) -{ - return _mm256_sub_ps(a.m256, b.m256); -} -__forceinline const avxf operator-(const avxf &a, const float &b) -{ - return a - avxf(b); -} -__forceinline const avxf operator-(const float &a, const avxf &b) -{ - return avxf(a) - b; -} - -__forceinline const avxf operator*(const avxf &a, const avxf &b) -{ - return _mm256_mul_ps(a.m256, b.m256); -} -__forceinline const avxf operator*(const avxf &a, const float &b) -{ - return a * avxf(b); -} -__forceinline const avxf operator*(const float &a, const avxf &b) -{ - return avxf(a) * b; -} - -__forceinline const avxf operator/(const avxf &a, const avxf &b) -{ - return _mm256_div_ps(a.m256, b.m256); -} -__forceinline const avxf operator/(const avxf &a, const float &b) -{ - return a / avxf(b); -} -__forceinline const avxf operator/(const float &a, const avxf &b) -{ - return avxf(a) / b; -} - -__forceinline const avxf operator|(const avxf &a, const avxf &b) -{ - return _mm256_or_ps(a.m256, b.m256); -} - -__forceinline const avxf operator^(const avxf &a, const avxf &b) -{ - return _mm256_xor_ps(a.m256, b.m256); -} - -__forceinline const avxf operator&(const avxf &a, const avxf &b) -{ - return _mm256_and_ps(a.m256, b.m256); -} - -__forceinline const avxf max(const avxf &a, const avxf &b) -{ - return _mm256_max_ps(a.m256, b.m256); -} -__forceinline const avxf min(const avxf &a, const avxf &b) -{ - return _mm256_min_ps(a.m256, b.m256); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Movement/Shifting/Shuffling Functions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const avxf shuffle(const avxf &a, const __m256i &shuf) -{ - return _mm256_permutevar_ps(a, shuf); -} - -template -__forceinline const avxf shuffle(const avxf &a) -{ - return _mm256_permutevar_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0)); -} - -template -__forceinline const avxf shuffle(const avxf &a, const avxf &b) -{ - return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); -} -template -__forceinline const avxf shuffle(const avxf &a) -{ - return shuffle(a, a); -} -template __forceinline const avxf shuffle(const avxf &a, const avxf &b) -{ - return shuffle(a, b); -} -template __forceinline const avxf shuffle(const avxf &a) -{ - return shuffle(a, a); -} - -template __forceinline float extract(const avxf &a) -{ - __m256 b = shuffle(a).m256; - return _mm256_cvtss_f32(b); -} -template<> __forceinline float extract<0>(const avxf &a) -{ - return _mm256_cvtss_f32(a.m256); -} - -__forceinline ssef low(const avxf &a) -{ - return _mm256_extractf128_ps(a.m256, 0); -} -__forceinline ssef high(const avxf &a) -{ - return _mm256_extractf128_ps(a.m256, 1); -} - -template -__forceinline const avxf permute(const avxf &a) -{ -#ifdef __KERNEL_AVX2__ - return _mm256_permutevar8x32_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0)); -#else - float temp[8]; - _mm256_storeu_ps((float *)&temp, a); - return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]); -#endif -} - -template -ccl_device_inline const avxf set_sign_bit(const avxf &a) -{ - return a ^ avxf(S7 << 31, S6 << 31, S5 << 31, S4 << 31, S3 << 31, S2 << 31, S1 << 31, S0 << 31); -} - -template -ccl_device_inline const avxf blend(const avxf &a, const avxf &b) -{ - return _mm256_blend_ps( - a, b, S7 << 0 | S6 << 1 | S5 << 2 | S4 << 3 | S3 << 4 | S2 << 5 | S1 << 6 | S0 << 7); -} - -template -ccl_device_inline const avxf blend(const avxf &a, const avxf &b) -{ - return blend(a, b); -} - -//#if defined(__KERNEL_SSE41__) -__forceinline avxf maxi(const avxf &a, const avxf &b) -{ - const avxf ci = _mm256_max_ps(a, b); - return ci; -} - -__forceinline avxf mini(const avxf &a, const avxf &b) -{ - const avxf ci = _mm256_min_ps(a, b); - return ci; -} -//#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Ternary Operators -//////////////////////////////////////////////////////////////////////////////// -__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c) -{ -#ifdef __KERNEL_AVX2__ - return _mm256_fmadd_ps(a, b, c); -#else - return c + (a * b); -#endif -} - -__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c) -{ -#ifdef __KERNEL_AVX2__ - return _mm256_fnmadd_ps(a, b, c); -#else - return c - (a * b); -#endif -} -__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c) -{ -#ifdef __KERNEL_AVX2__ - return _mm256_fmsub_ps(a, b, c); -#else - return (a * b) - c; -#endif -} - -//////////////////////////////////////////////////////////////////////////////// -/// Comparison Operators + Select -//////////////////////////////////////////////////////////////////////////////// -__forceinline const avxb operator<=(const avxf &a, const avxf &b) -{ - return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS); -} - -__forceinline const avxf select(const avxb &m, const avxf &t, const avxf &f) -{ - return _mm256_blendv_ps(f, t, m); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Common Functions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline avxf mix(const avxf &a, const avxf &b, const avxf &t) -{ - return madd(t, b, (avxf(1.0f) - t) * a); -} - -#ifndef _mm256_set_m128 -# define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \ - _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1) -#endif - -#define _mm256_loadu2_m128(/* float const* */ hiaddr, /* float const* */ loaddr) \ - _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr)) - -CCL_NAMESPACE_END - -#endif diff --git a/intern/cycles/util/util_avxi.h b/intern/cycles/util/util_avxi.h deleted file mode 100644 index 0ae4bf271c8..00000000000 --- a/intern/cycles/util/util_avxi.h +++ /dev/null @@ -1,745 +0,0 @@ -/* - * Copyright 2009-2013 Intel Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_AVXI_H__ -#define __UTIL_AVXI_H__ - -CCL_NAMESPACE_BEGIN - -struct avxb; - -struct avxi { - typedef avxb Mask; // mask type for us - enum { size = 8 }; // number of SIMD elements - union { // data - __m256i m256; -#if !defined(__KERNEL_AVX2__) - struct { - __m128i l, h; - }; -#endif - int32_t v[8]; - }; - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline avxi() - { - } - __forceinline avxi(const avxi &a) - { - m256 = a.m256; - } - __forceinline avxi &operator=(const avxi &a) - { - m256 = a.m256; - return *this; - } - - __forceinline avxi(const __m256i a) : m256(a) - { - } - __forceinline operator const __m256i &(void) const - { - return m256; - } - __forceinline operator __m256i &(void) - { - return m256; - } - - __forceinline explicit avxi(const ssei &a) - : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), a, 1)) - { - } - __forceinline avxi(const ssei &a, const ssei &b) - : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1)) - { - } -#if defined(__KERNEL_AVX2__) - __forceinline avxi(const __m128i &a, const __m128i &b) - : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1)) - { - } -#else - __forceinline avxi(const __m128i &a, const __m128i &b) : l(a), h(b) - { - } -#endif - __forceinline explicit avxi(const int32_t *const a) - : m256(_mm256_castps_si256(_mm256_loadu_ps((const float *)a))) - { - } - __forceinline avxi(int32_t a) : m256(_mm256_set1_epi32(a)) - { - } - __forceinline avxi(int32_t a, int32_t b) : m256(_mm256_set_epi32(b, a, b, a, b, a, b, a)) - { - } - __forceinline avxi(int32_t a, int32_t b, int32_t c, int32_t d) - : m256(_mm256_set_epi32(d, c, b, a, d, c, b, a)) - { - } - __forceinline avxi( - int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, int32_t f, int32_t g, int32_t h) - : m256(_mm256_set_epi32(h, g, f, e, d, c, b, a)) - { - } - - __forceinline explicit avxi(const __m256 a) : m256(_mm256_cvtps_epi32(a)) - { - } - - //////////////////////////////////////////////////////////////////////////////// - /// Constants - //////////////////////////////////////////////////////////////////////////////// - - __forceinline avxi(ZeroTy) : m256(_mm256_setzero_si256()) - { - } -#if defined(__KERNEL_AVX2__) - __forceinline avxi(OneTy) : m256(_mm256_set1_epi32(1)) - { - } - __forceinline avxi(PosInfTy) : m256(_mm256_set1_epi32(pos_inf)) - { - } - __forceinline avxi(NegInfTy) : m256(_mm256_set1_epi32(neg_inf)) - { - } -#else - __forceinline avxi(OneTy) : m256(_mm256_set_epi32(1, 1, 1, 1, 1, 1, 1, 1)) - { - } - __forceinline avxi(PosInfTy) - : m256(_mm256_set_epi32( - pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf)) - { - } - __forceinline avxi(NegInfTy) - : m256(_mm256_set_epi32( - neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf)) - { - } -#endif - __forceinline avxi(StepTy) : m256(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0)) - { - } - - //////////////////////////////////////////////////////////////////////////////// - /// Array Access - //////////////////////////////////////////////////////////////////////////////// - - __forceinline const int32_t &operator[](const size_t i) const - { - assert(i < 8); - return v[i]; - } - __forceinline int32_t &operator[](const size_t i) - { - assert(i < 8); - return v[i]; - } -}; - -//////////////////////////////////////////////////////////////////////////////// -/// Unary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const avxi cast(const __m256 &a) -{ - return _mm256_castps_si256(a); -} -__forceinline const avxi operator+(const avxi &a) -{ - return a; -} -#if defined(__KERNEL_AVX2__) -__forceinline const avxi operator-(const avxi &a) -{ - return _mm256_sub_epi32(_mm256_setzero_si256(), a.m256); -} -__forceinline const avxi abs(const avxi &a) -{ - return _mm256_abs_epi32(a.m256); -} -#else -__forceinline const avxi operator-(const avxi &a) -{ - return avxi(_mm_sub_epi32(_mm_setzero_si128(), a.l), _mm_sub_epi32(_mm_setzero_si128(), a.h)); -} -__forceinline const avxi abs(const avxi &a) -{ - return avxi(_mm_abs_epi32(a.l), _mm_abs_epi32(a.h)); -} -#endif - -//////////////////////////////////////////////////////////////////////////////// -/// Binary Operators -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__KERNEL_AVX2__) -__forceinline const avxi operator+(const avxi &a, const avxi &b) -{ - return _mm256_add_epi32(a.m256, b.m256); -} -#else -__forceinline const avxi operator+(const avxi &a, const avxi &b) -{ - return avxi(_mm_add_epi32(a.l, b.l), _mm_add_epi32(a.h, b.h)); -} -#endif -__forceinline const avxi operator+(const avxi &a, const int32_t b) -{ - return a + avxi(b); -} -__forceinline const avxi operator+(const int32_t a, const avxi &b) -{ - return avxi(a) + b; -} - -#if defined(__KERNEL_AVX2__) -__forceinline const avxi operator-(const avxi &a, const avxi &b) -{ - return _mm256_sub_epi32(a.m256, b.m256); -} -#else -__forceinline const avxi operator-(const avxi &a, const avxi &b) -{ - return avxi(_mm_sub_epi32(a.l, b.l), _mm_sub_epi32(a.h, b.h)); -} -#endif -__forceinline const avxi operator-(const avxi &a, const int32_t b) -{ - return a - avxi(b); -} -__forceinline const avxi operator-(const int32_t a, const avxi &b) -{ - return avxi(a) - b; -} - -#if defined(__KERNEL_AVX2__) -__forceinline const avxi operator*(const avxi &a, const avxi &b) -{ - return _mm256_mullo_epi32(a.m256, b.m256); -} -#else -__forceinline const avxi operator*(const avxi &a, const avxi &b) -{ - return avxi(_mm_mullo_epi32(a.l, b.l), _mm_mullo_epi32(a.h, b.h)); -} -#endif -__forceinline const avxi operator*(const avxi &a, const int32_t b) -{ - return a * avxi(b); -} -__forceinline const avxi operator*(const int32_t a, const avxi &b) -{ - return avxi(a) * b; -} - -#if defined(__KERNEL_AVX2__) -__forceinline const avxi operator&(const avxi &a, const avxi &b) -{ - return _mm256_and_si256(a.m256, b.m256); -} -#else -__forceinline const avxi operator&(const avxi &a, const avxi &b) -{ - return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); -} -#endif -__forceinline const avxi operator&(const avxi &a, const int32_t b) -{ - return a & avxi(b); -} -__forceinline const avxi operator&(const int32_t a, const avxi &b) -{ - return avxi(a) & b; -} - -#if defined(__KERNEL_AVX2__) -__forceinline const avxi operator|(const avxi &a, const avxi &b) -{ - return _mm256_or_si256(a.m256, b.m256); -} -#else -__forceinline const avxi operator|(const avxi &a, const avxi &b) -{ - return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); -} -#endif -__forceinline const avxi operator|(const avxi &a, const int32_t b) -{ - return a | avxi(b); -} -__forceinline const avxi operator|(const int32_t a, const avxi &b) -{ - return avxi(a) | b; -} - -#if defined(__KERNEL_AVX2__) -__forceinline const avxi operator^(const avxi &a, const avxi &b) -{ - return _mm256_xor_si256(a.m256, b.m256); -} -#else -__forceinline const avxi operator^(const avxi &a, const avxi &b) -{ - return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); -} -#endif -__forceinline const avxi operator^(const avxi &a, const int32_t b) -{ - return a ^ avxi(b); -} -__forceinline const avxi operator^(const int32_t a, const avxi &b) -{ - return avxi(a) ^ b; -} - -#if defined(__KERNEL_AVX2__) -__forceinline const avxi operator<<(const avxi &a, const int32_t n) -{ - return _mm256_slli_epi32(a.m256, n); -} -__forceinline const avxi operator>>(const avxi &a, const int32_t n) -{ - return _mm256_srai_epi32(a.m256, n); -} - -__forceinline const avxi sra(const avxi &a, const int32_t b) -{ - return _mm256_srai_epi32(a.m256, b); -} -__forceinline const avxi srl(const avxi &a, const int32_t b) -{ - return _mm256_srli_epi32(a.m256, b); -} -#else -__forceinline const avxi operator<<(const avxi &a, const int32_t n) -{ - return avxi(_mm_slli_epi32(a.l, n), _mm_slli_epi32(a.h, n)); -} -__forceinline const avxi operator>>(const avxi &a, const int32_t n) -{ - return avxi(_mm_srai_epi32(a.l, n), _mm_srai_epi32(a.h, n)); -} - -__forceinline const avxi sra(const avxi &a, const int32_t b) -{ - return avxi(_mm_srai_epi32(a.l, b), _mm_srai_epi32(a.h, b)); -} -__forceinline const avxi srl(const avxi &a, const int32_t b) -{ - return avxi(_mm_srli_epi32(a.l, b), _mm_srli_epi32(a.h, b)); -} -#endif - -#if defined(__KERNEL_AVX2__) -__forceinline const avxi min(const avxi &a, const avxi &b) -{ - return _mm256_min_epi32(a.m256, b.m256); -} -#else -__forceinline const avxi min(const avxi &a, const avxi &b) -{ - return avxi(_mm_min_epi32(a.l, b.l), _mm_min_epi32(a.h, b.h)); -} -#endif -__forceinline const avxi min(const avxi &a, const int32_t b) -{ - return min(a, avxi(b)); -} -__forceinline const avxi min(const int32_t a, const avxi &b) -{ - return min(avxi(a), b); -} - -#if defined(__KERNEL_AVX2__) -__forceinline const avxi max(const avxi &a, const avxi &b) -{ - return _mm256_max_epi32(a.m256, b.m256); -} -#else -__forceinline const avxi max(const avxi &a, const avxi &b) -{ - return avxi(_mm_max_epi32(a.l, b.l), _mm_max_epi32(a.h, b.h)); -} -#endif -__forceinline const avxi max(const avxi &a, const int32_t b) -{ - return max(a, avxi(b)); -} -__forceinline const avxi max(const int32_t a, const avxi &b) -{ - return max(avxi(a), b); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Assignment Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline avxi &operator+=(avxi &a, const avxi &b) -{ - return a = a + b; -} -__forceinline avxi &operator+=(avxi &a, const int32_t b) -{ - return a = a + b; -} - -__forceinline avxi &operator-=(avxi &a, const avxi &b) -{ - return a = a - b; -} -__forceinline avxi &operator-=(avxi &a, const int32_t b) -{ - return a = a - b; -} - -__forceinline avxi &operator*=(avxi &a, const avxi &b) -{ - return a = a * b; -} -__forceinline avxi &operator*=(avxi &a, const int32_t b) -{ - return a = a * b; -} - -__forceinline avxi &operator&=(avxi &a, const avxi &b) -{ - return a = a & b; -} -__forceinline avxi &operator&=(avxi &a, const int32_t b) -{ - return a = a & b; -} - -__forceinline avxi &operator|=(avxi &a, const avxi &b) -{ - return a = a | b; -} -__forceinline avxi &operator|=(avxi &a, const int32_t b) -{ - return a = a | b; -} - -__forceinline avxi &operator^=(avxi &a, const avxi &b) -{ - return a = a ^ b; -} -__forceinline avxi &operator^=(avxi &a, const int32_t b) -{ - return a = a ^ b; -} - -__forceinline avxi &operator<<=(avxi &a, const int32_t b) -{ - return a = a << b; -} -__forceinline avxi &operator>>=(avxi &a, const int32_t b) -{ - return a = a >> b; -} - -//////////////////////////////////////////////////////////////////////////////// -/// Comparison Operators + Select -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__KERNEL_AVX2__) -__forceinline const avxb operator==(const avxi &a, const avxi &b) -{ - return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a.m256, b.m256)); -} -#else -__forceinline const avxb operator==(const avxi &a, const avxi &b) -{ - return avxb(_mm_castsi128_ps(_mm_cmpeq_epi32(a.l, b.l)), - _mm_castsi128_ps(_mm_cmpeq_epi32(a.h, b.h))); -} -#endif -__forceinline const avxb operator==(const avxi &a, const int32_t b) -{ - return a == avxi(b); -} -__forceinline const avxb operator==(const int32_t a, const avxi &b) -{ - return avxi(a) == b; -} - -__forceinline const avxb operator!=(const avxi &a, const avxi &b) -{ - return !(a == b); -} -__forceinline const avxb operator!=(const avxi &a, const int32_t b) -{ - return a != avxi(b); -} -__forceinline const avxb operator!=(const int32_t a, const avxi &b) -{ - return avxi(a) != b; -} - -#if defined(__KERNEL_AVX2__) -__forceinline const avxb operator<(const avxi &a, const avxi &b) -{ - return _mm256_castsi256_ps(_mm256_cmpgt_epi32(b.m256, a.m256)); -} -#else -__forceinline const avxb operator<(const avxi &a, const avxi &b) -{ - return avxb(_mm_castsi128_ps(_mm_cmplt_epi32(a.l, b.l)), - _mm_castsi128_ps(_mm_cmplt_epi32(a.h, b.h))); -} -#endif -__forceinline const avxb operator<(const avxi &a, const int32_t b) -{ - return a < avxi(b); -} -__forceinline const avxb operator<(const int32_t a, const avxi &b) -{ - return avxi(a) < b; -} - -__forceinline const avxb operator>=(const avxi &a, const avxi &b) -{ - return !(a < b); -} -__forceinline const avxb operator>=(const avxi &a, const int32_t b) -{ - return a >= avxi(b); -} -__forceinline const avxb operator>=(const int32_t a, const avxi &b) -{ - return avxi(a) >= b; -} - -#if defined(__KERNEL_AVX2__) -__forceinline const avxb operator>(const avxi &a, const avxi &b) -{ - return _mm256_castsi256_ps(_mm256_cmpgt_epi32(a.m256, b.m256)); -} -#else -__forceinline const avxb operator>(const avxi &a, const avxi &b) -{ - return avxb(_mm_castsi128_ps(_mm_cmpgt_epi32(a.l, b.l)), - _mm_castsi128_ps(_mm_cmpgt_epi32(a.h, b.h))); -} -#endif -__forceinline const avxb operator>(const avxi &a, const int32_t b) -{ - return a > avxi(b); -} -__forceinline const avxb operator>(const int32_t a, const avxi &b) -{ - return avxi(a) > b; -} - -__forceinline const avxb operator<=(const avxi &a, const avxi &b) -{ - return !(a > b); -} -__forceinline const avxb operator<=(const avxi &a, const int32_t b) -{ - return a <= avxi(b); -} -__forceinline const avxb operator<=(const int32_t a, const avxi &b) -{ - return avxi(a) <= b; -} - -__forceinline const avxi select(const avxb &m, const avxi &t, const avxi &f) -{ - return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(f), _mm256_castsi256_ps(t), m)); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Movement/Shifting/Shuffling Functions -//////////////////////////////////////////////////////////////////////////////// - -#if defined(__KERNEL_AVX2__) -__forceinline avxi unpacklo(const avxi &a, const avxi &b) -{ - return _mm256_unpacklo_epi32(a.m256, b.m256); -} -__forceinline avxi unpackhi(const avxi &a, const avxi &b) -{ - return _mm256_unpackhi_epi32(a.m256, b.m256); -} -#else -__forceinline avxi unpacklo(const avxi &a, const avxi &b) -{ - return _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); -} -__forceinline avxi unpackhi(const avxi &a, const avxi &b) -{ - return _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); -} -#endif - -template __forceinline const avxi shuffle(const avxi &a) -{ - return _mm256_castps_si256(_mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i, i, i, i))); -} - -template __forceinline const avxi shuffle(const avxi &a) -{ - return _mm256_permute2f128_si256(a, a, (i1 << 4) | (i0 << 0)); -} - -template __forceinline const avxi shuffle(const avxi &a, const avxi &b) -{ - return _mm256_permute2f128_si256(a, b, (i1 << 4) | (i0 << 0)); -} - -template -__forceinline const avxi shuffle(const avxi &a) -{ - return _mm256_castps_si256( - _mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i3, i2, i1, i0))); -} - -template -__forceinline const avxi shuffle(const avxi &a, const avxi &b) -{ - return _mm256_castps_si256(_mm256_shuffle_ps( - _mm256_castsi256_ps(a), _mm256_castsi256_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); -} - -template<> __forceinline const avxi shuffle<0, 0, 2, 2>(const avxi &b) -{ - return _mm256_castps_si256(_mm256_moveldup_ps(_mm256_castsi256_ps(b))); -} -template<> __forceinline const avxi shuffle<1, 1, 3, 3>(const avxi &b) -{ - return _mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(b))); -} -template<> __forceinline const avxi shuffle<0, 1, 0, 1>(const avxi &b) -{ - return _mm256_castps_si256( - _mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(_mm256_castsi256_ps(b))))); -} - -__forceinline const avxi broadcast(const int *ptr) -{ - return _mm256_castps_si256(_mm256_broadcast_ss((const float *)ptr)); -} -template __forceinline const avxi insert(const avxi &a, const ssei &b) -{ - return _mm256_insertf128_si256(a, b, i); -} -template __forceinline const ssei extract(const avxi &a) -{ - return _mm256_extractf128_si256(a, i); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Reductions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const avxi vreduce_min2(const avxi &v) -{ - return min(v, shuffle<1, 0, 3, 2>(v)); -} -__forceinline const avxi vreduce_min4(const avxi &v) -{ - avxi v1 = vreduce_min2(v); - return min(v1, shuffle<2, 3, 0, 1>(v1)); -} -__forceinline const avxi vreduce_min(const avxi &v) -{ - avxi v1 = vreduce_min4(v); - return min(v1, shuffle<1, 0>(v1)); -} - -__forceinline const avxi vreduce_max2(const avxi &v) -{ - return max(v, shuffle<1, 0, 3, 2>(v)); -} -__forceinline const avxi vreduce_max4(const avxi &v) -{ - avxi v1 = vreduce_max2(v); - return max(v1, shuffle<2, 3, 0, 1>(v1)); -} -__forceinline const avxi vreduce_max(const avxi &v) -{ - avxi v1 = vreduce_max4(v); - return max(v1, shuffle<1, 0>(v1)); -} - -__forceinline const avxi vreduce_add2(const avxi &v) -{ - return v + shuffle<1, 0, 3, 2>(v); -} -__forceinline const avxi vreduce_add4(const avxi &v) -{ - avxi v1 = vreduce_add2(v); - return v1 + shuffle<2, 3, 0, 1>(v1); -} -__forceinline const avxi vreduce_add(const avxi &v) -{ - avxi v1 = vreduce_add4(v); - return v1 + shuffle<1, 0>(v1); -} - -__forceinline int reduce_min(const avxi &v) -{ - return extract<0>(extract<0>(vreduce_min(v))); -} -__forceinline int reduce_max(const avxi &v) -{ - return extract<0>(extract<0>(vreduce_max(v))); -} -__forceinline int reduce_add(const avxi &v) -{ - return extract<0>(extract<0>(vreduce_add(v))); -} - -__forceinline uint32_t select_min(const avxi &v) -{ - return __bsf(movemask(v == vreduce_min(v))); -} -__forceinline uint32_t select_max(const avxi &v) -{ - return __bsf(movemask(v == vreduce_max(v))); -} - -__forceinline uint32_t select_min(const avxb &valid, const avxi &v) -{ - const avxi a = select(valid, v, avxi(pos_inf)); - return __bsf(movemask(valid & (a == vreduce_min(a)))); -} -__forceinline uint32_t select_max(const avxb &valid, const avxi &v) -{ - const avxi a = select(valid, v, avxi(neg_inf)); - return __bsf(movemask(valid & (a == vreduce_max(a)))); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Output Operators -//////////////////////////////////////////////////////////////////////////////// - -ccl_device_inline void print_avxi(const char *label, const avxi &a) -{ - printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]); -} - -CCL_NAMESPACE_END - -#endif diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h deleted file mode 100644 index 7fab7bd5a15..00000000000 --- a/intern/cycles/util/util_boundbox.h +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_BOUNDBOX_H__ -#define __UTIL_BOUNDBOX_H__ - -#include -#include - -#include "util/util_math.h" -#include "util/util_string.h" -#include "util/util_transform.h" -#include "util/util_types.h" - -CCL_NAMESPACE_BEGIN - -/* 3D BoundBox */ - -class BoundBox { - public: - float3 min, max; - - __forceinline BoundBox() - { - } - - __forceinline BoundBox(const float3 &pt) : min(pt), max(pt) - { - } - - __forceinline BoundBox(const float3 &min_, const float3 &max_) : min(min_), max(max_) - { - } - - enum empty_t { empty = 0 }; - - __forceinline BoundBox(empty_t) - : min(make_float3(FLT_MAX, FLT_MAX, FLT_MAX)), max(make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX)) - { - } - - __forceinline void grow(const float3 &pt) - { - /* the order of arguments to min is such that if pt is nan, it will not - * influence the resulting bounding box */ - min = ccl::min(pt, min); - max = ccl::max(pt, max); - } - - __forceinline void grow(const float3 &pt, float border) - { - float3 shift = make_float3(border, border, border); - min = ccl::min(pt - shift, min); - max = ccl::max(pt + shift, max); - } - - __forceinline void grow(const BoundBox &bbox) - { - grow(bbox.min); - grow(bbox.max); - } - - __forceinline void grow_safe(const float3 &pt) - { - /* the order of arguments to min is such that if pt is nan, it will not - * influence the resulting bounding box */ - if (isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z)) { - min = ccl::min(pt, min); - max = ccl::max(pt, max); - } - } - - __forceinline void grow_safe(const float3 &pt, float border) - { - if (isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z) && isfinite(border)) { - float3 shift = make_float3(border, border, border); - min = ccl::min(pt - shift, min); - max = ccl::max(pt + shift, max); - } - } - - __forceinline void grow_safe(const BoundBox &bbox) - { - grow_safe(bbox.min); - grow_safe(bbox.max); - } - - __forceinline void intersect(const BoundBox &bbox) - { - min = ccl::max(min, bbox.min); - max = ccl::min(max, bbox.max); - } - - /* todo: avoid using this */ - __forceinline float safe_area() const - { - if (!((min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z))) - return 0.0f; - - return area(); - } - - __forceinline float area() const - { - return half_area() * 2.0f; - } - - __forceinline float half_area() const - { - float3 d = max - min; - return (d.x * d.z + d.y * d.z + d.x * d.y); - } - - __forceinline float3 center() const - { - return 0.5f * (min + max); - } - - __forceinline float3 center2() const - { - return min + max; - } - - __forceinline float3 size() const - { - return max - min; - } - - __forceinline bool valid() const - { - return (min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z) && - (isfinite(min.x) && isfinite(min.y) && isfinite(min.z)) && - (isfinite(max.x) && isfinite(max.y) && isfinite(max.z)); - } - - BoundBox transformed(const Transform *tfm) const - { - BoundBox result = BoundBox::empty; - - for (int i = 0; i < 8; i++) { - float3 p; - - p.x = (i & 1) ? min.x : max.x; - p.y = (i & 2) ? min.y : max.y; - p.z = (i & 4) ? min.z : max.z; - - result.grow(transform_point(tfm, p)); - } - - return result; - } - - __forceinline bool intersects(const BoundBox &other) - { - float3 center_diff = center() - other.center(), total_size = (size() + other.size()) * 0.5f; - return fabsf(center_diff.x) <= total_size.x && fabsf(center_diff.y) <= total_size.y && - fabsf(center_diff.z) <= total_size.z; - } -}; - -__forceinline BoundBox merge(const BoundBox &bbox, const float3 &pt) -{ - return BoundBox(min(bbox.min, pt), max(bbox.max, pt)); -} - -__forceinline BoundBox merge(const BoundBox &a, const BoundBox &b) -{ - return BoundBox(min(a.min, b.min), max(a.max, b.max)); -} - -__forceinline BoundBox merge(const BoundBox &a, - const BoundBox &b, - const BoundBox &c, - const BoundBox &d) -{ - return merge(merge(a, b), merge(c, d)); -} - -__forceinline BoundBox intersect(const BoundBox &a, const BoundBox &b) -{ - return BoundBox(max(a.min, b.min), min(a.max, b.max)); -} - -__forceinline BoundBox intersect(const BoundBox &a, const BoundBox &b, const BoundBox &c) -{ - return intersect(a, intersect(b, c)); -} - -/* 2D BoundBox */ - -class BoundBox2D { - public: - float left; - float right; - float bottom; - float top; - - BoundBox2D() : left(0.0f), right(1.0f), bottom(0.0f), top(1.0f) - { - } - - bool operator==(const BoundBox2D &other) const - { - return (left == other.left && right == other.right && bottom == other.bottom && - top == other.top); - } - - float width() - { - return right - left; - } - - float height() - { - return top - bottom; - } - - BoundBox2D operator*(float f) const - { - BoundBox2D result; - - result.left = left * f; - result.right = right * f; - result.bottom = bottom * f; - result.top = top * f; - - return result; - } - - BoundBox2D subset(const BoundBox2D &other) const - { - BoundBox2D subset; - - subset.left = left + other.left * (right - left); - subset.right = left + other.right * (right - left); - subset.bottom = bottom + other.bottom * (top - bottom); - subset.top = bottom + other.top * (top - bottom); - - return subset; - } - - BoundBox2D make_relative_to(const BoundBox2D &other) const - { - BoundBox2D result; - - result.left = ((left - other.left) / (other.right - other.left)); - result.right = ((right - other.left) / (other.right - other.left)); - result.bottom = ((bottom - other.bottom) / (other.top - other.bottom)); - result.top = ((top - other.bottom) / (other.top - other.bottom)); - - return result; - } - - BoundBox2D clamp(float mn = 0.0f, float mx = 1.0f) - { - BoundBox2D result; - - result.left = ccl::clamp(left, mn, mx); - result.right = ccl::clamp(right, mn, mx); - result.bottom = ccl::clamp(bottom, mn, mx); - result.top = ccl::clamp(top, mn, mx); - - return result; - } -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_BOUNDBOX_H__ */ diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h deleted file mode 100644 index 361c36d9061..00000000000 --- a/intern/cycles/util/util_color.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_COLOR_H__ -#define __UTIL_COLOR_H__ - -#include "util/util_math.h" -#include "util/util_types.h" - -#if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__) -# include "util/util_simd.h" -#endif - -CCL_NAMESPACE_BEGIN - -ccl_device uchar float_to_byte(float val) -{ - return ((val <= 0.0f) ? 0 : - ((val > (1.0f - 0.5f / 255.0f)) ? 255 : (uchar)((255.0f * val) + 0.5f))); -} - -ccl_device uchar4 color_float_to_byte(float3 c) -{ - uchar r, g, b; - - r = float_to_byte(c.x); - g = float_to_byte(c.y); - b = float_to_byte(c.z); - - return make_uchar4(r, g, b, 0); -} - -ccl_device uchar4 color_float4_to_uchar4(float4 c) -{ - uchar r, g, b, a; - - r = float_to_byte(c.x); - g = float_to_byte(c.y); - b = float_to_byte(c.z); - a = float_to_byte(c.w); - - return make_uchar4(r, g, b, a); -} - -ccl_device_inline float3 color_byte_to_float(uchar4 c) -{ - return make_float3(c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f)); -} - -ccl_device_inline float4 color_uchar4_to_float4(uchar4 c) -{ - return make_float4( - c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f), c.w * (1.0f / 255.0f)); -} - -ccl_device float color_srgb_to_linear(float c) -{ - if (c < 0.04045f) - return (c < 0.0f) ? 0.0f : c * (1.0f / 12.92f); - else - return powf((c + 0.055f) * (1.0f / 1.055f), 2.4f); -} - -ccl_device float color_linear_to_srgb(float c) -{ - if (c < 0.0031308f) - return (c < 0.0f) ? 0.0f : c * 12.92f; - else - return 1.055f * powf(c, 1.0f / 2.4f) - 0.055f; -} - -ccl_device float3 rgb_to_hsv(float3 rgb) -{ - float cmax, cmin, h, s, v, cdelta; - float3 c; - - cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z)); - cmin = min(rgb.x, min(rgb.y, rgb.z)); - cdelta = cmax - cmin; - - v = cmax; - - if (cmax != 0.0f) { - s = cdelta / cmax; - } - else { - s = 0.0f; - h = 0.0f; - } - - if (s != 0.0f) { - float3 cmax3 = make_float3(cmax, cmax, cmax); - c = (cmax3 - rgb) / cdelta; - - if (rgb.x == cmax) - h = c.z - c.y; - else if (rgb.y == cmax) - h = 2.0f + c.x - c.z; - else - h = 4.0f + c.y - c.x; - - h /= 6.0f; - - if (h < 0.0f) - h += 1.0f; - } - else { - h = 0.0f; - } - - return make_float3(h, s, v); -} - -ccl_device float3 hsv_to_rgb(float3 hsv) -{ - float i, f, p, q, t, h, s, v; - float3 rgb; - - h = hsv.x; - s = hsv.y; - v = hsv.z; - - if (s != 0.0f) { - if (h == 1.0f) - h = 0.0f; - - h *= 6.0f; - i = floorf(h); - f = h - i; - rgb = make_float3(f, f, f); - p = v * (1.0f - s); - q = v * (1.0f - (s * f)); - t = v * (1.0f - (s * (1.0f - f))); - - if (i == 0.0f) - rgb = make_float3(v, t, p); - else if (i == 1.0f) - rgb = make_float3(q, v, p); - else if (i == 2.0f) - rgb = make_float3(p, v, t); - else if (i == 3.0f) - rgb = make_float3(p, q, v); - else if (i == 4.0f) - rgb = make_float3(t, p, v); - else - rgb = make_float3(v, p, q); - } - else { - rgb = make_float3(v, v, v); - } - - return rgb; -} - -ccl_device float3 xyY_to_xyz(float x, float y, float Y) -{ - float X, Z; - - if (y != 0.0f) - X = (x / y) * Y; - else - X = 0.0f; - - if (y != 0.0f && Y != 0.0f) - Z = (1.0f - x - y) / y * Y; - else - Z = 0.0f; - - return make_float3(X, Y, Z); -} - -#ifdef __KERNEL_SSE2__ -/* - * Calculate initial guess for arg^exp based on float representation - * This method gives a constant bias, - * which can be easily compensated by multiplication with bias_coeff. - * Gives better results for exponents near 1 (e. g. 4/5). - * exp = exponent, encoded as uint32_t - * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t - */ -template ccl_device_inline ssef fastpow(const ssef &arg) -{ - ssef ret; - ret = arg * cast(ssei(e2coeff)); - ret = ssef(cast(ret)); - ret = ret * cast(ssei(exp)); - ret = cast(ssei(ret)); - return ret; -} - -/* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */ -ccl_device_inline ssef improve_5throot_solution(const ssef &old_result, const ssef &x) -{ - ssef approx2 = old_result * old_result; - ssef approx4 = approx2 * approx2; - ssef t = x / approx4; - ssef summ = madd(ssef(4.0f), old_result, t); - return summ * ssef(1.0f / 5.0f); -} - -/* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */ -ccl_device_inline ssef fastpow24(const ssef &arg) -{ - /* max, avg and |avg| errors were calculated in gcc without FMA instructions - * The final precision should be better than powf in glibc */ - - /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */ - /* 0x3F4CCCCD = 4/5 */ - /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */ - ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05 - ssef arg2 = arg * arg; - ssef arg4 = arg2 * arg2; - - /* error max = 0.018 avg = 0.0031 |avg| = 0.0031 */ - x = improve_5throot_solution(x, arg4); - /* error max = 0.00021 avg = 1.6e-05 |avg| = 1.6e-05 */ - x = improve_5throot_solution(x, arg4); - /* error max = 6.1e-07 avg = 5.2e-08 |avg| = 1.1e-07 */ - x = improve_5throot_solution(x, arg4); - - return x * (x * x); -} - -ccl_device ssef color_srgb_to_linear(const ssef &c) -{ - sseb cmp = c < ssef(0.04045f); - ssef lt = max(c * ssef(1.0f / 12.92f), ssef(0.0f)); - ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f / 1.055f); /* fma */ - ssef gte = fastpow24(gtebase); - return select(cmp, lt, gte); -} -#endif /* __KERNEL_SSE2__ */ - -ccl_device float3 color_srgb_to_linear_v3(float3 c) -{ - return make_float3( - color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z)); -} - -ccl_device float3 color_linear_to_srgb_v3(float3 c) -{ - return make_float3( - color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z)); -} - -ccl_device float4 color_linear_to_srgb_v4(float4 c) -{ - return make_float4( - color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z), c.w); -} - -ccl_device float4 color_srgb_to_linear_v4(float4 c) -{ -#ifdef __KERNEL_SSE2__ - ssef r_ssef; - float4 &r = (float4 &)r_ssef; - r = c; - r_ssef = color_srgb_to_linear(r_ssef); - r.w = c.w; - return r; -#else - return make_float4( - color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z), c.w); -#endif -} - -ccl_device float3 color_highlight_compress(float3 color, ccl_private float3 *variance) -{ - color += one_float3(); - if (variance) { - *variance *= sqr3(one_float3() / color); - } - return log3(color); -} - -ccl_device float3 color_highlight_uncompress(float3 color) -{ - return exp3(color) - one_float3(); -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_COLOR_H__ */ diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp deleted file mode 100644 index 2245668d02f..00000000000 --- a/intern/cycles/util/util_debug.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_debug.h" - -#include - -#include "bvh/bvh_params.h" - -#include "util/util_logging.h" -#include "util/util_string.h" - -CCL_NAMESPACE_BEGIN - -DebugFlags::CPU::CPU() - : avx2(true), avx(true), sse41(true), sse3(true), sse2(true), bvh_layout(BVH_LAYOUT_AUTO) -{ - reset(); -} - -void DebugFlags::CPU::reset() -{ -#define STRINGIFY(x) #x -#define CHECK_CPU_FLAGS(flag, env) \ - do { \ - flag = (getenv(env) == NULL); \ - if (!flag) { \ - VLOG(1) << "Disabling " << STRINGIFY(flag) << " instruction set."; \ - } \ - } while (0) - - CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2"); - CHECK_CPU_FLAGS(avx, "CYCLES_CPU_NO_AVX"); - CHECK_CPU_FLAGS(sse41, "CYCLES_CPU_NO_SSE41"); - CHECK_CPU_FLAGS(sse3, "CYCLES_CPU_NO_SSE3"); - CHECK_CPU_FLAGS(sse2, "CYCLES_CPU_NO_SSE2"); - -#undef STRINGIFY -#undef CHECK_CPU_FLAGS - - bvh_layout = BVH_LAYOUT_AUTO; -} - -DebugFlags::CUDA::CUDA() : adaptive_compile(false) -{ - reset(); -} - -DebugFlags::HIP::HIP() : adaptive_compile(false) -{ - reset(); -} - -void DebugFlags::CUDA::reset() -{ - if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL) - adaptive_compile = true; -} - -void DebugFlags::HIP::reset() -{ - if (getenv("CYCLES_HIP_ADAPTIVE_COMPILE") != NULL) - adaptive_compile = true; -} - -DebugFlags::OptiX::OptiX() -{ - reset(); -} - -void DebugFlags::OptiX::reset() -{ - use_debug = false; -} - -DebugFlags::DebugFlags() : viewport_static_bvh(false), running_inside_blender(false) -{ - /* Nothing for now. */ -} - -void DebugFlags::reset() -{ - viewport_static_bvh = false; - cpu.reset(); - cuda.reset(); - optix.reset(); -} - -std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags) -{ - os << "CPU flags:\n" - << " AVX2 : " << string_from_bool(debug_flags.cpu.avx2) << "\n" - << " AVX : " << string_from_bool(debug_flags.cpu.avx) << "\n" - << " SSE4.1 : " << string_from_bool(debug_flags.cpu.sse41) << "\n" - << " SSE3 : " << string_from_bool(debug_flags.cpu.sse3) << "\n" - << " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n" - << " BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n"; - - os << "CUDA flags:\n" - << " Adaptive Compile : " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n"; - - os << "OptiX flags:\n" - << " Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n"; - - os << "HIP flags:\n" - << " HIP streams : " << string_from_bool(debug_flags.hip.adaptive_compile) << "\n"; - - return os; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h deleted file mode 100644 index 81677201790..00000000000 --- a/intern/cycles/util/util_debug.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_DEBUG_H__ -#define __UTIL_DEBUG_H__ - -#include -#include - -#include "bvh/bvh_params.h" - -CCL_NAMESPACE_BEGIN - -/* Global storage for all sort of flags used to fine-tune behavior of particular - * areas for the development purposes, without officially exposing settings to - * the interface. - */ -class DebugFlags { - public: - /* Use static BVH in viewport, to match final render exactly. */ - bool viewport_static_bvh; - - bool running_inside_blender; - - /* Descriptor of CPU feature-set to be used. */ - struct CPU { - CPU(); - - /* Reset flags to their defaults. */ - void reset(); - - /* Flags describing which instructions sets are allowed for use. */ - bool avx2; - bool avx; - bool sse41; - bool sse3; - bool sse2; - - /* Check functions to see whether instructions up to the given one - * are allowed for use. - */ - bool has_avx2() - { - return has_avx() && avx2; - } - bool has_avx() - { - return has_sse41() && avx; - } - bool has_sse41() - { - return has_sse3() && sse41; - } - bool has_sse3() - { - return has_sse2() && sse3; - } - bool has_sse2() - { - return sse2; - } - - /* Requested BVH layout. - * - * By default the fastest will be used. For debugging the BVH used by other - * CPUs and GPUs can be selected here instead. - */ - BVHLayout bvh_layout; - }; - - /* Descriptor of CUDA feature-set to be used. */ - struct CUDA { - CUDA(); - - /* Reset flags to their defaults. */ - void reset(); - - /* Whether adaptive feature based runtime compile is enabled or not. - * Requires the CUDA Toolkit and only works on Linux at the moment. */ - bool adaptive_compile; - }; - - /* Descriptor of HIP feature-set to be used. */ - struct HIP { - HIP(); - - /* Reset flags to their defaults. */ - void reset(); - - /* Whether adaptive feature based runtime compile is enabled or not.*/ - bool adaptive_compile; - }; - - /* Descriptor of OptiX feature-set to be used. */ - struct OptiX { - OptiX(); - - /* Reset flags to their defaults. */ - void reset(); - - /* Load OptiX module with debug capabilities. Will lower logging verbosity level, enable - * validations, and lower optimization level. */ - bool use_debug; - }; - - /* Get instance of debug flags registry. */ - static DebugFlags &get() - { - static DebugFlags instance; - return instance; - } - - /* Reset flags to their defaults. */ - void reset(); - - /* Requested CPU flags. */ - CPU cpu; - - /* Requested CUDA flags. */ - CUDA cuda; - - /* Requested OptiX flags. */ - OptiX optix; - - /* Requested HIP flags. */ - HIP hip; - - private: - DebugFlags(); - -#if (__cplusplus > 199711L) - public: - explicit DebugFlags(DebugFlags const & /*other*/) = delete; - void operator=(DebugFlags const & /*other*/) = delete; -#else - private: - explicit DebugFlags(DebugFlags const & /*other*/); - void operator=(DebugFlags const & /*other*/); -#endif -}; - -typedef DebugFlags &DebugFlagsRef; -typedef const DebugFlags &DebugFlagsConstRef; - -inline DebugFlags &DebugFlags() -{ - return DebugFlags::get(); -} - -std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags); - -CCL_NAMESPACE_END - -#endif /* __UTIL_DEBUG_H__ */ diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h deleted file mode 100644 index 9b1698d461a..00000000000 --- a/intern/cycles/util/util_defines.h +++ /dev/null @@ -1,146 +0,0 @@ - -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* clang-format off */ - -/* #define __forceinline triggers a bug in some clang-format versions, disable - * format for entire file to keep results consistent. */ - -#ifndef __UTIL_DEFINES_H__ -#define __UTIL_DEFINES_H__ - -/* Bitness */ - -#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \ - defined(_M_X64) || defined(__aarch64__) -# define __KERNEL_64_BIT__ -#endif - -/* Qualifiers for kernel code shared by CPU and GPU */ - -#ifndef __KERNEL_GPU__ -# define ccl_device static inline -# define ccl_device_noinline static -# define ccl_device_noinline_cpu ccl_device_noinline -# define ccl_global -# define ccl_static_constant static const -# define ccl_constant const -# define ccl_local -# define ccl_local_param -# define ccl_private -# define ccl_restrict __restrict -# define ccl_optional_struct_init -# define ccl_loop_no_unroll -# define ccl_attr_maybe_unused [[maybe_unused]] -# define __KERNEL_WITH_SSE_ALIGN__ - -# if defined(_WIN32) && !defined(FREE_WINDOWS) -# define ccl_device_inline static __forceinline -# define ccl_device_forceinline static __forceinline -# define ccl_align(...) __declspec(align(__VA_ARGS__)) -# ifdef __KERNEL_64_BIT__ -# define ccl_try_align(...) __declspec(align(__VA_ARGS__)) -# else /* __KERNEL_64_BIT__ */ -# undef __KERNEL_WITH_SSE_ALIGN__ -/* No support for function arguments (error C2719). */ -# define ccl_try_align(...) -# endif /* __KERNEL_64_BIT__ */ -# define ccl_may_alias -# define ccl_always_inline __forceinline -# define ccl_never_inline __declspec(noinline) -# else /* _WIN32 && !FREE_WINDOWS */ -# define ccl_device_inline static inline __attribute__((always_inline)) -# define ccl_device_forceinline static inline __attribute__((always_inline)) -# define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) -# ifndef FREE_WINDOWS64 -# define __forceinline inline __attribute__((always_inline)) -# endif -# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) -# define ccl_may_alias __attribute__((__may_alias__)) -# define ccl_always_inline __attribute__((always_inline)) -# define ccl_never_inline __attribute__((noinline)) -# endif /* _WIN32 && !FREE_WINDOWS */ - -/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */ -# ifndef ATTR_FALLTHROUGH -# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */ -# define ATTR_FALLTHROUGH __attribute__((fallthrough)) -# else -# define ATTR_FALLTHROUGH ((void)0) -# endif -# endif -#endif /* __KERNEL_GPU__ */ - -/* macros */ - -/* hints for branch prediction, only use in code that runs a _lot_ */ -#if defined(__GNUC__) && defined(__KERNEL_CPU__) -# define LIKELY(x) __builtin_expect(!!(x), 1) -# define UNLIKELY(x) __builtin_expect(!!(x), 0) -#else -# define LIKELY(x) (x) -# define UNLIKELY(x) (x) -#endif - -#if defined(__GNUC__) || defined(__clang__) -# if defined(__cplusplus) -/* Some magic to be sure we don't have reference in the type. */ -template static inline T decltype_helper(T x) -{ - return x; -} -# define TYPEOF(x) decltype(decltype_helper(x)) -# else -# define TYPEOF(x) typeof(x) -# endif -#endif - -/* Causes warning: - * incompatible types when assigning to type 'Foo' from type 'Bar' - * ... the compiler optimizes away the temp var */ -#ifdef __GNUC__ -# define CHECK_TYPE(var, type) \ - { \ - TYPEOF(var) * __tmp; \ - __tmp = (type *)NULL; \ - (void)__tmp; \ - } \ - (void)0 - -# define CHECK_TYPE_PAIR(var_a, var_b) \ - { \ - TYPEOF(var_a) * __tmp; \ - __tmp = (typeof(var_b) *)NULL; \ - (void)__tmp; \ - } \ - (void)0 -#else -# define CHECK_TYPE(var, type) -# define CHECK_TYPE_PAIR(var_a, var_b) -#endif - -/* can be used in simple macros */ -#define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val))) - -#ifndef __KERNEL_GPU__ -# include -# define util_assert(statement) assert(statement) -#else -# define util_assert(statement) -#endif - -#endif /* __UTIL_DEFINES_H__ */ diff --git a/intern/cycles/util/util_deque.h b/intern/cycles/util/util_deque.h deleted file mode 100644 index ccac961aa7d..00000000000 --- a/intern/cycles/util/util_deque.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright 2011-2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_DEQUE_H__ -#define __UTIL_DEQUE_H__ - -#include - -CCL_NAMESPACE_BEGIN - -using std::deque; - -CCL_NAMESPACE_END - -#endif /* __UTIL_DEQUE_H__ */ diff --git a/intern/cycles/util/util_disjoint_set.h b/intern/cycles/util/util_disjoint_set.h deleted file mode 100644 index 946632371d2..00000000000 --- a/intern/cycles/util/util_disjoint_set.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_DISJOINT_SET_H__ -#define __UTIL_DISJOINT_SET_H__ - -#include "util_array.h" -#include - -CCL_NAMESPACE_BEGIN - -class DisjointSet { - private: - array parents; - array ranks; - - public: - DisjointSet(size_t size) : parents(size), ranks(size) - { - for (size_t i = 0; i < size; i++) { - parents[i] = i; - ranks[i] = 0; - } - } - - size_t find(size_t x) - { - size_t root = x; - while (parents[root] != root) { - root = parents[root]; - } - while (parents[x] != root) { - size_t parent = parents[x]; - parents[x] = root; - x = parent; - } - return root; - } - - void join(size_t x, size_t y) - { - size_t x_root = find(x); - size_t y_root = find(y); - - if (x_root == y_root) { - return; - } - - if (ranks[x_root] < ranks[y_root]) { - std::swap(x_root, y_root); - } - parents[y_root] = x_root; - - if (ranks[x_root] == ranks[y_root]) { - ranks[x_root]++; - } - } -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_DISJOINT_SET_H__ */ diff --git a/intern/cycles/util/util_foreach.h b/intern/cycles/util/util_foreach.h deleted file mode 100644 index d907974be91..00000000000 --- a/intern/cycles/util/util_foreach.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_FOREACH_H__ -#define __UTIL_FOREACH_H__ - -/* Nice foreach() loops for STL data structures. */ - -#define foreach(x, y) for (x : y) - -#endif /* __UTIL_FOREACH_H__ */ diff --git a/intern/cycles/util/util_function.h b/intern/cycles/util/util_function.h deleted file mode 100644 index f3cc00329ad..00000000000 --- a/intern/cycles/util/util_function.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_FUNCTION_H__ -#define __UTIL_FUNCTION_H__ - -#include - -CCL_NAMESPACE_BEGIN - -#define function_bind std::bind -#define function_null nullptr -using std::function; -using std::placeholders::_1; -using std::placeholders::_2; -using std::placeholders::_3; -using std::placeholders::_4; -using std::placeholders::_5; -using std::placeholders::_6; -using std::placeholders::_7; -using std::placeholders::_8; -using std::placeholders::_9; - -CCL_NAMESPACE_END - -#endif /* __UTIL_FUNCTION_H__ */ diff --git a/intern/cycles/util/util_guarded_allocator.cpp b/intern/cycles/util/util_guarded_allocator.cpp deleted file mode 100644 index 1cb466a1ffa..00000000000 --- a/intern/cycles/util/util_guarded_allocator.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_guarded_allocator.h" -#include "util/util_stats.h" - -CCL_NAMESPACE_BEGIN - -static Stats global_stats(Stats::static_init); - -/* Internal API. */ - -void util_guarded_mem_alloc(size_t n) -{ - global_stats.mem_alloc(n); -} - -void util_guarded_mem_free(size_t n) -{ - global_stats.mem_free(n); -} - -/* Public API. */ - -size_t util_guarded_get_mem_used() -{ - return global_stats.mem_used; -} - -size_t util_guarded_get_mem_peak() -{ - return global_stats.mem_peak; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_guarded_allocator.h b/intern/cycles/util/util_guarded_allocator.h deleted file mode 100644 index f78cc5f5da9..00000000000 --- a/intern/cycles/util/util_guarded_allocator.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_GUARDED_ALLOCATOR_H__ -#define __UTIL_GUARDED_ALLOCATOR_H__ - -#include -#include -#include - -#ifdef WITH_BLENDER_GUARDEDALLOC -# include "../../guardedalloc/MEM_guardedalloc.h" -#endif - -CCL_NAMESPACE_BEGIN - -/* Internal use only. */ -void util_guarded_mem_alloc(size_t n); -void util_guarded_mem_free(size_t n); - -/* Guarded allocator for the use with STL. */ -template class GuardedAllocator { - public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef T *pointer; - typedef const T *const_pointer; - typedef T &reference; - typedef const T &const_reference; - typedef T value_type; - - GuardedAllocator() - { - } - GuardedAllocator(const GuardedAllocator &) - { - } - - T *allocate(size_t n, const void *hint = 0) - { - (void)hint; - size_t size = n * sizeof(T); - util_guarded_mem_alloc(size); - if (n == 0) { - return NULL; - } - T *mem; -#ifdef WITH_BLENDER_GUARDEDALLOC - /* C++ standard requires allocation functions to allocate memory suitably - * aligned for any standard type. This is 16 bytes for 64 bit platform as - * far as i concerned. We might over-align on 32bit here, but that should - * be all safe actually. - */ - mem = (T *)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); -#else - mem = (T *)malloc(size); -#endif - if (mem == NULL) { - throw std::bad_alloc(); - } - return mem; - } - - void deallocate(T *p, size_t n) - { - util_guarded_mem_free(n * sizeof(T)); - if (p != NULL) { -#ifdef WITH_BLENDER_GUARDEDALLOC - MEM_freeN(p); -#else - free(p); -#endif - } - } - - T *address(T &x) const - { - return &x; - } - - const T *address(const T &x) const - { - return &x; - } - - GuardedAllocator &operator=(const GuardedAllocator &) - { - return *this; - } - - size_t max_size() const - { - return size_t(-1); - } - - template struct rebind { - typedef GuardedAllocator other; - }; - - template GuardedAllocator(const GuardedAllocator &) - { - } - - template GuardedAllocator &operator=(const GuardedAllocator &) - { - return *this; - } - - inline bool operator==(GuardedAllocator const & /*other*/) const - { - return true; - } - inline bool operator!=(GuardedAllocator const &other) const - { - return !operator==(other); - } - -#ifdef _MSC_VER - /* Welcome to the black magic here. - * - * The issue is that MSVC C++ allocates container proxy on any - * vector initialization, including static vectors which don't - * have any data yet. This leads to several issues: - * - * - Static objects initialization fiasco (global_stats from - * util_stats.h might not be initialized yet). - * - If main() function changes allocator type (for example, - * this might happen with `blender --debug-memory`) nobody - * will know how to convert already allocated memory to a new - * guarded allocator. - * - * Here we work this around by making it so container proxy does - * not use guarded allocation. A bit fragile, unfortunately. - */ - template<> struct rebind { - typedef std::allocator other; - }; - - operator std::allocator() const - { - return std::allocator(); - } -#endif -}; - -/* Get memory usage and peak from the guarded STL allocator. */ -size_t util_guarded_get_mem_used(); -size_t util_guarded_get_mem_peak(); - -/* Call given function and keep track if it runs out of memory. - * - * If it does run out f memory, stop execution and set progress - * to do a global cancel. - * - * It's not fully robust, but good enough to catch obvious issues - * when running out of memory. - */ -#define MEM_GUARDED_CALL(progress, func, ...) \ - do { \ - try { \ - (func)(__VA_ARGS__); \ - } \ - catch (std::bad_alloc &) { \ - fprintf(stderr, "Error: run out of memory!\n"); \ - fflush(stderr); \ - (progress)->set_error("Out of memory"); \ - } \ - } while (false) - -CCL_NAMESPACE_END - -#endif /* __UTIL_GUARDED_ALLOCATOR_H__ */ diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h deleted file mode 100644 index 0db5acd319a..00000000000 --- a/intern/cycles/util/util_half.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_HALF_H__ -#define __UTIL_HALF_H__ - -#include "util/util_math.h" -#include "util/util_types.h" - -#if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__) -# include "util/util_simd.h" -#endif - -CCL_NAMESPACE_BEGIN - -/* Half Floats */ - -/* CUDA has its own half data type, no need to define then */ -#if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__) -/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from - * unsigned shorts. */ -class half { - public: - half() : v(0) - { - } - half(const unsigned short &i) : v(i) - { - } - operator unsigned short() - { - return v; - } - half &operator=(const unsigned short &i) - { - v = i; - return *this; - } - - private: - unsigned short v; -}; -#endif - -struct half4 { - half x, y, z, w; -}; - -/* Conversion to/from half float for image textures - * - * Simplified float to half for fast sampling on processor without a native - * instruction, and eliminating any NaN and inf values. */ - -ccl_device_inline half float_to_half_image(float f) -{ -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) - return __float2half(f); -#else - const uint u = __float_as_uint(f); - /* Sign bit, shifted to its position. */ - uint sign_bit = u & 0x80000000; - sign_bit >>= 16; - /* Exponent. */ - uint exponent_bits = u & 0x7f800000; - /* Non-sign bits. */ - uint value_bits = u & 0x7fffffff; - value_bits >>= 13; /* Align mantissa on MSB. */ - value_bits -= 0x1c000; /* Adjust bias. */ - /* Flush-to-zero. */ - value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits; - /* Clamp-to-max. */ - value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits; - /* Denormals-as-zero. */ - value_bits = (exponent_bits == 0 ? 0 : value_bits); - /* Re-insert sign bit and return. */ - return (value_bits | sign_bit); -#endif -} - -ccl_device_inline float half_to_float_image(half h) -{ -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) - return __half2float(h); -#else - const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13); - return __int_as_float(x); -#endif -} - -ccl_device_inline float4 half4_to_float4_image(const half4 h) -{ - /* Unable to use because it gives different results half_to_float_image, can we - * modify float_to_half_image so the conversion results are identical? */ -#if 0 /* defined(__KERNEL_AVX2__) */ - /* CPU: AVX. */ - __m128i x = _mm_castpd_si128(_mm_load_sd((const double *)&h)); - return float4(_mm_cvtph_ps(x)); -#endif - - const float4 f = make_float4(half_to_float_image(h.x), - half_to_float_image(h.y), - half_to_float_image(h.z), - half_to_float_image(h.w)); - return f; -} - -/* Conversion to half float texture for display. - * - * Simplified float to half for fast display texture conversion on processors - * without a native instruction. Assumes no negative, no NaN, no inf, and sets - * denormal to 0. */ - -ccl_device_inline half float_to_half_display(const float f) -{ -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) - return __float2half(f); -#else - const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f); - const int absolute = x & 0x7FFFFFFF; - const int Z = absolute + 0xC8000000; - const int result = (absolute < 0x38800000) ? 0 : Z; - const int rshift = (result >> 13); - return (rshift & 0x7FFF); -#endif -} - -ccl_device_inline half4 float4_to_half4_display(const float4 f) -{ -#ifdef __KERNEL_SSE2__ - /* CPU: SSE and AVX. */ - ssef x = min(max(load4f(f), 0.0f), 65504.0f); -# ifdef __KERNEL_AVX2__ - ssei rpack = _mm_cvtps_ph(x, 0); -# else - ssei absolute = cast(x) & 0x7FFFFFFF; - ssei Z = absolute + 0xC8000000; - ssei result = andnot(absolute < 0x38800000, Z); - ssei rshift = (result >> 13) & 0x7FFF; - ssei rpack = _mm_packs_epi32(rshift, rshift); -# endif - half4 h; - _mm_storel_pi((__m64 *)&h, _mm_castsi128_ps(rpack)); - return h; -#else - /* GPU and scalar fallback. */ - const half4 h = {float_to_half_display(f.x), - float_to_half_display(f.y), - float_to_half_display(f.z), - float_to_half_display(f.w)}; - return h; -#endif -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_HALF_H__ */ diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h deleted file mode 100644 index 0021eec169b..00000000000 --- a/intern/cycles/util/util_hash.h +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_HASH_H__ -#define __UTIL_HASH_H__ - -#include "util/util_types.h" - -CCL_NAMESPACE_BEGIN - -/* ***** Jenkins Lookup3 Hash Functions ***** */ - -/* Source: http://burtleburtle.net/bob/c/lookup3.c */ - -#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) - -#define mix(a, b, c) \ - { \ - a -= c; \ - a ^= rot(c, 4); \ - c += b; \ - b -= a; \ - b ^= rot(a, 6); \ - a += c; \ - c -= b; \ - c ^= rot(b, 8); \ - b += a; \ - a -= c; \ - a ^= rot(c, 16); \ - c += b; \ - b -= a; \ - b ^= rot(a, 19); \ - a += c; \ - c -= b; \ - c ^= rot(b, 4); \ - b += a; \ - } \ - ((void)0) - -#define final(a, b, c) \ - { \ - c ^= b; \ - c -= rot(b, 14); \ - a ^= c; \ - a -= rot(c, 11); \ - b ^= a; \ - b -= rot(a, 25); \ - c ^= b; \ - c -= rot(b, 16); \ - a ^= c; \ - a -= rot(c, 4); \ - b ^= a; \ - b -= rot(a, 14); \ - c ^= b; \ - c -= rot(b, 24); \ - } \ - ((void)0) - -ccl_device_inline uint hash_uint(uint kx) -{ - uint a, b, c; - a = b = c = 0xdeadbeef + (1 << 2) + 13; - - a += kx; - final(a, b, c); - - return c; -} - -ccl_device_inline uint hash_uint2(uint kx, uint ky) -{ - uint a, b, c; - a = b = c = 0xdeadbeef + (2 << 2) + 13; - - b += ky; - a += kx; - final(a, b, c); - - return c; -} - -ccl_device_inline uint hash_uint3(uint kx, uint ky, uint kz) -{ - uint a, b, c; - a = b = c = 0xdeadbeef + (3 << 2) + 13; - - c += kz; - b += ky; - a += kx; - final(a, b, c); - - return c; -} - -ccl_device_inline uint hash_uint4(uint kx, uint ky, uint kz, uint kw) -{ - uint a, b, c; - a = b = c = 0xdeadbeef + (4 << 2) + 13; - - a += kx; - b += ky; - c += kz; - mix(a, b, c); - - a += kw; - final(a, b, c); - - return c; -} - -#undef rot -#undef final -#undef mix - -/* Hashing uint or uint[234] into a float in the range [0, 1]. */ - -ccl_device_inline float hash_uint_to_float(uint kx) -{ - return (float)hash_uint(kx) / (float)0xFFFFFFFFu; -} - -ccl_device_inline float hash_uint2_to_float(uint kx, uint ky) -{ - return (float)hash_uint2(kx, ky) / (float)0xFFFFFFFFu; -} - -ccl_device_inline float hash_uint3_to_float(uint kx, uint ky, uint kz) -{ - return (float)hash_uint3(kx, ky, kz) / (float)0xFFFFFFFFu; -} - -ccl_device_inline float hash_uint4_to_float(uint kx, uint ky, uint kz, uint kw) -{ - return (float)hash_uint4(kx, ky, kz, kw) / (float)0xFFFFFFFFu; -} - -/* Hashing float or float[234] into a float in the range [0, 1]. */ - -ccl_device_inline float hash_float_to_float(float k) -{ - return hash_uint_to_float(__float_as_uint(k)); -} - -ccl_device_inline float hash_float2_to_float(float2 k) -{ - return hash_uint2_to_float(__float_as_uint(k.x), __float_as_uint(k.y)); -} - -ccl_device_inline float hash_float3_to_float(float3 k) -{ - return hash_uint3_to_float(__float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z)); -} - -ccl_device_inline float hash_float4_to_float(float4 k) -{ - return hash_uint4_to_float( - __float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z), __float_as_uint(k.w)); -} - -/* Hashing float[234] into float[234] of components in the range [0, 1]. */ - -ccl_device_inline float2 hash_float2_to_float2(float2 k) -{ - return make_float2(hash_float2_to_float(k), hash_float3_to_float(make_float3(k.x, k.y, 1.0))); -} - -ccl_device_inline float3 hash_float3_to_float3(float3 k) -{ - return make_float3(hash_float3_to_float(k), - hash_float4_to_float(make_float4(k.x, k.y, k.z, 1.0)), - hash_float4_to_float(make_float4(k.x, k.y, k.z, 2.0))); -} - -ccl_device_inline float4 hash_float4_to_float4(float4 k) -{ - return make_float4(hash_float4_to_float(k), - hash_float4_to_float(make_float4(k.w, k.x, k.y, k.z)), - hash_float4_to_float(make_float4(k.z, k.w, k.x, k.y)), - hash_float4_to_float(make_float4(k.y, k.z, k.w, k.x))); -} - -/* Hashing float or float[234] into float3 of components in range [0, 1]. */ - -ccl_device_inline float3 hash_float_to_float3(float k) -{ - return make_float3(hash_float_to_float(k), - hash_float2_to_float(make_float2(k, 1.0)), - hash_float2_to_float(make_float2(k, 2.0))); -} - -ccl_device_inline float3 hash_float2_to_float3(float2 k) -{ - return make_float3(hash_float2_to_float(k), - hash_float3_to_float(make_float3(k.x, k.y, 1.0)), - hash_float3_to_float(make_float3(k.x, k.y, 2.0))); -} - -ccl_device_inline float3 hash_float4_to_float3(float4 k) -{ - return make_float3(hash_float4_to_float(k), - hash_float4_to_float(make_float4(k.z, k.x, k.w, k.y)), - hash_float4_to_float(make_float4(k.w, k.z, k.y, k.x))); -} - -/* SSE Versions Of Jenkins Lookup3 Hash Functions */ - -#ifdef __KERNEL_SSE2__ -# define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k)))) - -# define mix(a, b, c) \ - { \ - a -= c; \ - a ^= rot(c, 4); \ - c += b; \ - b -= a; \ - b ^= rot(a, 6); \ - a += c; \ - c -= b; \ - c ^= rot(b, 8); \ - b += a; \ - a -= c; \ - a ^= rot(c, 16); \ - c += b; \ - b -= a; \ - b ^= rot(a, 19); \ - a += c; \ - c -= b; \ - c ^= rot(b, 4); \ - b += a; \ - } - -# define final(a, b, c) \ - { \ - c ^= b; \ - c -= rot(b, 14); \ - a ^= c; \ - a -= rot(c, 11); \ - b ^= a; \ - b -= rot(a, 25); \ - c ^= b; \ - c -= rot(b, 16); \ - a ^= c; \ - a -= rot(c, 4); \ - b ^= a; \ - b -= rot(a, 14); \ - c ^= b; \ - c -= rot(b, 24); \ - } - -ccl_device_inline ssei hash_ssei(ssei kx) -{ - ssei a, b, c; - a = b = c = ssei(0xdeadbeef + (1 << 2) + 13); - - a += kx; - final(a, b, c); - - return c; -} - -ccl_device_inline ssei hash_ssei2(ssei kx, ssei ky) -{ - ssei a, b, c; - a = b = c = ssei(0xdeadbeef + (2 << 2) + 13); - - b += ky; - a += kx; - final(a, b, c); - - return c; -} - -ccl_device_inline ssei hash_ssei3(ssei kx, ssei ky, ssei kz) -{ - ssei a, b, c; - a = b = c = ssei(0xdeadbeef + (3 << 2) + 13); - - c += kz; - b += ky; - a += kx; - final(a, b, c); - - return c; -} - -ccl_device_inline ssei hash_ssei4(ssei kx, ssei ky, ssei kz, ssei kw) -{ - ssei a, b, c; - a = b = c = ssei(0xdeadbeef + (4 << 2) + 13); - - a += kx; - b += ky; - c += kz; - mix(a, b, c); - - a += kw; - final(a, b, c); - - return c; -} - -# if defined(__KERNEL_AVX__) -ccl_device_inline avxi hash_avxi(avxi kx) -{ - avxi a, b, c; - a = b = c = avxi(0xdeadbeef + (1 << 2) + 13); - - a += kx; - final(a, b, c); - - return c; -} - -ccl_device_inline avxi hash_avxi2(avxi kx, avxi ky) -{ - avxi a, b, c; - a = b = c = avxi(0xdeadbeef + (2 << 2) + 13); - - b += ky; - a += kx; - final(a, b, c); - - return c; -} - -ccl_device_inline avxi hash_avxi3(avxi kx, avxi ky, avxi kz) -{ - avxi a, b, c; - a = b = c = avxi(0xdeadbeef + (3 << 2) + 13); - - c += kz; - b += ky; - a += kx; - final(a, b, c); - - return c; -} - -ccl_device_inline avxi hash_avxi4(avxi kx, avxi ky, avxi kz, avxi kw) -{ - avxi a, b, c; - a = b = c = avxi(0xdeadbeef + (4 << 2) + 13); - - a += kx; - b += ky; - c += kz; - mix(a, b, c); - - a += kw; - final(a, b, c); - - return c; -} -# endif - -# undef rot -# undef final -# undef mix - -#endif - -#ifndef __KERNEL_GPU__ -static inline uint hash_string(const char *str) -{ - uint i = 0, c; - - while ((c = *str++)) - i = i * 37 + c; - - return i; -} -#endif - -CCL_NAMESPACE_END - -#endif /* __UTIL_HASH_H__ */ diff --git a/intern/cycles/util/util_ies.cpp b/intern/cycles/util/util_ies.cpp deleted file mode 100644 index 62d3d42186d..00000000000 --- a/intern/cycles/util/util_ies.cpp +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Copyright 2011-2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "util/util_foreach.h" -#include "util/util_ies.h" -#include "util/util_math.h" -#include "util/util_string.h" - -CCL_NAMESPACE_BEGIN - -// NOTE: For some reason gcc-7.2 does not instantiate this versio of allocator -// gere (used in IESTextParser). Works fine for gcc-6, gcc-7.3 and gcc-8. -// -// TODO(sergey): Get to the root of this issue, or confirm this i a compiler -// issue. -template class GuardedAllocator; - -bool IESFile::load(const string &ies) -{ - clear(); - if (!parse(ies) || !process()) { - clear(); - return false; - } - return true; -} - -void IESFile::clear() -{ - intensity.clear(); - v_angles.clear(); - h_angles.clear(); -} - -int IESFile::packed_size() -{ - if (v_angles.size() && h_angles.size() > 0) { - return 2 + h_angles.size() + v_angles.size() + h_angles.size() * v_angles.size(); - } - return 0; -} - -void IESFile::pack(float *data) -{ - if (v_angles.size() && h_angles.size()) { - *(data++) = __int_as_float(h_angles.size()); - *(data++) = __int_as_float(v_angles.size()); - - memcpy(data, &h_angles[0], h_angles.size() * sizeof(float)); - data += h_angles.size(); - memcpy(data, &v_angles[0], v_angles.size() * sizeof(float)); - data += v_angles.size(); - - for (int h = 0; h < intensity.size(); h++) { - memcpy(data, &intensity[h][0], v_angles.size() * sizeof(float)); - data += v_angles.size(); - } - } -} - -class IESTextParser { - public: - vector text; - char *data; - - IESTextParser(const string &str) : text(str.begin(), str.end()) - { - std::replace(text.begin(), text.end(), ',', ' '); - data = strstr(&text[0], "\nTILT="); - } - - bool eof() - { - return (data == NULL) || (data[0] == '\0'); - } - - double get_double() - { - if (eof()) { - return 0.0; - } - char *old_data = data; - double val = strtod(data, &data); - if (data == old_data) { - data = NULL; - return 0.0; - } - return val; - } - - long get_long() - { - if (eof()) { - return 0; - } - char *old_data = data; - long val = strtol(data, &data, 10); - if (data == old_data) { - data = NULL; - return 0; - } - return val; - } -}; - -bool IESFile::parse(const string &ies) -{ - if (ies.empty()) { - return false; - } - - IESTextParser parser(ies); - if (parser.eof()) { - return false; - } - - /* Handle the tilt data block. */ - if (strncmp(parser.data, "\nTILT=INCLUDE", 13) == 0) { - parser.data += 13; - parser.get_double(); /* Lamp to Luminaire geometry */ - int num_tilt = parser.get_long(); /* Amount of tilt angles and factors */ - /* Skip over angles and factors. */ - for (int i = 0; i < 2 * num_tilt; i++) { - parser.get_double(); - } - } - else { - /* Skip to next line. */ - parser.data = strstr(parser.data + 1, "\n"); - } - - if (parser.eof()) { - return false; - } - parser.data++; - - parser.get_long(); /* Number of lamps */ - parser.get_double(); /* Lumens per lamp */ - double factor = parser.get_double(); /* Candela multiplier */ - int v_angles_num = parser.get_long(); /* Number of vertical angles */ - int h_angles_num = parser.get_long(); /* Number of horizontal angles */ - type = (IESType)parser.get_long(); /* Photometric type */ - - /* TODO(lukas): Test whether the current type B processing can also deal with type A files. - * In theory the only difference should be orientation which we ignore anyways, but with IES you - * never know... - */ - if (type != TYPE_B && type != TYPE_C) { - return false; - } - - parser.get_long(); /* Unit of the geometry data */ - parser.get_double(); /* Width */ - parser.get_double(); /* Length */ - parser.get_double(); /* Height */ - factor *= parser.get_double(); /* Ballast factor */ - factor *= parser.get_double(); /* Ballast-Lamp Photometric factor */ - parser.get_double(); /* Input Watts */ - - /* Intensity values in IES files are specified in candela (lumen/sr), a photometric quantity. - * Cycles expects radiometric quantities, though, which requires a conversion. - * However, the Luminous efficacy (ratio of lumens per Watt) depends on the spectral distribution - * of the light source since lumens take human perception into account. - * Since this spectral distribution is not known from the IES file, a typical one must be - * assumed. The D65 standard illuminant has a Luminous efficacy of 177.83, which is used here to - * convert to Watt/sr. A more advanced approach would be to add a Blackbody Temperature input to - * the node and numerically integrate the Luminous efficacy from the resulting spectral - * distribution. Also, the Watt/sr value must be multiplied by 4*pi to get the Watt value that - * Cycles expects for lamp strength. Therefore, the conversion here uses 4*pi/177.83 as a Candela - * to Watt factor. - */ - factor *= 0.0706650768394; - - v_angles.reserve(v_angles_num); - for (int i = 0; i < v_angles_num; i++) { - v_angles.push_back((float)parser.get_double()); - } - - h_angles.reserve(h_angles_num); - for (int i = 0; i < h_angles_num; i++) { - h_angles.push_back((float)parser.get_double()); - } - - intensity.resize(h_angles_num); - for (int i = 0; i < h_angles_num; i++) { - intensity[i].reserve(v_angles_num); - for (int j = 0; j < v_angles_num; j++) { - intensity[i].push_back((float)(factor * parser.get_double())); - } - } - - return !parser.eof(); -} - -bool IESFile::process_type_b() -{ - vector> newintensity; - newintensity.resize(v_angles.size()); - for (int i = 0; i < v_angles.size(); i++) { - newintensity[i].reserve(h_angles.size()); - for (int j = 0; j < h_angles.size(); j++) { - newintensity[i].push_back(intensity[j][i]); - } - } - intensity.swap(newintensity); - h_angles.swap(v_angles); - - float h_first = h_angles[0], h_last = h_angles[h_angles.size() - 1]; - if (h_last != 90.0f) { - return false; - } - - if (h_first == 0.0f) { - /* The range in the file corresponds to 90°-180°, we need to mirror that to get the - * full 180° range. */ - vector new_h_angles; - vector> new_intensity; - int hnum = h_angles.size(); - new_h_angles.reserve(2 * hnum - 1); - new_intensity.reserve(2 * hnum - 1); - for (int i = hnum - 1; i > 0; i--) { - new_h_angles.push_back(90.0f - h_angles[i]); - new_intensity.push_back(intensity[i]); - } - for (int i = 0; i < hnum; i++) { - new_h_angles.push_back(90.0f + h_angles[i]); - new_intensity.push_back(intensity[i]); - } - h_angles.swap(new_h_angles); - intensity.swap(new_intensity); - } - else if (h_first == -90.0f) { - /* We have full 180° coverage, so just shift to match the angle range convention. */ - for (int i = 0; i < h_angles.size(); i++) { - h_angles[i] += 90.0f; - } - } - /* To get correct results with the cubic interpolation in the kernel, the horizontal range - * has to cover all 360°. Therefore, we copy the 0° entry to 360° to ensure full coverage - * and seamless interpolation. */ - h_angles.push_back(360.0f); - intensity.push_back(intensity[0]); - - float v_first = v_angles[0], v_last = v_angles[v_angles.size() - 1]; - if (v_last != 90.0f) { - return false; - } - - if (v_first == 0.0f) { - /* The range in the file corresponds to 90°-180°, we need to mirror that to get the - * full 180° range. */ - vector new_v_angles; - int hnum = h_angles.size(); - int vnum = v_angles.size(); - new_v_angles.reserve(2 * vnum - 1); - for (int i = vnum - 1; i > 0; i--) { - new_v_angles.push_back(90.0f - v_angles[i]); - } - for (int i = 0; i < vnum; i++) { - new_v_angles.push_back(90.0f + v_angles[i]); - } - for (int i = 0; i < hnum; i++) { - vector new_intensity; - new_intensity.reserve(2 * vnum - 1); - for (int j = vnum - 2; j >= 0; j--) { - new_intensity.push_back(intensity[i][j]); - } - new_intensity.insert(new_intensity.end(), intensity[i].begin(), intensity[i].end()); - intensity[i].swap(new_intensity); - } - v_angles.swap(new_v_angles); - } - else if (v_first == -90.0f) { - /* We have full 180° coverage, so just shift to match the angle range convention. */ - for (int i = 0; i < v_angles.size(); i++) { - v_angles[i] += 90.0f; - } - } - - return true; -} - -bool IESFile::process_type_c() -{ - if (h_angles[0] == 90.0f) { - /* Some files are stored from 90° to 270°, so we just rotate them to the regular 0°-180° range - * here. */ - for (int i = 0; i < h_angles.size(); i++) { - h_angles[i] -= 90.0f; - } - } - - if (h_angles[0] != 0.0f) { - return false; - } - - if (h_angles.size() == 1) { - h_angles.push_back(360.0f); - intensity.push_back(intensity[0]); - } - - if (h_angles[h_angles.size() - 1] == 90.0f) { - /* Only one quadrant is defined, so we need to mirror twice (from one to two, then to four). - * Since the two->four mirroring step might also be required if we get an input of two - * quadrants, we only do the first mirror here and later do the second mirror in either case. - */ - int hnum = h_angles.size(); - for (int i = hnum - 2; i >= 0; i--) { - h_angles.push_back(180.0f - h_angles[i]); - intensity.push_back(intensity[i]); - } - } - - if (h_angles[h_angles.size() - 1] == 180.0f) { - /* Mirror half to the full range. */ - int hnum = h_angles.size(); - for (int i = hnum - 2; i >= 0; i--) { - h_angles.push_back(360.0f - h_angles[i]); - intensity.push_back(intensity[i]); - } - } - - /* Some files skip the 360° entry (contrary to standard) because it's supposed to be identical to - * the 0° entry. If the file has a discernible order in its spacing, just fix this. */ - if (h_angles[h_angles.size() - 1] != 360.0f) { - int hnum = h_angles.size(); - float last_step = h_angles[hnum - 1] - h_angles[hnum - 2]; - float first_step = h_angles[1] - h_angles[0]; - float difference = 360.0f - h_angles[hnum - 1]; - if (last_step == difference || first_step == difference) { - h_angles.push_back(360.0f); - intensity.push_back(intensity[0]); - } - else { - return false; - } - } - - float v_first = v_angles[0], v_last = v_angles[v_angles.size() - 1]; - if (v_first == 90.0f) { - if (v_last == 180.0f) { - /* Flip to ensure that vertical angles always start at 0°. */ - for (int i = 0; i < v_angles.size(); i++) { - v_angles[i] = 180.0f - v_angles[i]; - } - } - else { - return false; - } - } - else if (v_first != 0.0f) { - return false; - } - - return true; -} - -bool IESFile::process() -{ - if (h_angles.size() == 0 || v_angles.size() == 0) { - return false; - } - - if (type == TYPE_B) { - if (!process_type_b()) { - return false; - } - } - else { - assert(type == TYPE_C); - if (!process_type_c()) { - return false; - } - } - - assert(v_angles[0] == 0.0f); - assert(h_angles[0] == 0.0f); - assert(h_angles[h_angles.size() - 1] == 360.0f); - - /* Convert from deg to rad. */ - for (int i = 0; i < v_angles.size(); i++) { - v_angles[i] *= M_PI_F / 180.f; - } - for (int i = 0; i < h_angles.size(); i++) { - h_angles[i] *= M_PI_F / 180.f; - } - - return true; -} - -IESFile::~IESFile() -{ - clear(); -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_ies.h b/intern/cycles/util/util_ies.h deleted file mode 100644 index 95473103614..00000000000 --- a/intern/cycles/util/util_ies.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2011-2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_IES_H__ -#define __UTIL_IES_H__ - -#include "util/util_string.h" -#include "util/util_vector.h" - -CCL_NAMESPACE_BEGIN - -class IESFile { - public: - IESFile() - { - } - ~IESFile(); - - int packed_size(); - void pack(float *data); - - bool load(const string &ies); - void clear(); - - protected: - bool parse(const string &ies); - bool process(); - bool process_type_b(); - bool process_type_c(); - - /* The brightness distribution is stored in spherical coordinates. - * The horizontal angles correspond to theta in the regular notation - * and always span the full range from 0° to 360°. - * The vertical angles correspond to phi and always start at 0°. */ - vector v_angles, h_angles; - /* The actual values are stored here, with every entry storing the values - * of one horizontal segment. */ - vector> intensity; - - /* Types of angle representation in IES files. Currently, only B and C are supported. */ - enum IESType { TYPE_A = 3, TYPE_B = 2, TYPE_C = 1 } type; -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_IES_H__ */ diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h deleted file mode 100644 index b082b971613..00000000000 --- a/intern/cycles/util/util_image.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_IMAGE_H__ -# define __UTIL_IMAGE_H__ - -/* OpenImageIO is used for all image file reading and writing. */ - -# include - -# include "util/util_half.h" -# include "util/util_vector.h" - -CCL_NAMESPACE_BEGIN - -OIIO_NAMESPACE_USING - -template -void util_image_resize_pixels(const vector &input_pixels, - const size_t input_width, - const size_t input_height, - const size_t input_depth, - const size_t components, - vector *output_pixels, - size_t *output_width, - size_t *output_height, - size_t *output_depth); - -/* Cast input pixel from unknown storage to float. */ -template inline float util_image_cast_to_float(T value); - -template<> inline float util_image_cast_to_float(float value) -{ - return value; -} -template<> inline float util_image_cast_to_float(uchar value) -{ - return (float)value / 255.0f; -} -template<> inline float util_image_cast_to_float(uint16_t value) -{ - return (float)value / 65535.0f; -} -template<> inline float util_image_cast_to_float(half value) -{ - return half_to_float_image(value); -} - -/* Cast float value to output pixel type. */ -template inline T util_image_cast_from_float(float value); - -template<> inline float util_image_cast_from_float(float value) -{ - return value; -} -template<> inline uchar util_image_cast_from_float(float value) -{ - if (value < 0.0f) { - return 0; - } - else if (value > (1.0f - 0.5f / 255.0f)) { - return 255; - } - return (uchar)((255.0f * value) + 0.5f); -} -template<> inline uint16_t util_image_cast_from_float(float value) -{ - if (value < 0.0f) { - return 0; - } - else if (value > (1.0f - 0.5f / 65535.0f)) { - return 65535; - } - return (uint16_t)((65535.0f * value) + 0.5f); -} -template<> inline half util_image_cast_from_float(float value) -{ - return float_to_half_image(value); -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_IMAGE_H__ */ - -#include "util/util_image_impl.h" diff --git a/intern/cycles/util/util_image_impl.h b/intern/cycles/util/util_image_impl.h deleted file mode 100644 index 3eb30d070ea..00000000000 --- a/intern/cycles/util/util_image_impl.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_IMAGE_IMPL_H__ -#define __UTIL_IMAGE_IMPL_H__ - -#include "util/util_algorithm.h" -#include "util/util_half.h" -#include "util/util_image.h" - -CCL_NAMESPACE_BEGIN - -namespace { - -template -const T *util_image_read(const vector &pixels, - const size_t width, - const size_t height, - const size_t /*depth*/, - const size_t components, - const size_t x, - const size_t y, - const size_t z) -{ - const size_t index = ((size_t)z * (width * height) + (size_t)y * width + (size_t)x) * components; - return &pixels[index]; -} - -template -void util_image_downscale_sample(const vector &pixels, - const size_t width, - const size_t height, - const size_t depth, - const size_t components, - const size_t kernel_size, - const float x, - const float y, - const float z, - T *result) -{ - assert(components <= 4); - const size_t ix = (size_t)x, iy = (size_t)y, iz = (size_t)z; - /* TODO(sergey): Support something smarter than box filer. */ - float accum[4] = {0}; - size_t count = 0; - for (size_t dz = 0; dz < kernel_size; ++dz) { - for (size_t dy = 0; dy < kernel_size; ++dy) { - for (size_t dx = 0; dx < kernel_size; ++dx) { - const size_t nx = ix + dx, ny = iy + dy, nz = iz + dz; - if (nx >= width || ny >= height || nz >= depth) { - continue; - } - const T *pixel = util_image_read(pixels, width, height, depth, components, nx, ny, nz); - for (size_t k = 0; k < components; ++k) { - accum[k] += util_image_cast_to_float(pixel[k]); - } - ++count; - } - } - } - if (count != 0) { - const float inv_count = 1.0f / (float)count; - for (size_t k = 0; k < components; ++k) { - result[k] = util_image_cast_from_float(accum[k] * inv_count); - } - } - else { - for (size_t k = 0; k < components; ++k) { - result[k] = T(0.0f); - } - } -} - -template -void util_image_downscale_pixels(const vector &input_pixels, - const size_t input_width, - const size_t input_height, - const size_t input_depth, - const size_t components, - const float inv_scale_factor, - const size_t output_width, - const size_t output_height, - const size_t output_depth, - vector *output_pixels) -{ - const size_t kernel_size = (size_t)(inv_scale_factor + 0.5f); - for (size_t z = 0; z < output_depth; ++z) { - for (size_t y = 0; y < output_height; ++y) { - for (size_t x = 0; x < output_width; ++x) { - const float input_x = (float)x * inv_scale_factor, input_y = (float)y * inv_scale_factor, - input_z = (float)z * inv_scale_factor; - const size_t output_index = (z * output_width * output_height + y * output_width + x) * - components; - util_image_downscale_sample(input_pixels, - input_width, - input_height, - input_depth, - components, - kernel_size, - input_x, - input_y, - input_z, - &output_pixels->at(output_index)); - } - } - } -} - -} /* namespace */ - -template -void util_image_resize_pixels(const vector &input_pixels, - const size_t input_width, - const size_t input_height, - const size_t input_depth, - const size_t components, - const float scale_factor, - vector *output_pixels, - size_t *output_width, - size_t *output_height, - size_t *output_depth) -{ - /* Early output for case when no scaling is applied. */ - if (scale_factor == 1.0f) { - *output_width = input_width; - *output_height = input_height; - *output_depth = input_depth; - *output_pixels = input_pixels; - return; - } - /* First of all, we calculate output image dimensions. - * We clamp them to be 1 pixel at least so we do not generate degenerate - * image. - */ - *output_width = max((size_t)((float)input_width * scale_factor), (size_t)1); - *output_height = max((size_t)((float)input_height * scale_factor), (size_t)1); - *output_depth = max((size_t)((float)input_depth * scale_factor), (size_t)1); - /* Prepare pixel storage for the result. */ - const size_t num_output_pixels = ((*output_width) * (*output_height) * (*output_depth)) * - components; - output_pixels->resize(num_output_pixels); - if (scale_factor < 1.0f) { - const float inv_scale_factor = 1.0f / scale_factor; - util_image_downscale_pixels(input_pixels, - input_width, - input_height, - input_depth, - components, - inv_scale_factor, - *output_width, - *output_height, - *output_depth, - output_pixels); - } - else { - /* TODO(sergey): Needs implementation. */ - } -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_IMAGE_IMPL_H__ */ diff --git a/intern/cycles/util/util_list.h b/intern/cycles/util/util_list.h deleted file mode 100644 index f555b001186..00000000000 --- a/intern/cycles/util/util_list.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_LIST_H__ -#define __UTIL_LIST_H__ - -#include - -CCL_NAMESPACE_BEGIN - -using std::list; - -CCL_NAMESPACE_END - -#endif /* __UTIL_LIST_H__ */ diff --git a/intern/cycles/util/util_logging.cpp b/intern/cycles/util/util_logging.cpp deleted file mode 100644 index 8272728a7a0..00000000000 --- a/intern/cycles/util/util_logging.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright 2011-2014 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_logging.h" - -#include "util/util_math.h" -#include "util/util_string.h" - -#include -#ifdef _MSC_VER -# define snprintf _snprintf -#endif - -CCL_NAMESPACE_BEGIN - -#ifdef WITH_CYCLES_LOGGING -static bool is_verbosity_set() -{ - using CYCLES_GFLAGS_NAMESPACE::GetCommandLineOption; - - std::string verbosity; - if (!GetCommandLineOption("v", &verbosity)) { - return false; - } - return verbosity != "0"; -} -#endif - -void util_logging_init(const char *argv0) -{ -#ifdef WITH_CYCLES_LOGGING - using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; - - google::InitGoogleLogging(argv0); - SetCommandLineOption("logtostderr", "1"); - if (!is_verbosity_set()) { - SetCommandLineOption("v", "0"); - } - SetCommandLineOption("stderrthreshold", "0"); - SetCommandLineOption("minloglevel", "0"); -#else - (void)argv0; -#endif -} - -void util_logging_start() -{ -#ifdef WITH_CYCLES_LOGGING - using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; - SetCommandLineOption("logtostderr", "1"); - if (!is_verbosity_set()) { - SetCommandLineOption("v", "2"); - } - SetCommandLineOption("stderrthreshold", "0"); - SetCommandLineOption("minloglevel", "0"); -#endif -} - -void util_logging_verbosity_set(int verbosity) -{ -#ifdef WITH_CYCLES_LOGGING - using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; - char val[10]; - snprintf(val, sizeof(val), "%d", verbosity); - SetCommandLineOption("v", val); -#else - (void)verbosity; -#endif -} - -std::ostream &operator<<(std::ostream &os, const int2 &value) -{ - os << "(" << value.x << ", " << value.y << ")"; - return os; -} - -std::ostream &operator<<(std::ostream &os, const float3 &value) -{ - os << "(" << value.x << ", " << value.y << ", " << value.z << ")"; - return os; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h deleted file mode 100644 index 35c2d436d09..00000000000 --- a/intern/cycles/util/util_logging.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright 2011-2014 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_LOGGING_H__ -#define __UTIL_LOGGING_H__ - -#if defined(WITH_CYCLES_LOGGING) && !defined(__KERNEL_GPU__) -# include -# include -#endif - -#include - -CCL_NAMESPACE_BEGIN - -#if !defined(WITH_CYCLES_LOGGING) || defined(__KERNEL_GPU__) -class StubStream { - public: - template StubStream &operator<<(const T &) - { - return *this; - } -}; - -class LogMessageVoidify { - public: - LogMessageVoidify() - { - } - void operator&(const StubStream &) - { - } -}; - -# define LOG_SUPPRESS() (true) ? ((void)0) : LogMessageVoidify() & StubStream() -# define LOG(severity) LOG_SUPPRESS() -# define VLOG(severity) LOG_SUPPRESS() -# define VLOG_IF(severity, condition) LOG_SUPPRESS() -# define VLOG_IS_ON(severity) false - -# define CHECK(expression) LOG_SUPPRESS() - -# define CHECK_NOTNULL(expression) LOG_SUPPRESS() -# define CHECK_NULL(expression) LOG_SUPPRESS() - -# define CHECK_NEAR(actual, expected, eps) LOG_SUPPRESS() - -# define CHECK_GE(a, b) LOG_SUPPRESS() -# define CHECK_NE(a, b) LOG_SUPPRESS() -# define CHECK_EQ(a, b) LOG_SUPPRESS() -# define CHECK_GT(a, b) LOG_SUPPRESS() -# define CHECK_LT(a, b) LOG_SUPPRESS() -# define CHECK_LE(a, b) LOG_SUPPRESS() - -# define DCHECK(expression) LOG_SUPPRESS() - -# define DCHECK_NOTNULL(expression) LOG_SUPPRESS() -# define DCHECK_NULL(expression) LOG_SUPPRESS() - -# define DCHECK_NEAR(actual, expected, eps) LOG_SUPPRESS() - -# define DCHECK_GE(a, b) LOG_SUPPRESS() -# define DCHECK_NE(a, b) LOG_SUPPRESS() -# define DCHECK_EQ(a, b) LOG_SUPPRESS() -# define DCHECK_GT(a, b) LOG_SUPPRESS() -# define DCHECK_LT(a, b) LOG_SUPPRESS() -# define DCHECK_LE(a, b) LOG_SUPPRESS() - -# define LOG_ASSERT(expression) LOG_SUPPRESS() -#endif - -#define VLOG_ONCE(level, flag) \ - if (!flag) \ - flag = true, VLOG(level) - -struct int2; -struct float3; - -void util_logging_init(const char *argv0); -void util_logging_start(); -void util_logging_verbosity_set(int verbosity); - -std::ostream &operator<<(std::ostream &os, const int2 &value); -std::ostream &operator<<(std::ostream &os, const float3 &value); - -CCL_NAMESPACE_END - -#endif /* __UTIL_LOGGING_H__ */ diff --git a/intern/cycles/util/util_map.h b/intern/cycles/util/util_map.h deleted file mode 100644 index f1b2522362f..00000000000 --- a/intern/cycles/util/util_map.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MAP_H__ -#define __UTIL_MAP_H__ - -#include -#include - -CCL_NAMESPACE_BEGIN - -using std::map; -using std::pair; -using std::unordered_map; -using std::unordered_multimap; - -template static void map_free_memory(T &data) -{ - /* Use swap() trick to actually free all internal memory. */ - T empty_data; - data.swap(empty_data); -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_MAP_H__ */ diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h deleted file mode 100644 index 535b6881d3f..00000000000 --- a/intern/cycles/util/util_math.h +++ /dev/null @@ -1,870 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MATH_H__ -#define __UTIL_MATH_H__ - -/* Math - * - * Basic math functions on scalar and vector types. This header is used by - * both the kernel code when compiled as C++, and other C++ non-kernel code. */ - -#ifndef __KERNEL_GPU__ -# include -#endif - -#ifdef __HIP__ -# include -#endif - -#include -#include -#include - -#include "util/util_types.h" - -CCL_NAMESPACE_BEGIN - -/* Float Pi variations */ - -/* Division */ -#ifndef M_PI_F -# define M_PI_F (3.1415926535897932f) /* pi */ -#endif -#ifndef M_PI_2_F -# define M_PI_2_F (1.5707963267948966f) /* pi/2 */ -#endif -#ifndef M_PI_4_F -# define M_PI_4_F (0.7853981633974830f) /* pi/4 */ -#endif -#ifndef M_1_PI_F -# define M_1_PI_F (0.3183098861837067f) /* 1/pi */ -#endif -#ifndef M_2_PI_F -# define M_2_PI_F (0.6366197723675813f) /* 2/pi */ -#endif -#ifndef M_1_2PI_F -# define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */ -#endif -#ifndef M_SQRT_PI_8_F -# define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */ -#endif -#ifndef M_LN_2PI_F -# define M_LN_2PI_F (1.8378770664093454f) /* ln(2*pi) */ -#endif - -/* Multiplication */ -#ifndef M_2PI_F -# define M_2PI_F (6.2831853071795864f) /* 2*pi */ -#endif -#ifndef M_4PI_F -# define M_4PI_F (12.566370614359172f) /* 4*pi */ -#endif - -/* Float sqrt variations */ -#ifndef M_SQRT2_F -# define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ -#endif -#ifndef M_LN2_F -# define M_LN2_F (0.6931471805599453f) /* ln(2) */ -#endif -#ifndef M_LN10_F -# define M_LN10_F (2.3025850929940457f) /* ln(10) */ -#endif - -/* Scalar */ - -#ifndef __HIP__ -# ifdef _WIN32 -ccl_device_inline float fmaxf(float a, float b) -{ - return (a > b) ? a : b; -} - -ccl_device_inline float fminf(float a, float b) -{ - return (a < b) ? a : b; -} - -# endif /* _WIN32 */ -#endif /* __HIP__ */ - -#ifndef __KERNEL_GPU__ -using std::isfinite; -using std::isnan; -using std::sqrt; - -ccl_device_inline int abs(int x) -{ - return (x > 0) ? x : -x; -} - -ccl_device_inline int max(int a, int b) -{ - return (a > b) ? a : b; -} - -ccl_device_inline int min(int a, int b) -{ - return (a < b) ? a : b; -} - -ccl_device_inline uint min(uint a, uint b) -{ - return (a < b) ? a : b; -} - -ccl_device_inline float max(float a, float b) -{ - return (a > b) ? a : b; -} - -ccl_device_inline float min(float a, float b) -{ - return (a < b) ? a : b; -} - -ccl_device_inline double max(double a, double b) -{ - return (a > b) ? a : b; -} - -ccl_device_inline double min(double a, double b) -{ - return (a < b) ? a : b; -} - -/* These 2 guys are templated for usage with registers data. - * - * NOTE: Since this is CPU-only functions it is ok to use references here. - * But for other devices we'll need to be careful about this. - */ - -template ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d) -{ - return min(min(a, b), min(c, d)); -} - -template ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d) -{ - return max(max(a, b), max(c, d)); -} -#endif /* __KERNEL_GPU__ */ - -ccl_device_inline float min4(float a, float b, float c, float d) -{ - return min(min(a, b), min(c, d)); -} - -ccl_device_inline float max4(float a, float b, float c, float d) -{ - return max(max(a, b), max(c, d)); -} - -/* Int/Float conversion */ - -ccl_device_inline int as_int(uint i) -{ - union { - uint ui; - int i; - } u; - u.ui = i; - return u.i; -} - -ccl_device_inline uint as_uint(int i) -{ - union { - uint ui; - int i; - } u; - u.i = i; - return u.ui; -} - -ccl_device_inline uint as_uint(float f) -{ - union { - uint i; - float f; - } u; - u.f = f; - return u.i; -} - -#ifndef __HIP__ -ccl_device_inline int __float_as_int(float f) -{ - union { - int i; - float f; - } u; - u.f = f; - return u.i; -} - -ccl_device_inline float __int_as_float(int i) -{ - union { - int i; - float f; - } u; - u.i = i; - return u.f; -} - -ccl_device_inline uint __float_as_uint(float f) -{ - union { - uint i; - float f; - } u; - u.f = f; - return u.i; -} - -ccl_device_inline float __uint_as_float(uint i) -{ - union { - uint i; - float f; - } u; - u.i = i; - return u.f; -} -#endif - -ccl_device_inline int4 __float4_as_int4(float4 f) -{ -#ifdef __KERNEL_SSE__ - return int4(_mm_castps_si128(f.m128)); -#else - return make_int4( - __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w)); -#endif -} - -ccl_device_inline float4 __int4_as_float4(int4 i) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_castsi128_ps(i.m128)); -#else - return make_float4( - __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w)); -#endif -} - -template ccl_device_inline uint pointer_pack_to_uint_0(T *ptr) -{ - return ((uint64_t)ptr) & 0xFFFFFFFF; -} - -template ccl_device_inline uint pointer_pack_to_uint_1(T *ptr) -{ - return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF; -} - -template ccl_device_inline T *pointer_unpack_from_uint(const uint a, const uint b) -{ - return (T *)(((uint64_t)b << 32) | a); -} - -ccl_device_inline uint uint16_pack_to_uint(const uint a, const uint b) -{ - return (a << 16) | b; -} - -ccl_device_inline uint uint16_unpack_from_uint_0(const uint i) -{ - return i >> 16; -} - -ccl_device_inline uint uint16_unpack_from_uint_1(const uint i) -{ - return i & 0xFFFF; -} - -/* Versions of functions which are safe for fast math. */ -ccl_device_inline bool isnan_safe(float f) -{ - unsigned int x = __float_as_uint(f); - return (x << 1) > 0xff000000u; -} - -ccl_device_inline bool isfinite_safe(float f) -{ - /* By IEEE 754 rule, 2*Inf equals Inf */ - unsigned int x = __float_as_uint(f); - return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u); -} - -ccl_device_inline float ensure_finite(float v) -{ - return isfinite_safe(v) ? v : 0.0f; -} - -ccl_device_inline int clamp(int a, int mn, int mx) -{ - return min(max(a, mn), mx); -} - -ccl_device_inline float clamp(float a, float mn, float mx) -{ - return min(max(a, mn), mx); -} - -ccl_device_inline float mix(float a, float b, float t) -{ - return a + t * (b - a); -} - -ccl_device_inline float smoothstep(float edge0, float edge1, float x) -{ - float result; - if (x < edge0) - result = 0.0f; - else if (x >= edge1) - result = 1.0f; - else { - float t = (x - edge0) / (edge1 - edge0); - result = (3.0f - 2.0f * t) * (t * t); - } - return result; -} - -#ifndef __KERNEL_CUDA__ -ccl_device_inline float saturate(float a) -{ - return clamp(a, 0.0f, 1.0f); -} -#endif /* __KERNEL_CUDA__ */ - -ccl_device_inline int float_to_int(float f) -{ - return (int)f; -} - -ccl_device_inline int floor_to_int(float f) -{ - return float_to_int(floorf(f)); -} - -ccl_device_inline int quick_floor_to_int(float x) -{ - return float_to_int(x) - ((x < 0) ? 1 : 0); -} - -ccl_device_inline float floorfrac(float x, ccl_private int *i) -{ - *i = quick_floor_to_int(x); - return x - *i; -} - -ccl_device_inline int ceil_to_int(float f) -{ - return float_to_int(ceilf(f)); -} - -ccl_device_inline float fractf(float x) -{ - return x - floorf(x); -} - -/* Adapted from godot-engine math_funcs.h. */ -ccl_device_inline float wrapf(float value, float max, float min) -{ - float range = max - min; - return (range != 0.0f) ? value - (range * floorf((value - min) / range)) : min; -} - -ccl_device_inline float pingpongf(float a, float b) -{ - return (b != 0.0f) ? fabsf(fractf((a - b) / (b * 2.0f)) * b * 2.0f - b) : 0.0f; -} - -ccl_device_inline float smoothminf(float a, float b, float k) -{ - if (k != 0.0f) { - float h = fmaxf(k - fabsf(a - b), 0.0f) / k; - return fminf(a, b) - h * h * h * k * (1.0f / 6.0f); - } - else { - return fminf(a, b); - } -} - -ccl_device_inline float signf(float f) -{ - return (f < 0.0f) ? -1.0f : 1.0f; -} - -ccl_device_inline float nonzerof(float f, float eps) -{ - if (fabsf(f) < eps) - return signf(f) * eps; - else - return f; -} - -/* `signum` function testing for zero. Matches GLSL and OSL functions. */ -ccl_device_inline float compatible_signf(float f) -{ - if (f == 0.0f) { - return 0.0f; - } - else { - return signf(f); - } -} - -ccl_device_inline float smoothstepf(float f) -{ - float ff = f * f; - return (3.0f * ff - 2.0f * ff * f); -} - -ccl_device_inline int mod(int x, int m) -{ - return (x % m + m) % m; -} - -ccl_device_inline float3 float2_to_float3(const float2 a) -{ - return make_float3(a.x, a.y, 0.0f); -} - -ccl_device_inline float3 float4_to_float3(const float4 a) -{ - return make_float3(a.x, a.y, a.z); -} - -ccl_device_inline float4 float3_to_float4(const float3 a) -{ - return make_float4(a.x, a.y, a.z, 1.0f); -} - -ccl_device_inline float inverse_lerp(float a, float b, float x) -{ - return (x - a) / (b - a); -} - -/* Cubic interpolation between b and c, a and d are the previous and next point. */ -ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x) -{ - return 0.5f * - (((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x + - (c - a)) * - x + - b; -} - -CCL_NAMESPACE_END - -#include "util/util_math_int2.h" -#include "util/util_math_int3.h" -#include "util/util_math_int4.h" - -#include "util/util_math_float2.h" -#include "util/util_math_float3.h" -#include "util/util_math_float4.h" - -#include "util/util_rect.h" - -CCL_NAMESPACE_BEGIN - -/* Interpolation */ - -template A lerp(const A &a, const A &b, const B &t) -{ - return (A)(a * ((B)1 - t) + b * t); -} - -/* Triangle */ - -ccl_device_inline float triangle_area(ccl_private const float3 &v1, - ccl_private const float3 &v2, - ccl_private const float3 &v3) -{ - return len(cross(v3 - v2, v1 - v2)) * 0.5f; -} - -/* Orthonormal vectors */ - -ccl_device_inline void make_orthonormals(const float3 N, - ccl_private float3 *a, - ccl_private float3 *b) -{ -#if 0 - if (fabsf(N.y) >= 0.999f) { - *a = make_float3(1, 0, 0); - *b = make_float3(0, 0, 1); - return; - } - if (fabsf(N.z) >= 0.999f) { - *a = make_float3(1, 0, 0); - *b = make_float3(0, 1, 0); - return; - } -#endif - - if (N.x != N.y || N.x != N.z) - *a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x); //(1,1,1)x N - else - *a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x); //(-1,1,1)x N - - *a = normalize(*a); - *b = cross(N, *a); -} - -/* Color division */ - -ccl_device_inline float3 safe_invert_color(float3 a) -{ - float x, y, z; - - x = (a.x != 0.0f) ? 1.0f / a.x : 0.0f; - y = (a.y != 0.0f) ? 1.0f / a.y : 0.0f; - z = (a.z != 0.0f) ? 1.0f / a.z : 0.0f; - - return make_float3(x, y, z); -} - -ccl_device_inline float3 safe_divide_color(float3 a, float3 b) -{ - float x, y, z; - - x = (b.x != 0.0f) ? a.x / b.x : 0.0f; - y = (b.y != 0.0f) ? a.y / b.y : 0.0f; - z = (b.z != 0.0f) ? a.z / b.z : 0.0f; - - return make_float3(x, y, z); -} - -ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b) -{ - float x, y, z; - - x = (b.x != 0.0f) ? a.x / b.x : 0.0f; - y = (b.y != 0.0f) ? a.y / b.y : 0.0f; - z = (b.z != 0.0f) ? a.z / b.z : 0.0f; - - /* try to get gray even if b is zero */ - if (b.x == 0.0f) { - if (b.y == 0.0f) { - x = z; - y = z; - } - else if (b.z == 0.0f) { - x = y; - z = y; - } - else - x = 0.5f * (y + z); - } - else if (b.y == 0.0f) { - if (b.z == 0.0f) { - y = x; - z = x; - } - else - y = 0.5f * (x + z); - } - else if (b.z == 0.0f) { - z = 0.5f * (x + y); - } - - return make_float3(x, y, z); -} - -/* Rotation of point around axis and angle */ - -ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) -{ - float costheta = cosf(angle); - float sintheta = sinf(angle); - float3 r; - - r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) + - (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) + - (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z); - - r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) + - ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) + - (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z); - - r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) + - (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) + - ((costheta + (1 - costheta) * axis.z * axis.z) * p.z); - - return r; -} - -/* NaN-safe math ops */ - -ccl_device_inline float safe_sqrtf(float f) -{ - return sqrtf(max(f, 0.0f)); -} - -ccl_device_inline float inversesqrtf(float f) -{ - return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f; -} - -ccl_device float safe_asinf(float a) -{ - return asinf(clamp(a, -1.0f, 1.0f)); -} - -ccl_device float safe_acosf(float a) -{ - return acosf(clamp(a, -1.0f, 1.0f)); -} - -ccl_device float compatible_powf(float x, float y) -{ -#ifdef __KERNEL_GPU__ - if (y == 0.0f) /* x^0 -> 1, including 0^0 */ - return 1.0f; - - /* GPU pow doesn't accept negative x, do manual checks here */ - if (x < 0.0f) { - if (fmodf(-y, 2.0f) == 0.0f) - return powf(-x, y); - else - return -powf(-x, y); - } - else if (x == 0.0f) - return 0.0f; -#endif - return powf(x, y); -} - -ccl_device float safe_powf(float a, float b) -{ - if (UNLIKELY(a < 0.0f && b != float_to_int(b))) - return 0.0f; - - return compatible_powf(a, b); -} - -ccl_device float safe_divide(float a, float b) -{ - return (b != 0.0f) ? a / b : 0.0f; -} - -ccl_device float safe_logf(float a, float b) -{ - if (UNLIKELY(a <= 0.0f || b <= 0.0f)) - return 0.0f; - - return safe_divide(logf(a), logf(b)); -} - -ccl_device float safe_modulo(float a, float b) -{ - return (b != 0.0f) ? fmodf(a, b) : 0.0f; -} - -ccl_device_inline float sqr(float a) -{ - return a * a; -} - -ccl_device_inline float pow20(float a) -{ - return sqr(sqr(sqr(sqr(a)) * a)); -} - -ccl_device_inline float pow22(float a) -{ - return sqr(a * sqr(sqr(sqr(a)) * a)); -} - -ccl_device_inline float beta(float x, float y) -{ - return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y)); -} - -ccl_device_inline float xor_signmask(float x, int y) -{ - return __int_as_float(__float_as_int(x) ^ y); -} - -ccl_device float bits_to_01(uint bits) -{ - return bits * (1.0f / (float)0xFFFFFFFF); -} - -ccl_device_inline uint count_leading_zeros(uint x) -{ -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) - return __clz(x); -#else - assert(x != 0); -# ifdef _MSC_VER - unsigned long leading_zero = 0; - _BitScanReverse(&leading_zero, x); - return (31 - leading_zero); -# else - return __builtin_clz(x); -# endif -#endif -} - -ccl_device_inline uint count_trailing_zeros(uint x) -{ -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) - return (__ffs(x) - 1); -#else - assert(x != 0); -# ifdef _MSC_VER - unsigned long ctz = 0; - _BitScanForward(&ctz, x); - return ctz; -# else - return __builtin_ctz(x); -# endif -#endif -} - -ccl_device_inline uint find_first_set(uint x) -{ -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) - return __ffs(x); -#else -# ifdef _MSC_VER - return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0; -# else - return __builtin_ffs(x); -# endif -#endif -} - -/* projections */ -ccl_device_inline float2 map_to_tube(const float3 co) -{ - float len, u, v; - len = sqrtf(co.x * co.x + co.y * co.y); - if (len > 0.0f) { - u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f; - v = (co.z + 1.0f) * 0.5f; - } - else { - u = v = 0.0f; - } - return make_float2(u, v); -} - -ccl_device_inline float2 map_to_sphere(const float3 co) -{ - float l = len(co); - float u, v; - if (l > 0.0f) { - if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) { - u = 0.0f; /* Otherwise domain error. */ - } - else { - u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f; - } - v = 1.0f - safe_acosf(co.z / l) / M_PI_F; - } - else { - u = v = 0.0f; - } - return make_float2(u, v); -} - -/* Compares two floats. - * Returns true if their absolute difference is smaller than abs_diff (for numbers near zero) - * or their relative difference is less than ulp_diff ULPs. - * Based on - * https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ - */ - -ccl_device_inline float compare_floats(float a, float b, float abs_diff, int ulp_diff) -{ - if (fabsf(a - b) < abs_diff) { - return true; - } - - if ((a < 0.0f) != (b < 0.0f)) { - return false; - } - - return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff); -} - -/* Calculate the angle between the two vectors a and b. - * The usual approach `acos(dot(a, b))` has severe precision issues for small angles, - * which are avoided by this method. - * Based on "Mangled Angles" from https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf - */ -ccl_device_inline float precise_angle(float3 a, float3 b) -{ - return 2.0f * atan2f(len(a - b), len(a + b)); -} - -/* Return value which is greater than the given one and is a power of two. */ -ccl_device_inline uint next_power_of_two(uint x) -{ - return x == 0 ? 1 : 1 << (32 - count_leading_zeros(x)); -} - -/* Return value which is lower than the given one and is a power of two. */ -ccl_device_inline uint prev_power_of_two(uint x) -{ - return x < 2 ? x : 1 << (31 - count_leading_zeros(x - 1)); -} - -#ifndef __has_builtin -# define __has_builtin(v) 0 -#endif - -/* Reverses the bits of a 32 bit integer. */ -ccl_device_inline uint32_t reverse_integer_bits(uint32_t x) -{ - /* Use a native instruction if it exists. */ -#if defined(__arm__) || defined(__aarch64__) - __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x)); - return x; -#elif defined(__KERNEL_CUDA__) - return __brev(x); -#elif __has_builtin(__builtin_bitreverse32) - return __builtin_bitreverse32(x); -#else - /* Flip pairwise. */ - x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); - /* Flip pairs. */ - x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); - /* Flip nibbles. */ - x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); - /* Flip bytes. CPUs have an instruction for that, pretty fast one. */ -# ifdef _MSC_VER - return _byteswap_ulong(x); -# elif defined(__INTEL_COMPILER) - return (uint32_t)_bswap((int)x); -# else - /* Assuming gcc or clang. */ - return __builtin_bswap32(x); -# endif -#endif -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_H__ */ diff --git a/intern/cycles/util/util_math_cdf.cpp b/intern/cycles/util/util_math_cdf.cpp deleted file mode 100644 index a58bab188ef..00000000000 --- a/intern/cycles/util/util_math_cdf.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_math_cdf.h" - -#include "util/util_algorithm.h" -#include "util/util_math.h" - -CCL_NAMESPACE_BEGIN - -/* Invert pre-calculated CDF function. */ -void util_cdf_invert(const int resolution, - const float from, - const float to, - const vector &cdf, - const bool make_symmetric, - vector &inv_cdf) -{ - const float inv_resolution = 1.0f / (float)resolution; - const float range = to - from; - inv_cdf.resize(resolution); - if (make_symmetric) { - const int half_size = (resolution - 1) / 2; - for (int i = 0; i <= half_size; i++) { - float x = i / (float)half_size; - int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin(); - float t; - if (index < cdf.size() - 1) { - t = (x - cdf[index]) / (cdf[index + 1] - cdf[index]); - } - else { - t = 0.0f; - index = cdf.size() - 1; - } - float y = ((index + t) / (resolution - 1)) * (2.0f * range); - inv_cdf[half_size + i] = 0.5f * (1.0f + y); - inv_cdf[half_size - i] = 0.5f * (1.0f - y); - } - } - else { - for (int i = 0; i < resolution; i++) { - float x = from + range * (float)i * inv_resolution; - int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin(); - float t; - if (index < cdf.size() - 1) { - t = (x - cdf[index]) / (cdf[index + 1] - cdf[index]); - } - else { - t = 0.0f; - index = resolution; - } - inv_cdf[i] = (index + t) * inv_resolution; - } - } -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_math_cdf.h b/intern/cycles/util/util_math_cdf.h deleted file mode 100644 index 43995204263..00000000000 --- a/intern/cycles/util/util_math_cdf.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MATH_CDF_H__ -#define __UTIL_MATH_CDF_H__ - -#include "util/util_algorithm.h" -#include "util/util_math.h" -#include "util/util_vector.h" - -CCL_NAMESPACE_BEGIN - -/* Evaluate CDF of a given functor with given range and resolution. */ -template -void util_cdf_evaluate( - const int resolution, const float from, const float to, Functor functor, vector &cdf) -{ - const int cdf_count = resolution + 1; - const float range = to - from; - cdf.resize(cdf_count); - cdf[0] = 0.0f; - /* Actual CDF evaluation. */ - for (int i = 0; i < resolution; ++i) { - float x = from + range * (float)i / (resolution - 1); - float y = functor(x); - cdf[i + 1] = cdf[i] + fabsf(y); - } - /* Normalize the CDF. */ - for (int i = 0; i <= resolution; i++) { - cdf[i] /= cdf[resolution]; - } -} - -/* Invert pre-calculated CDF function. */ -void util_cdf_invert(const int resolution, - const float from, - const float to, - const vector &cdf, - const bool make_symmetric, - vector &inv_cdf); - -/* Evaluate inverted CDF of a given functor with given range and resolution. */ -template -void util_cdf_inverted(const int resolution, - const float from, - const float to, - Functor functor, - const bool make_symmetric, - vector &inv_cdf) -{ - vector cdf; - /* There is no much smartness going around lower resolution for the CDF table, - * this just to match the old code from pixel filter so it all stays exactly - * the same and no regression tests are failed. - */ - util_cdf_evaluate(resolution - 1, from, to, functor, cdf); - util_cdf_invert(resolution, from, to, cdf, make_symmetric, inv_cdf); -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_H_CDF__ */ diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h deleted file mode 100644 index cc924f36a71..00000000000 --- a/intern/cycles/util/util_math_fast.h +++ /dev/null @@ -1,652 +0,0 @@ -/* - * Adapted from OpenImageIO library with this license: - * - * Copyright 2008-2014 Larry Gritz and the other authors and contributors. - * All Rights Reserved. - - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the software's owners nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * (This is the Modified BSD License) - * - * A few bits here are based upon code from NVIDIA that was also released - * under the same modified BSD license, and marked as: - * Copyright 2004 NVIDIA Corporation. All Rights Reserved. - * - * Some parts of this file were first open-sourced in Open Shading Language, - * then later moved here. The original copyright notice was: - * Copyright (c) 2009-2014 Sony Pictures Imageworks Inc., et al. - * - * Many of the math functions were copied from or inspired by other - * public domain sources or open source packages with compatible licenses. - * The individual functions give references were applicable. - */ - -#ifndef __UTIL_FAST_MATH__ -#define __UTIL_FAST_MATH__ - -CCL_NAMESPACE_BEGIN - -ccl_device_inline float madd(const float a, const float b, const float c) -{ - /* NOTE: In the future we may want to explicitly ask for a fused - * multiply-add in a specialized version for float. - * - * NOTE: GCC/ICC will turn this (for float) into a FMA unless - * explicitly asked not to, clang seems to leave the code alone. - */ - return a * b + c; -} - -ccl_device_inline float4 madd4(const float4 a, const float4 b, const float4 c) -{ - return a * b + c; -} - -/* - * FAST & APPROXIMATE MATH - * - * The functions named "fast_*" provide a set of replacements to libm that - * are much faster at the expense of some accuracy and robust handling of - * extreme values. One design goal for these approximation was to avoid - * branches as much as possible and operate on single precision values only - * so that SIMD versions should be straightforward ports We also try to - * implement "safe" semantics (ie: clamp to valid range where possible) - * natively since wrapping these inline calls in another layer would be - * wasteful. - * - * Some functions are fast_safe_*, which is both a faster approximation as - * well as clamped input domain to ensure no NaN, Inf, or divide by zero. - */ - -/* Round to nearest integer, returning as an int. */ -ccl_device_inline int fast_rint(float x) -{ - /* used by sin/cos/tan range reduction. */ -#ifdef __KERNEL_SSE4__ - /* Single `roundps` instruction on SSE4.1+ (for gcc/clang at least). */ - return float_to_int(rintf(x)); -#else - /* emulate rounding by adding/subtracting 0.5. */ - return float_to_int(x + copysignf(0.5f, x)); -#endif -} - -ccl_device float fast_sinf(float x) -{ - /* Very accurate argument reduction from SLEEF, - * starts failing around x=262000 - * - * Results on: [-2pi,2pi]. - * - * Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp, - * 1.19209e-07 max error - */ - int q = fast_rint(x * M_1_PI_F); - float qf = (float)q; - x = madd(qf, -0.78515625f * 4, x); - x = madd(qf, -0.00024187564849853515625f * 4, x); - x = madd(qf, -3.7747668102383613586e-08f * 4, x); - x = madd(qf, -1.2816720341285448015e-12f * 4, x); - x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals */ - float s = x * x; - if ((q & 1) != 0) - x = -x; - /* This polynomial approximation has very low error on [-pi/2,+pi/2] - * 1.19209e-07 max error in total over [-2pi,+2pi]. */ - float u = 2.6083159809786593541503e-06f; - u = madd(u, s, -0.0001981069071916863322258f); - u = madd(u, s, +0.00833307858556509017944336f); - u = madd(u, s, -0.166666597127914428710938f); - u = madd(s, u * x, x); - /* For large x, the argument reduction can fail and the polynomial can be - * evaluated with arguments outside the valid internal. Just clamp the bad - * values away (setting to 0.0f means no branches need to be generated). */ - if (fabsf(u) > 1.0f) { - u = 0.0f; - } - return u; -} - -ccl_device float fast_cosf(float x) -{ - /* Same argument reduction as fast_sinf(). */ - int q = fast_rint(x * M_1_PI_F); - float qf = (float)q; - x = madd(qf, -0.78515625f * 4, x); - x = madd(qf, -0.00024187564849853515625f * 4, x); - x = madd(qf, -3.7747668102383613586e-08f * 4, x); - x = madd(qf, -1.2816720341285448015e-12f * 4, x); - x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals. */ - float s = x * x; - /* Polynomial from SLEEF's sincosf, max error is - * 4.33127e-07 over [-2pi,2pi] (98% of values are "exact"). */ - float u = -2.71811842367242206819355e-07f; - u = madd(u, s, +2.47990446951007470488548e-05f); - u = madd(u, s, -0.00138888787478208541870117f); - u = madd(u, s, +0.0416666641831398010253906f); - u = madd(u, s, -0.5f); - u = madd(u, s, +1.0f); - if ((q & 1) != 0) { - u = -u; - } - if (fabsf(u) > 1.0f) { - u = 0.0f; - } - return u; -} - -ccl_device void fast_sincosf(float x, ccl_private float *sine, ccl_private float *cosine) -{ - /* Same argument reduction as fast_sin. */ - int q = fast_rint(x * M_1_PI_F); - float qf = (float)q; - x = madd(qf, -0.78515625f * 4, x); - x = madd(qf, -0.00024187564849853515625f * 4, x); - x = madd(qf, -3.7747668102383613586e-08f * 4, x); - x = madd(qf, -1.2816720341285448015e-12f * 4, x); - x = M_PI_2_F - (M_PI_2_F - x); // crush denormals - float s = x * x; - /* NOTE: same exact polynomials as fast_sinf() and fast_cosf() above. */ - if ((q & 1) != 0) { - x = -x; - } - float su = 2.6083159809786593541503e-06f; - su = madd(su, s, -0.0001981069071916863322258f); - su = madd(su, s, +0.00833307858556509017944336f); - su = madd(su, s, -0.166666597127914428710938f); - su = madd(s, su * x, x); - float cu = -2.71811842367242206819355e-07f; - cu = madd(cu, s, +2.47990446951007470488548e-05f); - cu = madd(cu, s, -0.00138888787478208541870117f); - cu = madd(cu, s, +0.0416666641831398010253906f); - cu = madd(cu, s, -0.5f); - cu = madd(cu, s, +1.0f); - if ((q & 1) != 0) { - cu = -cu; - } - if (fabsf(su) > 1.0f) { - su = 0.0f; - } - if (fabsf(cu) > 1.0f) { - cu = 0.0f; - } - *sine = su; - *cosine = cu; -} - -/* NOTE: this approximation is only valid on [-8192.0,+8192.0], it starts - * becoming really poor outside of this range because the reciprocal amplifies - * errors. - */ -ccl_device float fast_tanf(float x) -{ - /* Derived from SLEEF implementation. - * - * Note that we cannot apply the "denormal crush" trick everywhere because - * we sometimes need to take the reciprocal of the polynomial - */ - int q = fast_rint(x * 2.0f * M_1_PI_F); - float qf = (float)q; - x = madd(qf, -0.78515625f * 2, x); - x = madd(qf, -0.00024187564849853515625f * 2, x); - x = madd(qf, -3.7747668102383613586e-08f * 2, x); - x = madd(qf, -1.2816720341285448015e-12f * 2, x); - if ((q & 1) == 0) { - /* Crush denormals (only if we aren't inverting the result later). */ - x = M_PI_4_F - (M_PI_4_F - x); - } - float s = x * x; - float u = 0.00927245803177356719970703f; - u = madd(u, s, 0.00331984995864331722259521f); - u = madd(u, s, 0.0242998078465461730957031f); - u = madd(u, s, 0.0534495301544666290283203f); - u = madd(u, s, 0.133383005857467651367188f); - u = madd(u, s, 0.333331853151321411132812f); - u = madd(s, u * x, x); - if ((q & 1) != 0) { - u = -1.0f / u; - } - return u; -} - -/* Fast, approximate sin(x*M_PI) with maximum absolute error of 0.000918954611. - * - * Adapted from http://devmaster.net/posts/9648/fast-and-accurate-sine-cosine#comment-76773 - */ -ccl_device float fast_sinpif(float x) -{ - /* Fast trick to strip the integral part off, so our domain is [-1, 1]. */ - const float z = x - ((x + 25165824.0f) - 25165824.0f); - const float y = z - z * fabsf(z); - const float Q = 3.10396624f; - const float P = 3.584135056f; /* P = 16-4*Q */ - return y * (Q + P * fabsf(y)); - - /* The original article used inferior constants for Q and P and - * so had max error 1.091e-3. - * - * The optimal value for Q was determined by exhaustive search, minimizing - * the absolute numerical error relative to float(std::sin(double(phi*M_PI))) - * over the interval [0,2] (which is where most of the invocations happen). - * - * The basic idea of this approximation starts with the coarse approximation: - * sin(pi*x) ~= f(x) = 4 * (x - x * abs(x)) - * - * This approximation always _over_ estimates the target. On the other hand, - * the curve: - * sin(pi*x) ~= f(x) * abs(f(x)) / 4 - * - * always lies _under_ the target. Thus we can simply numerically search for - * the optimal constant to LERP these curves into a more precise - * approximation. - * - * After folding the constants together and simplifying the resulting math, - * we end up with the compact implementation above. - * - * NOTE: this function actually computes sin(x * pi) which avoids one or two - * mults in many cases and guarantees exact values at integer periods. - */ -} - -/* Fast approximate cos(x*M_PI) with ~0.1% absolute error. */ -ccl_device_inline float fast_cospif(float x) -{ - return fast_sinpif(x + 0.5f); -} - -ccl_device float fast_acosf(float x) -{ - const float f = fabsf(x); - /* clamp and crush denormals. */ - const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; - /* Based on http://www.pouet.net/topic.php?which=9132&page=2 - * 85% accurate (ulp 0) - * Examined 2130706434 values of acos: - * 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush" - * Examined 2130706434 values of acos: - * 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush" - */ - const float a = sqrtf(1.0f - m) * - (1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f))); - return x < 0 ? M_PI_F - a : a; -} - -ccl_device float fast_asinf(float x) -{ - /* Based on acosf approximation above. - * Max error is 4.51133e-05 (ulps are higher because we are consistently off - * by a little amount). - */ - const float f = fabsf(x); - /* Clamp and crush denormals. */ - const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; - const float a = M_PI_2_F - - sqrtf(1.0f - m) * (1.5707963267f + - m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f))); - return copysignf(a, x); -} - -ccl_device float fast_atanf(float x) -{ - const float a = fabsf(x); - const float k = a > 1.0f ? 1 / a : a; - const float s = 1.0f - (1.0f - k); /* Crush denormals. */ - const float t = s * s; - /* http://mathforum.org/library/drmath/view/62672.html - * Examined 4278190080 values of atan: - * 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals) - * Examined 4278190080 values of atan: - * 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals) - */ - float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); - if (a > 1.0f) { - r = M_PI_2_F - r; - } - return copysignf(r, x); -} - -ccl_device float fast_atan2f(float y, float x) -{ - /* Based on atan approximation above. - * - * The special cases around 0 and infinity were tested explicitly. - * - * The only case not handled correctly is x=NaN,y=0 which returns 0 instead - * of nan. - */ - const float a = fabsf(x); - const float b = fabsf(y); - - const float k = (b == 0) ? 0.0f : ((a == b) ? 1.0f : (b > a ? a / b : b / a)); - const float s = 1.0f - (1.0f - k); /* Crush denormals */ - const float t = s * s; - - float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); - - if (b > a) { - /* Account for arg reduction. */ - r = M_PI_2_F - r; - } - /* Test sign bit of x. */ - if (__float_as_uint(x) & 0x80000000u) { - r = M_PI_F - r; - } - return copysignf(r, y); -} - -/* Based on: - * - * https://github.com/LiraNuna/glsl-sse2/blob/master/source/vec4.h - */ -ccl_device float fast_log2f(float x) -{ - /* NOTE: clamp to avoid special cases and make result "safe" from large - * negative values/NAN's. */ - x = clamp(x, FLT_MIN, FLT_MAX); - unsigned bits = __float_as_uint(x); - int exponent = (int)(bits >> 23) - 127; - float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f; - /* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]: - * 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error. - * ulp histogram: - * 0 = 97.46% - * 1 = 2.29% - * 2 = 0.11% - */ - float f2 = f * f; - float f4 = f2 * f2; - float hi = madd(f, -0.00931049621349f, 0.05206469089414f); - float lo = madd(f, 0.47868480909345f, -0.72116591947498f); - hi = madd(f, hi, -0.13753123777116f); - hi = madd(f, hi, 0.24187369696082f); - hi = madd(f, hi, -0.34730547155299f); - lo = madd(f, lo, 1.442689881667200f); - return ((f4 * hi) + (f * lo)) + exponent; -} - -ccl_device_inline float fast_logf(float x) -{ - /* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]: - * 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error. - */ - return fast_log2f(x) * M_LN2_F; -} - -ccl_device_inline float fast_log10(float x) -{ - /* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]: - * 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error. - */ - return fast_log2f(x) * M_LN2_F / M_LN10_F; -} - -ccl_device float fast_logb(float x) -{ - /* Don't bother with denormals. */ - x = fabsf(x); - x = clamp(x, FLT_MIN, FLT_MAX); - unsigned bits = __float_as_uint(x); - return (float)((int)(bits >> 23) - 127); -} - -ccl_device float fast_exp2f(float x) -{ - /* Clamp to safe range for final addition. */ - x = clamp(x, -126.0f, 126.0f); - /* Range reduction. */ - int m = (int)x; - x -= m; - x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */ - /* 5th degree polynomial generated with sollya - * Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff, - * 232 max ulp. - * - * ulp histogram: - * 0 = 87.81% - * 1 = 4.18% - */ - float r = 1.33336498402e-3f; - r = madd(x, r, 9.810352697968e-3f); - r = madd(x, r, 5.551834031939e-2f); - r = madd(x, r, 0.2401793301105f); - r = madd(x, r, 0.693144857883f); - r = madd(x, r, 1.0f); - /* Multiply by 2 ^ m by adding in the exponent. */ - /* NOTE: left-shift of negative number is undefined behavior. */ - return __uint_as_float(__float_as_uint(r) + ((unsigned)m << 23)); -} - -ccl_device_inline float fast_expf(float x) -{ - /* Examined 2237485550 values of exp on [-87.3300018,87.3300018]: - * 2.6666452 avg ulp diff, 230 max ulp. - */ - return fast_exp2f(x / M_LN2_F); -} - -#if defined(__KERNEL_CPU__) && !defined(_MSC_VER) -/* MSVC seems to have a code-gen bug here in at least SSE41/AVX, see - * T78047 and T78869 for details. Just disable for now, it only makes - * a small difference in denoising performance. */ -ccl_device float4 fast_exp2f4(float4 x) -{ - const float4 one = make_float4(1.0f); - const float4 limit = make_float4(126.0f); - x = clamp(x, -limit, limit); - int4 m = make_int4(x); - x = one - (one - (x - make_float4(m))); - float4 r = make_float4(1.33336498402e-3f); - r = madd4(x, r, make_float4(9.810352697968e-3f)); - r = madd4(x, r, make_float4(5.551834031939e-2f)); - r = madd4(x, r, make_float4(0.2401793301105f)); - r = madd4(x, r, make_float4(0.693144857883f)); - r = madd4(x, r, make_float4(1.0f)); - return __int4_as_float4(__float4_as_int4(r) + (m << 23)); -} - -ccl_device_inline float4 fast_expf4(float4 x) -{ - return fast_exp2f4(x / M_LN2_F); -} -#else -ccl_device_inline float4 fast_expf4(float4 x) -{ - return make_float4(fast_expf(x.x), fast_expf(x.y), fast_expf(x.z), fast_expf(x.w)); -} -#endif - -ccl_device_inline float fast_exp10(float x) -{ - /* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]: - * 2.71732409 avg ulp diff, 232 max ulp. - */ - return fast_exp2f(x * M_LN10_F / M_LN2_F); -} - -ccl_device_inline float fast_expm1f(float x) -{ - if (fabsf(x) < 1e-5f) { - x = 1.0f - (1.0f - x); /* Crush denormals. */ - return madd(0.5f, x * x, x); - } - else { - return fast_expf(x) - 1.0f; - } -} - -ccl_device float fast_sinhf(float x) -{ - float a = fabsf(x); - if (a > 1.0f) { - /* Examined 53389559 values of sinh on [1,87.3300018]: - * 33.6886442 avg ulp diff, 178 max ulp. */ - float e = fast_expf(a); - return copysignf(0.5f * e - 0.5f / e, x); - } - else { - a = 1.0f - (1.0f - a); /* Crush denorms. */ - float a2 = a * a; - /* Degree 7 polynomial generated with sollya. */ - /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */ - float r = 2.03945513931e-4f; - r = madd(r, a2, 8.32990277558e-3f); - r = madd(r, a2, 0.1666673421859f); - r = madd(r * a, a2, a); - return copysignf(r, x); - } -} - -ccl_device_inline float fast_coshf(float x) -{ - /* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]: - * 1.78256726 avg ulp diff, 178 max ulp. - */ - float e = fast_expf(fabsf(x)); - return 0.5f * e + 0.5f / e; -} - -ccl_device_inline float fast_tanhf(float x) -{ - /* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]: - * 3.12924e-06 max error. - */ - /* NOTE: ulp error is high because of sub-optimal handling around the origin. */ - float e = fast_expf(2.0f * fabsf(x)); - return copysignf(1.0f - 2.0f / (1.0f + e), x); -} - -ccl_device float fast_safe_powf(float x, float y) -{ - if (y == 0) - return 1.0f; /* x^1=1 */ - if (x == 0) - return 0.0f; /* 0^y=0 */ - float sign = 1.0f; - if (x < 0.0f) { - /* if x is negative, only deal with integer powers - * powf returns NaN for non-integers, we will return 0 instead. - */ - int ybits = __float_as_int(y) & 0x7fffffff; - if (ybits >= 0x4b800000) { - // always even int, keep positive - } - else if (ybits >= 0x3f800000) { - /* Bigger than 1, check. */ - int k = (ybits >> 23) - 127; /* Get exponent. */ - int j = ybits >> (23 - k); /* Shift out possible fractional bits. */ - if ((j << (23 - k)) == ybits) { /* rebuild number and check for a match. */ - /* +1 for even, -1 for odd. */ - sign = __int_as_float(0x3f800000 | (j << 31)); - } - else { - /* Not an integer. */ - return 0.0f; - } - } - else { - /* Not an integer. */ - return 0.0f; - } - } - return sign * fast_exp2f(y * fast_log2f(fabsf(x))); -} - -/* TODO(sergey): Check speed with our erf functions implementation from - * bsdf_microfacet.h. - */ - -ccl_device_inline float fast_erff(float x) -{ - /* Examined 1082130433 values of erff on [0,4]: 1.93715e-06 max error. */ - /* Abramowitz and Stegun, 7.1.28. */ - const float a1 = 0.0705230784f; - const float a2 = 0.0422820123f; - const float a3 = 0.0092705272f; - const float a4 = 0.0001520143f; - const float a5 = 0.0002765672f; - const float a6 = 0.0000430638f; - const float a = fabsf(x); - if (a >= 12.3f) { - return copysignf(1.0f, x); - } - const float b = 1.0f - (1.0f - a); /* Crush denormals. */ - const float r = madd( - madd(madd(madd(madd(madd(a6, b, a5), b, a4), b, a3), b, a2), b, a1), b, 1.0f); - const float s = r * r; /* ^2 */ - const float t = s * s; /* ^4 */ - const float u = t * t; /* ^8 */ - const float v = u * u; /* ^16 */ - return copysignf(1.0f - 1.0f / v, x); -} - -ccl_device_inline float fast_erfcf(float x) -{ - /* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error. - * - * ulp histogram: - * - * 0 = 80.30% - */ - return 1.0f - fast_erff(x); -} - -ccl_device_inline float fast_ierff(float x) -{ - /* From: Approximating the `erfinv` function by Mike Giles. */ - /* To avoid trouble at the limit, clamp input to 1-eps. */ - float a = fabsf(x); - if (a > 0.99999994f) { - a = 0.99999994f; - } - float w = -fast_logf((1.0f - a) * (1.0f + a)), p; - if (w < 5.0f) { - w = w - 2.5f; - p = 2.81022636e-08f; - p = madd(p, w, 3.43273939e-07f); - p = madd(p, w, -3.5233877e-06f); - p = madd(p, w, -4.39150654e-06f); - p = madd(p, w, 0.00021858087f); - p = madd(p, w, -0.00125372503f); - p = madd(p, w, -0.00417768164f); - p = madd(p, w, 0.246640727f); - p = madd(p, w, 1.50140941f); - } - else { - w = sqrtf(w) - 3.0f; - p = -0.000200214257f; - p = madd(p, w, 0.000100950558f); - p = madd(p, w, 0.00134934322f); - p = madd(p, w, -0.00367342844f); - p = madd(p, w, 0.00573950773f); - p = madd(p, w, -0.0076224613f); - p = madd(p, w, 0.00943887047f); - p = madd(p, w, 1.00167406f); - p = madd(p, w, 2.83297682f); - } - return p * x; -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_FAST_MATH__ */ diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h deleted file mode 100644 index 25eda840214..00000000000 --- a/intern/cycles/util/util_math_float2.h +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MATH_FLOAT2_H__ -#define __UTIL_MATH_FLOAT2_H__ - -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -/******************************************************************************* - * Declaration. - */ - -ccl_device_inline float2 operator-(const float2 &a); -ccl_device_inline float2 operator*(const float2 &a, const float2 &b); -ccl_device_inline float2 operator*(const float2 &a, float f); -ccl_device_inline float2 operator*(float f, const float2 &a); -ccl_device_inline float2 operator/(float f, const float2 &a); -ccl_device_inline float2 operator/(const float2 &a, float f); -ccl_device_inline float2 operator/(const float2 &a, const float2 &b); -ccl_device_inline float2 operator+(const float2 &a, const float f); -ccl_device_inline float2 operator+(const float2 &a, const float2 &b); -ccl_device_inline float2 operator-(const float2 &a, const float f); -ccl_device_inline float2 operator-(const float2 &a, const float2 &b); -ccl_device_inline float2 operator+=(float2 &a, const float2 &b); -ccl_device_inline float2 operator*=(float2 &a, const float2 &b); -ccl_device_inline float2 operator*=(float2 &a, float f); -ccl_device_inline float2 operator/=(float2 &a, const float2 &b); -ccl_device_inline float2 operator/=(float2 &a, float f); - -ccl_device_inline bool operator==(const float2 &a, const float2 &b); -ccl_device_inline bool operator!=(const float2 &a, const float2 &b); - -ccl_device_inline bool is_zero(const float2 &a); -ccl_device_inline float average(const float2 &a); -ccl_device_inline float distance(const float2 &a, const float2 &b); -ccl_device_inline float dot(const float2 &a, const float2 &b); -ccl_device_inline float cross(const float2 &a, const float2 &b); -ccl_device_inline float len(const float2 &a); -ccl_device_inline float2 normalize(const float2 &a); -ccl_device_inline float2 normalize_len(const float2 &a, float *t); -ccl_device_inline float2 safe_normalize(const float2 &a); -ccl_device_inline float2 min(const float2 &a, const float2 &b); -ccl_device_inline float2 max(const float2 &a, const float2 &b); -ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx); -ccl_device_inline float2 fabs(const float2 &a); -ccl_device_inline float2 as_float2(const float4 &a); -ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t); -ccl_device_inline float2 floor(const float2 &a); - -ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b); - -/******************************************************************************* - * Definition. - */ - -ccl_device_inline float2 zero_float2() -{ - return make_float2(0.0f, 0.0f); -} - -ccl_device_inline float2 one_float2() -{ - return make_float2(1.0f, 1.0f); -} - -ccl_device_inline float2 operator-(const float2 &a) -{ - return make_float2(-a.x, -a.y); -} - -ccl_device_inline float2 operator*(const float2 &a, const float2 &b) -{ - return make_float2(a.x * b.x, a.y * b.y); -} - -ccl_device_inline float2 operator*(const float2 &a, float f) -{ - return make_float2(a.x * f, a.y * f); -} - -ccl_device_inline float2 operator*(float f, const float2 &a) -{ - return make_float2(a.x * f, a.y * f); -} - -ccl_device_inline float2 operator/(float f, const float2 &a) -{ - return make_float2(f / a.x, f / a.y); -} - -ccl_device_inline float2 operator/(const float2 &a, float f) -{ - float invf = 1.0f / f; - return make_float2(a.x * invf, a.y * invf); -} - -ccl_device_inline float2 operator/(const float2 &a, const float2 &b) -{ - return make_float2(a.x / b.x, a.y / b.y); -} - -ccl_device_inline float2 operator+(const float2 &a, const float f) -{ - return a + make_float2(f, f); -} - -ccl_device_inline float2 operator+(const float2 &a, const float2 &b) -{ - return make_float2(a.x + b.x, a.y + b.y); -} - -ccl_device_inline float2 operator-(const float2 &a, const float f) -{ - return a - make_float2(f, f); -} - -ccl_device_inline float2 operator-(const float2 &a, const float2 &b) -{ - return make_float2(a.x - b.x, a.y - b.y); -} - -ccl_device_inline float2 operator+=(float2 &a, const float2 &b) -{ - return a = a + b; -} - -ccl_device_inline float2 operator*=(float2 &a, const float2 &b) -{ - return a = a * b; -} - -ccl_device_inline float2 operator*=(float2 &a, float f) -{ - return a = a * f; -} - -ccl_device_inline float2 operator/=(float2 &a, const float2 &b) -{ - return a = a / b; -} - -ccl_device_inline float2 operator/=(float2 &a, float f) -{ - float invf = 1.0f / f; - return a = a * invf; -} - -ccl_device_inline bool operator==(const float2 &a, const float2 &b) -{ - return (a.x == b.x && a.y == b.y); -} - -ccl_device_inline bool operator!=(const float2 &a, const float2 &b) -{ - return !(a == b); -} - -ccl_device_inline bool is_zero(const float2 &a) -{ - return (a.x == 0.0f && a.y == 0.0f); -} - -ccl_device_inline float average(const float2 &a) -{ - return (a.x + a.y) * (1.0f / 2.0f); -} - -ccl_device_inline float distance(const float2 &a, const float2 &b) -{ - return len(a - b); -} - -ccl_device_inline float dot(const float2 &a, const float2 &b) -{ - return a.x * b.x + a.y * b.y; -} - -ccl_device_inline float cross(const float2 &a, const float2 &b) -{ - return (a.x * b.y - a.y * b.x); -} - -ccl_device_inline float len(const float2 &a) -{ - return sqrtf(dot(a, a)); -} - -ccl_device_inline float2 normalize(const float2 &a) -{ - return a / len(a); -} - -ccl_device_inline float2 normalize_len(const float2 &a, ccl_private float *t) -{ - *t = len(a); - return a / (*t); -} - -ccl_device_inline float2 safe_normalize(const float2 &a) -{ - float t = len(a); - return (t != 0.0f) ? a / t : a; -} - -ccl_device_inline float2 min(const float2 &a, const float2 &b) -{ - return make_float2(min(a.x, b.x), min(a.y, b.y)); -} - -ccl_device_inline float2 max(const float2 &a, const float2 &b) -{ - return make_float2(max(a.x, b.x), max(a.y, b.y)); -} - -ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx) -{ - return min(max(a, mn), mx); -} - -ccl_device_inline float2 fabs(const float2 &a) -{ - return make_float2(fabsf(a.x), fabsf(a.y)); -} - -ccl_device_inline float2 as_float2(const float4 &a) -{ - return make_float2(a.x, a.y); -} - -ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t) -{ - return a + t * (b - a); -} - -ccl_device_inline float2 mix(const float2 &a, const float2 &b, float t) -{ - return a + t * (b - a); -} - -ccl_device_inline float2 floor(const float2 &a) -{ - return make_float2(floorf(a.x), floorf(a.y)); -} - -ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b) -{ - return (b != 0.0f) ? a / b : zero_float2(); -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_FLOAT2_H__ */ diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h deleted file mode 100644 index c3230a8068c..00000000000 --- a/intern/cycles/util/util_math_float3.h +++ /dev/null @@ -1,530 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MATH_FLOAT3_H__ -#define __UTIL_MATH_FLOAT3_H__ - -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -/******************************************************************************* - * Declaration. - */ - -ccl_device_inline float3 operator-(const float3 &a); -ccl_device_inline float3 operator*(const float3 &a, const float3 &b); -ccl_device_inline float3 operator*(const float3 &a, const float f); -ccl_device_inline float3 operator*(const float f, const float3 &a); -ccl_device_inline float3 operator/(const float f, const float3 &a); -ccl_device_inline float3 operator/(const float3 &a, const float f); -ccl_device_inline float3 operator/(const float3 &a, const float3 &b); -ccl_device_inline float3 operator+(const float3 &a, const float f); -ccl_device_inline float3 operator+(const float3 &a, const float3 &b); -ccl_device_inline float3 operator-(const float3 &a, const float f); -ccl_device_inline float3 operator-(const float3 &a, const float3 &b); -ccl_device_inline float3 operator+=(float3 &a, const float3 &b); -ccl_device_inline float3 operator-=(float3 &a, const float3 &b); -ccl_device_inline float3 operator*=(float3 &a, const float3 &b); -ccl_device_inline float3 operator*=(float3 &a, float f); -ccl_device_inline float3 operator/=(float3 &a, const float3 &b); -ccl_device_inline float3 operator/=(float3 &a, float f); - -ccl_device_inline bool operator==(const float3 &a, const float3 &b); -ccl_device_inline bool operator!=(const float3 &a, const float3 &b); - -ccl_device_inline float distance(const float3 &a, const float3 &b); -ccl_device_inline float dot(const float3 &a, const float3 &b); -ccl_device_inline float dot_xy(const float3 &a, const float3 &b); -ccl_device_inline float3 cross(const float3 &a, const float3 &b); -ccl_device_inline float3 normalize(const float3 &a); -ccl_device_inline float3 min(const float3 &a, const float3 &b); -ccl_device_inline float3 max(const float3 &a, const float3 &b); -ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx); -ccl_device_inline float3 fabs(const float3 &a); -ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t); -ccl_device_inline float3 rcp(const float3 &a); -ccl_device_inline float3 sqrt(const float3 &a); -ccl_device_inline float3 floor(const float3 &a); -ccl_device_inline float3 ceil(const float3 &a); - -ccl_device_inline float min3(float3 a); -ccl_device_inline float max3(float3 a); -ccl_device_inline float len(const float3 a); -ccl_device_inline float len_squared(const float3 a); - -ccl_device_inline float3 reflect(const float3 incident, const float3 normal); -ccl_device_inline float3 project(const float3 v, const float3 v_proj); - -ccl_device_inline float3 saturate3(float3 a); -ccl_device_inline float3 safe_normalize(const float3 a); -ccl_device_inline float3 normalize_len(const float3 a, float *t); -ccl_device_inline float3 safe_normalize_len(const float3 a, float *t); -ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b); -ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b); -ccl_device_inline float3 interp(float3 a, float3 b, float t); -ccl_device_inline float3 sqr3(float3 a); - -ccl_device_inline bool is_zero(const float3 a); -ccl_device_inline float reduce_add(const float3 a); -ccl_device_inline float average(const float3 a); -ccl_device_inline bool isequal_float3(const float3 a, const float3 b); - -/******************************************************************************* - * Definition. - */ - -ccl_device_inline float3 zero_float3() -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_setzero_ps()); -#else - return make_float3(0.0f, 0.0f, 0.0f); -#endif -} - -ccl_device_inline float3 one_float3() -{ - return make_float3(1.0f, 1.0f, 1.0f); -} - -ccl_device_inline float3 operator-(const float3 &a) -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); -#else - return make_float3(-a.x, -a.y, -a.z); -#endif -} - -ccl_device_inline float3 operator*(const float3 &a, const float3 &b) -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_mul_ps(a.m128, b.m128)); -#else - return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); -#endif -} - -ccl_device_inline float3 operator*(const float3 &a, const float f) -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f))); -#else - return make_float3(a.x * f, a.y * f, a.z * f); -#endif -} - -ccl_device_inline float3 operator*(const float f, const float3 &a) -{ -#if defined(__KERNEL_SSE__) - return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128)); -#else - return make_float3(a.x * f, a.y * f, a.z * f); -#endif -} - -ccl_device_inline float3 operator/(const float f, const float3 &a) -{ -#if defined(__KERNEL_SSE__) - return float3(_mm_div_ps(_mm_set1_ps(f), a.m128)); -#else - return make_float3(f / a.x, f / a.y, f / a.z); -#endif -} - -ccl_device_inline float3 operator/(const float3 &a, const float f) -{ - float invf = 1.0f / f; - return a * invf; -} - -ccl_device_inline float3 operator/(const float3 &a, const float3 &b) -{ -#if defined(__KERNEL_SSE__) - return float3(_mm_div_ps(a.m128, b.m128)); -#else - return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); -#endif -} - -ccl_device_inline float3 operator+(const float3 &a, const float f) -{ - return a + make_float3(f, f, f); -} - -ccl_device_inline float3 operator+(const float3 &a, const float3 &b) -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_add_ps(a.m128, b.m128)); -#else - return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); -#endif -} - -ccl_device_inline float3 operator-(const float3 &a, const float f) -{ - return a - make_float3(f, f, f); -} - -ccl_device_inline float3 operator-(const float3 &a, const float3 &b) -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_sub_ps(a.m128, b.m128)); -#else - return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); -#endif -} - -ccl_device_inline float3 operator+=(float3 &a, const float3 &b) -{ - return a = a + b; -} - -ccl_device_inline float3 operator-=(float3 &a, const float3 &b) -{ - return a = a - b; -} - -ccl_device_inline float3 operator*=(float3 &a, const float3 &b) -{ - return a = a * b; -} - -ccl_device_inline float3 operator*=(float3 &a, float f) -{ - return a = a * f; -} - -ccl_device_inline float3 operator/=(float3 &a, const float3 &b) -{ - return a = a / b; -} - -ccl_device_inline float3 operator/=(float3 &a, float f) -{ - float invf = 1.0f / f; - return a = a * invf; -} - -ccl_device_inline bool operator==(const float3 &a, const float3 &b) -{ -#ifdef __KERNEL_SSE__ - return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7; -#else - return (a.x == b.x && a.y == b.y && a.z == b.z); -#endif -} - -ccl_device_inline bool operator!=(const float3 &a, const float3 &b) -{ - return !(a == b); -} - -ccl_device_inline float distance(const float3 &a, const float3 &b) -{ - return len(a - b); -} - -ccl_device_inline float dot(const float3 &a, const float3 &b) -{ -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) - return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F)); -#else - return a.x * b.x + a.y * b.y + a.z * b.z; -#endif -} - -ccl_device_inline float dot_xy(const float3 &a, const float3 &b) -{ -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) - return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a, b), b)); -#else - return a.x * b.x + a.y * b.y; -#endif -} - -ccl_device_inline float3 cross(const float3 &a, const float3 &b) -{ - float3 r = make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); - return r; -} - -ccl_device_inline float3 normalize(const float3 &a) -{ -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) - __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F)); - return float3(_mm_div_ps(a.m128, norm)); -#else - return a / len(a); -#endif -} - -ccl_device_inline float3 min(const float3 &a, const float3 &b) -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_min_ps(a.m128, b.m128)); -#else - return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); -#endif -} - -ccl_device_inline float3 max(const float3 &a, const float3 &b) -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_max_ps(a.m128, b.m128)); -#else - return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); -#endif -} - -ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx) -{ - return min(max(a, mn), mx); -} - -ccl_device_inline float3 fabs(const float3 &a) -{ -#ifdef __KERNEL_SSE__ -# ifdef __KERNEL_NEON__ - return float3(vabsq_f32(a.m128)); -# else - __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); - return float3(_mm_and_ps(a.m128, mask)); -# endif -#else - return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z)); -#endif -} - -ccl_device_inline float3 sqrt(const float3 &a) -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_sqrt_ps(a)); -#else - return make_float3(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z)); -#endif -} - -ccl_device_inline float3 floor(const float3 &a) -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_floor_ps(a)); -#else - return make_float3(floorf(a.x), floorf(a.y), floorf(a.z)); -#endif -} - -ccl_device_inline float3 ceil(const float3 &a) -{ -#ifdef __KERNEL_SSE__ - return float3(_mm_ceil_ps(a)); -#else - return make_float3(ceilf(a.x), ceilf(a.y), ceilf(a.z)); -#endif -} - -ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t) -{ - return a + t * (b - a); -} - -ccl_device_inline float3 rcp(const float3 &a) -{ -#ifdef __KERNEL_SSE__ - /* Don't use _mm_rcp_ps due to poor precision. */ - return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128)); -#else - return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z); -#endif -} - -ccl_device_inline float min3(float3 a) -{ - return min(min(a.x, a.y), a.z); -} - -ccl_device_inline float max3(float3 a) -{ - return max(max(a.x, a.y), a.z); -} - -ccl_device_inline float len(const float3 a) -{ -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) - return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F))); -#else - return sqrtf(dot(a, a)); -#endif -} - -ccl_device_inline float len_squared(const float3 a) -{ - return dot(a, a); -} - -ccl_device_inline float3 reflect(const float3 incident, const float3 normal) -{ - float3 unit_normal = normalize(normal); - return incident - 2.0f * unit_normal * dot(incident, unit_normal); -} - -ccl_device_inline float3 refract(const float3 incident, const float3 normal, const float eta) -{ - float k = 1.0f - eta * eta * (1.0f - dot(normal, incident) * dot(normal, incident)); - if (k < 0.0f) - return zero_float3(); - else - return eta * incident - (eta * dot(normal, incident) + sqrt(k)) * normal; -} - -ccl_device_inline float3 faceforward(const float3 vector, - const float3 incident, - const float3 reference) -{ - return (dot(reference, incident) < 0.0f) ? vector : -vector; -} - -ccl_device_inline float3 project(const float3 v, const float3 v_proj) -{ - float len_squared = dot(v_proj, v_proj); - return (len_squared != 0.0f) ? (dot(v, v_proj) / len_squared) * v_proj : zero_float3(); -} - -ccl_device_inline float3 saturate3(float3 a) -{ - return make_float3(saturate(a.x), saturate(a.y), saturate(a.z)); -} - -ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t) -{ - *t = len(a); - float x = 1.0f / *t; - return a * x; -} - -ccl_device_inline float3 safe_normalize(const float3 a) -{ - float t = len(a); - return (t != 0.0f) ? a * (1.0f / t) : a; -} - -ccl_device_inline float3 safe_normalize_len(const float3 a, ccl_private float *t) -{ - *t = len(a); - return (*t != 0.0f) ? a / (*t) : a; -} - -ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b) -{ - return make_float3((b.x != 0.0f) ? a.x / b.x : 0.0f, - (b.y != 0.0f) ? a.y / b.y : 0.0f, - (b.z != 0.0f) ? a.z / b.z : 0.0f); -} - -ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b) -{ - return (b != 0.0f) ? a / b : zero_float3(); -} - -ccl_device_inline float3 interp(float3 a, float3 b, float t) -{ - return a + t * (b - a); -} - -ccl_device_inline float3 sqr3(float3 a) -{ - return a * a; -} - -ccl_device_inline bool is_zero(const float3 a) -{ -#ifdef __KERNEL_SSE__ - return a == make_float3(0.0f); -#else - return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); -#endif -} - -ccl_device_inline float reduce_add(const float3 a) -{ -#if defined(__KERNEL_SSE__) && defined(__KERNEL_NEON__) - __m128 t = a.m128; - t[3] = 0.0f; - return vaddvq_f32(t); -#else - return (a.x + a.y + a.z); -#endif -} - -ccl_device_inline float average(const float3 a) -{ - return reduce_add(a) * (1.0f / 3.0f); -} - -ccl_device_inline bool isequal_float3(const float3 a, const float3 b) -{ - return a == b; -} - -ccl_device_inline float3 pow3(float3 v, float e) -{ - return make_float3(powf(v.x, e), powf(v.y, e), powf(v.z, e)); -} - -ccl_device_inline float3 exp3(float3 v) -{ - return make_float3(expf(v.x), expf(v.y), expf(v.z)); -} - -ccl_device_inline float3 log3(float3 v) -{ - return make_float3(logf(v.x), logf(v.y), logf(v.z)); -} - -ccl_device_inline int3 quick_floor_to_int3(const float3 a) -{ -#ifdef __KERNEL_SSE__ - int3 b = int3(_mm_cvttps_epi32(a.m128)); - int3 isneg = int3(_mm_castps_si128(_mm_cmplt_ps(a.m128, _mm_set_ps1(0.0f)))); - /* Unsaturated add 0xffffffff is the same as subtract -1. */ - return b + isneg; -#else - return make_int3(quick_floor_to_int(a.x), quick_floor_to_int(a.y), quick_floor_to_int(a.z)); -#endif -} - -ccl_device_inline bool isfinite3_safe(float3 v) -{ - return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z); -} - -ccl_device_inline float3 ensure_finite3(float3 v) -{ - if (!isfinite_safe(v.x)) - v.x = 0.0f; - if (!isfinite_safe(v.y)) - v.y = 0.0f; - if (!isfinite_safe(v.z)) - v.z = 0.0f; - return v; -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_FLOAT3_H__ */ diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h deleted file mode 100644 index f30a78cfc69..00000000000 --- a/intern/cycles/util/util_math_float4.h +++ /dev/null @@ -1,536 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MATH_FLOAT4_H__ -#define __UTIL_MATH_FLOAT4_H__ - -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -/******************************************************************************* - * Declaration. - */ - -ccl_device_inline float4 operator-(const float4 &a); -ccl_device_inline float4 operator*(const float4 &a, const float4 &b); -ccl_device_inline float4 operator*(const float4 &a, float f); -ccl_device_inline float4 operator*(float f, const float4 &a); -ccl_device_inline float4 operator/(const float4 &a, float f); -ccl_device_inline float4 operator/(const float4 &a, const float4 &b); -ccl_device_inline float4 operator+(const float4 &a, const float f); -ccl_device_inline float4 operator+(const float4 &a, const float4 &b); -ccl_device_inline float4 operator-(const float4 &a, const float f); -ccl_device_inline float4 operator-(const float4 &a, const float4 &b); -ccl_device_inline float4 operator+=(float4 &a, const float4 &b); -ccl_device_inline float4 operator*=(float4 &a, const float4 &b); -ccl_device_inline float4 operator*=(float4 &a, float f); -ccl_device_inline float4 operator/=(float4 &a, float f); - -ccl_device_inline int4 operator<(const float4 &a, const float4 &b); -ccl_device_inline int4 operator>=(const float4 &a, const float4 &b); -ccl_device_inline int4 operator<=(const float4 &a, const float4 &b); -ccl_device_inline bool operator==(const float4 &a, const float4 &b); - -ccl_device_inline float distance(const float4 &a, const float4 &b); -ccl_device_inline float dot(const float4 &a, const float4 &b); -ccl_device_inline float len_squared(const float4 &a); -ccl_device_inline float4 rcp(const float4 &a); -ccl_device_inline float4 sqrt(const float4 &a); -ccl_device_inline float4 sqr(const float4 &a); -ccl_device_inline float4 cross(const float4 &a, const float4 &b); -ccl_device_inline bool is_zero(const float4 &a); -ccl_device_inline float average(const float4 &a); -ccl_device_inline float len(const float4 &a); -ccl_device_inline float4 normalize(const float4 &a); -ccl_device_inline float4 safe_normalize(const float4 &a); -ccl_device_inline float4 min(const float4 &a, const float4 &b); -ccl_device_inline float4 max(const float4 &a, const float4 &b); -ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx); -ccl_device_inline float4 fabs(const float4 &a); -ccl_device_inline float4 floor(const float4 &a); -ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t); - -ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b); - -#ifdef __KERNEL_SSE__ -template -__forceinline const float4 shuffle(const float4 &b); -template -__forceinline const float4 shuffle(const float4 &a, const float4 &b); - -template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b); - -template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b); -template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b); - -# ifdef __KERNEL_SSE3__ -template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b); -template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b); -# endif -#endif /* __KERNEL_SSE__ */ - -#ifndef __KERNEL_GPU__ -ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b); -ccl_device_inline float4 reduce_min(const float4 &a); -ccl_device_inline float4 reduce_max(const float4 &a); -ccl_device_inline float4 reduce_add(const float4 &a); -#endif /* !__KERNEL_GPU__ */ - -/******************************************************************************* - * Definition. - */ - -ccl_device_inline float4 zero_float4() -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_setzero_ps()); -#else - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); -#endif -} - -ccl_device_inline float4 one_float4() -{ - return make_float4(1.0f, 1.0f, 1.0f, 1.0f); -} - -ccl_device_inline float4 operator-(const float4 &a) -{ -#ifdef __KERNEL_SSE__ - __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); - return float4(_mm_xor_ps(a.m128, mask)); -#else - return make_float4(-a.x, -a.y, -a.z, -a.w); -#endif -} - -ccl_device_inline float4 operator*(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_mul_ps(a.m128, b.m128)); -#else - return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -#endif -} - -ccl_device_inline float4 operator*(const float4 &a, float f) -{ -#if defined(__KERNEL_SSE__) - return a * make_float4(f); -#else - return make_float4(a.x * f, a.y * f, a.z * f, a.w * f); -#endif -} - -ccl_device_inline float4 operator*(float f, const float4 &a) -{ - return a * f; -} - -ccl_device_inline float4 operator/(const float4 &a, float f) -{ - return a * (1.0f / f); -} - -ccl_device_inline float4 operator/(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_div_ps(a.m128, b.m128)); -#else - return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); -#endif -} - -ccl_device_inline float4 operator+(const float4 &a, const float f) -{ - return a + make_float4(f, f, f, f); -} - -ccl_device_inline float4 operator+(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_add_ps(a.m128, b.m128)); -#else - return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -#endif -} - -ccl_device_inline float4 operator-(const float4 &a, const float f) -{ - return a - make_float4(f, f, f, f); -} - -ccl_device_inline float4 operator-(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_sub_ps(a.m128, b.m128)); -#else - return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -#endif -} - -ccl_device_inline float4 operator+=(float4 &a, const float4 &b) -{ - return a = a + b; -} - -ccl_device_inline float4 operator-=(float4 &a, const float4 &b) -{ - return a = a - b; -} - -ccl_device_inline float4 operator*=(float4 &a, const float4 &b) -{ - return a = a * b; -} - -ccl_device_inline float4 operator*=(float4 &a, float f) -{ - return a = a * f; -} - -ccl_device_inline float4 operator/=(float4 &a, float f) -{ - return a = a / f; -} - -ccl_device_inline int4 operator<(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return int4(_mm_castps_si128(_mm_cmplt_ps(a.m128, b.m128))); -#else - return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); -#endif -} - -ccl_device_inline int4 operator>=(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return int4(_mm_castps_si128(_mm_cmpge_ps(a.m128, b.m128))); -#else - return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); -#endif -} - -ccl_device_inline int4 operator<=(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return int4(_mm_castps_si128(_mm_cmple_ps(a.m128, b.m128))); -#else - return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w); -#endif -} - -ccl_device_inline bool operator==(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15; -#else - return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); -#endif -} - -ccl_device_inline float distance(const float4 &a, const float4 &b) -{ - return len(a - b); -} - -ccl_device_inline float dot(const float4 &a, const float4 &b) -{ -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) -# if defined(__KERNEL_NEON__) - __m128 t = vmulq_f32(a, b); - return vaddvq_f32(t); -# else - return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF)); -# endif -#else - return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w); -#endif -} - -ccl_device_inline float len_squared(const float4 &a) -{ - return dot(a, a); -} - -ccl_device_inline float4 rcp(const float4 &a) -{ -#ifdef __KERNEL_SSE__ - /* Don't use _mm_rcp_ps due to poor precision. */ - return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128)); -#else - return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w); -#endif -} - -ccl_device_inline float4 sqrt(const float4 &a) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_sqrt_ps(a.m128)); -#else - return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w)); -#endif -} - -ccl_device_inline float4 sqr(const float4 &a) -{ - return a * a; -} - -ccl_device_inline float4 cross(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return (shuffle<1, 2, 0, 0>(a) * shuffle<2, 0, 1, 0>(b)) - - (shuffle<2, 0, 1, 0>(a) * shuffle<1, 2, 0, 0>(b)); -#else - return make_float4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f); -#endif -} - -ccl_device_inline bool is_zero(const float4 &a) -{ -#ifdef __KERNEL_SSE__ - return a == make_float4(0.0f); -#else - return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); -#endif -} - -ccl_device_inline float4 reduce_add(const float4 &a) -{ -#if defined(__KERNEL_SSE__) -# if defined(__KERNEL_NEON__) - return float4(vdupq_n_f32(vaddvq_f32(a))); -# elif defined(__KERNEL_SSE3__) - float4 h(_mm_hadd_ps(a.m128, a.m128)); - return float4(_mm_hadd_ps(h.m128, h.m128)); -# else - float4 h(shuffle<1, 0, 3, 2>(a) + a); - return shuffle<2, 3, 0, 1>(h) + h; -# endif -#else - float sum = (a.x + a.y) + (a.z + a.w); - return make_float4(sum, sum, sum, sum); -#endif -} - -ccl_device_inline float average(const float4 &a) -{ - return reduce_add(a).x * 0.25f; -} - -ccl_device_inline float len(const float4 &a) -{ - return sqrtf(dot(a, a)); -} - -ccl_device_inline float4 normalize(const float4 &a) -{ - return a / len(a); -} - -ccl_device_inline float4 safe_normalize(const float4 &a) -{ - float t = len(a); - return (t != 0.0f) ? a / t : a; -} - -ccl_device_inline float4 min(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_min_ps(a.m128, b.m128)); -#else - return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); -#endif -} - -ccl_device_inline float4 max(const float4 &a, const float4 &b) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_max_ps(a.m128, b.m128)); -#else - return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); -#endif -} - -ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx) -{ - return min(max(a, mn), mx); -} - -ccl_device_inline float4 fabs(const float4 &a) -{ -#if defined(__KERNEL_SSE__) -# if defined(__KERNEL_NEON__) - return float4(vabsq_f32(a)); -# else - return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); -# endif -#else - return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); -#endif -} - -ccl_device_inline float4 floor(const float4 &a) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_floor_ps(a)); -#else - return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w)); -#endif -} - -ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t) -{ - return a + t * (b - a); -} - -#ifdef __KERNEL_SSE__ -template -__forceinline const float4 shuffle(const float4 &b) -{ -# if defined(__KERNEL_NEON__) - return float4(shuffle_neon<__m128, index_0, index_1, index_2, index_3>(b.m128)); -# else - return float4(_mm_castsi128_ps( - _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0)))); -# endif -} - -template -__forceinline const float4 shuffle(const float4 &a, const float4 &b) -{ -# if defined(__KERNEL_NEON__) - return float4(shuffle_neon<__m128, index_0, index_1, index_2, index_3>(a.m128, b.m128)); -# else - return float4(_mm_shuffle_ps(a.m128, b.m128, _MM_SHUFFLE(index_3, index_2, index_1, index_0))); -# endif -} - -template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b) -{ - return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b)))); -} - -template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b) -{ - return float4(_mm_movelh_ps(a.m128, b.m128)); -} - -template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b) -{ - return float4(_mm_movehl_ps(b.m128, a.m128)); -} - -# ifdef __KERNEL_SSE3__ -template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b) -{ - return float4(_mm_moveldup_ps(b)); -} - -template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b) -{ - return float4(_mm_movehdup_ps(b)); -} -# endif /* __KERNEL_SSE3__ */ -#endif /* __KERNEL_SSE__ */ - -#ifndef __KERNEL_GPU__ -ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b) -{ -# ifdef __KERNEL_SSE__ - return float4(_mm_blendv_ps(b.m128, a.m128, _mm_castsi128_ps(mask.m128))); -# else - return make_float4( - (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w); -# endif -} - -ccl_device_inline float4 mask(const int4 &mask, const float4 &a) -{ - /* Replace elements of x with zero where mask isn't set. */ - return select(mask, a, make_float4(0.0f)); -} - -ccl_device_inline float4 reduce_min(const float4 &a) -{ -# if defined(__KERNEL_SSE__) -# if defined(__KERNEL_NEON__) - return float4(vdupq_n_f32(vminvq_f32(a))); -# else - float4 h = min(shuffle<1, 0, 3, 2>(a), a); - return min(shuffle<2, 3, 0, 1>(h), h); -# endif -# else - return make_float4(min(min(a.x, a.y), min(a.z, a.w))); -# endif -} - -ccl_device_inline float4 reduce_max(const float4 &a) -{ -# if defined(__KERNEL_SSE__) -# if defined(__KERNEL_NEON__) - return float4(vdupq_n_f32(vmaxvq_f32(a))); -# else - float4 h = max(shuffle<1, 0, 3, 2>(a), a); - return max(shuffle<2, 3, 0, 1>(h), h); -# endif -# else - return make_float4(max(max(a.x, a.y), max(a.z, a.w))); -# endif -} - -ccl_device_inline float4 load_float4(ccl_private const float *v) -{ -# ifdef __KERNEL_SSE__ - return float4(_mm_loadu_ps(v)); -# else - return make_float4(v[0], v[1], v[2], v[3]); -# endif -} - -#endif /* !__KERNEL_GPU__ */ - -ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b) -{ - return (b != 0.0f) ? a / b : zero_float4(); -} - -ccl_device_inline bool isfinite4_safe(float4 v) -{ - return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z) && isfinite_safe(v.w); -} - -ccl_device_inline float4 ensure_finite4(float4 v) -{ - if (!isfinite_safe(v.x)) - v.x = 0.0f; - if (!isfinite_safe(v.y)) - v.y = 0.0f; - if (!isfinite_safe(v.z)) - v.z = 0.0f; - if (!isfinite_safe(v.w)) - v.w = 0.0f; - return v; -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_FLOAT4_H__ */ diff --git a/intern/cycles/util/util_math_int2.h b/intern/cycles/util/util_math_int2.h deleted file mode 100644 index 5782b878801..00000000000 --- a/intern/cycles/util/util_math_int2.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MATH_INT2_H__ -#define __UTIL_MATH_INT2_H__ - -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -/******************************************************************************* - * Declaration. - */ - -ccl_device_inline bool operator==(const int2 a, const int2 b); -ccl_device_inline int2 operator+(const int2 &a, const int2 &b); -ccl_device_inline int2 operator+=(int2 &a, const int2 &b); -ccl_device_inline int2 operator-(const int2 &a, const int2 &b); -ccl_device_inline int2 operator*(const int2 &a, const int2 &b); -ccl_device_inline int2 operator/(const int2 &a, const int2 &b); - -/******************************************************************************* - * Definition. - */ - -ccl_device_inline bool operator==(const int2 a, const int2 b) -{ - return (a.x == b.x && a.y == b.y); -} - -ccl_device_inline int2 operator+(const int2 &a, const int2 &b) -{ - return make_int2(a.x + b.x, a.y + b.y); -} - -ccl_device_inline int2 operator+=(int2 &a, const int2 &b) -{ - return a = a + b; -} - -ccl_device_inline int2 operator-(const int2 &a, const int2 &b) -{ - return make_int2(a.x - b.x, a.y - b.y); -} - -ccl_device_inline int2 operator*(const int2 &a, const int2 &b) -{ - return make_int2(a.x * b.x, a.y * b.y); -} - -ccl_device_inline int2 operator/(const int2 &a, const int2 &b) -{ - return make_int2(a.x / b.x, a.y / b.y); -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_INT2_H__ */ diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h deleted file mode 100644 index e0dfae7c015..00000000000 --- a/intern/cycles/util/util_math_int3.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MATH_INT3_H__ -#define __UTIL_MATH_INT3_H__ - -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -/******************************************************************************* - * Declaration. - */ - -ccl_device_inline int3 min(int3 a, int3 b); -ccl_device_inline int3 max(int3 a, int3 b); -ccl_device_inline int3 clamp(const int3 &a, int mn, int mx); -ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx); - -/******************************************************************************* - * Definition. - */ - -ccl_device_inline int3 min(int3 a, int3 b) -{ -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) - return int3(_mm_min_epi32(a.m128, b.m128)); -#else - return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); -#endif -} - -ccl_device_inline int3 max(int3 a, int3 b) -{ -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) - return int3(_mm_max_epi32(a.m128, b.m128)); -#else - return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); -#endif -} - -ccl_device_inline int3 clamp(const int3 &a, int mn, int mx) -{ -#ifdef __KERNEL_SSE__ - return min(max(a, make_int3(mn)), make_int3(mx)); -#else - return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)); -#endif -} - -ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx) -{ -#ifdef __KERNEL_SSE__ - return min(max(a, mn), make_int3(mx)); -#else - return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)); -#endif -} - -ccl_device_inline bool operator==(const int3 &a, const int3 &b) -{ - return a.x == b.x && a.y == b.y && a.z == b.z; -} - -ccl_device_inline bool operator!=(const int3 &a, const int3 &b) -{ - return !(a == b); -} - -ccl_device_inline bool operator<(const int3 &a, const int3 &b) -{ - return a.x < b.x && a.y < b.y && a.z < b.z; -} - -ccl_device_inline int3 operator+(const int3 &a, const int3 &b) -{ -#ifdef __KERNEL_SSE__ - return int3(_mm_add_epi32(a.m128, b.m128)); -#else - return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); -#endif -} - -ccl_device_inline int3 operator-(const int3 &a, const int3 &b) -{ -#ifdef __KERNEL_SSE__ - return int3(_mm_sub_epi32(a.m128, b.m128)); -#else - return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); -#endif -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_INT3_H__ */ diff --git a/intern/cycles/util/util_math_int4.h b/intern/cycles/util/util_math_int4.h deleted file mode 100644 index 186cc58489b..00000000000 --- a/intern/cycles/util/util_math_int4.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MATH_INT4_H__ -#define __UTIL_MATH_INT4_H__ - -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -/******************************************************************************* - * Declaration. - */ - -#ifndef __KERNEL_GPU__ -ccl_device_inline int4 operator+(const int4 &a, const int4 &b); -ccl_device_inline int4 operator+=(int4 &a, const int4 &b); -ccl_device_inline int4 operator>>(const int4 &a, int i); -ccl_device_inline int4 operator<<(const int4 &a, int i); -ccl_device_inline int4 operator<(const int4 &a, const int4 &b); -ccl_device_inline int4 operator>=(const int4 &a, const int4 &b); -ccl_device_inline int4 operator&(const int4 &a, const int4 &b); -ccl_device_inline int4 min(int4 a, int4 b); -ccl_device_inline int4 max(int4 a, int4 b); -ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx); -ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b); -#endif /* __KERNEL_GPU__ */ - -/******************************************************************************* - * Definition. - */ - -#ifndef __KERNEL_GPU__ -ccl_device_inline int4 operator+(const int4 &a, const int4 &b) -{ -# ifdef __KERNEL_SSE__ - return int4(_mm_add_epi32(a.m128, b.m128)); -# else - return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -# endif -} - -ccl_device_inline int4 operator+=(int4 &a, const int4 &b) -{ - return a = a + b; -} - -ccl_device_inline int4 operator>>(const int4 &a, int i) -{ -# ifdef __KERNEL_SSE__ - return int4(_mm_srai_epi32(a.m128, i)); -# else - return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i); -# endif -} - -ccl_device_inline int4 operator<<(const int4 &a, int i) -{ -# ifdef __KERNEL_SSE__ - return int4(_mm_slli_epi32(a.m128, i)); -# else - return make_int4(a.x << i, a.y << i, a.z << i, a.w << i); -# endif -} - -ccl_device_inline int4 operator<(const int4 &a, const int4 &b) -{ -# ifdef __KERNEL_SSE__ - return int4(_mm_cmplt_epi32(a.m128, b.m128)); -# else - return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); -# endif -} - -ccl_device_inline int4 operator>=(const int4 &a, const int4 &b) -{ -# ifdef __KERNEL_SSE__ - return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128))); -# else - return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); -# endif -} - -ccl_device_inline int4 operator&(const int4 &a, const int4 &b) -{ -# ifdef __KERNEL_SSE__ - return int4(_mm_and_si128(a.m128, b.m128)); -# else - return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w); -# endif -} - -ccl_device_inline int4 min(int4 a, int4 b) -{ -# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) - return int4(_mm_min_epi32(a.m128, b.m128)); -# else - return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); -# endif -} - -ccl_device_inline int4 max(int4 a, int4 b) -{ -# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) - return int4(_mm_max_epi32(a.m128, b.m128)); -# else - return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); -# endif -} - -ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx) -{ - return min(max(a, mn), mx); -} - -ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b) -{ -# ifdef __KERNEL_SSE__ - const __m128 m = _mm_cvtepi32_ps(mask); - /* TODO(sergey): avoid cvt. */ - return int4(_mm_castps_si128( - _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b))))); -# else - return make_int4( - (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w); -# endif -} - -ccl_device_inline int4 load_int4(const int *v) -{ -# ifdef __KERNEL_SSE__ - return int4(_mm_loadu_si128((__m128i *)v)); -# else - return make_int4(v[0], v[1], v[2], v[3]); -# endif -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_INT4_H__ */ diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h deleted file mode 100644 index 0c431a36afb..00000000000 --- a/intern/cycles/util/util_math_intersect.h +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MATH_INTERSECT_H__ -#define __UTIL_MATH_INTERSECT_H__ - -CCL_NAMESPACE_BEGIN - -/* Ray Intersection */ - -ccl_device bool ray_sphere_intersect(float3 ray_P, - float3 ray_D, - float ray_t, - float3 sphere_P, - float sphere_radius, - ccl_private float3 *isect_P, - ccl_private float *isect_t) -{ - const float3 d = sphere_P - ray_P; - const float radiussq = sphere_radius * sphere_radius; - const float tsq = dot(d, d); - - if (tsq > radiussq) { - /* Ray origin outside sphere. */ - const float tp = dot(d, ray_D); - if (tp < 0.0f) { - /* Ray points away from sphere. */ - return false; - } - const float dsq = tsq - tp * tp; /* Pythagoras. */ - if (dsq > radiussq) { - /* Closest point on ray outside sphere. */ - return false; - } - const float t = tp - sqrtf(radiussq - dsq); /* pythagoras */ - if (t < ray_t) { - *isect_t = t; - *isect_P = ray_P + ray_D * t; - return true; - } - } - return false; -} - -ccl_device bool ray_aligned_disk_intersect(float3 ray_P, - float3 ray_D, - float ray_t, - float3 disk_P, - float disk_radius, - ccl_private float3 *isect_P, - ccl_private float *isect_t) -{ - /* Aligned disk normal. */ - float disk_t; - const float3 disk_N = normalize_len(ray_P - disk_P, &disk_t); - const float div = dot(ray_D, disk_N); - if (UNLIKELY(div == 0.0f)) { - return false; - } - /* Compute t to intersection point. */ - const float t = -disk_t / div; - if (t < 0.0f || t > ray_t) { - return false; - } - /* Test if within radius. */ - float3 P = ray_P + ray_D * t; - if (len_squared(P - disk_P) > disk_radius * disk_radius) { - return false; - } - *isect_P = P; - *isect_t = t; - return true; -} - -ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P, - float3 ray_dir, - float ray_t, -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts, -#else - const float3 tri_a, - const float3 tri_b, - const float3 tri_c, -#endif - ccl_private float *isect_u, - ccl_private float *isect_v, - ccl_private float *isect_t) -{ -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - typedef ssef float3; - const float3 tri_a(ssef_verts[0]); - const float3 tri_b(ssef_verts[1]); - const float3 tri_c(ssef_verts[2]); - const float3 P(ray_P); - const float3 dir(ray_dir); -#else -# define dot3(a, b) dot(a, b) - const float3 P = ray_P; - const float3 dir = ray_dir; -#endif - - /* Calculate vertices relative to ray origin. */ - const float3 v0 = tri_c - P; - const float3 v1 = tri_a - P; - const float3 v2 = tri_b - P; - - /* Calculate triangle edges. */ - const float3 e0 = v2 - v0; - const float3 e1 = v0 - v1; - const float3 e2 = v1 - v2; - - /* Perform edge tests. */ -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const float3 crossU = cross(v2 + v0, e0); - const float3 crossV = cross(v0 + v1, e1); - const float3 crossW = cross(v1 + v2, e2); - - ssef crossX(crossU); - ssef crossY(crossV); - ssef crossZ(crossW); - ssef zero = _mm_setzero_ps(); - _MM_TRANSPOSE4_PS(crossX, crossY, crossZ, zero); - - const ssef dirX(ray_dir.x); - const ssef dirY(ray_dir.y); - const ssef dirZ(ray_dir.z); - - ssef UVWW = madd(crossX, dirX, madd(crossY, dirY, crossZ * dirZ)); -#else /* __KERNEL_SSE2__ */ - const float U = dot(cross(v2 + v0, e0), ray_dir); - const float V = dot(cross(v0 + v1, e1), ray_dir); - const float W = dot(cross(v1 + v2, e2), ray_dir); -#endif /* __KERNEL_SSE2__ */ - -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - int uvw_sign = movemask(UVWW) & 0x7; - if (uvw_sign != 0) { - if (uvw_sign != 0x7) { - return false; - } - } -#else - const float minUVW = min(U, min(V, W)); - const float maxUVW = max(U, max(V, W)); - - if (minUVW < 0.0f && maxUVW > 0.0f) { - return false; - } -#endif - - /* Calculate geometry normal and denominator. */ - const float3 Ng1 = cross(e1, e0); - // const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0); - const float3 Ng = Ng1 + Ng1; - const float den = dot3(Ng, dir); - /* Avoid division by 0. */ - if (UNLIKELY(den == 0.0f)) { - return false; - } - - /* Perform depth test. */ - const float T = dot3(v0, Ng); - const int sign_den = (__float_as_int(den) & 0x80000000); - const float sign_T = xor_signmask(T, sign_den); - if ((sign_T < 0.0f) || (sign_T > ray_t * xor_signmask(den, sign_den))) { - return false; - } - - const float inv_den = 1.0f / den; -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - UVWW *= inv_den; - _mm_store_ss(isect_u, UVWW); - _mm_store_ss(isect_v, shuffle<1, 1, 3, 3>(UVWW)); -#else - *isect_u = U * inv_den; - *isect_v = V * inv_den; -#endif - *isect_t = T * inv_den; - return true; - -#undef dot3 -} - -/* Tests for an intersection between a ray and a quad defined by - * its midpoint, normal and sides. - * If ellipse is true, hits outside the ellipse that's enclosed by the - * quad are rejected. - */ -ccl_device bool ray_quad_intersect(float3 ray_P, - float3 ray_D, - float ray_mint, - float ray_maxt, - float3 quad_P, - float3 quad_u, - float3 quad_v, - float3 quad_n, - ccl_private float3 *isect_P, - ccl_private float *isect_t, - ccl_private float *isect_u, - ccl_private float *isect_v, - bool ellipse) -{ - /* Perform intersection test. */ - float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n); - if (t < ray_mint || t > ray_maxt) { - return false; - } - const float3 hit = ray_P + t * ray_D; - const float3 inplane = hit - quad_P; - const float u = dot(inplane, quad_u) / dot(quad_u, quad_u); - if (u < -0.5f || u > 0.5f) { - return false; - } - const float v = dot(inplane, quad_v) / dot(quad_v, quad_v); - if (v < -0.5f || v > 0.5f) { - return false; - } - if (ellipse && (u * u + v * v > 0.25f)) { - return false; - } - /* Store the result. */ - /* TODO(sergey): Check whether we can avoid some checks here. */ - if (isect_P != NULL) - *isect_P = hit; - if (isect_t != NULL) - *isect_t = t; - if (isect_u != NULL) - *isect_u = u + 0.5f; - if (isect_v != NULL) - *isect_v = v + 0.5f; - return true; -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_INTERSECT_H__ */ diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h deleted file mode 100644 index bff7ddb4cee..00000000000 --- a/intern/cycles/util/util_math_matrix.h +++ /dev/null @@ -1,454 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MATH_MATRIX_H__ -#define __UTIL_MATH_MATRIX_H__ - -CCL_NAMESPACE_BEGIN - -#define MAT(A, size, row, col) A[(row) * (size) + (col)] - -/* Variants that use a constant stride on GPUS. */ -#ifdef __KERNEL_GPU__ -# define MATS(A, n, r, c, s) A[((r) * (n) + (c)) * (s)] -/* Element access when only the lower-triangular elements are stored. */ -# define MATHS(A, r, c, s) A[((r) * ((r) + 1) / 2 + (c)) * (s)] -# define VECS(V, i, s) V[(i) * (s)] -#else -# define MATS(A, n, r, c, s) MAT(A, n, r, c) -# define MATHS(A, r, c, s) A[(r) * ((r) + 1) / 2 + (c)] -# define VECS(V, i, s) V[i] -#endif - -/* Zeroing helpers. */ - -ccl_device_inline void math_vector_zero(ccl_private float *v, int n) -{ - for (int i = 0; i < n; i++) { - v[i] = 0.0f; - } -} - -ccl_device_inline void math_matrix_zero(ccl_private float *A, int n) -{ - for (int row = 0; row < n; row++) { - for (int col = 0; col <= row; col++) { - MAT(A, n, row, col) = 0.0f; - } - } -} - -/* Elementary vector operations. */ - -ccl_device_inline void math_vector_add(ccl_private float *a, - ccl_private const float *ccl_restrict b, - int n) -{ - for (int i = 0; i < n; i++) { - a[i] += b[i]; - } -} - -ccl_device_inline void math_vector_mul(ccl_private float *a, - ccl_private const float *ccl_restrict b, - int n) -{ - for (int i = 0; i < n; i++) { - a[i] *= b[i]; - } -} - -ccl_device_inline void math_vector_mul_strided(ccl_global float *a, - ccl_private const float *ccl_restrict b, - int astride, - int n) -{ - for (int i = 0; i < n; i++) { - a[i * astride] *= b[i]; - } -} - -ccl_device_inline void math_vector_scale(ccl_private float *a, float b, int n) -{ - for (int i = 0; i < n; i++) { - a[i] *= b; - } -} - -ccl_device_inline void math_vector_max(ccl_private float *a, - ccl_private const float *ccl_restrict b, - int n) -{ - for (int i = 0; i < n; i++) { - a[i] = max(a[i], b[i]); - } -} - -ccl_device_inline void math_vec3_add(ccl_private float3 *v, int n, ccl_private float *x, float3 w) -{ - for (int i = 0; i < n; i++) { - v[i] += w * x[i]; - } -} - -ccl_device_inline void math_vec3_add_strided( - ccl_global float3 *v, int n, ccl_private float *x, float3 w, int stride) -{ - for (int i = 0; i < n; i++) { - ccl_global float *elem = (ccl_global float *)(v + i * stride); - atomic_add_and_fetch_float(elem + 0, w.x * x[i]); - atomic_add_and_fetch_float(elem + 1, w.y * x[i]); - atomic_add_and_fetch_float(elem + 2, w.z * x[i]); - } -} - -/* Elementary matrix operations. - * Note: TriMatrix refers to a square matrix that is symmetric, - * and therefore its upper-triangular part isn't stored. */ - -ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A, - int n, - float val, - int stride) -{ - for (int row = 0; row < n; row++) { - MATHS(A, row, row, stride) += val; - } -} - -/* Add Gramian matrix of v to A. - * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */ -ccl_device_inline void math_matrix_add_gramian(ccl_private float *A, - int n, - ccl_private const float *ccl_restrict v, - float weight) -{ - for (int row = 0; row < n; row++) { - for (int col = 0; col <= row; col++) { - MAT(A, n, row, col) += v[row] * v[col] * weight; - } - } -} - -/* Add Gramian matrix of v to A. - * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */ -ccl_device_inline void math_trimatrix_add_gramian_strided( - ccl_global float *A, int n, ccl_private const float *ccl_restrict v, float weight, int stride) -{ - for (int row = 0; row < n; row++) { - for (int col = 0; col <= row; col++) { - atomic_add_and_fetch_float(&MATHS(A, row, col, stride), v[row] * v[col] * weight); - } - } -} - -ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A, - int n, - ccl_private const float *ccl_restrict v, - float weight) -{ - for (int row = 0; row < n; row++) { - for (int col = 0; col <= row; col++) { - MATHS(A, row, col, 1) += v[row] * v[col] * weight; - } - } -} - -/* Transpose matrix A in place. */ -ccl_device_inline void math_matrix_transpose(ccl_global float *A, int n, int stride) -{ - for (int i = 0; i < n; i++) { - for (int j = 0; j < i; j++) { - float temp = MATS(A, n, i, j, stride); - MATS(A, n, i, j, stride) = MATS(A, n, j, i, stride); - MATS(A, n, j, i, stride) = temp; - } - } -} - -/* Solvers for matrix problems */ - -/* In-place Cholesky-Banachiewicz decomposition of the square, positive-definite matrix A - * into a lower triangular matrix L so that A = L*L^T. A is being overwritten by L. - * Also, only the lower triangular part of A is ever accessed. */ -ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride) -{ - for (int row = 0; row < n; row++) { - for (int col = 0; col <= row; col++) { - float sum_col = MATHS(A, row, col, stride); - for (int k = 0; k < col; k++) { - sum_col -= MATHS(A, row, k, stride) * MATHS(A, col, k, stride); - } - if (row == col) { - sum_col = sqrtf(max(sum_col, 0.0f)); - } - else { - sum_col /= MATHS(A, col, col, stride); - } - MATHS(A, row, col, stride) = sum_col; - } - } -} - -/* Solve A*S=y for S given A and y, - * where A is symmetrical positive-semi-definite and both inputs are destroyed in the process. - * - * We can apply Cholesky decomposition to find a lower triangular L so that L*Lt = A. - * With that we get (L*Lt)*S = L*(Lt*S) = L*b = y, defining b as Lt*S. - * Since L is lower triangular, finding b is relatively easy since y is known. - * Then, the remaining problem is Lt*S = b, which again can be solved easily. - * - * This is useful for solving the normal equation S=inv(Xt*W*X)*Xt*W*y, since Xt*W*X is - * symmetrical positive-semidefinite by construction, - * so we can just use this function with A=Xt*W*X and y=Xt*W*y. */ -ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A, - ccl_global float3 *y, - int n, - int stride) -{ - /* Since the first entry of the design row is always 1, the upper-left element of XtWX is a good - * heuristic for the amount of pixels considered (with weighting), - * therefore the amount of correction is scaled based on it. */ - math_trimatrix_add_diagonal(A, n, 3e-7f * A[0], stride); /* Improve the numerical stability. */ - math_trimatrix_cholesky(A, n, stride); /* Replace A with L so that L*Lt = A. */ - - /* Use forward substitution to solve L*b = y, replacing y by b. */ - for (int row = 0; row < n; row++) { - float3 sum = VECS(y, row, stride); - for (int col = 0; col < row; col++) - sum -= MATHS(A, row, col, stride) * VECS(y, col, stride); - VECS(y, row, stride) = sum / MATHS(A, row, row, stride); - } - - /* Use backward substitution to solve Lt*S = b, replacing b by S. */ - for (int row = n - 1; row >= 0; row--) { - float3 sum = VECS(y, row, stride); - for (int col = row + 1; col < n; col++) - sum -= MATHS(A, col, row, stride) * VECS(y, col, stride); - VECS(y, row, stride) = sum / MATHS(A, row, row, stride); - } -} - -/* Perform the Jacobi Eigenvalue Method on matrix A. - * A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever - * accessed. The algorithm overwrites the contents of A. - * - * After returning, A will be overwritten with D, which is (almost) diagonal, - * and V will contain the eigenvectors of the original A in its rows (!), - * so that A = V^T*D*V. Therefore, the diagonal elements of D are the (sorted) eigenvalues of A. - */ -ccl_device void math_matrix_jacobi_eigendecomposition(ccl_private float *A, - ccl_global float *V, - int n, - int v_stride) -{ - const float singular_epsilon = 1e-9f; - - for (int row = 0; row < n; row++) { - for (int col = 0; col < n; col++) { - MATS(V, n, row, col, v_stride) = (col == row) ? 1.0f : 0.0f; - } - } - - for (int sweep = 0; sweep < 8; sweep++) { - float off_diagonal = 0.0f; - for (int row = 1; row < n; row++) { - for (int col = 0; col < row; col++) { - off_diagonal += fabsf(MAT(A, n, row, col)); - } - } - if (off_diagonal < 1e-7f) { - /* The matrix has nearly reached diagonal form. - * Since the eigenvalues are only used to determine truncation, their exact values aren't - * required - a relative error of a few ULPs won't matter at all. */ - break; - } - - /* Set the threshold for the small element rotation skip in the first sweep: - * Skip all elements that are less than a tenth of the average off-diagonal element. */ - float threshold = 0.2f * off_diagonal / (n * n); - - for (int row = 1; row < n; row++) { - for (int col = 0; col < row; col++) { - /* Perform a Jacobi rotation on this element that reduces it to zero. */ - float element = MAT(A, n, row, col); - float abs_element = fabsf(element); - - /* If we're in a later sweep and the element already is very small, - * just set it to zero and skip the rotation. */ - if (sweep > 3 && abs_element <= singular_epsilon * fabsf(MAT(A, n, row, row)) && - abs_element <= singular_epsilon * fabsf(MAT(A, n, col, col))) { - MAT(A, n, row, col) = 0.0f; - continue; - } - - if (element == 0.0f) { - continue; - } - - /* If we're in one of the first sweeps and the element is smaller than the threshold, - * skip it. */ - if (sweep < 3 && (abs_element < threshold)) { - continue; - } - - /* Determine rotation: The rotation is characterized by its angle phi - or, - * in the actual implementation, sin(phi) and cos(phi). - * To find those, we first compute their ratio - that might be unstable if the angle - * approaches 90°, so there's a fallback for that case. - * Then, we compute sin(phi) and cos(phi) themselves. */ - float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col); - float ratio; - if (abs_element > singular_epsilon * fabsf(singular_diff)) { - float cot_2phi = 0.5f * singular_diff / element; - ratio = 1.0f / (fabsf(cot_2phi) + sqrtf(1.0f + cot_2phi * cot_2phi)); - if (cot_2phi < 0.0f) - ratio = -ratio; /* Copy sign. */ - } - else { - ratio = element / singular_diff; - } - - float c = 1.0f / sqrtf(1.0f + ratio * ratio); - float s = ratio * c; - /* To improve numerical stability by avoiding cancellation, the update equations are - * reformulized to use sin(phi) and tan(phi/2) instead. */ - float tan_phi_2 = s / (1.0f + c); - - /* Update the singular values in the diagonal. */ - float singular_delta = ratio * element; - MAT(A, n, row, row) += singular_delta; - MAT(A, n, col, col) -= singular_delta; - - /* Set the element itself to zero. */ - MAT(A, n, row, col) = 0.0f; - - /* Perform the actual rotations on the matrices. */ -#define ROT(M, r1, c1, r2, c2, stride) \ - { \ - float M1 = MATS(M, n, r1, c1, stride); \ - float M2 = MATS(M, n, r2, c2, stride); \ - MATS(M, n, r1, c1, stride) -= s * (M2 + tan_phi_2 * M1); \ - MATS(M, n, r2, c2, stride) += s * (M1 - tan_phi_2 * M2); \ - } - - /* Split into three parts to ensure correct accesses since we only store the - * lower-triangular part of A. */ - for (int i = 0; i < col; i++) - ROT(A, col, i, row, i, 1); - for (int i = col + 1; i < row; i++) - ROT(A, i, col, row, i, 1); - for (int i = row + 1; i < n; i++) - ROT(A, i, col, i, row, 1); - - for (int i = 0; i < n; i++) - ROT(V, col, i, row, i, v_stride); -#undef ROT - } - } - } - - /* Sort eigenvalues and the associated eigenvectors. */ - for (int i = 0; i < n - 1; i++) { - float v = MAT(A, n, i, i); - int k = i; - for (int j = i; j < n; j++) { - if (MAT(A, n, j, j) >= v) { - v = MAT(A, n, j, j); - k = j; - } - } - if (k != i) { - /* Swap eigenvalues. */ - MAT(A, n, k, k) = MAT(A, n, i, i); - MAT(A, n, i, i) = v; - /* Swap eigenvectors. */ - for (int j = 0; j < n; j++) { - float v = MATS(V, n, i, j, v_stride); - MATS(V, n, i, j, v_stride) = MATS(V, n, k, j, v_stride); - MATS(V, n, k, j, v_stride) = v; - } - } - } -} - -#ifdef __KERNEL_SSE3__ -ccl_device_inline void math_vector_zero_sse(float4 *A, int n) -{ - for (int i = 0; i < n; i++) { - A[i] = make_float4(0.0f); - } -} - -ccl_device_inline void math_matrix_zero_sse(float4 *A, int n) -{ - for (int row = 0; row < n; row++) { - for (int col = 0; col <= row; col++) { - MAT(A, n, row, col) = make_float4(0.0f); - } - } -} - -/* Add Gramian matrix of v to A. - * The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j]. */ -ccl_device_inline void math_matrix_add_gramian_sse(float4 *A, - int n, - const float4 *ccl_restrict v, - float4 weight) -{ - for (int row = 0; row < n; row++) { - for (int col = 0; col <= row; col++) { - MAT(A, n, row, col) = MAT(A, n, row, col) + v[row] * v[col] * weight; - } - } -} - -ccl_device_inline void math_vector_add_sse(float4 *V, int n, const float4 *ccl_restrict a) -{ - for (int i = 0; i < n; i++) { - V[i] += a[i]; - } -} - -ccl_device_inline void math_vector_mul_sse(float4 *V, int n, const float4 *ccl_restrict a) -{ - for (int i = 0; i < n; i++) { - V[i] *= a[i]; - } -} - -ccl_device_inline void math_vector_max_sse(float4 *a, const float4 *ccl_restrict b, int n) -{ - for (int i = 0; i < n; i++) { - a[i] = max(a[i], b[i]); - } -} - -ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restrict B) -{ - for (int row = 0; row < n; row++) { - for (int col = 0; col <= row; col++) { - MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0]; - } - } -} -#endif - -#undef MAT - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_MATRIX_H__ */ diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp deleted file mode 100644 index 0df521c2b58..00000000000 --- a/intern/cycles/util/util_md5.cpp +++ /dev/null @@ -1,387 +0,0 @@ -/* - * Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - * - * L. Peter Deutsch - * ghost@aladdin.com - */ - -/* Minor modifications done to remove some code and change style. */ - -#include "util_md5.h" -#include "util_path.h" - -#include -#include - -CCL_NAMESPACE_BEGIN - -#define T_MASK ((uint32_t)~0) -#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87) -#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9) -#define T3 0x242070db -#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111) -#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050) -#define T6 0x4787c62a -#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec) -#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe) -#define T9 0x698098d8 -#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850) -#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e) -#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841) -#define T13 0x6b901122 -#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c) -#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71) -#define T16 0x49b40821 -#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d) -#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf) -#define T19 0x265e5a51 -#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855) -#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2) -#define T22 0x02441453 -#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e) -#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437) -#define T25 0x21e1cde6 -#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829) -#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278) -#define T28 0x455a14ed -#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa) -#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07) -#define T31 0x676f02d9 -#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375) -#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd) -#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e) -#define T35 0x6d9d6122 -#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3) -#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb) -#define T38 0x4bdecfa9 -#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f) -#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f) -#define T41 0x289b7ec6 -#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805) -#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a) -#define T44 0x04881d05 -#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6) -#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a) -#define T47 0x1fa27cf8 -#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a) -#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb) -#define T50 0x432aff97 -#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58) -#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6) -#define T53 0x655b59c3 -#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d) -#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82) -#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e) -#define T57 0x6fa87e4f -#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f) -#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb) -#define T60 0x4e0811a1 -#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d) -#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca) -#define T63 0x2ad7d2bb -#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e) - -void MD5Hash::process(const uint8_t *data /*[64]*/) -{ - uint32_t a = abcd[0], b = abcd[1], c = abcd[2], d = abcd[3]; - uint32_t t; - /* Define storage for little-endian or both types of CPUs. */ - uint32_t xbuf[16]; - const uint32_t *X; - - { - /* - * Determine dynamically whether this is a big-endian or - * little-endian machine, since we can use a more efficient - * algorithm on the latter. - */ - static const int w = 1; - - if (*((const uint8_t *)&w)) /* dynamic little-endian */ - { - /* - * On little-endian machines, we can process properly aligned - * data without copying it. - */ - if (!((data - (const uint8_t *)0) & 3)) { - /* data are properly aligned */ - X = (const uint32_t *)data; - } - else { - /* not aligned */ - memcpy(xbuf, data, 64); - X = xbuf; - } - } - else { /* dynamic big-endian */ - /* - * On big-endian machines, we must arrange the bytes in the - * right order. - */ - const uint8_t *xp = data; - int i; - - X = xbuf; /* (dynamic only) */ - for (i = 0; i < 16; ++i, xp += 4) - xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24); - } - } - -#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) - - /* Round 1. */ - /* Let [abcd k s i] denote the operation - * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ -#define F(x, y, z) (((x) & (y)) | (~(x) & (z))) -#define SET(a, b, c, d, k, s, Ti) \ - t = a + F(b, c, d) + X[k] + Ti; \ - a = ROTATE_LEFT(t, s) + b - /* Do the following 16 operations. */ - SET(a, b, c, d, 0, 7, T1); - SET(d, a, b, c, 1, 12, T2); - SET(c, d, a, b, 2, 17, T3); - SET(b, c, d, a, 3, 22, T4); - SET(a, b, c, d, 4, 7, T5); - SET(d, a, b, c, 5, 12, T6); - SET(c, d, a, b, 6, 17, T7); - SET(b, c, d, a, 7, 22, T8); - SET(a, b, c, d, 8, 7, T9); - SET(d, a, b, c, 9, 12, T10); - SET(c, d, a, b, 10, 17, T11); - SET(b, c, d, a, 11, 22, T12); - SET(a, b, c, d, 12, 7, T13); - SET(d, a, b, c, 13, 12, T14); - SET(c, d, a, b, 14, 17, T15); - SET(b, c, d, a, 15, 22, T16); -#undef SET - - /* Round 2. */ - /* Let [abcd k s i] denote the operation - * a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ -#define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) -#define SET(a, b, c, d, k, s, Ti) \ - t = a + G(b, c, d) + X[k] + Ti; \ - a = ROTATE_LEFT(t, s) + b - /* Do the following 16 operations. */ - SET(a, b, c, d, 1, 5, T17); - SET(d, a, b, c, 6, 9, T18); - SET(c, d, a, b, 11, 14, T19); - SET(b, c, d, a, 0, 20, T20); - SET(a, b, c, d, 5, 5, T21); - SET(d, a, b, c, 10, 9, T22); - SET(c, d, a, b, 15, 14, T23); - SET(b, c, d, a, 4, 20, T24); - SET(a, b, c, d, 9, 5, T25); - SET(d, a, b, c, 14, 9, T26); - SET(c, d, a, b, 3, 14, T27); - SET(b, c, d, a, 8, 20, T28); - SET(a, b, c, d, 13, 5, T29); - SET(d, a, b, c, 2, 9, T30); - SET(c, d, a, b, 7, 14, T31); - SET(b, c, d, a, 12, 20, T32); -#undef SET - - /* Round 3. */ - /* Let [abcd k s t] denote the operation - * a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */ -#define H(x, y, z) ((x) ^ (y) ^ (z)) -#define SET(a, b, c, d, k, s, Ti) \ - t = a + H(b, c, d) + X[k] + Ti; \ - a = ROTATE_LEFT(t, s) + b - /* Do the following 16 operations. */ - SET(a, b, c, d, 5, 4, T33); - SET(d, a, b, c, 8, 11, T34); - SET(c, d, a, b, 11, 16, T35); - SET(b, c, d, a, 14, 23, T36); - SET(a, b, c, d, 1, 4, T37); - SET(d, a, b, c, 4, 11, T38); - SET(c, d, a, b, 7, 16, T39); - SET(b, c, d, a, 10, 23, T40); - SET(a, b, c, d, 13, 4, T41); - SET(d, a, b, c, 0, 11, T42); - SET(c, d, a, b, 3, 16, T43); - SET(b, c, d, a, 6, 23, T44); - SET(a, b, c, d, 9, 4, T45); - SET(d, a, b, c, 12, 11, T46); - SET(c, d, a, b, 15, 16, T47); - SET(b, c, d, a, 2, 23, T48); -#undef SET - - /* Round 4. */ - /* Let [abcd k s t] denote the operation - * a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ -#define I(x, y, z) ((y) ^ ((x) | ~(z))) -#define SET(a, b, c, d, k, s, Ti) \ - t = a + I(b, c, d) + X[k] + Ti; \ - a = ROTATE_LEFT(t, s) + b - /* Do the following 16 operations. */ - SET(a, b, c, d, 0, 6, T49); - SET(d, a, b, c, 7, 10, T50); - SET(c, d, a, b, 14, 15, T51); - SET(b, c, d, a, 5, 21, T52); - SET(a, b, c, d, 12, 6, T53); - SET(d, a, b, c, 3, 10, T54); - SET(c, d, a, b, 10, 15, T55); - SET(b, c, d, a, 1, 21, T56); - SET(a, b, c, d, 8, 6, T57); - SET(d, a, b, c, 15, 10, T58); - SET(c, d, a, b, 6, 15, T59); - SET(b, c, d, a, 13, 21, T60); - SET(a, b, c, d, 4, 6, T61); - SET(d, a, b, c, 11, 10, T62); - SET(c, d, a, b, 2, 15, T63); - SET(b, c, d, a, 9, 21, T64); -#undef SET - - /* Then perform the following additions. (That is increment each - * of the four registers by the value it had before this block - * was started.) */ - abcd[0] += a; - abcd[1] += b; - abcd[2] += c; - abcd[3] += d; -} - -MD5Hash::MD5Hash() -{ - count[0] = count[1] = 0; - abcd[0] = 0x67452301; - abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476; - abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301; - abcd[3] = 0x10325476; -} - -MD5Hash::~MD5Hash() -{ -} - -void MD5Hash::append(const uint8_t *data, int nbytes) -{ - const uint8_t *p = data; - int left = nbytes; - int offset = (count[0] >> 3) & 63; - uint32_t nbits = (uint32_t)(nbytes << 3); - - if (nbytes <= 0) - return; - - /* Update the message length. */ - count[1] += nbytes >> 29; - count[0] += nbits; - if (count[0] < nbits) - count[1]++; - - /* Process an initial partial block. */ - if (offset) { - int copy = (offset + nbytes > 64 ? 64 - offset : nbytes); - - memcpy(buf + offset, p, copy); - if (offset + copy < 64) - return; - p += copy; - left -= copy; - process(buf); - } - - /* Process full blocks. */ - for (; left >= 64; p += 64, left -= 64) - process(p); - - /* Process a final partial block. */ - if (left) - memcpy(buf, p, left); -} - -void MD5Hash::append(const string &str) -{ - if (str.size()) { - append((const uint8_t *)str.c_str(), str.size()); - } -} - -bool MD5Hash::append_file(const string &filepath) -{ - FILE *f = path_fopen(filepath, "rb"); - - if (!f) { - fprintf(stderr, "MD5: failed to open file %s\n", filepath.c_str()); - return false; - } - - const size_t buffer_size = 1024; - uint8_t buffer[buffer_size]; - size_t n; - - do { - n = fread(buffer, 1, buffer_size, f); - append(buffer, n); - } while (n == buffer_size); - - bool success = (ferror(f) == 0); - - fclose(f); - - return success; -} - -void MD5Hash::finish(uint8_t digest[16]) -{ - static const uint8_t pad[64] = {0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - - uint8_t data[8]; - int i; - - /* Save the length before padding. */ - for (i = 0; i < 8; ++i) - data[i] = (uint8_t)(count[i >> 2] >> ((i & 3) << 3)); - - /* Pad to 56 bytes mod 64. */ - append(pad, ((55 - (count[0] >> 3)) & 63) + 1); - /* Append the length. */ - append(data, 8); - - for (i = 0; i < 16; ++i) - digest[i] = (uint8_t)(abcd[i >> 2] >> ((i & 3) << 3)); -} - -string MD5Hash::get_hex() -{ - uint8_t digest[16]; - char buf[16 * 2 + 1]; - - finish(digest); - - for (int i = 0; i < 16; i++) - sprintf(buf + i * 2, "%02X", (unsigned int)digest[i]); - buf[sizeof(buf) - 1] = '\0'; - - return string(buf); -} - -string util_md5_string(const string &str) -{ - MD5Hash md5; - md5.append((uint8_t *)str.c_str(), str.size()); - return md5.get_hex(); -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_md5.h b/intern/cycles/util/util_md5.h deleted file mode 100644 index 3102a0f4bad..00000000000 --- a/intern/cycles/util/util_md5.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - * - * L. Peter Deutsch - * ghost@aladdin.com - */ - -/* MD5 - * - * Simply MD5 hash computation, used by disk cache. Adapted from external - * code, with minor code modifications done to remove some unused code and - * change code style. */ - -#ifndef __UTIL_MD5_H__ -#define __UTIL_MD5_H__ - -#include "util/util_string.h" -#include "util/util_types.h" - -CCL_NAMESPACE_BEGIN - -class MD5Hash { - public: - MD5Hash(); - ~MD5Hash(); - - void append(const uint8_t *data, int size); - void append(const string &str); - bool append_file(const string &filepath); - string get_hex(); - - protected: - void process(const uint8_t *data); - void finish(uint8_t digest[16]); - - uint32_t count[2]; /* message length in bits, LSW first. */ - uint32_t abcd[4]; /* digest buffer */ - uint8_t buf[64]; /* accumulate block */ -}; - -string util_md5_string(const string &str); - -CCL_NAMESPACE_END - -#endif /* __UTIL_MD5_H__ */ diff --git a/intern/cycles/util/util_murmurhash.cpp b/intern/cycles/util/util_murmurhash.cpp deleted file mode 100644 index 5d728769fe9..00000000000 --- a/intern/cycles/util/util_murmurhash.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright 2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This is taken from alShaders/Cryptomatte/MurmurHash3.h: - * - * MurmurHash3 was written by Austin Appleby, and is placed in the public - * domain. The author hereby disclaims copyright to this source code. - */ - -#include -#include - -#include "util/util_algorithm.h" -#include "util/util_murmurhash.h" - -#if defined(_MSC_VER) -# define ROTL32(x, y) _rotl(x, y) -# define ROTL64(x, y) _rotl64(x, y) -# define BIG_CONSTANT(x) (x) -#else -ccl_device_inline uint32_t rotl32(uint32_t x, int8_t r) -{ - return (x << r) | (x >> (32 - r)); -} -# define ROTL32(x, y) rotl32(x, y) -# define BIG_CONSTANT(x) (x##LLU) -#endif - -CCL_NAMESPACE_BEGIN - -/* Block read - if your platform needs to do endian-swapping or can only - * handle aligned reads, do the conversion here. */ -ccl_device_inline uint32_t mm_hash_getblock32(const uint32_t *p, int i) -{ - return p[i]; -} - -/* Finalization mix - force all bits of a hash block to avalanche */ -ccl_device_inline uint32_t mm_hash_fmix32(uint32_t h) -{ - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - return h; -} - -uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed) -{ - const uint8_t *data = (const uint8_t *)key; - const int nblocks = len / 4; - - uint32_t h1 = seed; - - const uint32_t c1 = 0xcc9e2d51; - const uint32_t c2 = 0x1b873593; - - const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4); - - for (int i = -nblocks; i; i++) { - uint32_t k1 = mm_hash_getblock32(blocks, i); - - k1 *= c1; - k1 = ROTL32(k1, 15); - k1 *= c2; - - h1 ^= k1; - h1 = ROTL32(h1, 13); - h1 = h1 * 5 + 0xe6546b64; - } - - const uint8_t *tail = (const uint8_t *)(data + nblocks * 4); - - uint32_t k1 = 0; - - switch (len & 3) { - case 3: - k1 ^= tail[2] << 16; - ATTR_FALLTHROUGH; - case 2: - k1 ^= tail[1] << 8; - ATTR_FALLTHROUGH; - case 1: - k1 ^= tail[0]; - k1 *= c1; - k1 = ROTL32(k1, 15); - k1 *= c2; - h1 ^= k1; - } - - h1 ^= len; - h1 = mm_hash_fmix32(h1); - return h1; -} - -/* This is taken from the cryptomatte specification 1.0 */ -float util_hash_to_float(uint32_t hash) -{ - uint32_t mantissa = hash & ((1 << 23) - 1); - uint32_t exponent = (hash >> 23) & ((1 << 8) - 1); - exponent = max(exponent, (uint32_t)1); - exponent = min(exponent, (uint32_t)254); - exponent = exponent << 23; - uint32_t sign = (hash >> 31); - sign = sign << 31; - uint32_t float_bits = sign | exponent | mantissa; - float f; - memcpy(&f, &float_bits, sizeof(uint32_t)); - return f; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_murmurhash.h b/intern/cycles/util/util_murmurhash.h deleted file mode 100644 index 2ec87efd87a..00000000000 --- a/intern/cycles/util/util_murmurhash.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_MURMURHASH_H__ -#define __UTIL_MURMURHASH_H__ - -#include "util/util_types.h" - -CCL_NAMESPACE_BEGIN - -uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed); -float util_hash_to_float(uint32_t hash); - -CCL_NAMESPACE_END - -#endif /* __UTIL_MURMURHASH_H__ */ diff --git a/intern/cycles/util/util_opengl.h b/intern/cycles/util/util_opengl.h deleted file mode 100644 index 7a8d5eec1f9..00000000000 --- a/intern/cycles/util/util_opengl.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_OPENGL_H__ -#define __UTIL_OPENGL_H__ - -/* OpenGL header includes, used everywhere we use OpenGL, to deal with - * platform differences in one central place. */ - -#include - -#endif /* __UTIL_OPENGL_H__ */ diff --git a/intern/cycles/util/util_openimagedenoise.h b/intern/cycles/util/util_openimagedenoise.h deleted file mode 100644 index 898c634141e..00000000000 --- a/intern/cycles/util/util_openimagedenoise.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_OPENIMAGEDENOISE_H__ -#define __UTIL_OPENIMAGEDENOISE_H__ - -#ifdef WITH_OPENIMAGEDENOISE -# include -#endif - -#include "util_system.h" - -CCL_NAMESPACE_BEGIN - -static inline bool openimagedenoise_supported() -{ -#ifdef WITH_OPENIMAGEDENOISE -# ifdef __APPLE__ - /* Always supported through Accelerate framework BNNS. */ - return true; -# else - return system_cpu_support_sse41(); -# endif -#else - return false; -#endif -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_OPENIMAGEDENOISE_H__ */ diff --git a/intern/cycles/util/util_openvdb.h b/intern/cycles/util/util_openvdb.h deleted file mode 100644 index ae5326e3199..00000000000 --- a/intern/cycles/util/util_openvdb.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2011-2020 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_OPENVDB_H__ -#define __UTIL_OPENVDB_H__ - -#ifdef WITH_OPENVDB -# include - -namespace openvdb { - -using Vec4fTree = tree::Tree4::Type; -using Vec4fGrid = Grid; - -/* Apply operation to known grid types. */ -template -bool grid_type_operation(const openvdb::GridBase::ConstPtr &grid, OpType &&op) -{ - if (grid->isType()) { - return op.template operator()(grid); - } - else if (grid->isType()) { - return op.template operator()(grid); - } - else if (grid->isType()) { - return op.template operator()(grid); - } - else if (grid->isType()) { - return op.template operator()(grid); - } - else if (grid->isType()) { - return op.template operator()(grid); - } - else if (grid->isType()) { - return op.template operator()(grid); - } - else if (grid->isType()) { - return op.template operator()(grid); - } - else if (grid->isType()) { - return op.template operator()(grid); - } - else if (grid->isType()) { - return op.template operator()(grid); - } - else { - return false; - } -} - -}; // namespace openvdb - -#endif - -#endif /* __UTIL_OPENVDB_H__ */ diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h deleted file mode 100644 index 7ecd3893cf4..00000000000 --- a/intern/cycles/util/util_optimization.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_OPTIMIZATION_H__ -#define __UTIL_OPTIMIZATION_H__ - -#ifndef __KERNEL_GPU__ - -/* x86 - * - * Compile a regular, SSE2 and SSE3 kernel. */ - -# if defined(i386) || defined(_M_IX86) - -/* We require minimum SSE2 support on x86, so auto enable. */ -# define __KERNEL_SSE2__ -# ifdef WITH_KERNEL_SSE2 -# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 -# endif -# ifdef WITH_KERNEL_SSE3 -# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -# endif - -/* x86-64 - * - * Compile a regular (includes SSE2), SSE3, SSE 4.1, AVX and AVX2 kernel. */ - -# elif defined(__x86_64__) || defined(_M_X64) - -/* SSE2 is always available on x86-64 CPUs, so auto enable */ -# define __KERNEL_SSE2__ -/* no SSE2 kernel on x86-64, part of regular kernel */ -# ifdef WITH_KERNEL_SSE3 -# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -# endif -# ifdef WITH_KERNEL_SSE41 -# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 -# endif -# ifdef WITH_KERNEL_AVX -# define WITH_CYCLES_OPTIMIZED_KERNEL_AVX -# endif -# ifdef WITH_KERNEL_AVX2 -# define WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 -# endif - -/* Arm Neon - * - * Compile a SSE4 kernel emulated with Neon. Most code is shared with - * SSE, some specializations for performance and compatibility are made - * made testing for __KERNEL_NEON__. */ - -# elif defined(__ARM_NEON) && defined(WITH_SSE2NEON) - -# define __KERNEL_NEON__ -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSE41__ - -# endif - -#endif - -#endif /* __UTIL_OPTIMIZATION_H__ */ diff --git a/intern/cycles/util/util_param.h b/intern/cycles/util/util_param.h deleted file mode 100644 index 3f8e2d6d700..00000000000 --- a/intern/cycles/util/util_param.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_PARAM_H__ -#define __UTIL_PARAM_H__ - -/* Parameter value lists from OpenImageIO are used to store custom properties - * on various data, which can then later be used in shaders. */ - -#include -#include -#include - -CCL_NAMESPACE_BEGIN - -OIIO_NAMESPACE_USING - -static constexpr TypeDesc TypeFloat2(TypeDesc::FLOAT, TypeDesc::VEC2); -static constexpr TypeDesc TypeRGBA(TypeDesc::FLOAT, TypeDesc::VEC4, TypeDesc::COLOR); -static constexpr TypeDesc TypeFloatArray4(TypeDesc::FLOAT, - TypeDesc::SCALAR, - TypeDesc::NOSEMANTICS, - 4); - -CCL_NAMESPACE_END - -#endif /* __UTIL_PARAM_H__ */ diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp deleted file mode 100644 index c78f4615013..00000000000 --- a/intern/cycles/util/util_path.cpp +++ /dev/null @@ -1,781 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_path.h" -#include "util/util_md5.h" -#include "util/util_string.h" - -#include -#include -#include - -OIIO_NAMESPACE_USING - -#include - -#include - -#if defined(_WIN32) -# define DIR_SEP '\\' -# define DIR_SEP_ALT '/' -# include -#else -# define DIR_SEP '/' -# include -# include -# include -# include -#endif - -#ifdef HAVE_SHLWAPI_H -# include -#endif - -#include "util/util_map.h" -#include "util/util_windows.h" - -CCL_NAMESPACE_BEGIN - -#ifdef _WIN32 -# if defined(_MSC_VER) || defined(__MINGW64__) -typedef struct _stat64 path_stat_t; -# elif defined(__MINGW32__) -typedef struct _stati64 path_stat_t; -# else -typedef struct _stat path_stat_t; -# endif -# ifndef S_ISDIR -# define S_ISDIR(x) (((x)&_S_IFDIR) == _S_IFDIR) -# endif -#else -typedef struct stat path_stat_t; -#endif - -static string cached_path = ""; -static string cached_user_path = ""; -static string cached_temp_path = ""; -static string cached_xdg_cache_path = ""; - -namespace { - -#ifdef _WIN32 -class directory_iterator { - public: - class path_info { - public: - path_info(const string &path, const WIN32_FIND_DATAW &find_data) - : path_(path), find_data_(find_data) - { - } - - string path() - { - return path_join(path_, string_from_wstring(find_data_.cFileName)); - } - - protected: - const string &path_; - const WIN32_FIND_DATAW &find_data_; - }; - - directory_iterator() : path_info_("", find_data_), h_find_(INVALID_HANDLE_VALUE) - { - } - - explicit directory_iterator(const string &path) : path_(path), path_info_(path, find_data_) - { - string wildcard = path; - if (wildcard[wildcard.size() - 1] != DIR_SEP) { - wildcard += DIR_SEP; - } - wildcard += "*"; - h_find_ = FindFirstFileW(string_to_wstring(wildcard).c_str(), &find_data_); - if (h_find_ != INVALID_HANDLE_VALUE) { - skip_dots(); - } - } - - ~directory_iterator() - { - if (h_find_ != INVALID_HANDLE_VALUE) { - FindClose(h_find_); - } - } - - directory_iterator &operator++() - { - step(); - return *this; - } - - path_info *operator->() - { - return &path_info_; - } - - bool operator!=(const directory_iterator &other) - { - return h_find_ != other.h_find_; - } - - protected: - bool step() - { - if (do_step()) { - return skip_dots(); - } - return false; - } - - bool do_step() - { - if (h_find_ != INVALID_HANDLE_VALUE) { - bool result = FindNextFileW(h_find_, &find_data_) == TRUE; - if (!result) { - FindClose(h_find_); - h_find_ = INVALID_HANDLE_VALUE; - } - return result; - } - return false; - } - - bool skip_dots() - { - while (wcscmp(find_data_.cFileName, L".") == 0 || wcscmp(find_data_.cFileName, L"..") == 0) { - if (!do_step()) { - return false; - } - } - return true; - } - - string path_; - path_info path_info_; - WIN32_FIND_DATAW find_data_; - HANDLE h_find_; -}; -#else /* _WIN32 */ - -class directory_iterator { - public: - class path_info { - public: - explicit path_info(const string &path) : path_(path), entry_(NULL) - { - } - - string path() - { - return path_join(path_, entry_->d_name); - } - - void current_entry_set(const struct dirent *entry) - { - entry_ = entry; - } - - protected: - const string &path_; - const struct dirent *entry_; - }; - - directory_iterator() : path_info_(""), name_list_(NULL), num_entries_(-1), cur_entry_(-1) - { - } - - explicit directory_iterator(const string &path) : path_(path), path_info_(path_), cur_entry_(0) - { - num_entries_ = scandir(path.c_str(), &name_list_, NULL, alphasort); - if (num_entries_ < 0) { - perror("scandir"); - } - else { - skip_dots(); - } - } - - ~directory_iterator() - { - destroy_name_list(); - } - - directory_iterator &operator++() - { - step(); - return *this; - } - - path_info *operator->() - { - path_info_.current_entry_set(name_list_[cur_entry_]); - return &path_info_; - } - - bool operator!=(const directory_iterator &other) - { - return name_list_ != other.name_list_; - } - - protected: - bool step() - { - if (do_step()) { - return skip_dots(); - } - return false; - } - - bool do_step() - { - ++cur_entry_; - if (cur_entry_ >= num_entries_) { - destroy_name_list(); - return false; - } - return true; - } - - /* Skip . and .. folders. */ - bool skip_dots() - { - while (strcmp(name_list_[cur_entry_]->d_name, ".") == 0 || - strcmp(name_list_[cur_entry_]->d_name, "..") == 0) { - if (!step()) { - return false; - } - } - return true; - } - - void destroy_name_list() - { - if (name_list_ == NULL) { - return; - } - for (int i = 0; i < num_entries_; ++i) { - free(name_list_[i]); - } - free(name_list_); - name_list_ = NULL; - } - - string path_; - path_info path_info_; - struct dirent **name_list_; - int num_entries_, cur_entry_; -}; - -#endif /* _WIN32 */ - -size_t find_last_slash(const string &path) -{ - for (size_t i = 0; i < path.size(); ++i) { - size_t index = path.size() - 1 - i; -#ifdef _WIN32 - if (path[index] == DIR_SEP || path[index] == DIR_SEP_ALT) -#else - if (path[index] == DIR_SEP) -#endif - { - return index; - } - } - return string::npos; -} - -} /* namespace */ - -static char *path_specials(const string &sub) -{ - static bool env_init = false; - static char *env_shader_path; - static char *env_source_path; - if (!env_init) { - env_shader_path = getenv("CYCLES_SHADER_PATH"); - /* NOTE: It is KERNEL in env variable for compatibility reasons. */ - env_source_path = getenv("CYCLES_KERNEL_PATH"); - env_init = true; - } - if (env_shader_path != NULL && sub == "shader") { - return env_shader_path; - } - else if (env_shader_path != NULL && sub == "source") { - return env_source_path; - } - return NULL; -} - -#if defined(__linux__) || defined(__APPLE__) -static string path_xdg_cache_get() -{ - const char *home = getenv("XDG_CACHE_HOME"); - if (home) { - return string(home); - } - else { - home = getenv("HOME"); - if (home == NULL) { - home = getpwuid(getuid())->pw_dir; - } - return path_join(string(home), ".cache"); - } -} -#endif - -void path_init(const string &path, const string &user_path, const string &temp_path) -{ - cached_path = path; - cached_user_path = user_path; - cached_temp_path = temp_path; - -#ifdef _MSC_VER - // workaround for https://svn.boost.org/trac/boost/ticket/6320 - // indirectly init boost codec here since it's not thread safe, and can - // cause crashes when it happens in multithreaded image load - OIIO::Filesystem::exists(path); -#endif -} - -string path_get(const string &sub) -{ - char *special = path_specials(sub); - if (special != NULL) - return special; - - if (cached_path == "") - cached_path = path_dirname(Sysutil::this_program_path()); - - return path_join(cached_path, sub); -} - -string path_user_get(const string &sub) -{ - if (cached_user_path == "") - cached_user_path = path_dirname(Sysutil::this_program_path()); - - return path_join(cached_user_path, sub); -} - -string path_cache_get(const string &sub) -{ -#if defined(__linux__) || defined(__APPLE__) - if (cached_xdg_cache_path == "") { - cached_xdg_cache_path = path_xdg_cache_get(); - } - string result = path_join(cached_xdg_cache_path, "cycles"); - return path_join(result, sub); -#else - /* TODO(sergey): What that should be on Windows? */ - return path_user_get(path_join("cache", sub)); -#endif -} - -string path_temp_get(const string &sub) -{ - if (cached_temp_path == "") { - cached_temp_path = Filesystem::temp_directory_path(); - } - - return path_join(cached_temp_path, sub); -} - -#if defined(__linux__) || defined(__APPLE__) -string path_xdg_home_get(const string &sub = ""); -#endif - -string path_filename(const string &path) -{ - size_t index = find_last_slash(path); - if (index != string::npos) { - /* Corner cases to match boost behavior. */ -#ifndef _WIN32 - if (index == 0 && path.size() == 1) { - return path; - } -#endif - if (index == path.size() - 1) { -#ifdef _WIN32 - if (index == 2) { - return string(1, DIR_SEP); - } -#endif - return "."; - } - return path.substr(index + 1, path.size() - index - 1); - } - return path; -} - -string path_dirname(const string &path) -{ - size_t index = find_last_slash(path); - if (index != string::npos) { -#ifndef _WIN32 - if (index == 0 && path.size() > 1) { - return string(1, DIR_SEP); - } -#endif - return path.substr(0, index); - } - return ""; -} - -string path_join(const string &dir, const string &file) -{ - if (dir.size() == 0) { - return file; - } - if (file.size() == 0) { - return dir; - } - string result = dir; -#ifndef _WIN32 - if (result[result.size() - 1] != DIR_SEP && file[0] != DIR_SEP) -#else - if (result[result.size() - 1] != DIR_SEP && result[result.size() - 1] != DIR_SEP_ALT && - file[0] != DIR_SEP && file[0] != DIR_SEP_ALT) -#endif - { - result += DIR_SEP; - } - result += file; - return result; -} - -string path_escape(const string &path) -{ - string result = path; - string_replace(result, " ", "\\ "); - return result; -} - -bool path_is_relative(const string &path) -{ -#ifdef _WIN32 -# ifdef HAVE_SHLWAPI_H - return PathIsRelative(path.c_str()); -# else /* HAVE_SHLWAPI_H */ - if (path.size() >= 3) { - return !(((path[0] >= 'a' && path[0] <= 'z') || (path[0] >= 'A' && path[0] <= 'Z')) && - path[1] == ':' && path[2] == DIR_SEP); - } - return true; -# endif /* HAVE_SHLWAPI_H */ -#else /* _WIN32 */ - if (path.size() == 0) { - return 1; - } - return path[0] != DIR_SEP; -#endif /* _WIN32 */ -} - -#ifdef _WIN32 -/* Add a slash if the UNC path points to a share. */ -static string path_unc_add_slash_to_share(const string &path) -{ - size_t slash_after_server = path.find(DIR_SEP, 2); - if (slash_after_server != string::npos) { - size_t slash_after_share = path.find(DIR_SEP, slash_after_server + 1); - if (slash_after_share == string::npos) { - return path + DIR_SEP; - } - } - return path; -} - -/* Convert: - * \\?\UNC\server\share\folder\... to \\server\share\folder\... - * \\?\C:\ to C:\ and \\?\C:\folder\... to C:\folder\... - */ -static string path_unc_to_short(const string &path) -{ - size_t len = path.size(); - if ((len > 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP) && (path[2] == '?') && - ((path[3] == DIR_SEP) || (path[3] == DIR_SEP_ALT))) { - if ((len > 5) && (path[5] == ':')) { - return path.substr(4, len - 4); - } - else if ((len > 7) && (path.substr(4, 3) == "UNC") && - ((path[7] == DIR_SEP) || (path[7] == DIR_SEP_ALT))) { - return "\\\\" + path.substr(8, len - 8); - } - } - return path; -} - -static string path_cleanup_unc(const string &path) -{ - string result = path_unc_to_short(path); - if (path.size() > 2) { - /* It's possible path is now a non-UNC. */ - if (result[0] == DIR_SEP && result[1] == DIR_SEP) { - return path_unc_add_slash_to_share(result); - } - } - return result; -} - -/* Make path compatible for stat() functions. */ -static string path_make_compatible(const string &path) -{ - string result = path; - /* In Windows stat() doesn't recognize dir ending on a slash. */ - if (result.size() > 3 && result[result.size() - 1] == DIR_SEP) { - result.resize(result.size() - 1); - } - /* Clean up UNC path. */ - if ((path.size() >= 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP)) { - result = path_cleanup_unc(result); - } - /* Make sure volume-only path ends up wit ha directory separator. */ - if (result.size() == 2 && result[1] == ':') { - result += DIR_SEP; - } - return result; -} - -static int path_wstat(const wstring &path_wc, path_stat_t *st) -{ -# if defined(_MSC_VER) || defined(__MINGW64__) - return _wstat64(path_wc.c_str(), st); -# elif defined(__MINGW32__) - return _wstati64(path_wc.c_str(), st); -# else - return _wstat(path_wc.c_str(), st); -# endif -} - -static int path_stat(const string &path, path_stat_t *st) -{ - wstring path_wc = string_to_wstring(path); - return path_wstat(path_wc, st); -} -#else /* _WIN32 */ -static int path_stat(const string &path, path_stat_t *st) -{ - return stat(path.c_str(), st); -} -#endif /* _WIN32 */ - -size_t path_file_size(const string &path) -{ - path_stat_t st; - if (path_stat(path, &st) != 0) { - return -1; - } - return st.st_size; -} - -bool path_exists(const string &path) -{ -#ifdef _WIN32 - string fixed_path = path_make_compatible(path); - wstring path_wc = string_to_wstring(fixed_path); - path_stat_t st; - if (path_wstat(path_wc, &st) != 0) { - return false; - } - return st.st_mode != 0; -#else /* _WIN32 */ - struct stat st; - if (stat(path.c_str(), &st) != 0) { - return 0; - } - return st.st_mode != 0; -#endif /* _WIN32 */ -} - -bool path_is_directory(const string &path) -{ - path_stat_t st; - if (path_stat(path, &st) != 0) { - return false; - } - return S_ISDIR(st.st_mode); -} - -static void path_files_md5_hash_recursive(MD5Hash &hash, const string &dir) -{ - if (path_exists(dir)) { - directory_iterator it(dir), it_end; - - for (; it != it_end; ++it) { - if (path_is_directory(it->path())) { - path_files_md5_hash_recursive(hash, it->path()); - } - else { - string filepath = it->path(); - - hash.append((const uint8_t *)filepath.c_str(), filepath.size()); - hash.append_file(filepath); - } - } - } -} - -string path_files_md5_hash(const string &dir) -{ - /* computes md5 hash of all files in the directory */ - MD5Hash hash; - - path_files_md5_hash_recursive(hash, dir); - - return hash.get_hex(); -} - -static bool create_directories_recursivey(const string &path) -{ - if (path_is_directory(path)) { - /* Directory already exists, nothing to do. */ - return true; - } - if (path_exists(path)) { - /* File exists and it's not a directory. */ - return false; - } - - string parent = path_dirname(path); - if (parent.size() > 0 && parent != path) { - if (!create_directories_recursivey(parent)) { - return false; - } - } - -#ifdef _WIN32 - wstring path_wc = string_to_wstring(path); - return _wmkdir(path_wc.c_str()) == 0; -#else - return mkdir(path.c_str(), 0777) == 0; -#endif -} - -void path_create_directories(const string &filepath) -{ - string path = path_dirname(filepath); - create_directories_recursivey(path); -} - -bool path_write_binary(const string &path, const vector &binary) -{ - path_create_directories(path); - - /* write binary file from memory */ - FILE *f = path_fopen(path, "wb"); - - if (!f) - return false; - - if (binary.size() > 0) - fwrite(&binary[0], sizeof(uint8_t), binary.size(), f); - - fclose(f); - - return true; -} - -bool path_write_text(const string &path, string &text) -{ - vector binary(text.length(), 0); - std::copy(text.begin(), text.end(), binary.begin()); - - return path_write_binary(path, binary); -} - -bool path_read_binary(const string &path, vector &binary) -{ - /* read binary file into memory */ - FILE *f = path_fopen(path, "rb"); - - if (!f) { - binary.resize(0); - return false; - } - - binary.resize(path_file_size(path)); - - if (binary.size() == 0) { - fclose(f); - return false; - } - - if (fread(&binary[0], sizeof(uint8_t), binary.size(), f) != binary.size()) { - fclose(f); - return false; - } - - fclose(f); - - return true; -} - -bool path_read_text(const string &path, string &text) -{ - vector binary; - - if (!path_exists(path) || !path_read_binary(path, binary)) - return false; - - const char *str = (const char *)&binary[0]; - size_t size = binary.size(); - text = string(str, size); - - return true; -} - -uint64_t path_modified_time(const string &path) -{ - path_stat_t st; - if (path_stat(path, &st) != 0) { - return 0; - } - return st.st_mtime; -} - -bool path_remove(const string &path) -{ - return remove(path.c_str()) == 0; -} - -FILE *path_fopen(const string &path, const string &mode) -{ -#ifdef _WIN32 - wstring path_wc = string_to_wstring(path); - wstring mode_wc = string_to_wstring(mode); - return _wfopen(path_wc.c_str(), mode_wc.c_str()); -#else - return fopen(path.c_str(), mode.c_str()); -#endif -} - -void path_cache_clear_except(const string &name, const set &except) -{ - string dir = path_user_get("cache"); - - if (path_exists(dir)) { - directory_iterator it(dir), it_end; - - for (; it != it_end; ++it) { - string filename = path_filename(it->path()); - - if (string_startswith(filename, name.c_str())) - if (except.find(filename) == except.end()) - path_remove(it->path()); - } - } -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h deleted file mode 100644 index f899bc2e01c..00000000000 --- a/intern/cycles/util/util_path.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_PATH_H__ -#define __UTIL_PATH_H__ - -/* Utility functions to get paths to files distributed with the program. For - * the standalone apps, paths are relative to the executable, for dynamically - * linked libraries, the path to the library may be set with path_init, which - * then makes all paths relative to that. */ - -#include - -#include "util/util_set.h" -#include "util/util_string.h" -#include "util/util_types.h" -#include "util/util_vector.h" - -CCL_NAMESPACE_BEGIN - -/* program paths */ -void path_init(const string &path = "", const string &user_path = "", const string &tmp_path = ""); -string path_get(const string &sub = ""); -string path_user_get(const string &sub = ""); -string path_temp_get(const string &sub = ""); -string path_cache_get(const string &sub = ""); - -/* path string manipulation */ -string path_filename(const string &path); -string path_dirname(const string &path); -string path_join(const string &dir, const string &file); -string path_escape(const string &path); -bool path_is_relative(const string &path); - -/* file info */ -size_t path_file_size(const string &path); -bool path_exists(const string &path); -bool path_is_directory(const string &path); -string path_files_md5_hash(const string &dir); -uint64_t path_modified_time(const string &path); - -/* directory utility */ -void path_create_directories(const string &path); - -/* file read/write utilities */ -FILE *path_fopen(const string &path, const string &mode); - -bool path_write_binary(const string &path, const vector &binary); -bool path_write_text(const string &path, string &text); -bool path_read_binary(const string &path, vector &binary); -bool path_read_text(const string &path, string &text); - -/* File manipulation. */ -bool path_remove(const string &path); - -/* cache utility */ -void path_cache_clear_except(const string &name, const set &except); - -CCL_NAMESPACE_END - -#endif diff --git a/intern/cycles/util/util_profiling.cpp b/intern/cycles/util/util_profiling.cpp deleted file mode 100644 index 5343f076e22..00000000000 --- a/intern/cycles/util/util_profiling.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright 2011-2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_profiling.h" -#include "util/util_algorithm.h" -#include "util/util_foreach.h" -#include "util/util_set.h" - -CCL_NAMESPACE_BEGIN - -Profiler::Profiler() : do_stop_worker(true), worker(NULL) -{ -} - -Profiler::~Profiler() -{ - assert(worker == NULL); -} - -void Profiler::run() -{ - uint64_t updates = 0; - auto start_time = std::chrono::system_clock::now(); - while (!do_stop_worker) { - thread_scoped_lock lock(mutex); - foreach (ProfilingState *state, states) { - uint32_t cur_event = state->event; - int32_t cur_shader = state->shader; - int32_t cur_object = state->object; - - /* The state reads/writes should be atomic, but just to be sure - * check the values for validity anyways. */ - if (cur_event < PROFILING_NUM_EVENTS) { - event_samples[cur_event]++; - } - - if (cur_shader >= 0 && cur_shader < shader_samples.size()) { - shader_samples[cur_shader]++; - } - - if (cur_object >= 0 && cur_object < object_samples.size()) { - object_samples[cur_object]++; - } - } - lock.unlock(); - - /* Relative waits always overshoot a bit, so just waiting 1ms every - * time would cause the sampling to drift over time. - * By keeping track of the absolute time, the wait times correct themselves - - * if one wait overshoots a lot, the next one will be shorter to compensate. */ - updates++; - std::this_thread::sleep_until(start_time + updates * std::chrono::milliseconds(1)); - } -} - -void Profiler::reset(int num_shaders, int num_objects) -{ - bool running = (worker != NULL); - if (running) { - stop(); - } - - /* Resize and clear the accumulation vectors. */ - shader_hits.assign(num_shaders, 0); - object_hits.assign(num_objects, 0); - - event_samples.assign(PROFILING_NUM_EVENTS, 0); - shader_samples.assign(num_shaders, 0); - object_samples.assign(num_objects, 0); - - if (running) { - start(); - } -} - -void Profiler::start() -{ - assert(worker == NULL); - do_stop_worker = false; - worker = new thread(function_bind(&Profiler::run, this)); -} - -void Profiler::stop() -{ - if (worker != NULL) { - do_stop_worker = true; - - worker->join(); - delete worker; - worker = NULL; - } -} - -void Profiler::add_state(ProfilingState *state) -{ - thread_scoped_lock lock(mutex); - - /* Add the ProfilingState from the list of sampled states. */ - assert(std::find(states.begin(), states.end(), state) == states.end()); - states.push_back(state); - - /* Resize thread-local hit counters. */ - state->shader_hits.assign(shader_hits.size(), 0); - state->object_hits.assign(object_hits.size(), 0); - - /* Initialize the state. */ - state->event = PROFILING_UNKNOWN; - state->shader = -1; - state->object = -1; - state->active = true; -} - -void Profiler::remove_state(ProfilingState *state) -{ - thread_scoped_lock lock(mutex); - - /* Remove the ProfilingState from the list of sampled states. */ - states.erase(std::remove(states.begin(), states.end(), state), states.end()); - state->active = false; - - /* Merge thread-local hit counters. */ - assert(shader_hits.size() == state->shader_hits.size()); - for (int i = 0; i < shader_hits.size(); i++) { - shader_hits[i] += state->shader_hits[i]; - } - - assert(object_hits.size() == state->object_hits.size()); - for (int i = 0; i < object_hits.size(); i++) { - object_hits[i] += state->object_hits[i]; - } -} - -uint64_t Profiler::get_event(ProfilingEvent event) -{ - assert(worker == NULL); - return event_samples[event]; -} - -bool Profiler::get_shader(int shader, uint64_t &samples, uint64_t &hits) -{ - assert(worker == NULL); - if (shader_samples[shader] == 0) { - return false; - } - samples = shader_samples[shader]; - hits = shader_hits[shader]; - return true; -} - -bool Profiler::get_object(int object, uint64_t &samples, uint64_t &hits) -{ - assert(worker == NULL); - if (object_samples[object] == 0) { - return false; - } - samples = object_samples[object]; - hits = object_hits[object]; - return true; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_profiling.h b/intern/cycles/util/util_profiling.h deleted file mode 100644 index 96bb682c50e..00000000000 --- a/intern/cycles/util/util_profiling.h +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright 2011-2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_PROFILING_H__ -#define __UTIL_PROFILING_H__ - -#include - -#include "util/util_map.h" -#include "util/util_thread.h" -#include "util/util_vector.h" - -CCL_NAMESPACE_BEGIN - -enum ProfilingEvent : uint32_t { - PROFILING_UNKNOWN, - PROFILING_RAY_SETUP, - - PROFILING_INTERSECT_CLOSEST, - PROFILING_INTERSECT_SUBSURFACE, - PROFILING_INTERSECT_SHADOW, - PROFILING_INTERSECT_VOLUME_STACK, - - PROFILING_SHADE_SURFACE_SETUP, - PROFILING_SHADE_SURFACE_EVAL, - PROFILING_SHADE_SURFACE_DIRECT_LIGHT, - PROFILING_SHADE_SURFACE_INDIRECT_LIGHT, - PROFILING_SHADE_SURFACE_AO, - PROFILING_SHADE_SURFACE_PASSES, - - PROFILING_SHADE_VOLUME_SETUP, - PROFILING_SHADE_VOLUME_INTEGRATE, - PROFILING_SHADE_VOLUME_DIRECT_LIGHT, - PROFILING_SHADE_VOLUME_INDIRECT_LIGHT, - - PROFILING_SHADE_SHADOW_SETUP, - PROFILING_SHADE_SHADOW_SURFACE, - PROFILING_SHADE_SHADOW_VOLUME, - - PROFILING_SHADE_LIGHT_SETUP, - PROFILING_SHADE_LIGHT_EVAL, - - PROFILING_NUM_EVENTS, -}; - -/* Contains the current execution state of a worker thread. - * These values are constantly updated by the worker. - * Periodically the profiler thread will wake up, read them - * and update its internal counters based on it. - * - * Atomics aren't needed here since we're only doing direct - * writes and reads to (4-byte-aligned) uint32_t, which is - * guaranteed to be atomic on x86 since the 486. - * Memory ordering is not guaranteed but does not matter. - * - * And even on other architectures, the extremely rare corner - * case of reading an intermediate state could at worst result - * in a single incorrect sample. */ -struct ProfilingState { - volatile uint32_t event = PROFILING_UNKNOWN; - volatile int32_t shader = -1; - volatile int32_t object = -1; - volatile bool active = false; - - vector shader_hits; - vector object_hits; -}; - -class Profiler { - public: - Profiler(); - ~Profiler(); - - void reset(int num_shaders, int num_objects); - - void start(); - void stop(); - - void add_state(ProfilingState *state); - void remove_state(ProfilingState *state); - - uint64_t get_event(ProfilingEvent event); - bool get_shader(int shader, uint64_t &samples, uint64_t &hits); - bool get_object(int object, uint64_t &samples, uint64_t &hits); - - protected: - void run(); - - /* Tracks how often the worker was in each ProfilingEvent while sampling, - * so multiplying the values by the sample frequency (currently 1ms) - * gives the approximate time spent in each state. */ - vector event_samples; - vector shader_samples; - vector object_samples; - - /* Tracks the total amounts every object/shader was hit. - * Used to evaluate relative cost, written by the render thread. - * Indexed by the shader and object IDs that the kernel also uses - * to index __object_flag and __shaders. */ - vector shader_hits; - vector object_hits; - - volatile bool do_stop_worker; - thread *worker; - - thread_mutex mutex; - vector states; -}; - -class ProfilingHelper { - public: - ProfilingHelper(ProfilingState *state, ProfilingEvent event) : state(state) - { - previous_event = state->event; - state->event = event; - } - - ~ProfilingHelper() - { - state->event = previous_event; - } - - inline void set_event(ProfilingEvent event) - { - state->event = event; - } - - protected: - ProfilingState *state; - uint32_t previous_event; -}; - -class ProfilingWithShaderHelper : public ProfilingHelper { - public: - ProfilingWithShaderHelper(ProfilingState *state, ProfilingEvent event) - : ProfilingHelper(state, event) - { - } - - ~ProfilingWithShaderHelper() - { - state->object = -1; - state->shader = -1; - } - - inline void set_shader(int object, int shader) - { - if (state->active) { - state->shader = shader; - state->object = object; - - if (shader >= 0) { - assert(shader < state->shader_hits.size()); - state->shader_hits[shader]++; - } - - if (object >= 0) { - assert(object < state->object_hits.size()); - state->object_hits[object]++; - } - } - } -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_PROFILING_H__ */ diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h deleted file mode 100644 index 176ee11e1e9..00000000000 --- a/intern/cycles/util/util_progress.h +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_PROGRESS_H__ -#define __UTIL_PROGRESS_H__ - -/* Progress - * - * Simple class to communicate progress status messages, timing information, - * update notifications from a job running in another thread. All methods - * except for the constructor/destructor are thread safe. */ - -#include "util/util_function.h" -#include "util/util_string.h" -#include "util/util_thread.h" -#include "util/util_time.h" - -CCL_NAMESPACE_BEGIN - -class Progress { - public: - Progress() - { - pixel_samples = 0; - total_pixel_samples = 0; - current_tile_sample = 0; - rendered_tiles = 0; - denoised_tiles = 0; - start_time = time_dt(); - render_start_time = time_dt(); - end_time = 0.0; - status = "Initializing"; - substatus = ""; - sync_status = ""; - sync_substatus = ""; - update_cb = function_null; - cancel = false; - cancel_message = ""; - error = false; - error_message = ""; - cancel_cb = function_null; - } - - Progress(Progress &progress) - { - *this = progress; - } - - Progress &operator=(Progress &progress) - { - thread_scoped_lock lock(progress.progress_mutex); - - progress.get_status(status, substatus); - - pixel_samples = progress.pixel_samples; - total_pixel_samples = progress.total_pixel_samples; - current_tile_sample = progress.get_current_sample(); - - return *this; - } - - void reset() - { - pixel_samples = 0; - total_pixel_samples = 0; - current_tile_sample = 0; - rendered_tiles = 0; - denoised_tiles = 0; - start_time = time_dt(); - render_start_time = time_dt(); - end_time = 0.0; - status = "Initializing"; - substatus = ""; - sync_status = ""; - sync_substatus = ""; - cancel = false; - cancel_message = ""; - error = false; - error_message = ""; - } - - /* cancel */ - void set_cancel(const string &cancel_message_) - { - thread_scoped_lock lock(progress_mutex); - cancel_message = cancel_message_; - cancel = true; - } - - bool get_cancel() const - { - if (!cancel && cancel_cb) - cancel_cb(); - - return cancel; - } - - string get_cancel_message() const - { - thread_scoped_lock lock(progress_mutex); - return cancel_message; - } - - void set_cancel_callback(function function) - { - cancel_cb = function; - } - - /* error */ - void set_error(const string &error_message_) - { - thread_scoped_lock lock(progress_mutex); - error_message = error_message_; - error = true; - /* If error happens we also stop rendering. */ - cancel_message = error_message_; - cancel = true; - } - - bool get_error() const - { - return error; - } - - string get_error_message() const - { - thread_scoped_lock lock(progress_mutex); - return error_message; - } - - /* tile and timing information */ - - void set_start_time() - { - thread_scoped_lock lock(progress_mutex); - - start_time = time_dt(); - end_time = 0.0; - } - - void set_render_start_time() - { - thread_scoped_lock lock(progress_mutex); - - render_start_time = time_dt(); - } - - void add_skip_time(const scoped_timer &start_timer, bool only_render) - { - double skip_time = time_dt() - start_timer.get_start(); - - render_start_time += skip_time; - if (!only_render) { - start_time += skip_time; - } - } - - void get_time(double &total_time_, double &render_time_) const - { - thread_scoped_lock lock(progress_mutex); - - double time = (end_time > 0) ? end_time : time_dt(); - - total_time_ = time - start_time; - render_time_ = time - render_start_time; - } - - void set_end_time() - { - end_time = time_dt(); - } - - void reset_sample() - { - thread_scoped_lock lock(progress_mutex); - - pixel_samples = 0; - current_tile_sample = 0; - rendered_tiles = 0; - denoised_tiles = 0; - } - - void set_total_pixel_samples(uint64_t total_pixel_samples_) - { - thread_scoped_lock lock(progress_mutex); - - total_pixel_samples = total_pixel_samples_; - } - - float get_progress() const - { - thread_scoped_lock lock(progress_mutex); - - if (total_pixel_samples > 0) { - return ((float)pixel_samples) / total_pixel_samples; - } - return 0.0f; - } - - void add_samples(uint64_t pixel_samples_, int tile_sample) - { - thread_scoped_lock lock(progress_mutex); - - pixel_samples += pixel_samples_; - current_tile_sample = tile_sample; - } - - void add_samples_update(uint64_t pixel_samples_, int tile_sample) - { - add_samples(pixel_samples_, tile_sample); - set_update(); - } - - void add_finished_tile(bool denoised) - { - thread_scoped_lock lock(progress_mutex); - - if (denoised) { - denoised_tiles++; - } - else { - rendered_tiles++; - } - } - - int get_current_sample() const - { - thread_scoped_lock lock(progress_mutex); - /* Note that the value here always belongs to the last tile that updated, - * so it's only useful if there is only one active tile. */ - return current_tile_sample; - } - - int get_rendered_tiles() const - { - thread_scoped_lock lock(progress_mutex); - return rendered_tiles; - } - - int get_denoised_tiles() const - { - thread_scoped_lock lock(progress_mutex); - return denoised_tiles; - } - - /* status messages */ - - void set_status(const string &status_, const string &substatus_ = "") - { - { - thread_scoped_lock lock(progress_mutex); - status = status_; - substatus = substatus_; - } - - set_update(); - } - - void set_substatus(const string &substatus_) - { - { - thread_scoped_lock lock(progress_mutex); - substatus = substatus_; - } - - set_update(); - } - - void set_sync_status(const string &status_, const string &substatus_ = "") - { - { - thread_scoped_lock lock(progress_mutex); - sync_status = status_; - sync_substatus = substatus_; - } - - set_update(); - } - - void set_sync_substatus(const string &substatus_) - { - { - thread_scoped_lock lock(progress_mutex); - sync_substatus = substatus_; - } - - set_update(); - } - - void get_status(string &status_, string &substatus_) const - { - thread_scoped_lock lock(progress_mutex); - - if (sync_status != "") { - status_ = sync_status; - substatus_ = sync_substatus; - } - else { - status_ = status; - substatus_ = substatus; - } - } - - /* callback */ - - void set_update() - { - if (update_cb) { - thread_scoped_lock lock(update_mutex); - update_cb(); - } - } - - void set_update_callback(function function) - { - update_cb = function; - } - - protected: - mutable thread_mutex progress_mutex; - mutable thread_mutex update_mutex; - function update_cb; - function cancel_cb; - - /* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel. - * This makes the progress estimate more accurate when tiles with different sizes are used. - * - * total_pixel_samples is the total amount of pixel samples that will be rendered. */ - uint64_t pixel_samples, total_pixel_samples; - /* Stores the current sample count of the last tile that called the update function. - * It's used to display the sample count if only one tile is active. */ - int current_tile_sample; - /* Stores the number of tiles that's already finished. - * Used to determine whether all but the last tile are finished rendering, - * in which case the current_tile_sample is displayed. */ - int rendered_tiles, denoised_tiles; - - double start_time, render_start_time; - /* End time written when render is done, so it doesn't keep increasing on redraws. */ - double end_time; - - string status; - string substatus; - - string sync_status; - string sync_substatus; - - volatile bool cancel; - string cancel_message; - - volatile bool error; - string error_message; -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_PROGRESS_H__ */ diff --git a/intern/cycles/util/util_projection.h b/intern/cycles/util/util_projection.h deleted file mode 100644 index 04b4574d75b..00000000000 --- a/intern/cycles/util/util_projection.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright 2011-2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_PROJECTION_H__ -#define __UTIL_PROJECTION_H__ - -#include "util/util_transform.h" - -CCL_NAMESPACE_BEGIN - -/* 4x4 projection matrix, perspective or orthographic. */ - -typedef struct ProjectionTransform { - float4 x, y, z, w; /* rows */ - -#ifndef __KERNEL_GPU__ - ProjectionTransform() - { - } - - explicit ProjectionTransform(const Transform &tfm) - : x(tfm.x), y(tfm.y), z(tfm.z), w(make_float4(0.0f, 0.0f, 0.0f, 1.0f)) - { - } -#endif -} ProjectionTransform; - -typedef struct PerspectiveMotionTransform { - ProjectionTransform pre; - ProjectionTransform post; -} PerspectiveMotionTransform; - -/* Functions */ - -ccl_device_inline float3 transform_perspective(ccl_private const ProjectionTransform *t, - const float3 a) -{ - float4 b = make_float4(a.x, a.y, a.z, 1.0f); - float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b)); - float w = dot(t->w, b); - - return (w != 0.0f) ? c / w : zero_float3(); -} - -ccl_device_inline float3 transform_perspective_direction(ccl_private const ProjectionTransform *t, - const float3 a) -{ - float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z, - a.x * t->y.x + a.y * t->y.y + a.z * t->y.z, - a.x * t->z.x + a.y * t->z.y + a.z * t->z.z); - - return c; -} - -#ifndef __KERNEL_GPU__ - -ccl_device_inline Transform projection_to_transform(const ProjectionTransform &a) -{ - Transform tfm = {a.x, a.y, a.z}; - return tfm; -} - -ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTransform &a) -{ - ProjectionTransform t; - - t.x.x = a.x.x; - t.x.y = a.y.x; - t.x.z = a.z.x; - t.x.w = a.w.x; - t.y.x = a.x.y; - t.y.y = a.y.y; - t.y.z = a.z.y; - t.y.w = a.w.y; - t.z.x = a.x.z; - t.z.y = a.y.z; - t.z.z = a.z.z; - t.z.w = a.w.z; - t.w.x = a.x.w; - t.w.y = a.y.w; - t.w.z = a.z.w; - t.w.w = a.w.w; - - return t; -} - -ProjectionTransform projection_inverse(const ProjectionTransform &a); - -ccl_device_inline ProjectionTransform make_projection(float a, - float b, - float c, - float d, - float e, - float f, - float g, - float h, - float i, - float j, - float k, - float l, - float m, - float n, - float o, - float p) -{ - ProjectionTransform t; - - t.x.x = a; - t.x.y = b; - t.x.z = c; - t.x.w = d; - t.y.x = e; - t.y.y = f; - t.y.z = g; - t.y.w = h; - t.z.x = i; - t.z.y = j; - t.z.z = k; - t.z.w = l; - t.w.x = m; - t.w.y = n; - t.w.z = o; - t.w.w = p; - - return t; -} -ccl_device_inline ProjectionTransform projection_identity() -{ - return make_projection(1.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 1.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 1.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 1.0f); -} - -ccl_device_inline ProjectionTransform operator*(const ProjectionTransform &a, - const ProjectionTransform &b) -{ - ProjectionTransform c = projection_transpose(b); - ProjectionTransform t; - - t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w)); - t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w)); - t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w)); - t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w)); - - return t; -} - -ccl_device_inline ProjectionTransform operator*(const ProjectionTransform &a, const Transform &b) -{ - return a * ProjectionTransform(b); -} - -ccl_device_inline ProjectionTransform operator*(const Transform &a, const ProjectionTransform &b) -{ - return ProjectionTransform(a) * b; -} - -ccl_device_inline void print_projection(const char *label, const ProjectionTransform &t) -{ - print_float4(label, t.x); - print_float4(label, t.y); - print_float4(label, t.z); - print_float4(label, t.w); - printf("\n"); -} - -ccl_device_inline ProjectionTransform projection_perspective(float fov, float n, float f) -{ - ProjectionTransform persp = make_projection( - 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, f / (f - n), -f * n / (f - n), 0, 0, 1, 0); - - float inv_angle = 1.0f / tanf(0.5f * fov); - - Transform scale = transform_scale(inv_angle, inv_angle, 1); - - return scale * persp; -} - -ccl_device_inline ProjectionTransform projection_orthographic(float znear, float zfar) -{ - Transform t = transform_scale(1.0f, 1.0f, 1.0f / (zfar - znear)) * - transform_translate(0.0f, 0.0f, -znear); - - return ProjectionTransform(t); -} - -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_PROJECTION_H__ */ diff --git a/intern/cycles/util/util_queue.h b/intern/cycles/util/util_queue.h deleted file mode 100644 index 622f4fe3e47..00000000000 --- a/intern/cycles/util/util_queue.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_QUEUE_H__ -#define __UTIL_QUEUE_H__ - -#include - -CCL_NAMESPACE_BEGIN - -using std::queue; - -CCL_NAMESPACE_END - -#endif /* __UTIL_LIST_H__ */ diff --git a/intern/cycles/util/util_rect.h b/intern/cycles/util/util_rect.h deleted file mode 100644 index 32df9327cbd..00000000000 --- a/intern/cycles/util/util_rect.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_RECT_H__ -#define __UTIL_RECT_H__ - -#include "util/util_types.h" - -CCL_NAMESPACE_BEGIN - -/* Rectangles are represented as a int4 containing the coordinates of the lower-left and - * upper-right corners in the order (x0, y0, x1, y1). */ - -ccl_device_inline int4 rect_from_shape(int x0, int y0, int w, int h) -{ - return make_int4(x0, y0, x0 + w, y0 + h); -} - -ccl_device_inline int4 rect_expand(int4 rect, int d) -{ - return make_int4(rect.x - d, rect.y - d, rect.z + d, rect.w + d); -} - -/* Returns the intersection of two rects. */ -ccl_device_inline int4 rect_clip(int4 a, int4 b) -{ - return make_int4(max(a.x, b.x), max(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); -} - -ccl_device_inline bool rect_is_valid(int4 rect) -{ - return (rect.z > rect.x) && (rect.w > rect.y); -} - -/* Returns the local row-major index of the pixel inside the rect. */ -ccl_device_inline int coord_to_local_index(int4 rect, int x, int y) -{ - int w = rect.z - rect.x; - return (y - rect.y) * w + (x - rect.x); -} - -/* Finds the coordinates of a pixel given by its row-major index in the rect, - * and returns whether the pixel is inside it. */ -ccl_device_inline bool local_index_to_coord(int4 rect, - int idx, - ccl_private int *x, - ccl_private int *y) -{ - int w = rect.z - rect.x; - *x = (idx % w) + rect.x; - *y = (idx / w) + rect.y; - return (*y < rect.w); -} - -ccl_device_inline int rect_size(int4 rect) -{ - return (rect.z - rect.x) * (rect.w - rect.y); -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_RECT_H__ */ diff --git a/intern/cycles/util/util_semaphore.h b/intern/cycles/util/util_semaphore.h deleted file mode 100644 index d995b0732b8..00000000000 --- a/intern/cycles/util/util_semaphore.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2011-2020 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_SEMAPHORE_H__ -#define __UTIL_SEMAPHORE_H__ - -#include "util/util_thread.h" - -CCL_NAMESPACE_BEGIN - -/* Counting Semaphore - * - * To restrict concurrent access to a resource to a specified number - * of threads. Similar to std::counting_semaphore from C++20. */ - -class thread_counting_semaphore { - public: - explicit thread_counting_semaphore(const int count) : count(count) - { - } - - thread_counting_semaphore(const thread_counting_semaphore &) = delete; - - void acquire() - { - thread_scoped_lock lock(mutex); - while (count == 0) { - condition.wait(lock); - } - count--; - } - - void release() - { - thread_scoped_lock lock(mutex); - count++; - condition.notify_one(); - } - - protected: - thread_mutex mutex; - thread_condition_variable condition; - int count; -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_SEMAPHORE_H__ */ diff --git a/intern/cycles/util/util_set.h b/intern/cycles/util/util_set.h deleted file mode 100644 index 298e1f7729a..00000000000 --- a/intern/cycles/util/util_set.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_SET_H__ -#define __UTIL_SET_H__ - -#include -#include - -#if defined(_MSC_VER) && (_MSC_VER >= 1900) -# include -#endif - -CCL_NAMESPACE_BEGIN - -using std::set; -using std::unordered_set; - -CCL_NAMESPACE_END - -#endif /* __UTIL_SET_H__ */ diff --git a/intern/cycles/util/util_simd.cpp b/intern/cycles/util/util_simd.cpp deleted file mode 100644 index 861dcf1fe36..00000000000 --- a/intern/cycles/util/util_simd.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2011-2013 Intel Corporation - * Modifications Copyright 2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#if (defined(WITH_KERNEL_SSE2)) || (defined(WITH_KERNEL_NATIVE) && defined(__SSE2__)) - -# define __KERNEL_SSE2__ -# include "util/util_simd.h" - -CCL_NAMESPACE_BEGIN - -const __m128 _mm_lookupmask_ps[16] = {_mm_castsi128_ps(_mm_set_epi32(0, 0, 0, 0)), - _mm_castsi128_ps(_mm_set_epi32(0, 0, 0, -1)), - _mm_castsi128_ps(_mm_set_epi32(0, 0, -1, 0)), - _mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1)), - _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, 0)), - _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, -1)), - _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, 0)), - _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)), - _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, 0)), - _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, -1)), - _mm_castsi128_ps(_mm_set_epi32(-1, 0, -1, 0)), - _mm_castsi128_ps(_mm_set_epi32(-1, 0, -1, -1)), - _mm_castsi128_ps(_mm_set_epi32(-1, -1, 0, 0)), - _mm_castsi128_ps(_mm_set_epi32(-1, -1, 0, -1)), - _mm_castsi128_ps(_mm_set_epi32(-1, -1, -1, 0)), - _mm_castsi128_ps(_mm_set_epi32(-1, -1, -1, -1))}; - -CCL_NAMESPACE_END - -#endif // WITH_KERNEL_SSE2 diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h deleted file mode 100644 index b4a153c329f..00000000000 --- a/intern/cycles/util/util_simd.h +++ /dev/null @@ -1,572 +0,0 @@ -/* - * Copyright 2011-2013 Intel Corporation - * Modifications Copyright 2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0(the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_SIMD_TYPES_H__ -#define __UTIL_SIMD_TYPES_H__ - -#include -#include - -#include "util/util_defines.h" - -/* SSE Intrinsics includes - * - * We assume __KERNEL_SSEX__ flags to have been defined at this point. - * - * MinGW64 has conflicting declarations for these SSE headers in . - * Since we can't avoid including , better only include that */ -#if defined(FREE_WINDOWS64) -# include "util/util_windows.h" -#elif defined(_MSC_VER) -# include -#elif (defined(__x86_64__) || defined(__i386__)) -# include -#elif defined(__KERNEL_NEON__) -# define SSE2NEON_PRECISE_MINMAX 1 -# include -#endif - -/* Floating Point Control, for Embree. */ -#if defined(__x86_64__) || defined(_M_X64) -# define SIMD_SET_FLUSH_TO_ZERO \ - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \ - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); -#else -# define SIMD_SET_FLUSH_TO_ZERO -#endif - -CCL_NAMESPACE_BEGIN - -/* Data structures used by SSE classes. */ -#ifdef __KERNEL_SSE2__ - -extern const __m128 _mm_lookupmask_ps[16]; - -static struct TrueTy { - __forceinline operator bool() const - { - return true; - } -} True ccl_attr_maybe_unused; - -static struct FalseTy { - __forceinline operator bool() const - { - return false; - } -} False ccl_attr_maybe_unused; - -static struct ZeroTy { - __forceinline operator float() const - { - return 0; - } - __forceinline operator int() const - { - return 0; - } -} zero ccl_attr_maybe_unused; - -static struct OneTy { - __forceinline operator float() const - { - return 1; - } - __forceinline operator int() const - { - return 1; - } -} one ccl_attr_maybe_unused; - -static struct NegInfTy { - __forceinline operator float() const - { - return -std::numeric_limits::infinity(); - } - __forceinline operator int() const - { - return std::numeric_limits::min(); - } -} neg_inf ccl_attr_maybe_unused; - -static struct PosInfTy { - __forceinline operator float() const - { - return std::numeric_limits::infinity(); - } - __forceinline operator int() const - { - return std::numeric_limits::max(); - } -} inf ccl_attr_maybe_unused, pos_inf ccl_attr_maybe_unused; - -static struct StepTy { -} step ccl_attr_maybe_unused; - -#endif - -/* Utilities used by Neon */ -#if defined(__KERNEL_NEON__) -template type shuffle_neon(const type &a) -{ - if (i0 == i1 && i0 == i2 && i0 == i3) { - return type(vdupq_laneq_s32(int32x4_t(a), i0)); - } - static const uint8_t tbl[16] = {(i0 * 4) + 0, - (i0 * 4) + 1, - (i0 * 4) + 2, - (i0 * 4) + 3, - (i1 * 4) + 0, - (i1 * 4) + 1, - (i1 * 4) + 2, - (i1 * 4) + 3, - (i2 * 4) + 0, - (i2 * 4) + 1, - (i2 * 4) + 2, - (i2 * 4) + 3, - (i3 * 4) + 0, - (i3 * 4) + 1, - (i3 * 4) + 2, - (i3 * 4) + 3}; - - return type(vqtbl1q_s8(int8x16_t(a), *(uint8x16_t *)tbl)); -} - -template -type shuffle_neon(const type &a, const type &b) -{ - if (&a == &b) { - static const uint8_t tbl[16] = {(i0 * 4) + 0, - (i0 * 4) + 1, - (i0 * 4) + 2, - (i0 * 4) + 3, - (i1 * 4) + 0, - (i1 * 4) + 1, - (i1 * 4) + 2, - (i1 * 4) + 3, - (i2 * 4) + 0, - (i2 * 4) + 1, - (i2 * 4) + 2, - (i2 * 4) + 3, - (i3 * 4) + 0, - (i3 * 4) + 1, - (i3 * 4) + 2, - (i3 * 4) + 3}; - - return type(vqtbl1q_s8(int8x16_t(b), *(uint8x16_t *)tbl)); - } - else { - - static const uint8_t tbl[16] = {(i0 * 4) + 0, - (i0 * 4) + 1, - (i0 * 4) + 2, - (i0 * 4) + 3, - (i1 * 4) + 0, - (i1 * 4) + 1, - (i1 * 4) + 2, - (i1 * 4) + 3, - (i2 * 4) + 0 + 16, - (i2 * 4) + 1 + 16, - (i2 * 4) + 2 + 16, - (i2 * 4) + 3 + 16, - (i3 * 4) + 0 + 16, - (i3 * 4) + 1 + 16, - (i3 * 4) + 2 + 16, - (i3 * 4) + 3 + 16}; - - return type(vqtbl2q_s8((int8x16x2_t){int8x16_t(a), int8x16_t(b)}, *(uint8x16_t *)tbl)); - } -} -#endif /* __KERNEL_NEON */ - -/* Intrinsics Functions - * - * For fast bit operations. */ - -#if defined(__BMI__) && defined(__GNUC__) -# ifndef _tzcnt_u32 -# define _tzcnt_u32 __tzcnt_u32 -# endif -# ifndef _tzcnt_u64 -# define _tzcnt_u64 __tzcnt_u64 -# endif -#endif - -#if defined(__LZCNT__) -# define _lzcnt_u32 __lzcnt32 -# define _lzcnt_u64 __lzcnt64 -#endif - -#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__clang__) -/* Intrinsic functions on Windows. */ -__forceinline uint32_t __bsf(uint32_t v) -{ -# if defined(__KERNEL_AVX2__) - return _tzcnt_u32(v); -# else - unsigned long r = 0; - _BitScanForward(&r, v); - return r; -# endif -} - -__forceinline uint32_t __bsr(uint32_t v) -{ - unsigned long r = 0; - _BitScanReverse(&r, v); - return r; -} - -__forceinline uint32_t __btc(uint32_t v, uint32_t i) -{ - long r = v; - _bittestandcomplement(&r, i); - return r; -} - -__forceinline uint32_t bitscan(uint32_t v) -{ -# if defined(__KERNEL_AVX2__) - return _tzcnt_u32(v); -# else - return __bsf(v); -# endif -} - -# if defined(__KERNEL_64_BIT__) - -__forceinline uint64_t __bsf(uint64_t v) -{ -# if defined(__KERNEL_AVX2__) - return _tzcnt_u64(v); -# else - unsigned long r = 0; - _BitScanForward64(&r, v); - return r; -# endif -} - -__forceinline uint64_t __bsr(uint64_t v) -{ - unsigned long r = 0; - _BitScanReverse64(&r, v); - return r; -} - -__forceinline uint64_t __btc(uint64_t v, uint64_t i) -{ - uint64_t r = v; - _bittestandcomplement64((__int64 *)&r, i); - return r; -} - -__forceinline uint64_t bitscan(uint64_t v) -{ -# if defined(__KERNEL_AVX2__) -# if defined(__KERNEL_64_BIT__) - return _tzcnt_u64(v); -# else - return _tzcnt_u32(v); -# endif -# else - return __bsf(v); -# endif -} - -# endif /* __KERNEL_64_BIT__ */ - -#elif (defined(__x86_64__) || defined(__i386__)) && defined(__KERNEL_SSE2__) -/* Intrinsic functions with x86 SSE. */ - -__forceinline uint32_t __bsf(const uint32_t v) -{ - uint32_t r = 0; - asm("bsf %1,%0" : "=r"(r) : "r"(v)); - return r; -} - -__forceinline uint32_t __bsr(const uint32_t v) -{ - uint32_t r = 0; - asm("bsr %1,%0" : "=r"(r) : "r"(v)); - return r; -} - -__forceinline uint32_t __btc(const uint32_t v, uint32_t i) -{ - uint32_t r = 0; - asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); - return r; -} - -# if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \ - !(defined(__ILP32__) && defined(__x86_64__)) -__forceinline uint64_t __bsf(const uint64_t v) -{ - uint64_t r = 0; - asm("bsf %1,%0" : "=r"(r) : "r"(v)); - return r; -} -# endif - -__forceinline uint64_t __bsr(const uint64_t v) -{ - uint64_t r = 0; - asm("bsr %1,%0" : "=r"(r) : "r"(v)); - return r; -} - -__forceinline uint64_t __btc(const uint64_t v, const uint64_t i) -{ - uint64_t r = 0; - asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); - return r; -} - -__forceinline uint32_t bitscan(uint32_t v) -{ -# if defined(__KERNEL_AVX2__) - return _tzcnt_u32(v); -# else - return __bsf(v); -# endif -} - -# if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \ - !(defined(__ILP32__) && defined(__x86_64__)) -__forceinline uint64_t bitscan(uint64_t v) -{ -# if defined(__KERNEL_AVX2__) -# if defined(__KERNEL_64_BIT__) - return _tzcnt_u64(v); -# else - return _tzcnt_u32(v); -# endif -# else - return __bsf(v); -# endif -} -# endif - -#else -/* Intrinsic functions fallback for arbitrary processor. */ -__forceinline uint32_t __bsf(const uint32_t x) -{ - for (int i = 0; i < 32; i++) { - if (x & (1U << i)) - return i; - } - return 32; -} - -__forceinline uint32_t __bsr(const uint32_t x) -{ - for (int i = 0; i < 32; i++) { - if (x & (1U << (31 - i))) - return (31 - i); - } - return 32; -} - -__forceinline uint32_t __btc(const uint32_t x, const uint32_t bit) -{ - uint32_t mask = 1U << bit; - return x & (~mask); -} - -__forceinline uint32_t __bsf(const uint64_t x) -{ - for (int i = 0; i < 64; i++) { - if (x & (1UL << i)) - return i; - } - return 64; -} - -__forceinline uint32_t __bsr(const uint64_t x) -{ - for (int i = 0; i < 64; i++) { - if (x & (1UL << (63 - i))) - return (63 - i); - } - return 64; -} - -__forceinline uint64_t __btc(const uint64_t x, const uint32_t bit) -{ - uint64_t mask = 1UL << bit; - return x & (~mask); -} - -__forceinline uint32_t bitscan(uint32_t value) -{ - assert(value != 0); - uint32_t bit = 0; - while ((value & (1 << bit)) == 0) { - ++bit; - } - return bit; -} - -__forceinline uint64_t bitscan(uint64_t value) -{ - assert(value != 0); - uint64_t bit = 0; - while ((value & (1 << bit)) == 0) { - ++bit; - } - return bit; -} - -#endif /* Intrinsics */ - -/* SSE compatibility. - * - * Various utilities to smooth over differences between SSE versions and - * implementations. */ -#ifdef __KERNEL_SSE2__ - -/* Test __KERNEL_SSE41__ for MSVC which does not define __SSE4_1__, and test - * __SSE4_1__ to avoid OpenImageIO conflicts with our emulation macros on other - * platforms when compiling code outside the kernel. */ -# if !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) - -/* Emulation of SSE4 functions with SSE2 */ - -# define _MM_FROUND_TO_NEAREST_INT 0x00 -# define _MM_FROUND_TO_NEG_INF 0x01 -# define _MM_FROUND_TO_POS_INF 0x02 -# define _MM_FROUND_TO_ZERO 0x03 -# define _MM_FROUND_CUR_DIRECTION 0x04 - -# undef _mm_blendv_ps -# define _mm_blendv_ps _mm_blendv_ps_emu -__forceinline __m128 _mm_blendv_ps_emu(__m128 value, __m128 input, __m128 mask) -{ - __m128i isignmask = _mm_set1_epi32(0x80000000); - __m128 signmask = _mm_castsi128_ps(isignmask); - __m128i iandsign = _mm_castps_si128(_mm_and_ps(mask, signmask)); - __m128i icmpmask = _mm_cmpeq_epi32(iandsign, isignmask); - __m128 cmpmask = _mm_castsi128_ps(icmpmask); - return _mm_or_ps(_mm_and_ps(cmpmask, input), _mm_andnot_ps(cmpmask, value)); -} - -# undef _mm_blend_ps -# define _mm_blend_ps _mm_blend_ps_emu -__forceinline __m128 _mm_blend_ps_emu(__m128 value, __m128 input, const int mask) -{ - assert(mask < 0x10); - return _mm_blendv_ps(value, input, _mm_lookupmask_ps[mask]); -} - -# undef _mm_blendv_epi8 -# define _mm_blendv_epi8 _mm_blendv_epi8_emu -__forceinline __m128i _mm_blendv_epi8_emu(__m128i value, __m128i input, __m128i mask) -{ - return _mm_or_si128(_mm_and_si128(mask, input), _mm_andnot_si128(mask, value)); -} - -# undef _mm_min_epi32 -# define _mm_min_epi32 _mm_min_epi32_emu -__forceinline __m128i _mm_min_epi32_emu(__m128i value, __m128i input) -{ - return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input)); -} - -# undef _mm_max_epi32 -# define _mm_max_epi32 _mm_max_epi32_emu -__forceinline __m128i _mm_max_epi32_emu(__m128i value, __m128i input) -{ - return _mm_blendv_epi8(value, input, _mm_cmplt_epi32(value, input)); -} - -# ifndef __KERNEL_NEON__ -# undef _mm_extract_epi32 -# define _mm_extract_epi32 _mm_extract_epi32_emu -__forceinline int _mm_extract_epi32_emu(__m128i input, const int index) -{ - switch (index) { - case 0: - return _mm_cvtsi128_si32(input); - case 1: - return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(1, 1, 1, 1))); - case 2: - return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(2, 2, 2, 2))); - case 3: - return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(3, 3, 3, 3))); - default: - assert(false); - return 0; - } -} -# endif - -# undef _mm_insert_epi32 -# define _mm_insert_epi32 _mm_insert_epi32_emu -__forceinline __m128i _mm_insert_epi32_emu(__m128i value, int input, const int index) -{ - assert(index >= 0 && index < 4); - ((int *)&value)[index] = input; - return value; -} - -# undef _mm_insert_ps -# define _mm_insert_ps _mm_insert_ps_emu -__forceinline __m128 _mm_insert_ps_emu(__m128 value, __m128 input, const int index) -{ - assert(index < 0x100); - ((float *)&value)[(index >> 4) & 0x3] = ((float *)&input)[index >> 6]; - return _mm_andnot_ps(_mm_lookupmask_ps[index & 0xf], value); -} - -# undef _mm_round_ps -# define _mm_round_ps _mm_round_ps_emu -__forceinline __m128 _mm_round_ps_emu(__m128 value, const int flags) -{ - switch (flags) { - case _MM_FROUND_TO_NEAREST_INT: - return _mm_cvtepi32_ps(_mm_cvtps_epi32(value)); - case _MM_FROUND_TO_NEG_INF: - return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(-0.5f)))); - case _MM_FROUND_TO_POS_INF: - return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(0.5f)))); - case _MM_FROUND_TO_ZERO: - return _mm_cvtepi32_ps(_mm_cvttps_epi32(value)); - } - return value; -} - -# endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */ - -/* Older GCC versions do not have _mm256_cvtss_f32 yet, so define it ourselves. - * _mm256_castps256_ps128 generates no instructions so this is just as efficient. */ -# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) -# undef _mm256_cvtss_f32 -# define _mm256_cvtss_f32(a) (_mm_cvtss_f32(_mm256_castps256_ps128(a))) -# endif - -#endif /* __KERNEL_SSE2__ */ - -/* quiet unused define warnings */ -#if defined(__KERNEL_SSE2__) || defined(__KERNEL_SSE3__) || defined(__KERNEL_SSSE3__) || \ - defined(__KERNEL_SSE41__) || defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) -/* do nothing */ -#endif - -CCL_NAMESPACE_END - -#endif /* __UTIL_SIMD_TYPES_H__ */ diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h deleted file mode 100644 index 6afce4f8909..00000000000 --- a/intern/cycles/util/util_sseb.h +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright 2011-2013 Intel Corporation - * Modifications Copyright 2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0(the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_SSEB_H__ -#define __UTIL_SSEB_H__ - -CCL_NAMESPACE_BEGIN - -#ifdef __KERNEL_SSE2__ - -struct ssei; -struct ssef; - -/*! 4-wide SSE bool type. */ -struct sseb { - typedef sseb Mask; // mask type - typedef ssei Int; // int type - typedef ssef Float; // float type - - enum { size = 4 }; // number of SIMD elements - union { - __m128 m128; - int32_t v[4]; - }; // data - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline sseb() - { - } - __forceinline sseb(const sseb &other) - { - m128 = other.m128; - } - __forceinline sseb &operator=(const sseb &other) - { - m128 = other.m128; - return *this; - } - - __forceinline sseb(const __m128 input) : m128(input) - { - } - __forceinline operator const __m128 &(void) const - { - return m128; - } - __forceinline operator const __m128i(void) const - { - return _mm_castps_si128(m128); - } - __forceinline operator const __m128d(void) const - { - return _mm_castps_pd(m128); - } - - __forceinline sseb(bool a) - : m128(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) - { - } - __forceinline sseb(bool a, bool b) - : m128(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) - { - } - __forceinline sseb(bool a, bool b, bool c, bool d) - : m128(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) - { - } - __forceinline sseb(int mask) - { - assert(mask >= 0 && mask < 16); - m128 = _mm_lookupmask_ps[mask]; - } - - //////////////////////////////////////////////////////////////////////////////// - /// Constants - //////////////////////////////////////////////////////////////////////////////// - - __forceinline sseb(FalseTy) : m128(_mm_setzero_ps()) - { - } - __forceinline sseb(TrueTy) - : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) - { - } - - //////////////////////////////////////////////////////////////////////////////// - /// Array Access - //////////////////////////////////////////////////////////////////////////////// - - __forceinline bool operator[](const size_t i) const - { - assert(i < 4); - return (_mm_movemask_ps(m128) >> i) & 1; - } - __forceinline int32_t &operator[](const size_t i) - { - assert(i < 4); - return v[i]; - } -}; - -//////////////////////////////////////////////////////////////////////////////// -/// Unary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const sseb operator!(const sseb &a) -{ - return _mm_xor_ps(a, sseb(True)); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Binary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const sseb operator&(const sseb &a, const sseb &b) -{ - return _mm_and_ps(a, b); -} -__forceinline const sseb operator|(const sseb &a, const sseb &b) -{ - return _mm_or_ps(a, b); -} -__forceinline const sseb operator^(const sseb &a, const sseb &b) -{ - return _mm_xor_ps(a, b); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Assignment Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const sseb operator&=(sseb &a, const sseb &b) -{ - return a = a & b; -} -__forceinline const sseb operator|=(sseb &a, const sseb &b) -{ - return a = a | b; -} -__forceinline const sseb operator^=(sseb &a, const sseb &b) -{ - return a = a ^ b; -} - -//////////////////////////////////////////////////////////////////////////////// -/// Comparison Operators + Select -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const sseb operator!=(const sseb &a, const sseb &b) -{ - return _mm_xor_ps(a, b); -} -__forceinline const sseb operator==(const sseb &a, const sseb &b) -{ - return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b)); -} - -__forceinline const sseb select(const sseb &m, const sseb &t, const sseb &f) -{ -# if defined(__KERNEL_SSE41__) - return _mm_blendv_ps(f, t, m); -# else - return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); -# endif -} - -//////////////////////////////////////////////////////////////////////////////// -/// Movement/Shifting/Shuffling Functions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const sseb unpacklo(const sseb &a, const sseb &b) -{ - return _mm_unpacklo_ps(a, b); -} -__forceinline const sseb unpackhi(const sseb &a, const sseb &b) -{ - return _mm_unpackhi_ps(a, b); -} - -template -__forceinline const sseb shuffle(const sseb &a) -{ -# ifdef __KERNEL_NEON__ - return shuffle_neon(a); -# else - return _mm_castsi128_ps(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0))); -# endif -} - -# ifndef __KERNEL_NEON__ -template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a) -{ - return _mm_movelh_ps(a, a); -} - -template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a) -{ - return _mm_movehl_ps(a, a); -} -# endif - -template -__forceinline const sseb shuffle(const sseb &a, const sseb &b) -{ -# ifdef __KERNEL_NEON__ - return shuffle_neon(a, b); -# else - return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); -# endif -} - -# ifndef __KERNEL_NEON__ -template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a, const sseb &b) -{ - return _mm_movelh_ps(a, b); -} - -template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a, const sseb &b) -{ - return _mm_movehl_ps(b, a); -} -# endif - -# if defined(__KERNEL_SSE3__) && !defined(__KERNEL_NEON__) -template<> __forceinline const sseb shuffle<0, 0, 2, 2>(const sseb &a) -{ - return _mm_moveldup_ps(a); -} -template<> __forceinline const sseb shuffle<1, 1, 3, 3>(const sseb &a) -{ - return _mm_movehdup_ps(a); -} -# endif - -# if defined(__KERNEL_SSE41__) -template -__forceinline const sseb insert(const sseb &a, const sseb &b) -{ -# ifdef __KERNEL_NEON__ - sseb res = a; - if (clr) - res[dst] = 0; - else - res[dst] = b[src]; - return res; -# else - return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); -# endif -} -template __forceinline const sseb insert(const sseb &a, const sseb &b) -{ - return insert(a, b); -} -template __forceinline const sseb insert(const sseb &a, const bool b) -{ - return insert(a, sseb(b)); -} -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Reduction Operations -//////////////////////////////////////////////////////////////////////////////// - -# if defined(__KERNEL_SSE41__) -__forceinline uint32_t popcnt(const sseb &a) -{ -# if defined(__KERNEL_NEON__) - const int32x4_t mask = {1, 1, 1, 1}; - int32x4_t t = vandq_s32(vreinterpretq_s32_m128(a.m128), mask); - return vaddvq_s32(t); -# else - return _mm_popcnt_u32(_mm_movemask_ps(a)); -# endif -} -# else -__forceinline uint32_t popcnt(const sseb &a) -{ - return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]); -} -# endif - -__forceinline bool reduce_and(const sseb &a) -{ -# if defined(__KERNEL_NEON__) - return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) == -4; -# else - return _mm_movemask_ps(a) == 0xf; -# endif -} -__forceinline bool reduce_or(const sseb &a) -{ -# if defined(__KERNEL_NEON__) - return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) != 0x0; -# else - return _mm_movemask_ps(a) != 0x0; -# endif -} -__forceinline bool all(const sseb &b) -{ -# if defined(__KERNEL_NEON__) - return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == -4; -# else - return _mm_movemask_ps(b) == 0xf; -# endif -} -__forceinline bool any(const sseb &b) -{ -# if defined(__KERNEL_NEON__) - return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) != 0x0; -# else - return _mm_movemask_ps(b) != 0x0; -# endif -} -__forceinline bool none(const sseb &b) -{ -# if defined(__KERNEL_NEON__) - return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == 0x0; -# else - return _mm_movemask_ps(b) == 0x0; -# endif -} - -__forceinline uint32_t movemask(const sseb &a) -{ - return _mm_movemask_ps(a); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Debug Functions -//////////////////////////////////////////////////////////////////////////////// - -ccl_device_inline void print_sseb(const char *label, const sseb &a) -{ - printf("%s: %d %d %d %d\n", label, a[0], a[1], a[2], a[3]); -} - -#endif - -CCL_NAMESPACE_END - -#endif diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h deleted file mode 100644 index 0c81ed87553..00000000000 --- a/intern/cycles/util/util_ssef.h +++ /dev/null @@ -1,1104 +0,0 @@ -/* - * Copyright 2011-2013 Intel Corporation - * Modifications Copyright 2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0(the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_SSEF_H__ -#define __UTIL_SSEF_H__ - -#include "util_ssei.h" - -CCL_NAMESPACE_BEGIN - -#ifdef __KERNEL_SSE2__ - -struct sseb; -struct ssef; - -/*! 4-wide SSE float type. */ -struct ssef { - typedef sseb Mask; // mask type - typedef ssei Int; // int type - typedef ssef Float; // float type - - enum { size = 4 }; // number of SIMD elements - union { - __m128 m128; - float f[4]; - int i[4]; - }; // data - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline ssef() - { - } - __forceinline ssef(const ssef &other) - { - m128 = other.m128; - } - __forceinline ssef &operator=(const ssef &other) - { - m128 = other.m128; - return *this; - } - - __forceinline ssef(const __m128 a) : m128(a) - { - } - __forceinline operator const __m128 &() const - { - return m128; - } - __forceinline operator __m128 &() - { - return m128; - } - - __forceinline ssef(float a) : m128(_mm_set1_ps(a)) - { - } - __forceinline ssef(float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d)) - { - } - - __forceinline explicit ssef(const __m128i a) : m128(_mm_cvtepi32_ps(a)) - { - } - - //////////////////////////////////////////////////////////////////////////////// - /// Loads and Stores - //////////////////////////////////////////////////////////////////////////////// - -# if defined(__KERNEL_AVX__) - static __forceinline ssef broadcast(const void *const a) - { - return _mm_broadcast_ss((float *)a); - } -# else - static __forceinline ssef broadcast(const void *const a) - { - return _mm_set1_ps(*(float *)a); - } -# endif - - //////////////////////////////////////////////////////////////////////////////// - /// Array Access - //////////////////////////////////////////////////////////////////////////////// - - __forceinline const float &operator[](const size_t i) const - { - assert(i < 4); - return f[i]; - } - __forceinline float &operator[](const size_t i) - { - assert(i < 4); - return f[i]; - } -}; - -//////////////////////////////////////////////////////////////////////////////// -/// Unary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const ssef cast(const __m128i &a) -{ - return _mm_castsi128_ps(a); -} -__forceinline const ssef operator+(const ssef &a) -{ - return a; -} -__forceinline const ssef operator-(const ssef &a) -{ - return _mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); -} -__forceinline const ssef abs(const ssef &a) -{ - return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))); -} -# if defined(__KERNEL_SSE41__) -__forceinline const ssef sign(const ssef &a) -{ - return _mm_blendv_ps(ssef(1.0f), -ssef(1.0f), _mm_cmplt_ps(a, ssef(0.0f))); -} -# endif -__forceinline const ssef signmsk(const ssef &a) -{ - return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); -} - -__forceinline const ssef rcp(const ssef &a) -{ - const ssef r = _mm_rcp_ps(a.m128); - return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a)); -} -__forceinline const ssef sqr(const ssef &a) -{ - return _mm_mul_ps(a, a); -} -__forceinline const ssef mm_sqrt(const ssef &a) -{ - return _mm_sqrt_ps(a.m128); -} -__forceinline const ssef rsqrt(const ssef &a) -{ - const ssef r = _mm_rsqrt_ps(a.m128); - return _mm_add_ps( - _mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r), - _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r), - _mm_mul_ps(r, r))); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Binary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const ssef operator+(const ssef &a, const ssef &b) -{ - return _mm_add_ps(a.m128, b.m128); -} -__forceinline const ssef operator+(const ssef &a, const float &b) -{ - return a + ssef(b); -} -__forceinline const ssef operator+(const float &a, const ssef &b) -{ - return ssef(a) + b; -} - -__forceinline const ssef operator-(const ssef &a, const ssef &b) -{ - return _mm_sub_ps(a.m128, b.m128); -} -__forceinline const ssef operator-(const ssef &a, const float &b) -{ - return a - ssef(b); -} -__forceinline const ssef operator-(const float &a, const ssef &b) -{ - return ssef(a) - b; -} - -__forceinline const ssef operator*(const ssef &a, const ssef &b) -{ - return _mm_mul_ps(a.m128, b.m128); -} -__forceinline const ssef operator*(const ssef &a, const float &b) -{ - return a * ssef(b); -} -__forceinline const ssef operator*(const float &a, const ssef &b) -{ - return ssef(a) * b; -} - -__forceinline const ssef operator/(const ssef &a, const ssef &b) -{ - return _mm_div_ps(a.m128, b.m128); -} -__forceinline const ssef operator/(const ssef &a, const float &b) -{ - return a / ssef(b); -} -__forceinline const ssef operator/(const float &a, const ssef &b) -{ - return ssef(a) / b; -} - -__forceinline const ssef operator^(const ssef &a, const ssef &b) -{ - return _mm_xor_ps(a.m128, b.m128); -} -__forceinline const ssef operator^(const ssef &a, const ssei &b) -{ - return _mm_xor_ps(a.m128, _mm_castsi128_ps(b.m128)); -} - -__forceinline const ssef operator&(const ssef &a, const ssef &b) -{ - return _mm_and_ps(a.m128, b.m128); -} -__forceinline const ssef operator&(const ssef &a, const ssei &b) -{ - return _mm_and_ps(a.m128, _mm_castsi128_ps(b.m128)); -} - -__forceinline const ssef operator|(const ssef &a, const ssef &b) -{ - return _mm_or_ps(a.m128, b.m128); -} -__forceinline const ssef operator|(const ssef &a, const ssei &b) -{ - return _mm_or_ps(a.m128, _mm_castsi128_ps(b.m128)); -} - -__forceinline const ssef andnot(const ssef &a, const ssef &b) -{ - return _mm_andnot_ps(a.m128, b.m128); -} - -__forceinline const ssef min(const ssef &a, const ssef &b) -{ - return _mm_min_ps(a.m128, b.m128); -} -__forceinline const ssef min(const ssef &a, const float &b) -{ - return _mm_min_ps(a.m128, ssef(b)); -} -__forceinline const ssef min(const float &a, const ssef &b) -{ - return _mm_min_ps(ssef(a), b.m128); -} - -__forceinline const ssef max(const ssef &a, const ssef &b) -{ - return _mm_max_ps(a.m128, b.m128); -} -__forceinline const ssef max(const ssef &a, const float &b) -{ - return _mm_max_ps(a.m128, ssef(b)); -} -__forceinline const ssef max(const float &a, const ssef &b) -{ - return _mm_max_ps(ssef(a), b.m128); -} - -# if defined(__KERNEL_SSE41__) -__forceinline ssef mini(const ssef &a, const ssef &b) -{ - const ssei ai = _mm_castps_si128(a); - const ssei bi = _mm_castps_si128(b); - const ssei ci = _mm_min_epi32(ai, bi); - return _mm_castsi128_ps(ci); -} -# endif - -# if defined(__KERNEL_SSE41__) -__forceinline ssef maxi(const ssef &a, const ssef &b) -{ - const ssei ai = _mm_castps_si128(a); - const ssei bi = _mm_castps_si128(b); - const ssei ci = _mm_max_epi32(ai, bi); - return _mm_castsi128_ps(ci); -} -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Ternary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const ssef madd(const ssef &a, const ssef &b, const ssef &c) -{ -# if defined(__KERNEL_NEON__) - return vfmaq_f32(c, a, b); -# elif defined(__KERNEL_AVX2__) - return _mm_fmadd_ps(a, b, c); -# else - return a * b + c; -# endif -} -__forceinline const ssef msub(const ssef &a, const ssef &b, const ssef &c) -{ -# if defined(__KERNEL_NEON__) - return vfmaq_f32(vnegq_f32(c), a, b); -# elif defined(__KERNEL_AVX2__) - return _mm_fmsub_ps(a, b, c); -# else - return a * b - c; -# endif -} -__forceinline const ssef nmadd(const ssef &a, const ssef &b, const ssef &c) -{ -# if defined(__KERNEL_NEON__) - return vfmsq_f32(c, a, b); -# elif defined(__KERNEL_AVX2__) - return _mm_fnmadd_ps(a, b, c); -# else - return c - a * b; -# endif -} -__forceinline const ssef nmsub(const ssef &a, const ssef &b, const ssef &c) -{ -# if defined(__KERNEL_NEON__) - return vfmsq_f32(vnegq_f32(c), a, b); -# elif defined(__KERNEL_AVX2__) - return _mm_fnmsub_ps(a, b, c); -# else - return -a * b - c; -# endif -} - -//////////////////////////////////////////////////////////////////////////////// -/// Assignment Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline ssef &operator+=(ssef &a, const ssef &b) -{ - return a = a + b; -} -__forceinline ssef &operator+=(ssef &a, const float &b) -{ - return a = a + b; -} - -__forceinline ssef &operator-=(ssef &a, const ssef &b) -{ - return a = a - b; -} -__forceinline ssef &operator-=(ssef &a, const float &b) -{ - return a = a - b; -} - -__forceinline ssef &operator*=(ssef &a, const ssef &b) -{ - return a = a * b; -} -__forceinline ssef &operator*=(ssef &a, const float &b) -{ - return a = a * b; -} - -__forceinline ssef &operator/=(ssef &a, const ssef &b) -{ - return a = a / b; -} -__forceinline ssef &operator/=(ssef &a, const float &b) -{ - return a = a / b; -} - -//////////////////////////////////////////////////////////////////////////////// -/// Comparison Operators + Select -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const sseb operator==(const ssef &a, const ssef &b) -{ - return _mm_cmpeq_ps(a.m128, b.m128); -} -__forceinline const sseb operator==(const ssef &a, const float &b) -{ - return a == ssef(b); -} -__forceinline const sseb operator==(const float &a, const ssef &b) -{ - return ssef(a) == b; -} - -__forceinline const sseb operator!=(const ssef &a, const ssef &b) -{ - return _mm_cmpneq_ps(a.m128, b.m128); -} -__forceinline const sseb operator!=(const ssef &a, const float &b) -{ - return a != ssef(b); -} -__forceinline const sseb operator!=(const float &a, const ssef &b) -{ - return ssef(a) != b; -} - -__forceinline const sseb operator<(const ssef &a, const ssef &b) -{ - return _mm_cmplt_ps(a.m128, b.m128); -} -__forceinline const sseb operator<(const ssef &a, const float &b) -{ - return a < ssef(b); -} -__forceinline const sseb operator<(const float &a, const ssef &b) -{ - return ssef(a) < b; -} - -__forceinline const sseb operator>=(const ssef &a, const ssef &b) -{ - return _mm_cmpnlt_ps(a.m128, b.m128); -} -__forceinline const sseb operator>=(const ssef &a, const float &b) -{ - return a >= ssef(b); -} -__forceinline const sseb operator>=(const float &a, const ssef &b) -{ - return ssef(a) >= b; -} - -__forceinline const sseb operator>(const ssef &a, const ssef &b) -{ - return _mm_cmpnle_ps(a.m128, b.m128); -} -__forceinline const sseb operator>(const ssef &a, const float &b) -{ - return a > ssef(b); -} -__forceinline const sseb operator>(const float &a, const ssef &b) -{ - return ssef(a) > b; -} - -__forceinline const sseb operator<=(const ssef &a, const ssef &b) -{ - return _mm_cmple_ps(a.m128, b.m128); -} -__forceinline const sseb operator<=(const ssef &a, const float &b) -{ - return a <= ssef(b); -} -__forceinline const sseb operator<=(const float &a, const ssef &b) -{ - return ssef(a) <= b; -} - -__forceinline const ssef select(const sseb &m, const ssef &t, const ssef &f) -{ -# ifdef __KERNEL_SSE41__ - return _mm_blendv_ps(f, t, m); -# else - return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); -# endif -} - -__forceinline const ssef select(const ssef &m, const ssef &t, const ssef &f) -{ -# ifdef __KERNEL_SSE41__ - return _mm_blendv_ps(f, t, m); -# else - return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); -# endif -} - -__forceinline const ssef select(const int mask, const ssef &t, const ssef &f) -{ -# if defined(__KERNEL_SSE41__) && \ - ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER)) - return _mm_blend_ps(f, t, mask); -# else - return select(sseb(mask), t, f); -# endif -} - -//////////////////////////////////////////////////////////////////////////////// -/// Rounding Functions -//////////////////////////////////////////////////////////////////////////////// - -# if defined(__KERNEL_SSE41__) -__forceinline const ssef round_even(const ssef &a) -{ -# ifdef __KERNEL_NEON__ - return vrndnq_f32(a); -# else - return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); -# endif -} -__forceinline const ssef round_down(const ssef &a) -{ -# ifdef __KERNEL_NEON__ - return vrndmq_f32(a); -# else - return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF); -# endif -} -__forceinline const ssef round_up(const ssef &a) -{ -# ifdef __KERNEL_NEON__ - return vrndpq_f32(a); -# else - return _mm_round_ps(a, _MM_FROUND_TO_POS_INF); -# endif -} -__forceinline const ssef round_zero(const ssef &a) -{ -# ifdef __KERNEL_NEON__ - return vrndq_f32(a); -# else - return _mm_round_ps(a, _MM_FROUND_TO_ZERO); -# endif -} -__forceinline const ssef floor(const ssef &a) -{ -# ifdef __KERNEL_NEON__ - return vrndnq_f32(a); -# else - return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF); -# endif -} -__forceinline const ssef ceil(const ssef &a) -{ -# ifdef __KERNEL_NEON__ - return vrndpq_f32(a); -# else - return _mm_round_ps(a, _MM_FROUND_TO_POS_INF); -# endif -} -# endif - -__forceinline ssei truncatei(const ssef &a) -{ - return _mm_cvttps_epi32(a.m128); -} - -/* This is about 25% faster than straightforward floor to integer conversion - * due to better pipelining. - * - * Unsaturated add 0xffffffff (a < 0) is the same as subtract -1. - */ -__forceinline ssei floori(const ssef &a) -{ - return truncatei(a) + cast((a < 0.0f).m128); -} - -__forceinline ssef floorfrac(const ssef &x, ssei *i) -{ - *i = floori(x); - return x - ssef(*i); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Common Functions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline ssef mix(const ssef &a, const ssef &b, const ssef &t) -{ - return madd(t, b, (ssef(1.0f) - t) * a); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Movement/Shifting/Shuffling Functions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline ssef unpacklo(const ssef &a, const ssef &b) -{ - return _mm_unpacklo_ps(a.m128, b.m128); -} -__forceinline ssef unpackhi(const ssef &a, const ssef &b) -{ - return _mm_unpackhi_ps(a.m128, b.m128); -} - -template -__forceinline const ssef shuffle(const ssef &b) -{ -# ifdef __KERNEL_NEON__ - return shuffle_neon(b.m128); -# else - return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0))); -# endif -} - -template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a) -{ - return _mm_movelh_ps(a, a); -} - -template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a) -{ - return _mm_movehl_ps(a, a); -} - -template -__forceinline const ssef shuffle(const ssef &a, const ssef &b) -{ -# ifdef __KERNEL_NEON__ - return shuffle_neon(a, b); -# else - return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); -# endif -} - -template __forceinline const ssef shuffle(const ssef &a, const ssef &b) -{ -# ifdef __KERNEL_NEON__ - return shuffle_neon(a, b); -# else - return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0)); -# endif -} - -# ifndef __KERNEL_NEON__ -template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a, const ssef &b) -{ - return _mm_movelh_ps(a, b); -} - -template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a, const ssef &b) -{ - return _mm_movehl_ps(b, a); -} -# endif - -# if defined(__KERNEL_SSSE3__) -__forceinline const ssef shuffle8(const ssef &a, const ssei &shuf) -{ - return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf)); -} -# endif - -# if defined(__KERNEL_SSE3__) -template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef &b) -{ - return _mm_moveldup_ps(b); -} -template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef &b) -{ - return _mm_movehdup_ps(b); -} -# endif - -template __forceinline const ssef shuffle(const ssef &b) -{ - return shuffle(b); -} - -# if defined(__KERNEL_AVX__) -__forceinline const ssef shuffle(const ssef &a, const ssei &shuf) -{ - return _mm_permutevar_ps(a, shuf); -} -# endif - -template __forceinline float extract(const ssef &a) -{ - return _mm_cvtss_f32(shuffle(a)); -} -template<> __forceinline float extract<0>(const ssef &a) -{ - return _mm_cvtss_f32(a); -} - -# if defined(__KERNEL_SSE41__) -template -__forceinline const ssef insert(const ssef &a, const ssef &b) -{ -# ifdef __KERNEL_NEON__ - ssef res = a; - if (clr) - res[dst] = 0; - else - res[dst] = b[src]; - return res; -# else - return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); -# endif -} -template __forceinline const ssef insert(const ssef &a, const ssef &b) -{ - return insert(a, b); -} -template __forceinline const ssef insert(const ssef &a, const float b) -{ - return insert(a, _mm_set_ss(b)); -} -# else -template __forceinline const ssef insert(const ssef &a, const float b) -{ - ssef c = a; - c[dst] = b; - return c; -} -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Transpose -//////////////////////////////////////////////////////////////////////////////// - -__forceinline void transpose(const ssef &r0, - const ssef &r1, - const ssef &r2, - const ssef &r3, - ssef &c0, - ssef &c1, - ssef &c2, - ssef &c3) -{ - ssef l02 = unpacklo(r0, r2); - ssef h02 = unpackhi(r0, r2); - ssef l13 = unpacklo(r1, r3); - ssef h13 = unpackhi(r1, r3); - c0 = unpacklo(l02, l13); - c1 = unpackhi(l02, l13); - c2 = unpacklo(h02, h13); - c3 = unpackhi(h02, h13); -} - -__forceinline void transpose( - const ssef &r0, const ssef &r1, const ssef &r2, const ssef &r3, ssef &c0, ssef &c1, ssef &c2) -{ - ssef l02 = unpacklo(r0, r2); - ssef h02 = unpackhi(r0, r2); - ssef l13 = unpacklo(r1, r3); - ssef h13 = unpackhi(r1, r3); - c0 = unpacklo(l02, l13); - c1 = unpackhi(l02, l13); - c2 = unpacklo(h02, h13); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Reductions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const ssef vreduce_min(const ssef &v) -{ -# ifdef __KERNEL_NEON__ - return vdupq_n_f32(vminvq_f32(v)); -# else - ssef h = min(shuffle<1, 0, 3, 2>(v), v); - return min(shuffle<2, 3, 0, 1>(h), h); -# endif -} -__forceinline const ssef vreduce_max(const ssef &v) -{ -# ifdef __KERNEL_NEON__ - return vdupq_n_f32(vmaxvq_f32(v)); -# else - ssef h = max(shuffle<1, 0, 3, 2>(v), v); - return max(shuffle<2, 3, 0, 1>(h), h); -# endif -} -__forceinline const ssef vreduce_add(const ssef &v) -{ -# ifdef __KERNEL_NEON__ - return vdupq_n_f32(vaddvq_f32(v)); -# else - ssef h = shuffle<1, 0, 3, 2>(v) + v; - return shuffle<2, 3, 0, 1>(h) + h; -# endif -} - -__forceinline float reduce_min(const ssef &v) -{ -# ifdef __KERNEL_NEON__ - return vminvq_f32(v); -# else - return _mm_cvtss_f32(vreduce_min(v)); -# endif -} -__forceinline float reduce_max(const ssef &v) -{ -# ifdef __KERNEL_NEON__ - return vmaxvq_f32(v); -# else - return _mm_cvtss_f32(vreduce_max(v)); -# endif -} -__forceinline float reduce_add(const ssef &v) -{ -# ifdef __KERNEL_NEON__ - return vaddvq_f32(v); -# else - return _mm_cvtss_f32(vreduce_add(v)); -# endif -} - -__forceinline uint32_t select_min(const ssef &v) -{ - return __bsf(movemask(v == vreduce_min(v))); -} -__forceinline uint32_t select_max(const ssef &v) -{ - return __bsf(movemask(v == vreduce_max(v))); -} - -__forceinline uint32_t select_min(const sseb &valid, const ssef &v) -{ - const ssef a = select(valid, v, ssef(pos_inf)); - return __bsf(movemask(valid & (a == vreduce_min(a)))); -} -__forceinline uint32_t select_max(const sseb &valid, const ssef &v) -{ - const ssef a = select(valid, v, ssef(neg_inf)); - return __bsf(movemask(valid & (a == vreduce_max(a)))); -} - -__forceinline uint32_t movemask(const ssef &a) -{ - return _mm_movemask_ps(a); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Memory load and store operations -//////////////////////////////////////////////////////////////////////////////// - -__forceinline ssef load4f(const float4 &a) -{ -# ifdef __KERNEL_WITH_SSE_ALIGN__ - return _mm_load_ps(&a.x); -# else - return _mm_loadu_ps(&a.x); -# endif -} - -__forceinline ssef load4f(const float3 &a) -{ -# ifdef __KERNEL_WITH_SSE_ALIGN__ - return _mm_load_ps(&a.x); -# else - return _mm_loadu_ps(&a.x); -# endif -} - -__forceinline ssef load4f(const void *const a) -{ - return _mm_load_ps((float *)a); -} - -__forceinline ssef load1f_first(const float a) -{ - return _mm_set_ss(a); -} - -__forceinline void store4f(void *ptr, const ssef &v) -{ - _mm_store_ps((float *)ptr, v); -} - -__forceinline ssef loadu4f(const void *const a) -{ - return _mm_loadu_ps((float *)a); -} - -__forceinline void storeu4f(void *ptr, const ssef &v) -{ - _mm_storeu_ps((float *)ptr, v); -} - -__forceinline void store4f(const sseb &mask, void *ptr, const ssef &f) -{ -# if defined(__KERNEL_AVX__) - _mm_maskstore_ps((float *)ptr, (__m128i)mask, f); -# else - *(ssef *)ptr = select(mask, f, *(ssef *)ptr); -# endif -} - -__forceinline ssef load4f_nt(void *ptr) -{ -# if defined(__KERNEL_SSE41__) - return _mm_castsi128_ps(_mm_stream_load_si128((__m128i *)ptr)); -# else - return _mm_load_ps((float *)ptr); -# endif -} - -__forceinline void store4f_nt(void *ptr, const ssef &v) -{ -# if defined(__KERNEL_SSE41__) - _mm_stream_ps((float *)ptr, v); -# else - _mm_store_ps((float *)ptr, v); -# endif -} - -//////////////////////////////////////////////////////////////////////////////// -/// Euclidian Space Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline float dot(const ssef &a, const ssef &b) -{ - return reduce_add(a * b); -} - -/* calculate shuffled cross product, useful when order of components does not matter */ -__forceinline ssef cross_zxy(const ssef &a, const ssef &b) -{ - const ssef a0 = a; - const ssef b0 = shuffle<1, 2, 0, 3>(b); - const ssef a1 = shuffle<1, 2, 0, 3>(a); - const ssef b1 = b; - return msub(a0, b0, a1 * b1); -} - -__forceinline ssef cross(const ssef &a, const ssef &b) -{ - return shuffle<1, 2, 0, 3>(cross_zxy(a, b)); -} - -ccl_device_inline const ssef dot3_splat(const ssef &a, const ssef &b) -{ -# ifdef __KERNEL_SSE41__ - return _mm_dp_ps(a.m128, b.m128, 0x7f); -# else - ssef t = a * b; - return ssef(((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]); -# endif -} - -/* squared length taking only specified axes into account */ -template ccl_device_inline float len_squared(const ssef &a) -{ -# ifndef __KERNEL_SSE41__ - float4 &t = (float4 &)a; - return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) + - (W ? t.w * t.w : 0.0f); -# else - return extract<0>( - ssef(_mm_dp_ps(a.m128, a.m128, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf))); -# endif -} - -ccl_device_inline float dot3(const ssef &a, const ssef &b) -{ -# ifdef __KERNEL_SSE41__ - return extract<0>(ssef(_mm_dp_ps(a.m128, b.m128, 0x7f))); -# else - ssef t = a * b; - return ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]; -# endif -} - -ccl_device_inline const ssef len3_squared_splat(const ssef &a) -{ - return dot3_splat(a, a); -} - -ccl_device_inline float len3_squared(const ssef &a) -{ - return dot3(a, a); -} - -ccl_device_inline float len3(const ssef &a) -{ - return extract<0>(mm_sqrt(dot3_splat(a, a))); -} - -/* SSE shuffle utility functions */ - -# ifdef __KERNEL_SSSE3__ - -/* faster version for SSSE3 */ -typedef ssei shuffle_swap_t; - -ccl_device_inline shuffle_swap_t shuffle_swap_identity() -{ - return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); -} - -ccl_device_inline shuffle_swap_t shuffle_swap_swap() -{ - return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); -} - -ccl_device_inline const ssef shuffle_swap(const ssef &a, const shuffle_swap_t &shuf) -{ - return cast(_mm_shuffle_epi8(cast(a), shuf)); -} - -# else - -/* somewhat slower version for SSE2 */ -typedef int shuffle_swap_t; - -ccl_device_inline shuffle_swap_t shuffle_swap_identity() -{ - return 0; -} - -ccl_device_inline shuffle_swap_t shuffle_swap_swap() -{ - return 1; -} - -ccl_device_inline const ssef shuffle_swap(const ssef &a, shuffle_swap_t shuf) -{ - /* shuffle value must be a constant, so we need to branch */ - if (shuf) - return shuffle<1, 0, 3, 2>(a); - else - return shuffle<3, 2, 1, 0>(a); -} - -# endif - -# if defined(__KERNEL_SSE41__) && !defined(__KERNEL_NEON__) - -ccl_device_inline void gen_idirsplat_swap(const ssef &pn, - const shuffle_swap_t &shuf_identity, - const shuffle_swap_t &shuf_swap, - const float3 &idir, - ssef idirsplat[3], - shuffle_swap_t shufflexyz[3]) -{ - const __m128 idirsplat_raw[] = {_mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z)}; - idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn); - idirsplat[1] = _mm_xor_ps(idirsplat_raw[1], pn); - idirsplat[2] = _mm_xor_ps(idirsplat_raw[2], pn); - - const ssef signmask = cast(ssei(0x80000000)); - const ssef shuf_identity_f = cast(shuf_identity); - const ssef shuf_swap_f = cast(shuf_swap); - - shufflexyz[0] = _mm_castps_si128( - _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[0], signmask))); - shufflexyz[1] = _mm_castps_si128( - _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[1], signmask))); - shufflexyz[2] = _mm_castps_si128( - _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[2], signmask))); -} - -# else - -ccl_device_inline void gen_idirsplat_swap(const ssef &pn, - const shuffle_swap_t &shuf_identity, - const shuffle_swap_t &shuf_swap, - const float3 &idir, - ssef idirsplat[3], - shuffle_swap_t shufflexyz[3]) -{ - idirsplat[0] = ssef(idir.x) ^ pn; - idirsplat[1] = ssef(idir.y) ^ pn; - idirsplat[2] = ssef(idir.z) ^ pn; - - shufflexyz[0] = (idir.x >= 0) ? shuf_identity : shuf_swap; - shufflexyz[1] = (idir.y >= 0) ? shuf_identity : shuf_swap; - shufflexyz[2] = (idir.z >= 0) ? shuf_identity : shuf_swap; -} - -# endif - -ccl_device_inline const ssef uint32_to_float(const ssei &in) -{ - ssei a = _mm_srli_epi32(in, 16); - ssei b = _mm_and_si128(in, _mm_set1_epi32(0x0000ffff)); - ssei c = _mm_or_si128(a, _mm_set1_epi32(0x53000000)); - ssef d = _mm_cvtepi32_ps(b); - ssef e = _mm_sub_ps(_mm_castsi128_ps(c), _mm_castsi128_ps(_mm_set1_epi32(0x53000000))); - return _mm_add_ps(e, d); -} - -template -ccl_device_inline const ssef set_sign_bit(const ssef &a) -{ - return cast(cast(a) ^ ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31)); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Debug Functions -//////////////////////////////////////////////////////////////////////////////// - -ccl_device_inline void print_ssef(const char *label, const ssef &a) -{ - printf( - "%s: %.8f %.8f %.8f %.8f\n", label, (double)a[0], (double)a[1], (double)a[2], (double)a[3]); -} - -#endif - -CCL_NAMESPACE_END - -#endif diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h deleted file mode 100644 index 94412fb77e7..00000000000 --- a/intern/cycles/util/util_ssei.h +++ /dev/null @@ -1,646 +0,0 @@ -/* - * Copyright 2011-2013 Intel Corporation - * Modifications Copyright 2014, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0(the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_SSEI_H__ -#define __UTIL_SSEI_H__ - -CCL_NAMESPACE_BEGIN - -#ifdef __KERNEL_SSE2__ - -struct sseb; -struct ssef; - -/*! 4-wide SSE integer type. */ -struct ssei { - typedef sseb Mask; // mask type - typedef ssei Int; // int type - typedef ssef Float; // float type - - enum { size = 4 }; // number of SIMD elements - union { - __m128i m128; - int32_t i[4]; - }; // data - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline ssei() - { - } - __forceinline ssei(const ssei &a) - { - m128 = a.m128; - } - __forceinline ssei &operator=(const ssei &a) - { - m128 = a.m128; - return *this; - } - - __forceinline ssei(const __m128i a) : m128(a) - { - } - __forceinline operator const __m128i &(void) const - { - return m128; - } - __forceinline operator __m128i &(void) - { - return m128; - } - - __forceinline ssei(const int a) : m128(_mm_set1_epi32(a)) - { - } - __forceinline ssei(int a, int b, int c, int d) : m128(_mm_setr_epi32(a, b, c, d)) - { - } - - __forceinline explicit ssei(const __m128 a) : m128(_mm_cvtps_epi32(a)) - { - } - - //////////////////////////////////////////////////////////////////////////////// - /// Array Access - //////////////////////////////////////////////////////////////////////////////// - - __forceinline const int32_t &operator[](const size_t index) const - { - assert(index < 4); - return i[index]; - } - __forceinline int32_t &operator[](const size_t index) - { - assert(index < 4); - return i[index]; - } -}; - -//////////////////////////////////////////////////////////////////////////////// -/// Unary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const ssei cast(const __m128 &a) -{ - return _mm_castps_si128(a); -} -__forceinline const ssei operator+(const ssei &a) -{ - return a; -} -__forceinline const ssei operator-(const ssei &a) -{ - return _mm_sub_epi32(_mm_setzero_si128(), a.m128); -} -# if defined(__KERNEL_SSSE3__) -__forceinline const ssei abs(const ssei &a) -{ - return _mm_abs_epi32(a.m128); -} -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Binary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const ssei operator+(const ssei &a, const ssei &b) -{ - return _mm_add_epi32(a.m128, b.m128); -} -__forceinline const ssei operator+(const ssei &a, const int32_t &b) -{ - return a + ssei(b); -} -__forceinline const ssei operator+(const int32_t &a, const ssei &b) -{ - return ssei(a) + b; -} - -__forceinline const ssei operator-(const ssei &a, const ssei &b) -{ - return _mm_sub_epi32(a.m128, b.m128); -} -__forceinline const ssei operator-(const ssei &a, const int32_t &b) -{ - return a - ssei(b); -} -__forceinline const ssei operator-(const int32_t &a, const ssei &b) -{ - return ssei(a) - b; -} - -# if defined(__KERNEL_SSE41__) -__forceinline const ssei operator*(const ssei &a, const ssei &b) -{ - return _mm_mullo_epi32(a.m128, b.m128); -} -__forceinline const ssei operator*(const ssei &a, const int32_t &b) -{ - return a * ssei(b); -} -__forceinline const ssei operator*(const int32_t &a, const ssei &b) -{ - return ssei(a) * b; -} -# endif - -__forceinline const ssei operator&(const ssei &a, const ssei &b) -{ - return _mm_and_si128(a.m128, b.m128); -} -__forceinline const ssei operator&(const ssei &a, const int32_t &b) -{ - return a & ssei(b); -} -__forceinline const ssei operator&(const int32_t &a, const ssei &b) -{ - return ssei(a) & b; -} - -__forceinline const ssei operator|(const ssei &a, const ssei &b) -{ - return _mm_or_si128(a.m128, b.m128); -} -__forceinline const ssei operator|(const ssei &a, const int32_t &b) -{ - return a | ssei(b); -} -__forceinline const ssei operator|(const int32_t &a, const ssei &b) -{ - return ssei(a) | b; -} - -__forceinline const ssei operator^(const ssei &a, const ssei &b) -{ - return _mm_xor_si128(a.m128, b.m128); -} -__forceinline const ssei operator^(const ssei &a, const int32_t &b) -{ - return a ^ ssei(b); -} -__forceinline const ssei operator^(const int32_t &a, const ssei &b) -{ - return ssei(a) ^ b; -} - -__forceinline const ssei operator<<(const ssei &a, const int32_t &n) -{ - return _mm_slli_epi32(a.m128, n); -} -__forceinline const ssei operator>>(const ssei &a, const int32_t &n) -{ - return _mm_srai_epi32(a.m128, n); -} - -__forceinline const ssei andnot(const ssei &a, const ssei &b) -{ - return _mm_andnot_si128(a.m128, b.m128); -} -__forceinline const ssei andnot(const sseb &a, const ssei &b) -{ - return _mm_andnot_si128(cast(a.m128), b.m128); -} -__forceinline const ssei andnot(const ssei &a, const sseb &b) -{ - return _mm_andnot_si128(a.m128, cast(b.m128)); -} - -__forceinline const ssei sra(const ssei &a, const int32_t &b) -{ - return _mm_srai_epi32(a.m128, b); -} -__forceinline const ssei srl(const ssei &a, const int32_t &b) -{ - return _mm_srli_epi32(a.m128, b); -} - -# if defined(__KERNEL_SSE41__) -__forceinline const ssei min(const ssei &a, const ssei &b) -{ - return _mm_min_epi32(a.m128, b.m128); -} -__forceinline const ssei min(const ssei &a, const int32_t &b) -{ - return min(a, ssei(b)); -} -__forceinline const ssei min(const int32_t &a, const ssei &b) -{ - return min(ssei(a), b); -} - -__forceinline const ssei max(const ssei &a, const ssei &b) -{ - return _mm_max_epi32(a.m128, b.m128); -} -__forceinline const ssei max(const ssei &a, const int32_t &b) -{ - return max(a, ssei(b)); -} -__forceinline const ssei max(const int32_t &a, const ssei &b) -{ - return max(ssei(a), b); -} -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Assignment Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline ssei &operator+=(ssei &a, const ssei &b) -{ - return a = a + b; -} -__forceinline ssei &operator+=(ssei &a, const int32_t &b) -{ - return a = a + b; -} - -__forceinline ssei &operator-=(ssei &a, const ssei &b) -{ - return a = a - b; -} -__forceinline ssei &operator-=(ssei &a, const int32_t &b) -{ - return a = a - b; -} - -# if defined(__KERNEL_SSE41__) -__forceinline ssei &operator*=(ssei &a, const ssei &b) -{ - return a = a * b; -} -__forceinline ssei &operator*=(ssei &a, const int32_t &b) -{ - return a = a * b; -} -# endif - -__forceinline ssei &operator&=(ssei &a, const ssei &b) -{ - return a = a & b; -} -__forceinline ssei &operator&=(ssei &a, const int32_t &b) -{ - return a = a & b; -} - -__forceinline ssei &operator|=(ssei &a, const ssei &b) -{ - return a = a | b; -} -__forceinline ssei &operator|=(ssei &a, const int32_t &b) -{ - return a = a | b; -} - -__forceinline ssei &operator^=(ssei &a, const ssei &b) -{ - return a = a ^ b; -} -__forceinline ssei &operator^=(ssei &a, const int32_t &b) -{ - return a = a ^ b; -} - -__forceinline ssei &operator<<=(ssei &a, const int32_t &b) -{ - return a = a << b; -} -__forceinline ssei &operator>>=(ssei &a, const int32_t &b) -{ - return a = a >> b; -} - -//////////////////////////////////////////////////////////////////////////////// -/// Comparison Operators + Select -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const sseb operator==(const ssei &a, const ssei &b) -{ - return _mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128)); -} -__forceinline const sseb operator==(const ssei &a, const int32_t &b) -{ - return a == ssei(b); -} -__forceinline const sseb operator==(const int32_t &a, const ssei &b) -{ - return ssei(a) == b; -} - -__forceinline const sseb operator!=(const ssei &a, const ssei &b) -{ - return !(a == b); -} -__forceinline const sseb operator!=(const ssei &a, const int32_t &b) -{ - return a != ssei(b); -} -__forceinline const sseb operator!=(const int32_t &a, const ssei &b) -{ - return ssei(a) != b; -} - -__forceinline const sseb operator<(const ssei &a, const ssei &b) -{ - return _mm_castsi128_ps(_mm_cmplt_epi32(a.m128, b.m128)); -} -__forceinline const sseb operator<(const ssei &a, const int32_t &b) -{ - return a < ssei(b); -} -__forceinline const sseb operator<(const int32_t &a, const ssei &b) -{ - return ssei(a) < b; -} - -__forceinline const sseb operator>=(const ssei &a, const ssei &b) -{ - return !(a < b); -} -__forceinline const sseb operator>=(const ssei &a, const int32_t &b) -{ - return a >= ssei(b); -} -__forceinline const sseb operator>=(const int32_t &a, const ssei &b) -{ - return ssei(a) >= b; -} - -__forceinline const sseb operator>(const ssei &a, const ssei &b) -{ - return _mm_castsi128_ps(_mm_cmpgt_epi32(a.m128, b.m128)); -} -__forceinline const sseb operator>(const ssei &a, const int32_t &b) -{ - return a > ssei(b); -} -__forceinline const sseb operator>(const int32_t &a, const ssei &b) -{ - return ssei(a) > b; -} - -__forceinline const sseb operator<=(const ssei &a, const ssei &b) -{ - return !(a > b); -} -__forceinline const sseb operator<=(const ssei &a, const int32_t &b) -{ - return a <= ssei(b); -} -__forceinline const sseb operator<=(const int32_t &a, const ssei &b) -{ - return ssei(a) <= b; -} - -__forceinline const ssei select(const sseb &m, const ssei &t, const ssei &f) -{ -# ifdef __KERNEL_SSE41__ - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); -# else - return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f)); -# endif -} - -__forceinline const ssei select(const int mask, const ssei &t, const ssei &f) -{ -# if defined(__KERNEL_SSE41__) && \ - ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER)) - return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask)); -# else - return select(sseb(mask), t, f); -# endif -} - -//////////////////////////////////////////////////////////////////////////////// -// Movement/Shifting/Shuffling Functions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline ssei unpacklo(const ssei &a, const ssei &b) -{ - return _mm_unpacklo_epi32(a, b); -} -__forceinline ssei unpackhi(const ssei &a, const ssei &b) -{ - return _mm_unpackhi_epi32(a, b); -} - -template -__forceinline const ssei shuffle(const ssei &a) -{ -# ifdef __KERNEL_NEON__ - int32x4_t result = shuffle_neon(vreinterpretq_s32_m128i(a)); - return vreinterpretq_m128i_s32(result); -# else - return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)); -# endif -} - -template -__forceinline const ssei shuffle(const ssei &a, const ssei &b) -{ -# ifdef __KERNEL_NEON__ - int32x4_t result = shuffle_neon(vreinterpretq_s32_m128i(a), - vreinterpretq_s32_m128i(b)); - return vreinterpretq_m128i_s32(result); -# else - return _mm_castps_si128( - _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); -# endif -} - -template __forceinline const ssei shuffle(const ssei &b) -{ - return shuffle(b); -} - -# if defined(__KERNEL_SSE41__) -template __forceinline int extract(const ssei &b) -{ - return _mm_extract_epi32(b, src); -} -template __forceinline const ssei insert(const ssei &a, const int32_t b) -{ - return _mm_insert_epi32(a, b, dst); -} -# else -template __forceinline int extract(const ssei &b) -{ - return b[src]; -} -template __forceinline const ssei insert(const ssei &a, const int32_t b) -{ - ssei c = a; - c[dst] = b; - return c; -} -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Reductions -//////////////////////////////////////////////////////////////////////////////// - -# if defined(__KERNEL_SSE41__) -__forceinline const ssei vreduce_min(const ssei &v) -{ - ssei h = min(shuffle<1, 0, 3, 2>(v), v); - return min(shuffle<2, 3, 0, 1>(h), h); -} -__forceinline const ssei vreduce_max(const ssei &v) -{ - ssei h = max(shuffle<1, 0, 3, 2>(v), v); - return max(shuffle<2, 3, 0, 1>(h), h); -} -__forceinline const ssei vreduce_add(const ssei &v) -{ - ssei h = shuffle<1, 0, 3, 2>(v) + v; - return shuffle<2, 3, 0, 1>(h) + h; -} - -__forceinline int reduce_min(const ssei &v) -{ -# ifdef __KERNEL_NEON__ - return vminvq_s32(vreinterpretq_s32_m128i(v)); -# else - return extract<0>(vreduce_min(v)); -# endif -} -__forceinline int reduce_max(const ssei &v) -{ -# ifdef __KERNEL_NEON__ - return vmaxvq_s32(vreinterpretq_s32_m128i(v)); -# else - return extract<0>(vreduce_max(v)); -# endif -} -__forceinline int reduce_add(const ssei &v) -{ -# ifdef __KERNEL_NEON__ - return vaddvq_s32(vreinterpretq_s32_m128i(v)); -# else - return extract<0>(vreduce_add(v)); -# endif -} - -__forceinline uint32_t select_min(const ssei &v) -{ - return __bsf(movemask(v == vreduce_min(v))); -} -__forceinline uint32_t select_max(const ssei &v) -{ - return __bsf(movemask(v == vreduce_max(v))); -} - -__forceinline uint32_t select_min(const sseb &valid, const ssei &v) -{ - const ssei a = select(valid, v, ssei((int)pos_inf)); - return __bsf(movemask(valid & (a == vreduce_min(a)))); -} -__forceinline uint32_t select_max(const sseb &valid, const ssei &v) -{ - const ssei a = select(valid, v, ssei((int)neg_inf)); - return __bsf(movemask(valid & (a == vreduce_max(a)))); -} - -# else - -__forceinline int ssei_min(int a, int b) -{ - return (a < b) ? a : b; -} -__forceinline int ssei_max(int a, int b) -{ - return (a > b) ? a : b; -} -__forceinline int reduce_min(const ssei &v) -{ - return ssei_min(ssei_min(v[0], v[1]), ssei_min(v[2], v[3])); -} -__forceinline int reduce_max(const ssei &v) -{ - return ssei_max(ssei_max(v[0], v[1]), ssei_max(v[2], v[3])); -} -__forceinline int reduce_add(const ssei &v) -{ - return v[0] + v[1] + v[2] + v[3]; -} - -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Memory load and store operations -//////////////////////////////////////////////////////////////////////////////// - -__forceinline ssei load4i(const void *const a) -{ - return _mm_load_si128((__m128i *)a); -} - -__forceinline void store4i(void *ptr, const ssei &v) -{ - _mm_store_si128((__m128i *)ptr, v); -} - -__forceinline void storeu4i(void *ptr, const ssei &v) -{ - _mm_storeu_si128((__m128i *)ptr, v); -} - -__forceinline void store4i(const sseb &mask, void *ptr, const ssei &i) -{ -# if defined(__KERNEL_AVX__) - _mm_maskstore_ps((float *)ptr, (__m128i)mask, _mm_castsi128_ps(i)); -# else - *(ssei *)ptr = select(mask, i, *(ssei *)ptr); -# endif -} - -__forceinline ssei load4i_nt(void *ptr) -{ -# if defined(__KERNEL_SSE41__) - return _mm_stream_load_si128((__m128i *)ptr); -# else - return _mm_load_si128((__m128i *)ptr); -# endif -} - -__forceinline void store4i_nt(void *ptr, const ssei &v) -{ -# if defined(__KERNEL_SSE41__) - _mm_stream_ps((float *)ptr, _mm_castsi128_ps(v)); -# else - _mm_store_si128((__m128i *)ptr, v); -# endif -} - -//////////////////////////////////////////////////////////////////////////////// -/// Debug Functions -//////////////////////////////////////////////////////////////////////////////// - -ccl_device_inline void print_ssei(const char *label, const ssei &a) -{ - printf("%s: %df %df %df %d\n", label, a[0], a[1], a[2], a[3]); -} - -#endif - -CCL_NAMESPACE_END - -#endif diff --git a/intern/cycles/util/util_stack_allocator.h b/intern/cycles/util/util_stack_allocator.h deleted file mode 100644 index ef31c0fe5e2..00000000000 --- a/intern/cycles/util/util_stack_allocator.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_STACK_ALLOCATOR_H__ -#define __UTIL_STACK_ALLOCATOR_H__ - -#include -#include - -CCL_NAMESPACE_BEGIN - -/* Stack allocator for the use with STL. */ -template class ccl_try_align(16) StackAllocator -{ - public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef T *pointer; - typedef const T *const_pointer; - typedef T &reference; - typedef const T &const_reference; - typedef T value_type; - - /* Allocator construction/destruction. */ - - StackAllocator() : pointer_(0), use_stack_(true) - { - } - - StackAllocator(const StackAllocator &) : pointer_(0), use_stack_(true) - { - } - - template - StackAllocator(const StackAllocator &) : pointer_(0), use_stack_(false) - { - } - - /* Memory allocation/deallocation. */ - - T *allocate(size_t n, const void *hint = 0) - { - (void)hint; - if (n == 0) { - return NULL; - } - if (pointer_ + n >= SIZE || use_stack_ == false) { - size_t size = n * sizeof(T); - util_guarded_mem_alloc(size); - T *mem; -#ifdef WITH_BLENDER_GUARDEDALLOC - mem = (T *)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); -#else - mem = (T *)malloc(size); -#endif - if (mem == NULL) { - throw std::bad_alloc(); - } - return mem; - } - T *mem = &data_[pointer_]; - pointer_ += n; - return mem; - } - - void deallocate(T * p, size_t n) - { - if (p == NULL) { - return; - } - if (p < data_ || p >= data_ + SIZE) { - util_guarded_mem_free(n * sizeof(T)); -#ifdef WITH_BLENDER_GUARDEDALLOC - MEM_freeN(p); -#else - free(p); -#endif - return; - } - /* We don't support memory free for the stack allocator. */ - } - - /* Address of an reference. */ - - T *address(T & x) const - { - return &x; - } - - const T *address(const T &x) const - { - return &x; - } - - /* Object construction/destruction. */ - - void construct(T * p, const T &val) - { - if (p != NULL) { - new ((T *)p) T(val); - } - } - - void destroy(T * p) - { - p->~T(); - } - - /* Maximum allocation size. */ - - size_t max_size() const - { - return size_t(-1); - } - - /* Rebind to other type of allocator. */ - - template struct rebind { - typedef StackAllocator other; - }; - - /* Operators */ - - template inline StackAllocator &operator=(const StackAllocator &) - { - return *this; - } - - StackAllocator &operator=(const StackAllocator &) - { - return *this; - } - - inline bool operator==(StackAllocator const & /*other*/) const - { - return true; - } - - inline bool operator!=(StackAllocator const &other) const - { - return !operator==(other); - } - - private: - int pointer_; - bool use_stack_; - T data_[SIZE]; -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_STACK_ALLOCATOR_H__ */ diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h deleted file mode 100644 index 7df52d462b7..00000000000 --- a/intern/cycles/util/util_static_assert.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* clang-format off */ - -/* #define static_assert triggers a bug in some clang-format versions, disable - * format for entire file to keep results consistent. */ - -#ifndef __UTIL_STATIC_ASSERT_H__ -#define __UTIL_STATIC_ASSERT_H__ - -CCL_NAMESPACE_BEGIN - -#if defined(CYCLES_CUBIN_CC) -# define static_assert(statement, message) -#endif - -#define static_assert_align(st, align) \ - static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT - -CCL_NAMESPACE_END - -#endif /* __UTIL_STATIC_ASSERT_H__ */ diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h deleted file mode 100644 index 15cf836de3c..00000000000 --- a/intern/cycles/util/util_stats.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_STATS_H__ -#define __UTIL_STATS_H__ - -#include "util/util_atomic.h" -#include "util/util_profiling.h" - -CCL_NAMESPACE_BEGIN - -class Stats { - public: - enum static_init_t { static_init = 0 }; - - Stats() : mem_used(0), mem_peak(0) - { - } - explicit Stats(static_init_t) - { - } - - void mem_alloc(size_t size) - { - atomic_add_and_fetch_z(&mem_used, size); - atomic_fetch_and_update_max_z(&mem_peak, mem_used); - } - - void mem_free(size_t size) - { - assert(mem_used >= size); - atomic_sub_and_fetch_z(&mem_used, size); - } - - size_t mem_used; - size_t mem_peak; -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_STATS_H__ */ diff --git a/intern/cycles/util/util_string.cpp b/intern/cycles/util/util_string.cpp deleted file mode 100644 index 0fc9cb4ae77..00000000000 --- a/intern/cycles/util/util_string.cpp +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include - -#include "util/util_foreach.h" -#include "util/util_string.h" -#include "util/util_windows.h" - -#ifdef _WIN32 -# ifndef vsnprintf -# define vsnprintf _vsnprintf -# endif -#endif /* _WIN32 */ - -CCL_NAMESPACE_BEGIN - -string string_printf(const char *format, ...) -{ - vector str(128, 0); - - while (1) { - va_list args; - int result; - - va_start(args, format); - result = vsnprintf(&str[0], str.size(), format, args); - va_end(args); - - if (result == -1) { - /* not enough space or formatting error */ - if (str.size() > 65536) { - assert(0); - return string(""); - } - - str.resize(str.size() * 2, 0); - continue; - } - else if (result >= (int)str.size()) { - /* not enough space */ - str.resize(result + 1, 0); - continue; - } - - return string(&str[0]); - } -} - -bool string_iequals(const string &a, const string &b) -{ - if (a.size() == b.size()) { - for (size_t i = 0; i < a.size(); i++) - if (toupper(a[i]) != toupper(b[i])) - return false; - - return true; - } - - return false; -} - -void string_split(vector &tokens, - const string &str, - const string &separators, - bool skip_empty_tokens) -{ - size_t token_start = 0, token_length = 0; - for (size_t i = 0; i < str.size(); ++i) { - const char ch = str[i]; - if (separators.find(ch) == string::npos) { - /* Current character is not a separator, - * append it to token by increasing token length. - */ - ++token_length; - } - else { - /* Current character is a separator, - * append current token to the list. - */ - if (!skip_empty_tokens || token_length > 0) { - string token = str.substr(token_start, token_length); - tokens.push_back(token); - } - token_start = i + 1; - token_length = 0; - } - } - /* Append token from the tail of the string if exists. */ - if (token_length) { - string token = str.substr(token_start, token_length); - tokens.push_back(token); - } -} - -bool string_startswith(const string_view s, const string_view start) -{ - const size_t len = start.size(); - - if (len > s.size()) { - return false; - } - - return strncmp(s.c_str(), start.data(), len) == 0; -} - -bool string_endswith(const string_view s, const string_view end) -{ - const size_t len = end.size(); - - if (len > s.size()) { - return false; - } - - return strncmp(s.c_str() + s.size() - len, end.data(), len) == 0; -} - -string string_strip(const string &s) -{ - string result = s; - result.erase(0, result.find_first_not_of(' ')); - result.erase(result.find_last_not_of(' ') + 1); - return result; -} - -void string_replace(string &haystack, const string &needle, const string &other) -{ - size_t i = 0, index; - while ((index = haystack.find(needle, i)) != string::npos) { - haystack.replace(index, needle.size(), other); - i = index + other.size(); - } -} - -string string_remove_trademark(const string &s) -{ - string result = s; - - /* Special case, so we don't leave sequential spaces behind. */ - /* TODO(sergey): Consider using regex perhaps? */ - string_replace(result, " (TM)", ""); - string_replace(result, " (R)", ""); - - string_replace(result, "(TM)", ""); - string_replace(result, "(R)", ""); - - return string_strip(result); -} - -string string_from_bool(bool var) -{ - if (var) - return "True"; - else - return "False"; -} - -string to_string(const char *str) -{ - return string(str); -} - -string string_to_lower(const string &s) -{ - string r = s; - std::transform(r.begin(), r.end(), r.begin(), [](char c) { return std::tolower(c); }); - return r; -} - -/* Wide char strings helpers for Windows. */ - -#ifdef _WIN32 - -wstring string_to_wstring(const string &str) -{ - const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0); - wstring str_wc(length_wc, 0); - MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &str_wc[0], length_wc); - return str_wc; -} - -string string_from_wstring(const wstring &str) -{ - int length_mb = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(), NULL, 0, NULL, NULL); - string str_mb(length_mb, 0); - WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(), &str_mb[0], length_mb, NULL, NULL); - return str_mb; -} - -string string_to_ansi(const string &str) -{ - const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0); - wstring str_wc(length_wc, 0); - MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &str_wc[0], length_wc); - - int length_mb = WideCharToMultiByte( - CP_ACP, 0, str_wc.c_str(), str_wc.size(), NULL, 0, NULL, NULL); - - string str_mb(length_mb, 0); - WideCharToMultiByte(CP_ACP, 0, str_wc.c_str(), str_wc.size(), &str_mb[0], length_mb, NULL, NULL); - - return str_mb; -} - -#endif /* _WIN32 */ - -string string_human_readable_size(size_t size) -{ - static const char suffixes[] = "BKMGTPEZY"; - - const char *suffix = suffixes; - size_t r = 0; - - while (size >= 1024) { - r = size % 1024; - size /= 1024; - suffix++; - } - - if (*suffix != 'B') - return string_printf("%.2f%c", double(size * 1024 + r) / 1024.0, *suffix); - else - return string_printf("%zu", size); -} - -string string_human_readable_number(size_t num) -{ - if (num == 0) { - return "0"; - } - - /* Add thousands separators. */ - char buf[32]; - - char *p = buf + 31; - *p = '\0'; - - int i = -1; - while (num) { - if (++i && i % 3 == 0) - *(--p) = ','; - - *(--p) = '0' + (num % 10); - - num /= 10; - } - - return p; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h deleted file mode 100644 index 55462cfd8b8..00000000000 --- a/intern/cycles/util/util_string.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_STRING_H__ -#define __UTIL_STRING_H__ - -#include -#include -#include - -/* Use string view implementation from OIIO. - * Ideally, need to switch to `std::string_view`, but this first requires getting rid of using - * namespace OIIO as it causes symbol collision. */ -#include - -#include "util/util_vector.h" - -CCL_NAMESPACE_BEGIN - -using std::istringstream; -using std::ostringstream; -using std::string; -using std::stringstream; -using std::to_string; - -using OIIO::string_view; - -#ifdef __GNUC__ -# define PRINTF_ATTRIBUTE __attribute__((format(printf, 1, 2))) -#else -# define PRINTF_ATTRIBUTE -#endif - -string string_printf(const char *format, ...) PRINTF_ATTRIBUTE; - -bool string_iequals(const string &a, const string &b); -void string_split(vector &tokens, - const string &str, - const string &separators = "\t ", - bool skip_empty_tokens = true); -void string_replace(string &haystack, const string &needle, const string &other); -bool string_startswith(string_view s, string_view start); -bool string_endswith(string_view s, string_view end); -string string_strip(const string &s); -string string_remove_trademark(const string &s); -string string_from_bool(const bool var); -string to_string(const char *str); -string string_to_lower(const string &s); - -/* Wide char strings are only used on Windows to deal with non-ASCII - * characters in file names and such. No reason to use such strings - * for something else at this moment. - * - * Please note that strings are expected to be in UTF-8 codepage, and - * if ANSI is needed then explicit conversion required. - */ -#ifdef _WIN32 -using std::wstring; -wstring string_to_wstring(const string &path); -string string_from_wstring(const wstring &path); -string string_to_ansi(const string &str); -#endif - -/* Make a string from a size in bytes in human readable form. */ -string string_human_readable_size(size_t size); -/* Make a string from a unit-less quantity in human readable form. */ -string string_human_readable_number(size_t num); - -CCL_NAMESPACE_END - -#endif /* __UTIL_STRING_H__ */ diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp deleted file mode 100644 index be8c2fb505a..00000000000 --- a/intern/cycles/util/util_system.cpp +++ /dev/null @@ -1,415 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_system.h" - -#include "util/util_logging.h" -#include "util/util_string.h" -#include "util/util_types.h" - -#include - -#include -OIIO_NAMESPACE_USING - -#ifdef _WIN32 -# if (!defined(FREE_WINDOWS)) -# include -# endif -# include "util_windows.h" -#elif defined(__APPLE__) -# include -# include -# include -#else -# include -# include -#endif - -CCL_NAMESPACE_BEGIN - -bool system_cpu_ensure_initialized() -{ - static bool is_initialized = false; - static bool result = false; - if (is_initialized) { - return result; - } - is_initialized = true; - const NUMAAPI_Result numa_result = numaAPI_Initialize(); - result = (numa_result == NUMAAPI_SUCCESS); - return result; -} - -/* Fallback solution, which doesn't use NUMA/CPU groups. */ -static int system_cpu_thread_count_fallback() -{ -#ifdef _WIN32 - SYSTEM_INFO info; - GetSystemInfo(&info); - return info.dwNumberOfProcessors; -#elif defined(__APPLE__) - int count; - size_t len = sizeof(count); - int mib[2] = {CTL_HW, HW_NCPU}; - sysctl(mib, 2, &count, &len, NULL, 0); - return count; -#else - return sysconf(_SC_NPROCESSORS_ONLN); -#endif -} - -int system_cpu_thread_count() -{ - const int num_nodes = system_cpu_num_numa_nodes(); - int num_threads = 0; - for (int node = 0; node < num_nodes; ++node) { - if (!system_cpu_is_numa_node_available(node)) { - continue; - } - num_threads += system_cpu_num_numa_node_processors(node); - } - return num_threads; -} - -int system_cpu_num_numa_nodes() -{ - if (!system_cpu_ensure_initialized()) { - /* Fallback to a single node with all the threads. */ - return 1; - } - return numaAPI_GetNumNodes(); -} - -bool system_cpu_is_numa_node_available(int node) -{ - if (!system_cpu_ensure_initialized()) { - return true; - } - return numaAPI_IsNodeAvailable(node); -} - -int system_cpu_num_numa_node_processors(int node) -{ - if (!system_cpu_ensure_initialized()) { - return system_cpu_thread_count_fallback(); - } - return numaAPI_GetNumNodeProcessors(node); -} - -bool system_cpu_run_thread_on_node(int node) -{ - if (!system_cpu_ensure_initialized()) { - return true; - } - return numaAPI_RunThreadOnNode(node); -} - -int system_console_width() -{ - int columns = 0; - -#ifdef _WIN32 - CONSOLE_SCREEN_BUFFER_INFO csbi; - if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) { - columns = csbi.dwSize.X; - } -#else - struct winsize w; - if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { - columns = w.ws_col; - } -#endif - - return (columns > 0) ? columns : 80; -} - -int system_cpu_num_active_group_processors() -{ - if (!system_cpu_ensure_initialized()) { - return system_cpu_thread_count_fallback(); - } - return numaAPI_GetNumCurrentNodesProcessors(); -} - -/* Equivalent of Windows __cpuid for x86 processors on other platforms. */ -#if (!defined(_WIN32) || defined(FREE_WINDOWS)) && (defined(__x86_64__) || defined(__i386__)) -static void __cpuid(int data[4], int selector) -{ -# if defined(__x86_64__) - asm("cpuid" : "=a"(data[0]), "=b"(data[1]), "=c"(data[2]), "=d"(data[3]) : "a"(selector)); -# elif defined(__i386__) - asm("pushl %%ebx \n\t" - "cpuid \n\t" - "movl %%ebx, %1 \n\t" - "popl %%ebx \n\t" - : "=a"(data[0]), "=r"(data[1]), "=c"(data[2]), "=d"(data[3]) - : "a"(selector) - : "ebx"); -# else - data[0] = data[1] = data[2] = data[3] = 0; -# endif -} -#endif - -string system_cpu_brand_string() -{ -#if defined(__APPLE__) - /* Get from system on macOS. */ - char modelname[512] = ""; - size_t bufferlen = 512; - if (sysctlbyname("machdep.cpu.brand_string", &modelname, &bufferlen, NULL, 0) == 0) { - return modelname; - } -#elif defined(WIN32) || defined(__x86_64__) || defined(__i386__) - /* Get from intrinsics on Windows and x86. */ - char buf[49] = {0}; - int result[4] = {0}; - - __cpuid(result, 0x80000000); - - if (result[0] != 0 && result[0] >= (int)0x80000004) { - __cpuid((int *)(buf + 0), 0x80000002); - __cpuid((int *)(buf + 16), 0x80000003); - __cpuid((int *)(buf + 32), 0x80000004); - - string brand = buf; - - /* Make it a bit more presentable. */ - brand = string_remove_trademark(brand); - - return brand; - } -#else - /* Get from /proc/cpuinfo on Unix systems. */ - FILE *cpuinfo = fopen("/proc/cpuinfo", "r"); - if (cpuinfo != nullptr) { - char cpuinfo_buf[513] = ""; - fread(cpuinfo_buf, sizeof(cpuinfo_buf) - 1, 1, cpuinfo); - fclose(cpuinfo); - - char *modelname = strstr(cpuinfo_buf, "model name"); - if (modelname != nullptr) { - modelname = strchr(modelname, ':'); - if (modelname != nullptr) { - modelname += 2; - char *modelname_end = strchr(modelname, '\n'); - if (modelname_end != nullptr) { - *modelname_end = '\0'; - return modelname; - } - } - } - } -#endif - return "Unknown CPU"; -} - -int system_cpu_bits() -{ - return (sizeof(void *) * 8); -} - -#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) - -struct CPUCapabilities { - bool x64; - bool mmx; - bool sse; - bool sse2; - bool sse3; - bool ssse3; - bool sse41; - bool sse42; - bool sse4a; - bool avx; - bool f16c; - bool avx2; - bool xop; - bool fma3; - bool fma4; - bool bmi1; - bool bmi2; -}; - -static CPUCapabilities &system_cpu_capabilities() -{ - static CPUCapabilities caps; - static bool caps_init = false; - - if (!caps_init) { - int result[4], num; - - memset(&caps, 0, sizeof(caps)); - - __cpuid(result, 0); - num = result[0]; - - if (num >= 1) { - __cpuid(result, 0x00000001); - caps.mmx = (result[3] & ((int)1 << 23)) != 0; - caps.sse = (result[3] & ((int)1 << 25)) != 0; - caps.sse2 = (result[3] & ((int)1 << 26)) != 0; - caps.sse3 = (result[2] & ((int)1 << 0)) != 0; - - caps.ssse3 = (result[2] & ((int)1 << 9)) != 0; - caps.sse41 = (result[2] & ((int)1 << 19)) != 0; - caps.sse42 = (result[2] & ((int)1 << 20)) != 0; - - caps.fma3 = (result[2] & ((int)1 << 12)) != 0; - caps.avx = false; - bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0; - bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0; - - if (os_uses_xsave_xrestore && cpu_avx_support) { - // Check if the OS will save the YMM registers - uint32_t xcr_feature_mask; -# if defined(__GNUC__) - int edx; /* not used */ - /* actual opcode for xgetbv */ - __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr_feature_mask), "=d"(edx) : "c"(0)); -# elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) - /* Minimum VS2010 SP1 compiler is required. */ - xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK); -# else - xcr_feature_mask = 0; -# endif - caps.avx = (xcr_feature_mask & 0x6) == 0x6; - } - - caps.f16c = (result[2] & ((int)1 << 29)) != 0; - - __cpuid(result, 0x00000007); - caps.bmi1 = (result[1] & ((int)1 << 3)) != 0; - caps.bmi2 = (result[1] & ((int)1 << 8)) != 0; - caps.avx2 = (result[1] & ((int)1 << 5)) != 0; - } - - caps_init = true; - } - - return caps; -} - -bool system_cpu_support_sse2() -{ - CPUCapabilities &caps = system_cpu_capabilities(); - return caps.sse && caps.sse2; -} - -bool system_cpu_support_sse3() -{ - CPUCapabilities &caps = system_cpu_capabilities(); - return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3; -} - -bool system_cpu_support_sse41() -{ - CPUCapabilities &caps = system_cpu_capabilities(); - return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41; -} - -bool system_cpu_support_avx() -{ - CPUCapabilities &caps = system_cpu_capabilities(); - return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx; -} - -bool system_cpu_support_avx2() -{ - CPUCapabilities &caps = system_cpu_capabilities(); - return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c && - caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2; -} -#else - -bool system_cpu_support_sse2() -{ - return false; -} - -bool system_cpu_support_sse3() -{ - return false; -} - -bool system_cpu_support_sse41() -{ - return false; -} - -bool system_cpu_support_avx() -{ - return false; -} -bool system_cpu_support_avx2() -{ - return false; -} - -#endif - -bool system_call_self(const vector &args) -{ - /* Escape program and arguments in case they contain spaces. */ - string cmd = "\"" + Sysutil::this_program_path() + "\""; - - for (int i = 0; i < args.size(); i++) { - cmd += " \"" + args[i] + "\""; - } - -#ifdef _WIN32 - /* Use cmd /S to avoid issues with spaces in arguments. */ - cmd = "cmd /S /C \"" + cmd + " > nul \""; -#else - /* Quiet output. */ - cmd += " > /dev/null"; -#endif - - return (system(cmd.c_str()) == 0); -} - -size_t system_physical_ram() -{ -#ifdef _WIN32 - MEMORYSTATUSEX ram; - ram.dwLength = sizeof(ram); - GlobalMemoryStatusEx(&ram); - return ram.ullTotalPhys; -#elif defined(__APPLE__) - uint64_t ram = 0; - size_t len = sizeof(ram); - if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) { - return ram; - } - return 0; -#else - size_t ps = sysconf(_SC_PAGESIZE); - size_t pn = sysconf(_SC_PHYS_PAGES); - return ps * pn; -#endif -} - -uint64_t system_self_process_id() -{ -#ifdef _WIN32 - return GetCurrentProcessId(); -#else - return getpid(); -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h deleted file mode 100644 index a1797e6ca44..00000000000 --- a/intern/cycles/util/util_system.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_SYSTEM_H__ -#define __UTIL_SYSTEM_H__ - -#include "util/util_string.h" -#include "util/util_vector.h" - -CCL_NAMESPACE_BEGIN - -/* Make sure CPU groups / NUMA API is initialized. */ -bool system_cpu_ensure_initialized(); - -/* Get total number of threads in all NUMA nodes / CPU groups. */ -int system_cpu_thread_count(); - -/* Get width in characters of the current console output. */ -int system_console_width(); - -/* Get number of available nodes. - * - * This is in fact an index of last node plus one and it's not guaranteed - * that all nodes up to this one are available. */ -int system_cpu_num_numa_nodes(); - -/* Returns truth if the given node is available for compute. */ -bool system_cpu_is_numa_node_available(int node); - -/* Get number of available processors on a given node. */ -int system_cpu_num_numa_node_processors(int node); - -/* Runs the current thread and its children on a specific node. - * - * Returns truth if affinity has successfully changed. */ -bool system_cpu_run_thread_on_node(int node); - -/* Number of processors within the current CPU group (or within active thread - * thread affinity). */ -int system_cpu_num_active_group_processors(); - -string system_cpu_brand_string(); -int system_cpu_bits(); -bool system_cpu_support_sse2(); -bool system_cpu_support_sse3(); -bool system_cpu_support_sse41(); -bool system_cpu_support_avx(); -bool system_cpu_support_avx2(); - -size_t system_physical_ram(); - -/* Start a new process of the current application with the given arguments. */ -bool system_call_self(const vector &args); - -/* Get identifier of the currently running process. */ -uint64_t system_self_process_id(); - -CCL_NAMESPACE_END - -#endif /* __UTIL_SYSTEM_H__ */ diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp deleted file mode 100644 index 949ba0a7b4d..00000000000 --- a/intern/cycles/util/util_task.cpp +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_task.h" -#include "util/util_foreach.h" -#include "util/util_logging.h" -#include "util/util_system.h" -#include "util/util_time.h" - -CCL_NAMESPACE_BEGIN - -/* Task Pool */ - -TaskPool::TaskPool() : start_time(time_dt()), num_tasks_pushed(0) -{ -} - -TaskPool::~TaskPool() -{ - cancel(); -} - -void TaskPool::push(TaskRunFunction &&task) -{ - tbb_group.run(std::move(task)); - num_tasks_pushed++; -} - -void TaskPool::wait_work(Summary *stats) -{ - tbb_group.wait(); - - if (stats != NULL) { - stats->time_total = time_dt() - start_time; - stats->num_tasks_handled = num_tasks_pushed; - } - - num_tasks_pushed = 0; -} - -void TaskPool::cancel() -{ - if (num_tasks_pushed > 0) { - tbb_group.cancel(); - tbb_group.wait(); - num_tasks_pushed = 0; - } -} - -bool TaskPool::canceled() -{ - return tbb::is_current_task_group_canceling(); -} - -/* Task Scheduler */ - -thread_mutex TaskScheduler::mutex; -int TaskScheduler::users = 0; -int TaskScheduler::active_num_threads = 0; -tbb::global_control *TaskScheduler::global_control = nullptr; - -void TaskScheduler::init(int num_threads) -{ - thread_scoped_lock lock(mutex); - /* Multiple cycles instances can use this task scheduler, sharing the same - * threads, so we keep track of the number of users. */ - ++users; - if (users != 1) { - return; - } - if (num_threads > 0) { - /* Automatic number of threads. */ - VLOG(1) << "Overriding number of TBB threads to " << num_threads << "."; - global_control = new tbb::global_control(tbb::global_control::max_allowed_parallelism, - num_threads); - active_num_threads = num_threads; - } - else { - active_num_threads = system_cpu_thread_count(); - } -} - -void TaskScheduler::exit() -{ - thread_scoped_lock lock(mutex); - users--; - if (users == 0) { - delete global_control; - global_control = nullptr; - active_num_threads = 0; - } -} - -void TaskScheduler::free_memory() -{ - assert(users == 0); -} - -int TaskScheduler::num_threads() -{ - return active_num_threads; -} - -/* Dedicated Task Pool */ - -DedicatedTaskPool::DedicatedTaskPool() -{ - do_cancel = false; - do_exit = false; - num = 0; - - worker_thread = new thread(function_bind(&DedicatedTaskPool::thread_run, this)); -} - -DedicatedTaskPool::~DedicatedTaskPool() -{ - wait(); - - do_exit = true; - queue_cond.notify_all(); - - worker_thread->join(); - delete worker_thread; -} - -void DedicatedTaskPool::push(TaskRunFunction &&task, bool front) -{ - num_increase(); - - /* add task to queue */ - queue_mutex.lock(); - if (front) - queue.emplace_front(std::move(task)); - else - queue.emplace_back(std::move(task)); - - queue_cond.notify_one(); - queue_mutex.unlock(); -} - -void DedicatedTaskPool::wait() -{ - thread_scoped_lock num_lock(num_mutex); - - while (num) - num_cond.wait(num_lock); -} - -void DedicatedTaskPool::cancel() -{ - do_cancel = true; - - clear(); - wait(); - - do_cancel = false; -} - -bool DedicatedTaskPool::canceled() -{ - return do_cancel; -} - -void DedicatedTaskPool::num_decrease(int done) -{ - thread_scoped_lock num_lock(num_mutex); - num -= done; - - assert(num >= 0); - if (num == 0) - num_cond.notify_all(); -} - -void DedicatedTaskPool::num_increase() -{ - thread_scoped_lock num_lock(num_mutex); - num++; - num_cond.notify_all(); -} - -bool DedicatedTaskPool::thread_wait_pop(TaskRunFunction &task) -{ - thread_scoped_lock queue_lock(queue_mutex); - - while (queue.empty() && !do_exit) - queue_cond.wait(queue_lock); - - if (queue.empty()) { - assert(do_exit); - return false; - } - - task = queue.front(); - queue.pop_front(); - - return true; -} - -void DedicatedTaskPool::thread_run() -{ - TaskRunFunction task; - - /* keep popping off tasks */ - while (thread_wait_pop(task)) { - /* run task */ - task(); - - /* delete task */ - task = nullptr; - - /* notify task was done */ - num_decrease(1); - } -} - -void DedicatedTaskPool::clear() -{ - thread_scoped_lock queue_lock(queue_mutex); - - /* erase all tasks from the queue */ - int done = queue.size(); - queue.clear(); - - queue_lock.unlock(); - - /* notify done */ - num_decrease(done); -} - -string TaskPool::Summary::full_report() const -{ - string report = ""; - report += string_printf("Total time: %f\n", time_total); - report += string_printf("Tasks handled: %d\n", num_tasks_handled); - return report; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h deleted file mode 100644 index ec45dfa8040..00000000000 --- a/intern/cycles/util/util_task.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TASK_H__ -#define __UTIL_TASK_H__ - -#include "util/util_list.h" -#include "util/util_string.h" -#include "util/util_tbb.h" -#include "util/util_thread.h" -#include "util/util_vector.h" - -CCL_NAMESPACE_BEGIN - -class TaskPool; -class TaskScheduler; - -typedef function TaskRunFunction; - -/* Task Pool - * - * Pool of tasks that will be executed by the central TaskScheduler. For each - * pool, we can wait for all tasks to be done, or cancel them before they are - * done. - * - * TaskRunFunction may be created with std::bind or lambda expressions. */ - -class TaskPool { - public: - struct Summary { - /* Time spent to handle all tasks. */ - double time_total; - - /* Number of all tasks handled by this pool. */ - int num_tasks_handled; - - /* A full multi-line description of the state of the pool after - * all work is done. - */ - string full_report() const; - }; - - TaskPool(); - ~TaskPool(); - - void push(TaskRunFunction &&task); - - void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */ - void cancel(); /* cancel all tasks and wait until they are no longer executing */ - - static bool canceled(); /* For worker threads, test if current task pool canceled. */ - - protected: - tbb::task_group tbb_group; - - /* ** Statistics ** */ - - /* Time stamp of first task pushed. */ - double start_time; - - /* Number of all tasks pushed to the pool. Cleared after wait_work() and cancel(). */ - int num_tasks_pushed; -}; - -/* Task Scheduler - * - * Central scheduler that holds running threads ready to execute tasks. A single - * queue holds the task from all pools. */ - -class TaskScheduler { - public: - static void init(int num_threads = 0); - static void exit(); - static void free_memory(); - - /* Approximate number of threads that will work on task, which may be lower - * or higher than the actual number of threads. Use as little as possible and - * leave splitting up tasks to the scheduler. */ - static int num_threads(); - - protected: - static thread_mutex mutex; - static int users; - static int active_num_threads; - -#ifdef WITH_TBB_GLOBAL_CONTROL - static tbb::global_control *global_control; -#endif -}; - -/* Dedicated Task Pool - * - * Like a TaskPool, but will launch one dedicated thread to execute all tasks. - * - * The run callback that actually executes the task may be created like this: - * function_bind(&MyClass::task_execute, this, _1, _2) */ - -class DedicatedTaskPool { - public: - DedicatedTaskPool(); - ~DedicatedTaskPool(); - - void push(TaskRunFunction &&run, bool front = false); - - void wait(); /* wait until all tasks are done */ - void cancel(); /* cancel all tasks, keep worker thread running */ - - bool canceled(); /* for worker thread, test if canceled */ - - protected: - void num_decrease(int done); - void num_increase(); - - void thread_run(); - bool thread_wait_pop(TaskRunFunction &task); - - void clear(); - - thread_mutex num_mutex; - thread_condition_variable num_cond; - - list queue; - thread_mutex queue_mutex; - thread_condition_variable queue_cond; - - int num; - bool do_cancel; - bool do_exit; - - thread *worker_thread; -}; - -CCL_NAMESPACE_END - -#endif diff --git a/intern/cycles/util/util_tbb.h b/intern/cycles/util/util_tbb.h deleted file mode 100644 index 8f84377ac8c..00000000000 --- a/intern/cycles/util/util_tbb.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2011-2020 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TBB_H__ -#define __UTIL_TBB_H__ - -/* TBB includes , do it ourselves first so we are sure - * WIN32_LEAN_AND_MEAN and similar are defined beforehand. */ -#include "util_windows.h" - -#include -#include -#include -#include -#include - -#if TBB_INTERFACE_VERSION_MAJOR >= 10 -# define WITH_TBB_GLOBAL_CONTROL -# include -#endif - -CCL_NAMESPACE_BEGIN - -using tbb::blocked_range; -using tbb::enumerable_thread_specific; -using tbb::parallel_for; - -static inline void parallel_for_cancel() -{ -#if TBB_INTERFACE_VERSION_MAJOR >= 12 - tbb::task_group_context *ctx = tbb::task::current_context(); - if (ctx) { - ctx->cancel_group_execution(); - } -#else - tbb::task::self().cancel_group_execution(); -#endif -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_TBB_H__ */ diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h deleted file mode 100644 index 4de66bf5f46..00000000000 --- a/intern/cycles/util/util_texture.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TEXTURE_H__ -#define __UTIL_TEXTURE_H__ - -#include "util_transform.h" - -CCL_NAMESPACE_BEGIN - -/* Color to use when textures are not found. */ -#define TEX_IMAGE_MISSING_R 1 -#define TEX_IMAGE_MISSING_G 0 -#define TEX_IMAGE_MISSING_B 1 -#define TEX_IMAGE_MISSING_A 1 - -/* Interpolation types for textures - * cuda also use texture space to store other objects */ -typedef enum InterpolationType { - INTERPOLATION_NONE = -1, - INTERPOLATION_LINEAR = 0, - INTERPOLATION_CLOSEST = 1, - INTERPOLATION_CUBIC = 2, - INTERPOLATION_SMART = 3, - - INTERPOLATION_NUM_TYPES, -} InterpolationType; - -typedef enum ImageDataType { - IMAGE_DATA_TYPE_FLOAT4 = 0, - IMAGE_DATA_TYPE_BYTE4 = 1, - IMAGE_DATA_TYPE_HALF4 = 2, - IMAGE_DATA_TYPE_FLOAT = 3, - IMAGE_DATA_TYPE_BYTE = 4, - IMAGE_DATA_TYPE_HALF = 5, - IMAGE_DATA_TYPE_USHORT4 = 6, - IMAGE_DATA_TYPE_USHORT = 7, - IMAGE_DATA_TYPE_NANOVDB_FLOAT = 8, - IMAGE_DATA_TYPE_NANOVDB_FLOAT3 = 9, - - IMAGE_DATA_NUM_TYPES -} ImageDataType; - -/* Alpha types - * How to treat alpha in images. */ -typedef enum ImageAlphaType { - IMAGE_ALPHA_UNASSOCIATED = 0, - IMAGE_ALPHA_ASSOCIATED = 1, - IMAGE_ALPHA_CHANNEL_PACKED = 2, - IMAGE_ALPHA_IGNORE = 3, - IMAGE_ALPHA_AUTO = 4, - - IMAGE_ALPHA_NUM_TYPES, -} ImageAlphaType; - -/* Extension types for textures. - * - * Defines how the image is extrapolated past its original bounds. */ -typedef enum ExtensionType { - /* Cause the image to repeat horizontally and vertically. */ - EXTENSION_REPEAT = 0, - /* Extend by repeating edge pixels of the image. */ - EXTENSION_EXTEND = 1, - /* Clip to image size and set exterior pixels as transparent. */ - EXTENSION_CLIP = 2, - - EXTENSION_NUM_TYPES, -} ExtensionType; - -typedef struct TextureInfo { - /* Pointer, offset or texture depending on device. */ - uint64_t data; - /* Data Type */ - uint data_type; - /* Interpolation and extension type. */ - uint interpolation, extension; - /* Dimensions. */ - uint width, height, depth; - /* Transform for 3D textures. */ - uint use_transform_3d; - Transform transform_3d; -} TextureInfo; - -CCL_NAMESPACE_END - -#endif /* __UTIL_TEXTURE_H__ */ diff --git a/intern/cycles/util/util_thread.cpp b/intern/cycles/util/util_thread.cpp deleted file mode 100644 index cccde5ae7d5..00000000000 --- a/intern/cycles/util/util_thread.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_thread.h" - -#include "util/util_system.h" -#include "util/util_windows.h" - -CCL_NAMESPACE_BEGIN - -thread::thread(function run_cb, int node) : run_cb_(run_cb), joined_(false), node_(node) -{ -#ifdef __APPLE__ - /* Set the stack size to 2MB to match Linux. The default 512KB on macOS is - * too small for Embree, and consistent stack size also makes things more - * predictable in general. */ - pthread_attr_t attribute; - pthread_attr_init(&attribute); - pthread_attr_setstacksize(&attribute, 1024 * 1024 * 2); - pthread_create(&pthread_id, &attribute, run, (void *)this); -#else - std_thread = std::thread(&thread::run, this); -#endif -} - -thread::~thread() -{ - if (!joined_) { - join(); - } -} - -void *thread::run(void *arg) -{ - thread *self = (thread *)(arg); - if (self->node_ != -1) { - system_cpu_run_thread_on_node(self->node_); - } - self->run_cb_(); - return NULL; -} - -bool thread::join() -{ - joined_ = true; -#ifdef __APPLE__ - return pthread_join(pthread_id, NULL) == 0; -#else - try { - std_thread.join(); - return true; - } - catch (const std::system_error &) { - return false; - } -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h deleted file mode 100644 index 29f9becbefe..00000000000 --- a/intern/cycles/util/util_thread.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_THREAD_H__ -#define __UTIL_THREAD_H__ - -#include -#include -#include -#include -#include - -#ifdef _WIN32 -# include "util_windows.h" -#else -# include -#endif - -/* NOTE: Use tbb/spin_mutex.h instead of util_tbb.h because some of the TBB - * functionality requires RTTI, which is disabled for OSL kernel. */ -#include - -#include "util/util_function.h" - -CCL_NAMESPACE_BEGIN - -typedef std::mutex thread_mutex; -typedef std::unique_lock thread_scoped_lock; -typedef std::condition_variable thread_condition_variable; - -/* Own thread implementation similar to std::thread, so we can set a - * custom stack size on macOS. */ - -class thread { - public: - /* NOTE: Node index of -1 means that affinity will be inherited from the - * parent thread and no override on top of that will happen. */ - thread(function run_cb, int node = -1); - ~thread(); - - static void *run(void *arg); - bool join(); - - protected: - function run_cb_; -#ifdef __APPLE__ - pthread_t pthread_id; -#else - std::thread std_thread; -#endif - bool joined_; - int node_; -}; - -using thread_spin_lock = tbb::spin_mutex; - -class thread_scoped_spin_lock { - public: - explicit thread_scoped_spin_lock(thread_spin_lock &lock) : lock_(lock) - { - lock_.lock(); - } - - ~thread_scoped_spin_lock() - { - lock_.unlock(); - } - - /* TODO(sergey): Implement manual control over lock/unlock. */ - - protected: - thread_spin_lock &lock_; -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_THREAD_H__ */ diff --git a/intern/cycles/util/util_time.cpp b/intern/cycles/util/util_time.cpp deleted file mode 100644 index 1641395d07e..00000000000 --- a/intern/cycles/util/util_time.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_time.h" - -#include - -#if !defined(_WIN32) -# include -# include -#endif - -#include "util/util_math.h" -#include "util/util_string.h" -#include "util/util_windows.h" - -CCL_NAMESPACE_BEGIN - -#ifdef _WIN32 -double time_dt() -{ - __int64 frequency, counter; - - QueryPerformanceFrequency((LARGE_INTEGER *)&frequency); - QueryPerformanceCounter((LARGE_INTEGER *)&counter); - - return (double)counter / (double)frequency; -} - -void time_sleep(double t) -{ - Sleep((int)(t * 1000)); -} -#else -double time_dt() -{ - struct timeval now; - gettimeofday(&now, NULL); - - return now.tv_sec + now.tv_usec * 1e-6; -} - -/* sleep t seconds */ -void time_sleep(double t) -{ - /* get whole seconds */ - int s = (int)t; - - if (s >= 1) { - sleep(s); - - /* adjust parameter to remove whole seconds */ - t -= s; - } - - /* get microseconds */ - int us = (int)(t * 1e6); - if (us > 0) - usleep(us); -} -#endif - -/* Time in format "hours:minutes:seconds.hundreds" */ - -string time_human_readable_from_seconds(const double seconds) -{ - const int h = (((int)seconds) / (60 * 60)); - const int m = (((int)seconds) / 60) % 60; - const int s = (((int)seconds) % 60); - const int r = (((int)(seconds * 100)) % 100); - - if (h > 0) { - return string_printf("%.2d:%.2d:%.2d.%.2d", h, m, s, r); - } - else { - return string_printf("%.2d:%.2d.%.2d", m, s, r); - } -} - -double time_human_readable_to_seconds(const string &time_string) -{ - /* Those are multiplies of a corresponding token surrounded by : in the - * time string, which denotes how to convert value to seconds. - * Effectively: seconds, minutes, hours, days in seconds. */ - const int multipliers[] = {1, 60, 60 * 60, 24 * 60 * 60}; - const int num_multiplies = sizeof(multipliers) / sizeof(*multipliers); - if (time_string.empty()) { - return 0.0; - } - double result = 0.0; - /* Split fractions of a second from the encoded time. */ - vector fraction_tokens; - string_split(fraction_tokens, time_string, ".", false); - const int num_fraction_tokens = fraction_tokens.size(); - if (num_fraction_tokens == 0) { - /* Time string is malformed. */ - return 0.0; - } - else if (fraction_tokens.size() == 1) { - /* There is no fraction of a second specified, the rest of the code - * handles this normally. */ - } - else if (fraction_tokens.size() == 2) { - result = atof(fraction_tokens[1].c_str()); - result *= pow(0.1, fraction_tokens[1].length()); - } - else { - /* This is not a valid string, the result can not be reliable. */ - return 0.0; - } - /* Split hours, minutes and seconds. - * Hours part is optional. */ - vector tokens; - string_split(tokens, fraction_tokens[0], ":", false); - const int num_tokens = tokens.size(); - if (num_tokens > num_multiplies) { - /* Can not reliably represent the value. */ - return 0.0; - } - for (int i = 0; i < num_tokens; ++i) { - result += atoi(tokens[num_tokens - i - 1].c_str()) * multipliers[i]; - } - return result; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_time.h b/intern/cycles/util/util_time.h deleted file mode 100644 index a82d400a0d7..00000000000 --- a/intern/cycles/util/util_time.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TIME_H__ -#define __UTIL_TIME_H__ - -#include "util/util_function.h" -#include "util/util_string.h" - -CCL_NAMESPACE_BEGIN - -/* Give current time in seconds in double precision, with good accuracy. */ - -double time_dt(); - -/* Sleep for the specified number of seconds. */ - -void time_sleep(double t); - -/* Scoped timer. */ - -class scoped_timer { - public: - explicit scoped_timer(double *value = NULL) : value_(value) - { - time_start_ = time_dt(); - } - - ~scoped_timer() - { - if (value_ != NULL) { - *value_ = get_time(); - } - } - - double get_start() const - { - return time_start_; - } - - double get_time() const - { - return time_dt() - time_start_; - } - - protected: - double *value_; - double time_start_; -}; - -class scoped_callback_timer { - public: - using callback_type = function; - - explicit scoped_callback_timer(callback_type cb) : cb(cb) - { - } - - ~scoped_callback_timer() - { - if (cb) { - cb(timer.get_time()); - } - } - - protected: - scoped_timer timer; - callback_type cb; -}; - -/* Make human readable string from time, compatible with Blender metadata. */ - -string time_human_readable_from_seconds(const double seconds); -double time_human_readable_to_seconds(const string &str); - -CCL_NAMESPACE_END - -#endif diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp deleted file mode 100644 index e8233b7fe6d..00000000000 --- a/intern/cycles/util/util_transform.cpp +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Adapted from code with license: - * - * Copyright (c) 2002, Industrial Light & Magic, a division of Lucas - * Digital Ltd. LLC. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Industrial Light & Magic nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "util/util_transform.h" -#include "util/util_projection.h" - -#include "util/util_boundbox.h" -#include "util/util_math.h" - -CCL_NAMESPACE_BEGIN - -/* Transform Inverse */ - -static bool transform_matrix4_gj_inverse(float R[][4], float M[][4]) -{ - /* forward elimination */ - for (int i = 0; i < 4; i++) { - int pivot = i; - float pivotsize = M[i][i]; - - if (pivotsize < 0) - pivotsize = -pivotsize; - - for (int j = i + 1; j < 4; j++) { - float tmp = M[j][i]; - - if (tmp < 0) - tmp = -tmp; - - if (tmp > pivotsize) { - pivot = j; - pivotsize = tmp; - } - } - - if (UNLIKELY(pivotsize == 0.0f)) - return false; - - if (pivot != i) { - for (int j = 0; j < 4; j++) { - float tmp; - - tmp = M[i][j]; - M[i][j] = M[pivot][j]; - M[pivot][j] = tmp; - - tmp = R[i][j]; - R[i][j] = R[pivot][j]; - R[pivot][j] = tmp; - } - } - - for (int j = i + 1; j < 4; j++) { - float f = M[j][i] / M[i][i]; - - for (int k = 0; k < 4; k++) { - M[j][k] -= f * M[i][k]; - R[j][k] -= f * R[i][k]; - } - } - } - - /* backward substitution */ - for (int i = 3; i >= 0; --i) { - float f; - - if (UNLIKELY((f = M[i][i]) == 0.0f)) - return false; - - for (int j = 0; j < 4; j++) { - M[i][j] /= f; - R[i][j] /= f; - } - - for (int j = 0; j < i; j++) { - f = M[j][i]; - - for (int k = 0; k < 4; k++) { - M[j][k] -= f * M[i][k]; - R[j][k] -= f * R[i][k]; - } - } - } - - return true; -} - -ProjectionTransform projection_inverse(const ProjectionTransform &tfm) -{ - ProjectionTransform tfmR = projection_identity(); - float M[4][4], R[4][4]; - - memcpy(R, &tfmR, sizeof(R)); - memcpy(M, &tfm, sizeof(M)); - - if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { - /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should - * never be in this situation, but try to invert it anyway with tweak */ - M[0][0] += 1e-8f; - M[1][1] += 1e-8f; - M[2][2] += 1e-8f; - - if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { - return projection_identity(); - } - } - - memcpy(&tfmR, R, sizeof(R)); - - return tfmR; -} - -Transform transform_inverse(const Transform &tfm) -{ - ProjectionTransform projection(tfm); - return projection_to_transform(projection_inverse(projection)); -} - -Transform transform_transposed_inverse(const Transform &tfm) -{ - ProjectionTransform projection(tfm); - ProjectionTransform iprojection = projection_inverse(projection); - return projection_to_transform(projection_transpose(iprojection)); -} - -/* Motion Transform */ - -float4 transform_to_quat(const Transform &tfm) -{ - double trace = (double)(tfm[0][0] + tfm[1][1] + tfm[2][2]); - float4 qt; - - if (trace > 0.0) { - double s = sqrt(trace + 1.0); - - qt.w = (float)(s / 2.0); - s = 0.5 / s; - - qt.x = (float)((double)(tfm[2][1] - tfm[1][2]) * s); - qt.y = (float)((double)(tfm[0][2] - tfm[2][0]) * s); - qt.z = (float)((double)(tfm[1][0] - tfm[0][1]) * s); - } - else { - int i = 0; - - if (tfm[1][1] > tfm[i][i]) - i = 1; - if (tfm[2][2] > tfm[i][i]) - i = 2; - - int j = (i + 1) % 3; - int k = (j + 1) % 3; - - double s = sqrt((double)(tfm[i][i] - (tfm[j][j] + tfm[k][k])) + 1.0); - - double q[3]; - q[i] = s * 0.5; - if (s != 0.0) - s = 0.5 / s; - - double w = (double)(tfm[k][j] - tfm[j][k]) * s; - q[j] = (double)(tfm[j][i] + tfm[i][j]) * s; - q[k] = (double)(tfm[k][i] + tfm[i][k]) * s; - - qt.x = (float)q[0]; - qt.y = (float)q[1]; - qt.z = (float)q[2]; - qt.w = (float)w; - } - - return qt; -} - -static void transform_decompose(DecomposedTransform *decomp, const Transform *tfm) -{ - /* extract translation */ - decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f); - - /* extract rotation */ - Transform M = *tfm; - M.x.w = 0.0f; - M.y.w = 0.0f; - M.z.w = 0.0f; - -#if 0 - Transform R = M; - float norm; - int iteration = 0; - - do { - Transform Rnext; - Transform Rit = transform_transposed_inverse(R); - - for (int i = 0; i < 3; i++) - for (int j = 0; j < 4; j++) - Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]); - - norm = 0.0f; - for (int i = 0; i < 3; i++) { - norm = max(norm, - fabsf(R[i][0] - Rnext[i][0]) + fabsf(R[i][1] - Rnext[i][1]) + - fabsf(R[i][2] - Rnext[i][2])); - } - - R = Rnext; - iteration++; - } while (iteration < 100 && norm > 1e-4f); - - if (transform_negative_scale(R)) - R = R * transform_scale(-1.0f, -1.0f, -1.0f); - - decomp->x = transform_to_quat(R); - - /* extract scale and pack it */ - Transform scale = transform_inverse(R) * M; - decomp->y.w = scale.x.x; - decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y); - decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z); -#else - float3 colx = transform_get_column(&M, 0); - float3 coly = transform_get_column(&M, 1); - float3 colz = transform_get_column(&M, 2); - - /* extract scale and shear first */ - float3 scale, shear; - scale.x = len(colx); - colx = safe_divide_float3_float(colx, scale.x); - shear.z = dot(colx, coly); - coly -= shear.z * colx; - scale.y = len(coly); - coly = safe_divide_float3_float(coly, scale.y); - shear.y = dot(colx, colz); - colz -= shear.y * colx; - shear.x = dot(coly, colz); - colz -= shear.x * coly; - scale.z = len(colz); - colz = safe_divide_float3_float(colz, scale.z); - - transform_set_column(&M, 0, colx); - transform_set_column(&M, 1, coly); - transform_set_column(&M, 2, colz); - - if (transform_negative_scale(M)) { - scale *= -1.0f; - M = M * transform_scale(-1.0f, -1.0f, -1.0f); - } - - decomp->x = transform_to_quat(M); - - decomp->y.w = scale.x; - decomp->z = make_float4(shear.z, shear.y, 0.0f, scale.y); - decomp->w = make_float4(shear.x, 0.0f, 0.0f, scale.z); -#endif -} - -void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size) -{ - /* Decompose and correct rotation. */ - for (size_t i = 0; i < size; i++) { - transform_decompose(decomp + i, motion + i); - - if (i > 0) { - /* Ensure rotation around shortest angle, negated quaternions are the same - * but this means we don't have to do the check in quat_interpolate */ - if (dot(decomp[i - 1].x, decomp[i].x) < 0.0f) - decomp[i].x = -decomp[i].x; - } - } - - /* Copy rotation to decomposed transform where scale is degenerate. This avoids weird object - * rotation interpolation when the scale goes to 0 for a time step. - * - * Note that this is very simple and naive implementation, which only deals with degenerated - * scale happening only on one frame. It is possible to improve it further by interpolating - * rotation into s degenerated range using rotation from time-steps from adjacent non-degenerated - * time steps. */ - for (size_t i = 0; i < size; i++) { - const float3 scale = make_float3(decomp[i].y.w, decomp[i].z.w, decomp[i].w.w); - if (!is_zero(scale)) { - continue; - } - - if (i > 0) { - decomp[i].x = decomp[i - 1].x; - } - else if (i < size - 1) { - decomp[i].x = decomp[i + 1].x; - } - } -} - -Transform transform_from_viewplane(BoundBox2D &viewplane) -{ - return transform_scale(1.0f / (viewplane.right - viewplane.left), - 1.0f / (viewplane.top - viewplane.bottom), - 1.0f) * - transform_translate(-viewplane.left, -viewplane.bottom, 0.0f); -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h deleted file mode 100644 index fc04f9aab46..00000000000 --- a/intern/cycles/util/util_transform.h +++ /dev/null @@ -1,512 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TRANSFORM_H__ -#define __UTIL_TRANSFORM_H__ - -#ifndef __KERNEL_GPU__ -# include -#endif - -#include "util/util_math.h" -#include "util/util_types.h" - -CCL_NAMESPACE_BEGIN - -/* Affine transformation, stored as 4x3 matrix. */ - -typedef struct Transform { - float4 x, y, z; - -#ifndef __KERNEL_GPU__ - float4 operator[](int i) const - { - return *(&x + i); - } - float4 &operator[](int i) - { - return *(&x + i); - } -#endif -} Transform; - -/* Transform decomposed in rotation/translation/scale. we use the same data - * structure as Transform, and tightly pack decomposition into it. first the - * rotation (4), then translation (3), then 3x3 scale matrix (9). */ - -typedef struct DecomposedTransform { - float4 x, y, z, w; -} DecomposedTransform; - -/* Functions */ - -ccl_device_inline float3 transform_point(ccl_private const Transform *t, const float3 a) -{ - /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) - ssef x, y, z, w, aa; - aa = a.m128; - - x = _mm_loadu_ps(&t->x.x); - y = _mm_loadu_ps(&t->y.x); - z = _mm_loadu_ps(&t->z.x); - w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); - - _MM_TRANSPOSE4_PS(x, y, z, w); - - ssef tmp = shuffle<0>(aa) * x; - tmp = madd(shuffle<1>(aa), y, tmp); - tmp = madd(shuffle<2>(aa), z, tmp); - tmp += w; - - return float3(tmp.m128); -#else - float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w, - a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w, - a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w); - - return c; -#endif -} - -ccl_device_inline float3 transform_direction(ccl_private const Transform *t, const float3 a) -{ -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) - ssef x, y, z, w, aa; - aa = a.m128; - x = _mm_loadu_ps(&t->x.x); - y = _mm_loadu_ps(&t->y.x); - z = _mm_loadu_ps(&t->z.x); - w = _mm_setzero_ps(); - - _MM_TRANSPOSE4_PS(x, y, z, w); - - ssef tmp = shuffle<0>(aa) * x; - tmp = madd(shuffle<1>(aa), y, tmp); - tmp = madd(shuffle<2>(aa), z, tmp); - - return float3(tmp.m128); -#else - float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z, - a.x * t->y.x + a.y * t->y.y + a.z * t->y.z, - a.x * t->z.x + a.y * t->z.y + a.z * t->z.z); - - return c; -#endif -} - -ccl_device_inline float3 transform_direction_transposed(ccl_private const Transform *t, - const float3 a) -{ - float3 x = make_float3(t->x.x, t->y.x, t->z.x); - float3 y = make_float3(t->x.y, t->y.y, t->z.y); - float3 z = make_float3(t->x.z, t->y.z, t->z.z); - - return make_float3(dot(x, a), dot(y, a), dot(z, a)); -} - -ccl_device_inline Transform make_transform(float a, - float b, - float c, - float d, - float e, - float f, - float g, - float h, - float i, - float j, - float k, - float l) -{ - Transform t; - - t.x.x = a; - t.x.y = b; - t.x.z = c; - t.x.w = d; - t.y.x = e; - t.y.y = f; - t.y.z = g; - t.y.w = h; - t.z.x = i; - t.z.y = j; - t.z.z = k; - t.z.w = l; - - return t; -} - -ccl_device_inline Transform euler_to_transform(const float3 euler) -{ - float cx = cosf(euler.x); - float cy = cosf(euler.y); - float cz = cosf(euler.z); - float sx = sinf(euler.x); - float sy = sinf(euler.y); - float sz = sinf(euler.z); - - Transform t; - t.x.x = cy * cz; - t.y.x = cy * sz; - t.z.x = -sy; - - t.x.y = sy * sx * cz - cx * sz; - t.y.y = sy * sx * sz + cx * cz; - t.z.y = cy * sx; - - t.x.z = sy * cx * cz + sx * sz; - t.y.z = sy * cx * sz - sx * cz; - t.z.z = cy * cx; - - t.x.w = t.y.w = t.z.w = 0.0f; - return t; -} - -/* Constructs a coordinate frame from a normalized normal. */ -ccl_device_inline Transform make_transform_frame(float3 N) -{ - const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N); - const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N); - const float3 dx = normalize((dot(dx0, dx0) > dot(dx1, dx1)) ? dx0 : dx1); - const float3 dy = normalize(cross(N, dx)); - return make_transform(dx.x, dx.y, dx.z, 0.0f, dy.x, dy.y, dy.z, 0.0f, N.x, N.y, N.z, 0.0f); -} - -#ifndef __KERNEL_GPU__ - -ccl_device_inline Transform transform_zero() -{ - Transform zero = {zero_float4(), zero_float4(), zero_float4()}; - return zero; -} - -ccl_device_inline Transform operator*(const Transform a, const Transform b) -{ - float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f); - float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f); - float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f); - float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f); - - Transform t; - t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w)); - t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w)); - t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w)); - - return t; -} - -ccl_device_inline void print_transform(const char *label, const Transform &t) -{ - print_float4(label, t.x); - print_float4(label, t.y); - print_float4(label, t.z); - printf("\n"); -} - -ccl_device_inline Transform transform_translate(float3 t) -{ - return make_transform(1, 0, 0, t.x, 0, 1, 0, t.y, 0, 0, 1, t.z); -} - -ccl_device_inline Transform transform_translate(float x, float y, float z) -{ - return transform_translate(make_float3(x, y, z)); -} - -ccl_device_inline Transform transform_scale(float3 s) -{ - return make_transform(s.x, 0, 0, 0, 0, s.y, 0, 0, 0, 0, s.z, 0); -} - -ccl_device_inline Transform transform_scale(float x, float y, float z) -{ - return transform_scale(make_float3(x, y, z)); -} - -ccl_device_inline Transform transform_rotate(float angle, float3 axis) -{ - float s = sinf(angle); - float c = cosf(angle); - float t = 1.0f - c; - - axis = normalize(axis); - - return make_transform(axis.x * axis.x * t + c, - axis.x * axis.y * t - s * axis.z, - axis.x * axis.z * t + s * axis.y, - 0.0f, - - axis.y * axis.x * t + s * axis.z, - axis.y * axis.y * t + c, - axis.y * axis.z * t - s * axis.x, - 0.0f, - - axis.z * axis.x * t - s * axis.y, - axis.z * axis.y * t + s * axis.x, - axis.z * axis.z * t + c, - 0.0f); -} - -/* Euler is assumed to be in XYZ order. */ -ccl_device_inline Transform transform_euler(float3 euler) -{ - return transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) * - transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) * - transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f)); -} - -ccl_device_inline Transform transform_identity() -{ - return transform_scale(1.0f, 1.0f, 1.0f); -} - -ccl_device_inline bool operator==(const Transform &A, const Transform &B) -{ - return memcmp(&A, &B, sizeof(Transform)) == 0; -} - -ccl_device_inline bool operator!=(const Transform &A, const Transform &B) -{ - return !(A == B); -} - -ccl_device_inline float3 transform_get_column(const Transform *t, int column) -{ - return make_float3(t->x[column], t->y[column], t->z[column]); -} - -ccl_device_inline void transform_set_column(Transform *t, int column, float3 value) -{ - t->x[column] = value.x; - t->y[column] = value.y; - t->z[column] = value.z; -} - -Transform transform_inverse(const Transform &a); -Transform transform_transposed_inverse(const Transform &a); - -ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale) -{ - /* the epsilon here is quite arbitrary, but this function is only used for - * surface area and bump, where we expect it to not be so sensitive */ - float eps = 1e-6f; - - float sx = len_squared(float4_to_float3(tfm.x)); - float sy = len_squared(float4_to_float3(tfm.y)); - float sz = len_squared(float4_to_float3(tfm.z)); - float stx = len_squared(transform_get_column(&tfm, 0)); - float sty = len_squared(transform_get_column(&tfm, 1)); - float stz = len_squared(transform_get_column(&tfm, 2)); - - if (fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && fabsf(sx - stx) < eps && - fabsf(sx - sty) < eps && fabsf(sx - stz) < eps) { - scale = sx; - return true; - } - - return false; -} - -ccl_device_inline bool transform_negative_scale(const Transform &tfm) -{ - float3 c0 = transform_get_column(&tfm, 0); - float3 c1 = transform_get_column(&tfm, 1); - float3 c2 = transform_get_column(&tfm, 2); - - return (dot(cross(c0, c1), c2) < 0.0f); -} - -ccl_device_inline Transform transform_clear_scale(const Transform &tfm) -{ - Transform ntfm = tfm; - - transform_set_column(&ntfm, 0, normalize(transform_get_column(&ntfm, 0))); - transform_set_column(&ntfm, 1, normalize(transform_get_column(&ntfm, 1))); - transform_set_column(&ntfm, 2, normalize(transform_get_column(&ntfm, 2))); - - return ntfm; -} - -ccl_device_inline Transform transform_empty() -{ - return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); -} - -#endif - -/* Motion Transform */ - -ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t) -{ - /* Optix is using lerp to interpolate motion transformations. */ -#ifdef __KERNEL_OPTIX__ - return normalize((1.0f - t) * q1 + t * q2); -#else /* __KERNEL_OPTIX__ */ - /* note: this does not ensure rotation around shortest angle, q1 and q2 - * are assumed to be matched already in transform_motion_decompose */ - float costheta = dot(q1, q2); - - /* possible optimization: it might be possible to precompute theta/qperp */ - - if (costheta > 0.9995f) { - /* linear interpolation in degenerate case */ - return normalize((1.0f - t) * q1 + t * q2); - } - else { - /* slerp */ - float theta = acosf(clamp(costheta, -1.0f, 1.0f)); - float4 qperp = normalize(q2 - q1 * costheta); - float thetap = theta * t; - return q1 * cosf(thetap) + qperp * sinf(thetap); - } -#endif /* __KERNEL_OPTIX__ */ -} - -ccl_device_inline Transform transform_quick_inverse(Transform M) -{ - /* possible optimization: can we avoid doing this altogether and construct - * the inverse matrix directly from negated translation, transposed rotation, - * scale can be inverted but what about shearing? */ - Transform R; - float det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) + - M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z); - if (det == 0.0f) { - M.x.x += 1e-8f; - M.y.y += 1e-8f; - M.z.z += 1e-8f; - det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) + - M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z); - } - det = (det != 0.0f) ? 1.0f / det : 0.0f; - - float3 Rx = det * make_float3(M.z.z * M.y.y - M.z.y * M.y.z, - M.z.y * M.x.z - M.z.z * M.x.y, - M.y.z * M.x.y - M.y.y * M.x.z); - float3 Ry = det * make_float3(M.z.x * M.y.z - M.z.z * M.y.x, - M.z.z * M.x.x - M.z.x * M.x.z, - M.y.x * M.x.z - M.y.z * M.x.x); - float3 Rz = det * make_float3(M.z.y * M.y.x - M.z.x * M.y.y, - M.z.x * M.x.y - M.z.y * M.x.x, - M.y.y * M.x.x - M.y.x * M.x.y); - float3 T = -make_float3(M.x.w, M.y.w, M.z.w); - - R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T)); - R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T)); - R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T)); - - return R; -} - -ccl_device_inline void transform_compose(ccl_private Transform *tfm, - ccl_private const DecomposedTransform *decomp) -{ - /* rotation */ - float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc; - - q0 = M_SQRT2_F * decomp->x.w; - q1 = M_SQRT2_F * decomp->x.x; - q2 = M_SQRT2_F * decomp->x.y; - q3 = M_SQRT2_F * decomp->x.z; - - qda = q0 * q1; - qdb = q0 * q2; - qdc = q0 * q3; - qaa = q1 * q1; - qab = q1 * q2; - qac = q1 * q3; - qbb = q2 * q2; - qbc = q2 * q3; - qcc = q3 * q3; - - float3 rotation_x = make_float3(1.0f - qbb - qcc, -qdc + qab, qdb + qac); - float3 rotation_y = make_float3(qdc + qab, 1.0f - qaa - qcc, -qda + qbc); - float3 rotation_z = make_float3(-qdb + qac, qda + qbc, 1.0f - qaa - qbb); - - /* scale */ - float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y); - float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z); - float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w); - - /* compose with translation */ - tfm->x = make_float4( - dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x); - tfm->y = make_float4( - dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y); - tfm->z = make_float4( - dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z); -} - -/* Interpolate from array of decomposed transforms. */ -ccl_device void transform_motion_array_interpolate(Transform *tfm, - const DecomposedTransform *motion, - uint numsteps, - float time) -{ - /* Figure out which steps we need to interpolate. */ - int maxstep = numsteps - 1; - int step = min((int)(time * maxstep), maxstep - 1); - float t = time * maxstep - step; - - const DecomposedTransform *a = motion + step; - const DecomposedTransform *b = motion + step + 1; - - /* Interpolate rotation, translation and scale. */ - DecomposedTransform decomp; - decomp.x = quat_interpolate(a->x, b->x, t); - decomp.y = (1.0f - t) * a->y + t * b->y; - decomp.z = (1.0f - t) * a->z + t * b->z; - decomp.w = (1.0f - t) * a->w + t * b->w; - - /* Compose rotation, translation, scale into matrix. */ - transform_compose(tfm, &decomp); -} - -ccl_device_inline bool transform_isfinite_safe(ccl_private Transform *tfm) -{ - return isfinite4_safe(tfm->x) && isfinite4_safe(tfm->y) && isfinite4_safe(tfm->z); -} - -ccl_device_inline bool transform_decomposed_isfinite_safe(ccl_private DecomposedTransform *decomp) -{ - return isfinite4_safe(decomp->x) && isfinite4_safe(decomp->y) && isfinite4_safe(decomp->z) && - isfinite4_safe(decomp->w); -} - -#ifndef __KERNEL_GPU__ - -class BoundBox2D; - -ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B) -{ - return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0; -} - -float4 transform_to_quat(const Transform &tfm); -void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size); -Transform transform_from_viewplane(BoundBox2D &viewplane); - -#endif - -/* TODO: This can be removed when we know if no devices will require explicit - * address space qualifiers for this case. */ - -#define transform_point_auto transform_point -#define transform_direction_auto transform_direction -#define transform_direction_transposed_auto transform_direction_transposed - -CCL_NAMESPACE_END - -#endif /* __UTIL_TRANSFORM_H__ */ diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h deleted file mode 100644 index 442c32b3a3d..00000000000 --- a/intern/cycles/util/util_types.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_H__ -#define __UTIL_TYPES_H__ - -#include - -/* Standard Integer Types */ - -#if !defined(__KERNEL_GPU__) -# include -#endif - -#include "util/util_defines.h" - -#ifndef __KERNEL_GPU__ -# include "util/util_optimization.h" -# include "util/util_simd.h" -#endif - -CCL_NAMESPACE_BEGIN - -/* Types - * - * Define simpler unsigned type names, and integer with defined number of bits. - * Also vector types, named to be compatible with OpenCL builtin types, while - * working for CUDA and C++ too. */ - -/* Shorter Unsigned Names */ - -typedef unsigned char uchar; -typedef unsigned int uint; -typedef unsigned short ushort; - -/* Fixed Bits Types */ - -#ifndef __KERNEL_GPU__ -/* Generic Memory Pointer */ - -typedef uint64_t device_ptr; -#endif /* __KERNEL_GPU__ */ - -ccl_device_inline size_t align_up(size_t offset, size_t alignment) -{ - return (offset + alignment - 1) & ~(alignment - 1); -} - -ccl_device_inline size_t divide_up(size_t x, size_t y) -{ - return (x + y - 1) / y; -} - -ccl_device_inline size_t round_up(size_t x, size_t multiple) -{ - return ((x + multiple - 1) / multiple) * multiple; -} - -ccl_device_inline size_t round_down(size_t x, size_t multiple) -{ - return (x / multiple) * multiple; -} - -ccl_device_inline bool is_power_of_two(size_t x) -{ - return (x & (x - 1)) == 0; -} - -CCL_NAMESPACE_END - -/* Vectorized types declaration. */ -#include "util/util_types_uchar2.h" -#include "util/util_types_uchar3.h" -#include "util/util_types_uchar4.h" - -#include "util/util_types_int2.h" -#include "util/util_types_int3.h" -#include "util/util_types_int4.h" - -#include "util/util_types_uint2.h" -#include "util/util_types_uint3.h" -#include "util/util_types_uint4.h" - -#include "util/util_types_ushort4.h" - -#include "util/util_types_float2.h" -#include "util/util_types_float3.h" -#include "util/util_types_float4.h" -#include "util/util_types_float8.h" - -#include "util/util_types_vector3.h" - -/* Vectorized types implementation. */ -#include "util/util_types_uchar2_impl.h" -#include "util/util_types_uchar3_impl.h" -#include "util/util_types_uchar4_impl.h" - -#include "util/util_types_int2_impl.h" -#include "util/util_types_int3_impl.h" -#include "util/util_types_int4_impl.h" - -#include "util/util_types_uint2_impl.h" -#include "util/util_types_uint3_impl.h" -#include "util/util_types_uint4_impl.h" - -#include "util/util_types_float2_impl.h" -#include "util/util_types_float3_impl.h" -#include "util/util_types_float4_impl.h" -#include "util/util_types_float8_impl.h" - -#include "util/util_types_vector3_impl.h" - -/* SSE types. */ -#ifndef __KERNEL_GPU__ -# include "util/util_sseb.h" -# include "util/util_ssef.h" -# include "util/util_ssei.h" -# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) -# include "util/util_avxb.h" -# include "util/util_avxf.h" -# include "util/util_avxi.h" -# endif -#endif - -#endif /* __UTIL_TYPES_H__ */ diff --git a/intern/cycles/util/util_types_float2.h b/intern/cycles/util/util_types_float2.h deleted file mode 100644 index 3760bf579b6..00000000000 --- a/intern/cycles/util/util_types_float2.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_FLOAT2_H__ -#define __UTIL_TYPES_FLOAT2_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct float2 { - float x, y; - - __forceinline float operator[](int i) const; - __forceinline float &operator[](int i); -}; - -ccl_device_inline float2 make_float2(float x, float y); -ccl_device_inline void print_float2(const char *label, const float2 &a); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_FLOAT2_H__ */ diff --git a/intern/cycles/util/util_types_float2_impl.h b/intern/cycles/util/util_types_float2_impl.h deleted file mode 100644 index 7810d2a8781..00000000000 --- a/intern/cycles/util/util_types_float2_impl.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_FLOAT2_IMPL_H__ -#define __UTIL_TYPES_FLOAT2_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -#ifndef __KERNEL_GPU__ -# include -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -__forceinline float float2::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} - -__forceinline float &float2::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} - -ccl_device_inline float2 make_float2(float x, float y) -{ - float2 a = {x, y}; - return a; -} - -ccl_device_inline void print_float2(const char *label, const float2 &a) -{ - printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_FLOAT2_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_float3.h b/intern/cycles/util/util_types_float3.h deleted file mode 100644 index 694a600bf5c..00000000000 --- a/intern/cycles/util/util_types_float3.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_FLOAT3_H__ -#define __UTIL_TYPES_FLOAT3_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct ccl_try_align(16) float3 -{ -# ifdef __KERNEL_SSE__ - union { - __m128 m128; - struct { - float x, y, z, w; - }; - }; - - __forceinline float3(); - __forceinline float3(const float3 &a); - __forceinline explicit float3(const __m128 &a); - - __forceinline operator const __m128 &() const; - __forceinline operator __m128 &(); - - __forceinline float3 &operator=(const float3 &a); -# else /* __KERNEL_SSE__ */ - float x, y, z, w; -# endif /* __KERNEL_SSE__ */ - - __forceinline float operator[](int i) const; - __forceinline float &operator[](int i); -}; - -ccl_device_inline float3 make_float3(float f); -ccl_device_inline float3 make_float3(float x, float y, float z); -ccl_device_inline void print_float3(const char *label, const float3 &a); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_FLOAT3_H__ */ diff --git a/intern/cycles/util/util_types_float3_impl.h b/intern/cycles/util/util_types_float3_impl.h deleted file mode 100644 index ab25fb4c975..00000000000 --- a/intern/cycles/util/util_types_float3_impl.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_FLOAT3_IMPL_H__ -#define __UTIL_TYPES_FLOAT3_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -#ifndef __KERNEL_GPU__ -# include -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -# ifdef __KERNEL_SSE__ -__forceinline float3::float3() -{ -} - -__forceinline float3::float3(const float3 &a) : m128(a.m128) -{ -} - -__forceinline float3::float3(const __m128 &a) : m128(a) -{ -} - -__forceinline float3::operator const __m128 &() const -{ - return m128; -} - -__forceinline float3::operator __m128 &() -{ - return m128; -} - -__forceinline float3 &float3::operator=(const float3 &a) -{ - m128 = a.m128; - return *this; -} -# endif /* __KERNEL_SSE__ */ - -__forceinline float float3::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} - -__forceinline float &float3::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} - -ccl_device_inline float3 make_float3(float f) -{ -# ifdef __KERNEL_SSE__ - float3 a(_mm_set1_ps(f)); -# else - float3 a = {f, f, f, f}; -# endif - return a; -} - -ccl_device_inline float3 make_float3(float x, float y, float z) -{ -# ifdef __KERNEL_SSE__ - float3 a(_mm_set_ps(0.0f, z, y, x)); -# else - float3 a = {x, y, z, 0.0f}; -# endif - return a; -} - -ccl_device_inline void print_float3(const char *label, const float3 &a) -{ - printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_FLOAT3_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_float4.h b/intern/cycles/util/util_types_float4.h deleted file mode 100644 index c29e6e15bc3..00000000000 --- a/intern/cycles/util/util_types_float4.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_FLOAT4_H__ -#define __UTIL_TYPES_FLOAT4_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct int4; - -struct ccl_try_align(16) float4 -{ -# ifdef __KERNEL_SSE__ - union { - __m128 m128; - struct { - float x, y, z, w; - }; - }; - - __forceinline float4(); - __forceinline explicit float4(const __m128 &a); - - __forceinline operator const __m128 &() const; - __forceinline operator __m128 &(); - - __forceinline float4 &operator=(const float4 &a); - -# else /* __KERNEL_SSE__ */ - float x, y, z, w; -# endif /* __KERNEL_SSE__ */ - - __forceinline float operator[](int i) const; - __forceinline float &operator[](int i); -}; - -ccl_device_inline float4 make_float4(float f); -ccl_device_inline float4 make_float4(float x, float y, float z, float w); -ccl_device_inline float4 make_float4(const int4 &i); -ccl_device_inline void print_float4(const char *label, const float4 &a); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_FLOAT4_H__ */ diff --git a/intern/cycles/util/util_types_float4_impl.h b/intern/cycles/util/util_types_float4_impl.h deleted file mode 100644 index 05a1feee5b2..00000000000 --- a/intern/cycles/util/util_types_float4_impl.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_FLOAT4_IMPL_H__ -#define __UTIL_TYPES_FLOAT4_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -#ifndef __KERNEL_GPU__ -# include -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -# ifdef __KERNEL_SSE__ -__forceinline float4::float4() -{ -} - -__forceinline float4::float4(const __m128 &a) : m128(a) -{ -} - -__forceinline float4::operator const __m128 &() const -{ - return m128; -} - -__forceinline float4::operator __m128 &() -{ - return m128; -} - -__forceinline float4 &float4::operator=(const float4 &a) -{ - m128 = a.m128; - return *this; -} -# endif /* __KERNEL_SSE__ */ - -__forceinline float float4::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} - -__forceinline float &float4::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} - -ccl_device_inline float4 make_float4(float f) -{ -# ifdef __KERNEL_SSE__ - float4 a(_mm_set1_ps(f)); -# else - float4 a = {f, f, f, f}; -# endif - return a; -} - -ccl_device_inline float4 make_float4(float x, float y, float z, float w) -{ -# ifdef __KERNEL_SSE__ - float4 a(_mm_set_ps(w, z, y, x)); -# else - float4 a = {x, y, z, w}; -# endif - return a; -} - -ccl_device_inline float4 make_float4(const int4 &i) -{ -# ifdef __KERNEL_SSE__ - float4 a(_mm_cvtepi32_ps(i.m128)); -# else - float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; -# endif - return a; -} - -ccl_device_inline void print_float4(const char *label, const float4 &a) -{ - printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_FLOAT4_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_float8.h b/intern/cycles/util/util_types_float8.h deleted file mode 100644 index 27da120a4ba..00000000000 --- a/intern/cycles/util/util_types_float8.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Original code Copyright 2017, Intel Corporation - * Modifications Copyright 2018, Blender Foundation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __UTIL_TYPES_FLOAT8_H__ -#define __UTIL_TYPES_FLOAT8_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ - -struct ccl_try_align(32) float8 -{ -# ifdef __KERNEL_AVX2__ - union { - __m256 m256; - struct { - float a, b, c, d, e, f, g, h; - }; - }; - - __forceinline float8(); - __forceinline float8(const float8 &a); - __forceinline explicit float8(const __m256 &a); - - __forceinline operator const __m256 &() const; - __forceinline operator __m256 &(); - - __forceinline float8 &operator=(const float8 &a); - -# else /* __KERNEL_AVX2__ */ - float a, b, c, d, e, f, g, h; -# endif /* __KERNEL_AVX2__ */ - - __forceinline float operator[](int i) const; - __forceinline float &operator[](int i); -}; - -ccl_device_inline float8 make_float8(float f); -ccl_device_inline float8 -make_float8(float a, float b, float c, float d, float e, float f, float g, float h); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_FLOAT8_H__ */ diff --git a/intern/cycles/util/util_types_float8_impl.h b/intern/cycles/util/util_types_float8_impl.h deleted file mode 100644 index 4e4ea28c6a4..00000000000 --- a/intern/cycles/util/util_types_float8_impl.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Original code Copyright 2017, Intel Corporation - * Modifications Copyright 2018, Blender Foundation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __UTIL_TYPES_FLOAT8_IMPL_H__ -#define __UTIL_TYPES_FLOAT8_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -#ifndef __KERNEL_GPU__ -# include -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -# ifdef __KERNEL_AVX2__ -__forceinline float8::float8() -{ -} - -__forceinline float8::float8(const float8 &f) : m256(f.m256) -{ -} - -__forceinline float8::float8(const __m256 &f) : m256(f) -{ -} - -__forceinline float8::operator const __m256 &() const -{ - return m256; -} - -__forceinline float8::operator __m256 &() -{ - return m256; -} - -__forceinline float8 &float8::operator=(const float8 &f) -{ - m256 = f.m256; - return *this; -} -# endif /* __KERNEL_AVX2__ */ - -__forceinline float float8::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 8); - return *(&a + i); -} - -__forceinline float &float8::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 8); - return *(&a + i); -} - -ccl_device_inline float8 make_float8(float f) -{ -# ifdef __KERNEL_AVX2__ - float8 r(_mm256_set1_ps(f)); -# else - float8 r = {f, f, f, f, f, f, f, f}; -# endif - return r; -} - -ccl_device_inline float8 -make_float8(float a, float b, float c, float d, float e, float f, float g, float h) -{ -# ifdef __KERNEL_AVX2__ - float8 r(_mm256_set_ps(a, b, c, d, e, f, g, h)); -# else - float8 r = {a, b, c, d, e, f, g, h}; -# endif - return r; -} - -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_FLOAT8_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_int2.h b/intern/cycles/util/util_types_int2.h deleted file mode 100644 index 8811e5ec7c2..00000000000 --- a/intern/cycles/util/util_types_int2.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_INT2_H__ -#define __UTIL_TYPES_INT2_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct int2 { - int x, y; - - __forceinline int operator[](int i) const; - __forceinline int &operator[](int i); -}; - -ccl_device_inline int2 make_int2(int x, int y); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_INT2_H__ */ diff --git a/intern/cycles/util/util_types_int2_impl.h b/intern/cycles/util/util_types_int2_impl.h deleted file mode 100644 index ce95d4f14e5..00000000000 --- a/intern/cycles/util/util_types_int2_impl.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_INT2_IMPL_H__ -#define __UTIL_TYPES_INT2_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -int int2::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} - -int &int2::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} - -ccl_device_inline int2 make_int2(int x, int y) -{ - int2 a = {x, y}; - return a; -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_INT2_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_int3.h b/intern/cycles/util/util_types_int3.h deleted file mode 100644 index 09edc09dff3..00000000000 --- a/intern/cycles/util/util_types_int3.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_INT3_H__ -#define __UTIL_TYPES_INT3_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct ccl_try_align(16) int3 -{ -# ifdef __KERNEL_SSE__ - union { - __m128i m128; - struct { - int x, y, z, w; - }; - }; - - __forceinline int3(); - __forceinline int3(const int3 &a); - __forceinline explicit int3(const __m128i &a); - - __forceinline operator const __m128i &() const; - __forceinline operator __m128i &(); - - __forceinline int3 &operator=(const int3 &a); -# else /* __KERNEL_SSE__ */ - int x, y, z, w; -# endif /* __KERNEL_SSE__ */ - - __forceinline int operator[](int i) const; - __forceinline int &operator[](int i); -}; - -ccl_device_inline int3 make_int3(int i); -ccl_device_inline int3 make_int3(int x, int y, int z); -ccl_device_inline void print_int3(const char *label, const int3 &a); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_INT3_H__ */ diff --git a/intern/cycles/util/util_types_int3_impl.h b/intern/cycles/util/util_types_int3_impl.h deleted file mode 100644 index 080c892640b..00000000000 --- a/intern/cycles/util/util_types_int3_impl.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_INT3_IMPL_H__ -#define __UTIL_TYPES_INT3_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -#ifndef __KERNEL_GPU__ -# include -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -# ifdef __KERNEL_SSE__ -__forceinline int3::int3() -{ -} - -__forceinline int3::int3(const __m128i &a) : m128(a) -{ -} - -__forceinline int3::int3(const int3 &a) : m128(a.m128) -{ -} - -__forceinline int3::operator const __m128i &() const -{ - return m128; -} - -__forceinline int3::operator __m128i &() -{ - return m128; -} - -__forceinline int3 &int3::operator=(const int3 &a) -{ - m128 = a.m128; - return *this; -} -# endif /* __KERNEL_SSE__ */ - -__forceinline int int3::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} - -__forceinline int &int3::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} - -ccl_device_inline int3 make_int3(int i) -{ -# ifdef __KERNEL_SSE__ - int3 a(_mm_set1_epi32(i)); -# else - int3 a = {i, i, i, i}; -# endif - return a; -} - -ccl_device_inline int3 make_int3(int x, int y, int z) -{ -# ifdef __KERNEL_SSE__ - int3 a(_mm_set_epi32(0, z, y, x)); -# else - int3 a = {x, y, z, 0}; -# endif - - return a; -} - -ccl_device_inline void print_int3(const char *label, const int3 &a) -{ - printf("%s: %d %d %d\n", label, a.x, a.y, a.z); -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_INT3_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_int4.h b/intern/cycles/util/util_types_int4.h deleted file mode 100644 index 5c7917cf5d6..00000000000 --- a/intern/cycles/util/util_types_int4.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_INT4_H__ -#define __UTIL_TYPES_INT4_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ - -struct float3; -struct float4; - -struct ccl_try_align(16) int4 -{ -# ifdef __KERNEL_SSE__ - union { - __m128i m128; - struct { - int x, y, z, w; - }; - }; - - __forceinline int4(); - __forceinline int4(const int4 &a); - __forceinline explicit int4(const __m128i &a); - - __forceinline operator const __m128i &() const; - __forceinline operator __m128i &(); - - __forceinline int4 &operator=(const int4 &a); -# else /* __KERNEL_SSE__ */ - int x, y, z, w; -# endif /* __KERNEL_SSE__ */ - - __forceinline int operator[](int i) const; - __forceinline int &operator[](int i); -}; - -ccl_device_inline int4 make_int4(int i); -ccl_device_inline int4 make_int4(int x, int y, int z, int w); -ccl_device_inline int4 make_int4(const float3 &f); -ccl_device_inline int4 make_int4(const float4 &f); -ccl_device_inline void print_int4(const char *label, const int4 &a); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_INT4_H__ */ diff --git a/intern/cycles/util/util_types_int4_impl.h b/intern/cycles/util/util_types_int4_impl.h deleted file mode 100644 index c6f6ff23a17..00000000000 --- a/intern/cycles/util/util_types_int4_impl.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_INT4_IMPL_H__ -#define __UTIL_TYPES_INT4_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -#ifndef __KERNEL_GPU__ -# include -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -# ifdef __KERNEL_SSE__ -__forceinline int4::int4() -{ -} - -__forceinline int4::int4(const int4 &a) : m128(a.m128) -{ -} - -__forceinline int4::int4(const __m128i &a) : m128(a) -{ -} - -__forceinline int4::operator const __m128i &() const -{ - return m128; -} - -__forceinline int4::operator __m128i &() -{ - return m128; -} - -__forceinline int4 &int4::operator=(const int4 &a) -{ - m128 = a.m128; - return *this; -} -# endif /* __KERNEL_SSE__ */ - -__forceinline int int4::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} - -__forceinline int &int4::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} - -ccl_device_inline int4 make_int4(int i) -{ -# ifdef __KERNEL_SSE__ - int4 a(_mm_set1_epi32(i)); -# else - int4 a = {i, i, i, i}; -# endif - return a; -} - -ccl_device_inline int4 make_int4(int x, int y, int z, int w) -{ -# ifdef __KERNEL_SSE__ - int4 a(_mm_set_epi32(w, z, y, x)); -# else - int4 a = {x, y, z, w}; -# endif - return a; -} - -ccl_device_inline int4 make_int4(const float3 &f) -{ -# ifdef __KERNEL_SSE__ - int4 a(_mm_cvtps_epi32(f.m128)); -# else - int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; -# endif - return a; -} - -ccl_device_inline int4 make_int4(const float4 &f) -{ -# ifdef __KERNEL_SSE__ - int4 a(_mm_cvtps_epi32(f.m128)); -# else - int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; -# endif - return a; -} - -ccl_device_inline void print_int4(const char *label, const int4 &a) -{ - printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_INT4_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uchar2.h b/intern/cycles/util/util_types_uchar2.h deleted file mode 100644 index 8cc486e3e48..00000000000 --- a/intern/cycles/util/util_types_uchar2.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UCHAR2_H__ -#define __UTIL_TYPES_UCHAR2_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct uchar2 { - uchar x, y; - - __forceinline uchar operator[](int i) const; - __forceinline uchar &operator[](int i); -}; - -ccl_device_inline uchar2 make_uchar2(uchar x, uchar y); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UCHAR2_H__ */ diff --git a/intern/cycles/util/util_types_uchar2_impl.h b/intern/cycles/util/util_types_uchar2_impl.h deleted file mode 100644 index 16968c32dd9..00000000000 --- a/intern/cycles/util/util_types_uchar2_impl.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UCHAR2_IMPL_H__ -#define __UTIL_TYPES_UCHAR2_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -uchar uchar2::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} - -uchar &uchar2::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} - -ccl_device_inline uchar2 make_uchar2(uchar x, uchar y) -{ - uchar2 a = {x, y}; - return a; -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UCHAR2_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uchar3.h b/intern/cycles/util/util_types_uchar3.h deleted file mode 100644 index 5838c437c70..00000000000 --- a/intern/cycles/util/util_types_uchar3.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UCHAR3_H__ -#define __UTIL_TYPES_UCHAR3_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct uchar3 { - uchar x, y, z; - - __forceinline uchar operator[](int i) const; - __forceinline uchar &operator[](int i); -}; - -ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UCHAR3_H__ */ diff --git a/intern/cycles/util/util_types_uchar3_impl.h b/intern/cycles/util/util_types_uchar3_impl.h deleted file mode 100644 index aa31b725731..00000000000 --- a/intern/cycles/util/util_types_uchar3_impl.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UCHAR3_IMPL_H__ -#define __UTIL_TYPES_UCHAR3_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -uchar uchar3::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} - -uchar &uchar3::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} - -ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z) -{ - uchar3 a = {x, y, z}; - return a; -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UCHAR3_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uchar4.h b/intern/cycles/util/util_types_uchar4.h deleted file mode 100644 index 22b6a1ac705..00000000000 --- a/intern/cycles/util/util_types_uchar4.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UCHAR4_H__ -#define __UTIL_TYPES_UCHAR4_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct uchar4 { - uchar x, y, z, w; - - __forceinline uchar operator[](int i) const; - __forceinline uchar &operator[](int i); -}; - -ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UCHAR4_H__ */ diff --git a/intern/cycles/util/util_types_uchar4_impl.h b/intern/cycles/util/util_types_uchar4_impl.h deleted file mode 100644 index 79879f176a6..00000000000 --- a/intern/cycles/util/util_types_uchar4_impl.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UCHAR4_IMPL_H__ -#define __UTIL_TYPES_UCHAR4_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -uchar uchar4::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} - -uchar &uchar4::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} - -ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) -{ - uchar4 a = {x, y, z, w}; - return a; -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UCHAR4_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uint2.h b/intern/cycles/util/util_types_uint2.h deleted file mode 100644 index abcb8ee5346..00000000000 --- a/intern/cycles/util/util_types_uint2.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UINT2_H__ -#define __UTIL_TYPES_UINT2_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct uint2 { - uint x, y; - - __forceinline uint operator[](uint i) const; - __forceinline uint &operator[](uint i); -}; - -ccl_device_inline uint2 make_uint2(uint x, uint y); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UINT2_H__ */ diff --git a/intern/cycles/util/util_types_uint2_impl.h b/intern/cycles/util/util_types_uint2_impl.h deleted file mode 100644 index db62bd99b89..00000000000 --- a/intern/cycles/util/util_types_uint2_impl.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UINT2_IMPL_H__ -#define __UTIL_TYPES_UINT2_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -__forceinline uint uint2::operator[](uint i) const -{ - util_assert(i < 2); - return *(&x + i); -} - -__forceinline uint &uint2::operator[](uint i) -{ - util_assert(i < 2); - return *(&x + i); -} - -ccl_device_inline uint2 make_uint2(uint x, uint y) -{ - uint2 a = {x, y}; - return a; -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UINT2_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uint3.h b/intern/cycles/util/util_types_uint3.h deleted file mode 100644 index 436d870b621..00000000000 --- a/intern/cycles/util/util_types_uint3.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UINT3_H__ -#define __UTIL_TYPES_UINT3_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct uint3 { - uint x, y, z; - - __forceinline uint operator[](uint i) const; - __forceinline uint &operator[](uint i); -}; - -ccl_device_inline uint3 make_uint3(uint x, uint y, uint z); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UINT3_H__ */ diff --git a/intern/cycles/util/util_types_uint3_impl.h b/intern/cycles/util/util_types_uint3_impl.h deleted file mode 100644 index d188fa06e2a..00000000000 --- a/intern/cycles/util/util_types_uint3_impl.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UINT3_IMPL_H__ -#define __UTIL_TYPES_UINT3_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -__forceinline uint uint3::operator[](uint i) const -{ - util_assert(i < 3); - return *(&x + i); -} - -__forceinline uint &uint3::operator[](uint i) -{ - util_assert(i < 3); - return *(&x + i); -} - -ccl_device_inline uint3 make_uint3(uint x, uint y, uint z) -{ - uint3 a = {x, y, z}; - return a; -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UINT3_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uint4.h b/intern/cycles/util/util_types_uint4.h deleted file mode 100644 index 57f2859fedf..00000000000 --- a/intern/cycles/util/util_types_uint4.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UINT4_H__ -#define __UTIL_TYPES_UINT4_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -struct uint4 { - uint x, y, z, w; - - __forceinline uint operator[](uint i) const; - __forceinline uint &operator[](uint i); -}; - -ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UINT4_H__ */ diff --git a/intern/cycles/util/util_types_uint4_impl.h b/intern/cycles/util/util_types_uint4_impl.h deleted file mode 100644 index bac8d23030d..00000000000 --- a/intern/cycles/util/util_types_uint4_impl.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_UINT4_IMPL_H__ -#define __UTIL_TYPES_UINT4_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -__forceinline uint uint4::operator[](uint i) const -{ - util_assert(i < 3); - return *(&x + i); -} - -__forceinline uint &uint4::operator[](uint i) -{ - util_assert(i < 3); - return *(&x + i); -} - -ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) -{ - uint4 a = {x, y, z, w}; - return a; -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_UINT4_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_ushort4.h b/intern/cycles/util/util_types_ushort4.h deleted file mode 100644 index 476ceec622c..00000000000 --- a/intern/cycles/util/util_types_ushort4.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_USHORT4_H__ -#define __UTIL_TYPES_USHORT4_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ - -struct ushort4 { - uint16_t x, y, z, w; -}; - -#endif - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_USHORT4_H__ */ diff --git a/intern/cycles/util/util_types_vector3.h b/intern/cycles/util/util_types_vector3.h deleted file mode 100644 index 728c7ca62a1..00000000000 --- a/intern/cycles/util/util_types_vector3.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_VECTOR3_H__ -#define __UTIL_TYPES_VECTOR3_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -template class vector3 { - public: - T x, y, z; - - __forceinline vector3(); - __forceinline vector3(const T &a); - __forceinline vector3(const T &x, const T &y, const T &z); -}; -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_VECTOR3_H__ */ diff --git a/intern/cycles/util/util_types_vector3_impl.h b/intern/cycles/util/util_types_vector3_impl.h deleted file mode 100644 index 33ba53e20b2..00000000000 --- a/intern/cycles/util/util_types_vector3_impl.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_TYPES_VECTOR3_IMPL_H__ -#define __UTIL_TYPES_VECTOR3_IMPL_H__ - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util_types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_GPU__ -template ccl_always_inline vector3::vector3() -{ -} - -template ccl_always_inline vector3::vector3(const T &a) : x(a), y(a), z(a) -{ -} - -template -ccl_always_inline vector3::vector3(const T &x, const T &y, const T &z) : x(x), y(y), z(z) -{ -} -#endif /* __KERNEL_GPU__ */ - -CCL_NAMESPACE_END - -#endif /* __UTIL_TYPES_VECTOR3_IMPL_H__ */ diff --git a/intern/cycles/util/util_unique_ptr.h b/intern/cycles/util/util_unique_ptr.h deleted file mode 100644 index 3181eafd43d..00000000000 --- a/intern/cycles/util/util_unique_ptr.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_UNIQUE_PTR_H__ -#define __UTIL_UNIQUE_PTR_H__ - -#include - -CCL_NAMESPACE_BEGIN - -using std::make_unique; -using std::unique_ptr; - -CCL_NAMESPACE_END - -#endif /* __UTIL_UNIQUE_PTR_H__ */ diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h deleted file mode 100644 index 87cd4de8438..00000000000 --- a/intern/cycles/util/util_vector.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_VECTOR_H__ -#define __UTIL_VECTOR_H__ - -#include -#include -#include - -#include "util/util_aligned_malloc.h" -#include "util/util_guarded_allocator.h" -#include "util/util_types.h" - -CCL_NAMESPACE_BEGIN - -/* Own subclass-ed version of std::vector. Subclass is needed because: - * - * - Use own allocator which keeps track of used/peak memory. - * - Have method to ensure capacity is re-set to 0. - */ -template> -class vector : public std::vector { - public: - typedef std::vector BaseClass; - - /* Inherit all constructors from base class. */ - using BaseClass::vector; - - /* Try as hard as possible to use zero memory. */ - void free_memory() - { - vector empty; - BaseClass::swap(empty); - } - - /* Some external API might demand working with std::vector. */ - operator std::vector() - { - return std::vector(this->begin(), this->end()); - } -}; - -CCL_NAMESPACE_END - -#endif /* __UTIL_VECTOR_H__ */ diff --git a/intern/cycles/util/util_version.h b/intern/cycles/util/util_version.h deleted file mode 100644 index 8bce5ff85aa..00000000000 --- a/intern/cycles/util/util_version.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_VERSION_H__ -#define __UTIL_VERSION_H__ - -/* Cycles version number */ - -CCL_NAMESPACE_BEGIN - -#define CYCLES_VERSION_MAJOR 1 -#define CYCLES_VERSION_MINOR 13 -#define CYCLES_VERSION_PATCH 0 - -#define CYCLES_MAKE_VERSION_STRING2(a, b, c) #a "." #b "." #c -#define CYCLES_MAKE_VERSION_STRING(a, b, c) CYCLES_MAKE_VERSION_STRING2(a, b, c) -#define CYCLES_VERSION_STRING \ - CYCLES_MAKE_VERSION_STRING(CYCLES_VERSION_MAJOR, CYCLES_VERSION_MINOR, CYCLES_VERSION_PATCH) - -CCL_NAMESPACE_END - -#endif /* __UTIL_VERSION_H__ */ diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp deleted file mode 100644 index 9d9ff451b3b..00000000000 --- a/intern/cycles/util/util_view.cpp +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include "util/util_opengl.h" -#include "util/util_string.h" -#include "util/util_time.h" -#include "util/util_version.h" -#include "util/util_view.h" - -#ifdef __APPLE__ -# include -#else -# include -#endif - -CCL_NAMESPACE_BEGIN - -/* structs */ - -struct View { - ViewInitFunc initf; - ViewExitFunc exitf; - ViewResizeFunc resize; - ViewDisplayFunc display; - ViewKeyboardFunc keyboard; - ViewMotionFunc motion; - - bool first_display; - bool redraw; - - int mouseX, mouseY; - int mouseBut0, mouseBut2; - - int width, height; -} V; - -/* public */ - -static void view_display_text(int x, int y, const char *text) -{ - const char *c; - - glRasterPos3f(x, y, 0); - - for (c = text; *c != '\0'; c++) - glutBitmapCharacter(GLUT_BITMAP_HELVETICA_10, *c); -} - -void view_display_info(const char *info) -{ - const int height = 20; - - glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glColor4f(0.1f, 0.1f, 0.1f, 0.8f); - glRectf(0.0f, V.height - height, V.width, V.height); - glDisable(GL_BLEND); - - glColor3f(0.5f, 0.5f, 0.5f); - - view_display_text(10, 7 + V.height - height, info); - - glColor3f(1.0f, 1.0f, 1.0f); -} - -void view_display_help() -{ - const int w = (int)((float)V.width / 1.15f); - const int h = (int)((float)V.height / 1.15f); - - const int x1 = (V.width - w) / 2; - const int x2 = x1 + w; - - const int y1 = (V.height - h) / 2; - const int y2 = y1 + h; - - glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glColor4f(0.5f, 0.5f, 0.5f, 0.8f); - glRectf(x1, y1, x2, y2); - glDisable(GL_BLEND); - - glColor3f(0.8f, 0.8f, 0.8f); - - string info = string("Cycles Renderer ") + CYCLES_VERSION_STRING; - - view_display_text(x1 + 20, y2 - 20, info.c_str()); - view_display_text(x1 + 20, y2 - 40, "(C) 2011-2016 Blender Foundation"); - view_display_text(x1 + 20, y2 - 80, "Controls:"); - view_display_text(x1 + 20, y2 - 100, "h: Info/Help"); - view_display_text(x1 + 20, y2 - 120, "r: Reset"); - view_display_text(x1 + 20, y2 - 140, "p: Pause"); - view_display_text(x1 + 20, y2 - 160, "esc: Cancel"); - view_display_text(x1 + 20, y2 - 180, "q: Quit program"); - - view_display_text(x1 + 20, y2 - 210, "i: Interactive mode"); - view_display_text(x1 + 20, y2 - 230, "Left mouse: Move camera"); - view_display_text(x1 + 20, y2 - 250, "Right mouse: Rotate camera"); - view_display_text(x1 + 20, y2 - 270, "W/A/S/D: Move camera"); - view_display_text(x1 + 20, y2 - 290, "0/1/2/3: Set max bounces"); - - glColor3f(1.0f, 1.0f, 1.0f); -} - -static void view_display() -{ - if (V.first_display) { - if (V.initf) - V.initf(); - if (V.exitf) - atexit(V.exitf); - - V.first_display = false; - } - - glClearColor(0.05f, 0.05f, 0.05f, 0.0f); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - glOrtho(0, V.width, 0, V.height, -1, 1); - - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - - glRasterPos3f(0, 0, 0); - - if (V.display) - V.display(); - - glutSwapBuffers(); -} - -static void view_reshape(int width, int height) -{ - if (width <= 0 || height <= 0) - return; - - V.width = width; - V.height = height; - - glViewport(0, 0, width, height); - - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - - if (V.resize) - V.resize(width, height); -} - -static void view_keyboard(unsigned char key, int x, int y) -{ - if (V.keyboard) - V.keyboard(key); - - if (key == 'm') - printf("mouse %d %d\n", x, y); - if (key == 'q') { - if (V.exitf) - V.exitf(); - exit(0); - } -} - -static void view_mouse(int button, int state, int x, int y) -{ - if (button == 0) { - if (state == GLUT_DOWN) { - V.mouseX = x; - V.mouseY = y; - V.mouseBut0 = 1; - } - else if (state == GLUT_UP) { - V.mouseBut0 = 0; - } - } - else if (button == 2) { - if (state == GLUT_DOWN) { - V.mouseX = x; - V.mouseY = y; - V.mouseBut2 = 1; - } - else if (state == GLUT_UP) { - V.mouseBut2 = 0; - } - } -} - -static void view_motion(int x, int y) -{ - const int but = V.mouseBut0 ? 0 : 2; - const int distX = x - V.mouseX; - const int distY = y - V.mouseY; - - if (V.motion) - V.motion(distX, distY, but); - - V.mouseX = x; - V.mouseY = y; -} - -static void view_idle() -{ - if (V.redraw) { - V.redraw = false; - glutPostRedisplay(); - } - - time_sleep(0.1); -} - -void view_main_loop(const char *title, - int width, - int height, - ViewInitFunc initf, - ViewExitFunc exitf, - ViewResizeFunc resize, - ViewDisplayFunc display, - ViewKeyboardFunc keyboard, - ViewMotionFunc motion) -{ - const char *name = "app"; - char *argv = (char *)name; - int argc = 1; - - memset(&V, 0, sizeof(V)); - V.width = width; - V.height = height; - V.first_display = true; - V.redraw = false; - V.initf = initf; - V.exitf = exitf; - V.resize = resize; - V.display = display; - V.keyboard = keyboard; - V.motion = motion; - - glutInit(&argc, &argv); - glutInitWindowSize(width, height); - glutInitWindowPosition(0, 0); - glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH); - glutCreateWindow(title); - - glewInit(); - - view_reshape(width, height); - - glutDisplayFunc(view_display); - glutIdleFunc(view_idle); - glutReshapeFunc(view_reshape); - glutKeyboardFunc(view_keyboard); - glutMouseFunc(view_mouse); - glutMotionFunc(view_motion); - - glutMainLoop(); -} - -void view_redraw() -{ - V.redraw = true; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_view.h b/intern/cycles/util/util_view.h deleted file mode 100644 index ad5c53ee5d5..00000000000 --- a/intern/cycles/util/util_view.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_VIEW_H__ -#define __UTIL_VIEW_H__ - -/* Functions to display a simple OpenGL window using GLUT, simplified to the - * bare minimum we need to reduce boilerplate code in tests apps. */ - -CCL_NAMESPACE_BEGIN - -typedef void (*ViewInitFunc)(); -typedef void (*ViewExitFunc)(); -typedef void (*ViewResizeFunc)(int width, int height); -typedef void (*ViewDisplayFunc)(); -typedef void (*ViewKeyboardFunc)(unsigned char key); -typedef void (*ViewMotionFunc)(int x, int y, int button); - -void view_main_loop(const char *title, - int width, - int height, - ViewInitFunc initf, - ViewExitFunc exitf, - ViewResizeFunc resize, - ViewDisplayFunc display, - ViewKeyboardFunc keyboard, - ViewMotionFunc motion); - -void view_display_info(const char *info); -void view_display_help(); -void view_redraw(); - -CCL_NAMESPACE_END - -#endif /*__UTIL_VIEW_H__*/ diff --git a/intern/cycles/util/util_windows.cpp b/intern/cycles/util/util_windows.cpp deleted file mode 100644 index 807a5adc84a..00000000000 --- a/intern/cycles/util/util_windows.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2019-2019 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef _WIN32 -# include -#endif - -#include "util_windows.h" - -CCL_NAMESPACE_BEGIN - -bool system_windows_version_at_least(int major, int build) -{ -#ifdef _WIN32 - HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); - if (hMod == 0) { - return false; - } - - typedef NTSTATUS(WINAPI * RtlGetVersionPtr)(PRTL_OSVERSIONINFOW); - RtlGetVersionPtr rtl_get_version = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion"); - if (rtl_get_version == NULL) { - return false; - } - - RTL_OSVERSIONINFOW rovi = {0}; - rovi.dwOSVersionInfoSize = sizeof(rovi); - if (rtl_get_version(&rovi) != 0) { - return false; - } - - return (rovi.dwMajorVersion > major || - (rovi.dwMajorVersion == major && rovi.dwBuildNumber >= build)); -#else - (void)major; - (void)build; - return false; -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_windows.h b/intern/cycles/util/util_windows.h deleted file mode 100644 index 9cbf91a23a7..00000000000 --- a/intern/cycles/util/util_windows.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_WINDOWS_H__ -#define __UTIL_WINDOWS_H__ - -#ifdef _WIN32 - -# ifndef NOGDI -# define NOGDI -# endif -# ifndef NOMINMAX -# define NOMINMAX -# endif -# ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -# endif - -# include - -#endif /* _WIN32 */ - -CCL_NAMESPACE_BEGIN - -bool system_windows_version_at_least(int major, int build); - -CCL_NAMESPACE_END - -#endif /* __UTIL_WINDOWS_H__ */ diff --git a/intern/cycles/util/util_xml.h b/intern/cycles/util/util_xml.h deleted file mode 100644 index 6f06f17937b..00000000000 --- a/intern/cycles/util/util_xml.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __UTIL_XML_H__ -#define __UTIL_XML_H__ - -/* PugiXML is used for XML parsing. */ - -#include - -CCL_NAMESPACE_BEGIN - -OIIO_NAMESPACE_USING - -#ifdef WITH_SYSTEM_PUGIXML -# define PUGIXML_NAMESPACE pugi -#else -# define PUGIXML_NAMESPACE OIIO_NAMESPACE::pugi -#endif - -using PUGIXML_NAMESPACE::xml_attribute; -using PUGIXML_NAMESPACE::xml_document; -using PUGIXML_NAMESPACE::xml_node; -using PUGIXML_NAMESPACE::xml_parse_result; - -CCL_NAMESPACE_END - -#endif /* __UTIL_XML_H__ */ diff --git a/intern/cycles/util/vector.h b/intern/cycles/util/vector.h new file mode 100644 index 00000000000..db35f198dc1 --- /dev/null +++ b/intern/cycles/util/vector.h @@ -0,0 +1,59 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_VECTOR_H__ +#define __UTIL_VECTOR_H__ + +#include +#include +#include + +#include "util/aligned_malloc.h" +#include "util/guarded_allocator.h" +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +/* Own subclass-ed version of std::vector. Subclass is needed because: + * + * - Use own allocator which keeps track of used/peak memory. + * - Have method to ensure capacity is re-set to 0. + */ +template> +class vector : public std::vector { + public: + typedef std::vector BaseClass; + + /* Inherit all constructors from base class. */ + using BaseClass::vector; + + /* Try as hard as possible to use zero memory. */ + void free_memory() + { + vector empty; + BaseClass::swap(empty); + } + + /* Some external API might demand working with std::vector. */ + operator std::vector() + { + return std::vector(this->begin(), this->end()); + } +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_VECTOR_H__ */ diff --git a/intern/cycles/util/version.h b/intern/cycles/util/version.h new file mode 100644 index 00000000000..8bce5ff85aa --- /dev/null +++ b/intern/cycles/util/version.h @@ -0,0 +1,35 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_VERSION_H__ +#define __UTIL_VERSION_H__ + +/* Cycles version number */ + +CCL_NAMESPACE_BEGIN + +#define CYCLES_VERSION_MAJOR 1 +#define CYCLES_VERSION_MINOR 13 +#define CYCLES_VERSION_PATCH 0 + +#define CYCLES_MAKE_VERSION_STRING2(a, b, c) #a "." #b "." #c +#define CYCLES_MAKE_VERSION_STRING(a, b, c) CYCLES_MAKE_VERSION_STRING2(a, b, c) +#define CYCLES_VERSION_STRING \ + CYCLES_MAKE_VERSION_STRING(CYCLES_VERSION_MAJOR, CYCLES_VERSION_MINOR, CYCLES_VERSION_PATCH) + +CCL_NAMESPACE_END + +#endif /* __UTIL_VERSION_H__ */ diff --git a/intern/cycles/util/view.cpp b/intern/cycles/util/view.cpp new file mode 100644 index 00000000000..1c70cea1a8b --- /dev/null +++ b/intern/cycles/util/view.cpp @@ -0,0 +1,282 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "util/opengl.h" +#include "util/string.h" +#include "util/time.h" +#include "util/version.h" +#include "util/view.h" + +#ifdef __APPLE__ +# include +#else +# include +#endif + +CCL_NAMESPACE_BEGIN + +/* structs */ + +struct View { + ViewInitFunc initf; + ViewExitFunc exitf; + ViewResizeFunc resize; + ViewDisplayFunc display; + ViewKeyboardFunc keyboard; + ViewMotionFunc motion; + + bool first_display; + bool redraw; + + int mouseX, mouseY; + int mouseBut0, mouseBut2; + + int width, height; +} V; + +/* public */ + +static void view_display_text(int x, int y, const char *text) +{ + const char *c; + + glRasterPos3f(x, y, 0); + + for (c = text; *c != '\0'; c++) + glutBitmapCharacter(GLUT_BITMAP_HELVETICA_10, *c); +} + +void view_display_info(const char *info) +{ + const int height = 20; + + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glColor4f(0.1f, 0.1f, 0.1f, 0.8f); + glRectf(0.0f, V.height - height, V.width, V.height); + glDisable(GL_BLEND); + + glColor3f(0.5f, 0.5f, 0.5f); + + view_display_text(10, 7 + V.height - height, info); + + glColor3f(1.0f, 1.0f, 1.0f); +} + +void view_display_help() +{ + const int w = (int)((float)V.width / 1.15f); + const int h = (int)((float)V.height / 1.15f); + + const int x1 = (V.width - w) / 2; + const int x2 = x1 + w; + + const int y1 = (V.height - h) / 2; + const int y2 = y1 + h; + + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glColor4f(0.5f, 0.5f, 0.5f, 0.8f); + glRectf(x1, y1, x2, y2); + glDisable(GL_BLEND); + + glColor3f(0.8f, 0.8f, 0.8f); + + string info = string("Cycles Renderer ") + CYCLES_VERSION_STRING; + + view_display_text(x1 + 20, y2 - 20, info.c_str()); + view_display_text(x1 + 20, y2 - 40, "(C) 2011-2016 Blender Foundation"); + view_display_text(x1 + 20, y2 - 80, "Controls:"); + view_display_text(x1 + 20, y2 - 100, "h: Info/Help"); + view_display_text(x1 + 20, y2 - 120, "r: Reset"); + view_display_text(x1 + 20, y2 - 140, "p: Pause"); + view_display_text(x1 + 20, y2 - 160, "esc: Cancel"); + view_display_text(x1 + 20, y2 - 180, "q: Quit program"); + + view_display_text(x1 + 20, y2 - 210, "i: Interactive mode"); + view_display_text(x1 + 20, y2 - 230, "Left mouse: Move camera"); + view_display_text(x1 + 20, y2 - 250, "Right mouse: Rotate camera"); + view_display_text(x1 + 20, y2 - 270, "W/A/S/D: Move camera"); + view_display_text(x1 + 20, y2 - 290, "0/1/2/3: Set max bounces"); + + glColor3f(1.0f, 1.0f, 1.0f); +} + +static void view_display() +{ + if (V.first_display) { + if (V.initf) + V.initf(); + if (V.exitf) + atexit(V.exitf); + + V.first_display = false; + } + + glClearColor(0.05f, 0.05f, 0.05f, 0.0f); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(0, V.width, 0, V.height, -1, 1); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + glRasterPos3f(0, 0, 0); + + if (V.display) + V.display(); + + glutSwapBuffers(); +} + +static void view_reshape(int width, int height) +{ + if (width <= 0 || height <= 0) + return; + + V.width = width; + V.height = height; + + glViewport(0, 0, width, height); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + if (V.resize) + V.resize(width, height); +} + +static void view_keyboard(unsigned char key, int x, int y) +{ + if (V.keyboard) + V.keyboard(key); + + if (key == 'm') + printf("mouse %d %d\n", x, y); + if (key == 'q') { + if (V.exitf) + V.exitf(); + exit(0); + } +} + +static void view_mouse(int button, int state, int x, int y) +{ + if (button == 0) { + if (state == GLUT_DOWN) { + V.mouseX = x; + V.mouseY = y; + V.mouseBut0 = 1; + } + else if (state == GLUT_UP) { + V.mouseBut0 = 0; + } + } + else if (button == 2) { + if (state == GLUT_DOWN) { + V.mouseX = x; + V.mouseY = y; + V.mouseBut2 = 1; + } + else if (state == GLUT_UP) { + V.mouseBut2 = 0; + } + } +} + +static void view_motion(int x, int y) +{ + const int but = V.mouseBut0 ? 0 : 2; + const int distX = x - V.mouseX; + const int distY = y - V.mouseY; + + if (V.motion) + V.motion(distX, distY, but); + + V.mouseX = x; + V.mouseY = y; +} + +static void view_idle() +{ + if (V.redraw) { + V.redraw = false; + glutPostRedisplay(); + } + + time_sleep(0.1); +} + +void view_main_loop(const char *title, + int width, + int height, + ViewInitFunc initf, + ViewExitFunc exitf, + ViewResizeFunc resize, + ViewDisplayFunc display, + ViewKeyboardFunc keyboard, + ViewMotionFunc motion) +{ + const char *name = "app"; + char *argv = (char *)name; + int argc = 1; + + memset(&V, 0, sizeof(V)); + V.width = width; + V.height = height; + V.first_display = true; + V.redraw = false; + V.initf = initf; + V.exitf = exitf; + V.resize = resize; + V.display = display; + V.keyboard = keyboard; + V.motion = motion; + + glutInit(&argc, &argv); + glutInitWindowSize(width, height); + glutInitWindowPosition(0, 0); + glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH); + glutCreateWindow(title); + + glewInit(); + + view_reshape(width, height); + + glutDisplayFunc(view_display); + glutIdleFunc(view_idle); + glutReshapeFunc(view_reshape); + glutKeyboardFunc(view_keyboard); + glutMouseFunc(view_mouse); + glutMotionFunc(view_motion); + + glutMainLoop(); +} + +void view_redraw() +{ + V.redraw = true; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/view.h b/intern/cycles/util/view.h new file mode 100644 index 00000000000..ad5c53ee5d5 --- /dev/null +++ b/intern/cycles/util/view.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_VIEW_H__ +#define __UTIL_VIEW_H__ + +/* Functions to display a simple OpenGL window using GLUT, simplified to the + * bare minimum we need to reduce boilerplate code in tests apps. */ + +CCL_NAMESPACE_BEGIN + +typedef void (*ViewInitFunc)(); +typedef void (*ViewExitFunc)(); +typedef void (*ViewResizeFunc)(int width, int height); +typedef void (*ViewDisplayFunc)(); +typedef void (*ViewKeyboardFunc)(unsigned char key); +typedef void (*ViewMotionFunc)(int x, int y, int button); + +void view_main_loop(const char *title, + int width, + int height, + ViewInitFunc initf, + ViewExitFunc exitf, + ViewResizeFunc resize, + ViewDisplayFunc display, + ViewKeyboardFunc keyboard, + ViewMotionFunc motion); + +void view_display_info(const char *info); +void view_display_help(); +void view_redraw(); + +CCL_NAMESPACE_END + +#endif /*__UTIL_VIEW_H__*/ diff --git a/intern/cycles/util/windows.cpp b/intern/cycles/util/windows.cpp new file mode 100644 index 00000000000..96944d07390 --- /dev/null +++ b/intern/cycles/util/windows.cpp @@ -0,0 +1,54 @@ +/* + * Copyright 2019-2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef _WIN32 +# include +#endif + +#include "util/windows.h" + +CCL_NAMESPACE_BEGIN + +bool system_windows_version_at_least(int major, int build) +{ +#ifdef _WIN32 + HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); + if (hMod == 0) { + return false; + } + + typedef NTSTATUS(WINAPI * RtlGetVersionPtr)(PRTL_OSVERSIONINFOW); + RtlGetVersionPtr rtl_get_version = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion"); + if (rtl_get_version == NULL) { + return false; + } + + RTL_OSVERSIONINFOW rovi = {0}; + rovi.dwOSVersionInfoSize = sizeof(rovi); + if (rtl_get_version(&rovi) != 0) { + return false; + } + + return (rovi.dwMajorVersion > major || + (rovi.dwMajorVersion == major && rovi.dwBuildNumber >= build)); +#else + (void)major; + (void)build; + return false; +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/windows.h b/intern/cycles/util/windows.h new file mode 100644 index 00000000000..9cbf91a23a7 --- /dev/null +++ b/intern/cycles/util/windows.h @@ -0,0 +1,42 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_WINDOWS_H__ +#define __UTIL_WINDOWS_H__ + +#ifdef _WIN32 + +# ifndef NOGDI +# define NOGDI +# endif +# ifndef NOMINMAX +# define NOMINMAX +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif + +# include + +#endif /* _WIN32 */ + +CCL_NAMESPACE_BEGIN + +bool system_windows_version_at_least(int major, int build); + +CCL_NAMESPACE_END + +#endif /* __UTIL_WINDOWS_H__ */ diff --git a/intern/cycles/util/xml.h b/intern/cycles/util/xml.h new file mode 100644 index 00000000000..6f06f17937b --- /dev/null +++ b/intern/cycles/util/xml.h @@ -0,0 +1,41 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_XML_H__ +#define __UTIL_XML_H__ + +/* PugiXML is used for XML parsing. */ + +#include + +CCL_NAMESPACE_BEGIN + +OIIO_NAMESPACE_USING + +#ifdef WITH_SYSTEM_PUGIXML +# define PUGIXML_NAMESPACE pugi +#else +# define PUGIXML_NAMESPACE OIIO_NAMESPACE::pugi +#endif + +using PUGIXML_NAMESPACE::xml_attribute; +using PUGIXML_NAMESPACE::xml_document; +using PUGIXML_NAMESPACE::xml_node; +using PUGIXML_NAMESPACE::xml_parse_result; + +CCL_NAMESPACE_END + +#endif /* __UTIL_XML_H__ */ -- cgit v1.2.3