Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAntonio Vazquez <blendergit@gmail.com>2022-11-08 18:29:56 +0300
committerAntonio Vazquez <blendergit@gmail.com>2022-11-08 18:29:56 +0300
commitaa9b976e9f9b8baff194f5bfadcf9e7694cf8d15 (patch)
treeb30ce7abaa65be85c147222cb074571c056b59df
parent410b87ca781d6b6b061bab0440005ac1ab82688f (diff)
parentbbb1d3e5e7eb4059a0324ae786e1e793852963a9 (diff)
Merge branch 'master' into temp-gpencil-automasktemp-gpencil-automask
-rw-r--r--intern/cycles/kernel/CMakeLists.txt3
-rw-r--r--intern/cycles/kernel/device/cpu/kernel.cpp9
-rw-r--r--intern/cycles/kernel/svm/noise.h268
-rw-r--r--intern/cycles/test/CMakeLists.txt15
-rw-r--r--intern/cycles/test/util_avxf_test.h211
-rw-r--r--intern/cycles/test/util_float8_avx2_test.cpp (renamed from intern/cycles/test/util_avxf_avx2_test.cpp)4
-rw-r--r--intern/cycles/test/util_float8_avx_test.cpp (renamed from intern/cycles/test/util_avxf_avx_test.cpp)3
-rw-r--r--intern/cycles/test/util_float8_sse2_test.cpp12
-rw-r--r--intern/cycles/test/util_float8_test.h103
-rw-r--r--intern/cycles/util/CMakeLists.txt9
-rw-r--r--intern/cycles/util/avxb.h230
-rw-r--r--intern/cycles/util/avxf.h379
-rw-r--r--intern/cycles/util/avxi.h732
-rw-r--r--intern/cycles/util/color.h48
-rw-r--r--intern/cycles/util/half.h16
-rw-r--r--intern/cycles/util/hash.h52
-rw-r--r--intern/cycles/util/math.h4
-rw-r--r--intern/cycles/util/math_float2.h133
-rw-r--r--intern/cycles/util/math_float3.h218
-rw-r--r--intern/cycles/util/math_float4.h475
-rw-r--r--intern/cycles/util/math_float8.h483
-rw-r--r--intern/cycles/util/math_int2.h17
-rw-r--r--intern/cycles/util/math_int3.h29
-rw-r--r--intern/cycles/util/math_int4.h216
-rw-r--r--intern/cycles/util/math_int8.h355
-rw-r--r--intern/cycles/util/math_intersect.h11
-rw-r--r--intern/cycles/util/sseb.h345
-rw-r--r--intern/cycles/util/ssef.h1090
-rw-r--r--intern/cycles/util/ssei.h633
-rw-r--r--intern/cycles/util/transform.cpp2
-rw-r--r--intern/cycles/util/transform.h31
-rw-r--r--intern/cycles/util/transform_inverse.h27
-rw-r--r--intern/cycles/util/types.h14
-rw-r--r--intern/cycles/util/types_float8.h29
-rw-r--r--intern/cycles/util/types_float8_impl.h63
-rw-r--r--intern/cycles/util/types_int8.h51
-rw-r--r--intern/cycles/util/types_int8_impl.h95
-rw-r--r--intern/ffmpeg/CMakeLists.txt1
-rw-r--r--intern/ffmpeg/ffmpeg_compat.h8
-rw-r--r--intern/ffmpeg/tests/ffmpeg_codecs.cc6
-rw-r--r--release/scripts/modules/rna_info.py6
-rw-r--r--source/blender/blenkernel/BKE_curves.hh25
-rw-r--r--source/blender/blenkernel/intern/curves_geometry.cc120
-rw-r--r--source/blender/blenkernel/intern/customdata.cc20
-rw-r--r--source/blender/blenkernel/intern/displist.cc3
-rw-r--r--source/blender/blenkernel/intern/freestyle.c2
-rw-r--r--source/blender/blenkernel/intern/image.cc4
-rw-r--r--source/blender/blenkernel/intern/node.cc6
-rw-r--r--source/blender/blenkernel/intern/pointcache.c6
-rw-r--r--source/blender/blenkernel/intern/writeffmpeg.c50
-rw-r--r--source/blender/blenlib/BLI_cache_mutex.hh106
-rw-r--r--source/blender/blenlib/BLI_string.h7
-rw-r--r--source/blender/blenlib/CMakeLists.txt2
-rw-r--r--source/blender/blenlib/intern/cache_mutex.cc25
-rw-r--r--source/blender/blenlib/intern/path_util.c4
-rw-r--r--source/blender/blenlib/intern/string.c15
-rw-r--r--source/blender/blenlib/intern/uuid.cc27
-rw-r--r--source/blender/blenlib/intern/winstuff.c14
-rw-r--r--source/blender/blenloader/intern/writefile.cc13
-rw-r--r--source/blender/compositor/intern/COM_Debug.cc2
-rw-r--r--source/blender/draw/intern/draw_manager_profiling.c48
-rw-r--r--source/blender/draw/intern/draw_pbvh.cc2
-rw-r--r--source/blender/editors/include/ED_image.h1
-rw-r--r--source/blender/editors/interface/interface_region_menu_popup.cc4
-rw-r--r--source/blender/editors/interface/interface_templates.c27
-rw-r--r--source/blender/editors/io/io_collada.c2
-rw-r--r--source/blender/editors/object/object_bake_api.c2
-rw-r--r--source/blender/editors/object/object_constraint.c4
-rw-r--r--source/blender/editors/object/object_remesh.cc2
-rw-r--r--source/blender/editors/render/render_internal.cc25
-rw-r--r--source/blender/editors/render/render_preview.cc8
-rw-r--r--source/blender/editors/sculpt_paint/paint_cursor.c29
-rw-r--r--source/blender/editors/sculpt_paint/paint_image.cc15
-rw-r--r--source/blender/editors/sculpt_paint/paint_image_proj.c2
-rw-r--r--source/blender/editors/sculpt_paint/paint_stroke.c2
-rw-r--r--source/blender/editors/space_graph/graph_select.c6
-rw-r--r--source/blender/editors/space_image/image_edit.c24
-rw-r--r--source/blender/editors/space_node/drawnode.cc17
-rw-r--r--source/blender/editors/space_node/node_edit.cc22
-rw-r--r--source/blender/editors/space_node/space_node.cc2
-rw-r--r--source/blender/editors/space_outliner/tree/tree_element_rna.cc7
-rw-r--r--source/blender/editors/space_view3d/view3d_draw.cc14
-rw-r--r--source/blender/freestyle/intern/python/BPy_ContextFunctions.cpp12
-rw-r--r--source/blender/freestyle/intern/python/BPy_Freestyle.cpp12
-rw-r--r--source/blender/freestyle/intern/python/BPy_IntegrationType.cpp12
-rw-r--r--source/blender/freestyle/intern/python/Interface1D/BPy_FEdge.cpp2
-rw-r--r--source/blender/freestyle/intern/python/Interface1D/BPy_Stroke.cpp2
-rw-r--r--source/blender/imbuf/intern/jp2.c3
-rw-r--r--source/blender/io/avi/intern/avi_codecs.c6
-rw-r--r--source/blender/io/collada/AnimationExporter.cpp3
-rw-r--r--source/blender/io/collada/GeometryExporter.cpp2
-rw-r--r--source/blender/io/gpencil/intern/gpencil_io_export_svg.cc7
-rw-r--r--source/blender/makesrna/intern/rna_color.c9
-rw-r--r--source/blender/makesrna/intern/rna_material.c21
-rw-r--r--source/blender/makesrna/intern/rna_nodetree.c9
-rw-r--r--source/blender/makesrna/intern/rna_particle.c4
-rw-r--r--source/blender/makesrna/intern/rna_sculpt_paint.c17
-rw-r--r--source/blender/modifiers/intern/MOD_ui_common.c3
-rw-r--r--source/blender/nodes/texture/nodes/node_texture_output.c2
-rw-r--r--source/blender/python/bmesh/bmesh_py_api.c16
-rw-r--r--source/blender/python/bmesh/bmesh_py_geometry.c16
-rw-r--r--source/blender/python/bmesh/bmesh_py_ops.c16
-rw-r--r--source/blender/python/bmesh/bmesh_py_types.c27
-rw-r--r--source/blender/python/bmesh/bmesh_py_types_customdata.c3
-rw-r--r--source/blender/python/bmesh/bmesh_py_types_meshdata.c2
-rw-r--r--source/blender/python/bmesh/bmesh_py_types_select.c2
-rw-r--r--source/blender/python/bmesh/bmesh_py_utils.c16
-rw-r--r--source/blender/python/generic/bgl.c44
-rw-r--r--source/blender/python/generic/bl_math_py_api.c16
-rw-r--r--source/blender/python/generic/blf_py_api.c16
-rw-r--r--source/blender/python/generic/idprop_py_api.c38
-rw-r--r--source/blender/python/generic/imbuf_py_api.c32
-rw-r--r--source/blender/python/gpu/gpu_py_api.c10
-rw-r--r--source/blender/python/gpu/gpu_py_buffer.c12
-rw-r--r--source/blender/python/gpu/gpu_py_capabilities.c11
-rw-r--r--source/blender/python/gpu/gpu_py_matrix.c11
-rw-r--r--source/blender/python/gpu/gpu_py_platform.c11
-rw-r--r--source/blender/python/gpu/gpu_py_select.c11
-rw-r--r--source/blender/python/gpu/gpu_py_shader.c11
-rw-r--r--source/blender/python/gpu/gpu_py_state.c11
-rw-r--r--source/blender/python/gpu/gpu_py_texture.c11
-rw-r--r--source/blender/python/gpu/gpu_py_types.c9
-rw-r--r--source/blender/python/intern/bpy_app_icons.c16
-rw-r--r--source/blender/python/intern/bpy_app_timers.c16
-rw-r--r--source/blender/python/intern/bpy_interface.c16
-rw-r--r--source/blender/python/intern/bpy_msgbus.c10
-rw-r--r--source/blender/python/intern/bpy_operator.c16
-rw-r--r--source/blender/python/intern/bpy_path.c16
-rw-r--r--source/blender/python/intern/bpy_props.c24
-rw-r--r--source/blender/python/intern/bpy_rna.c34
-rw-r--r--source/blender/python/intern/bpy_utils_previews.c16
-rw-r--r--source/blender/python/intern/bpy_utils_units.c16
-rw-r--r--source/blender/python/mathutils/mathutils.c16
-rw-r--r--source/blender/python/mathutils/mathutils_Color.c6
-rw-r--r--source/blender/python/mathutils/mathutils_Euler.c6
-rw-r--r--source/blender/python/mathutils/mathutils_Matrix.c12
-rw-r--r--source/blender/python/mathutils/mathutils_Quaternion.c6
-rw-r--r--source/blender/python/mathutils/mathutils_Vector.c6
-rw-r--r--source/blender/python/mathutils/mathutils_bvhtree.c16
-rw-r--r--source/blender/python/mathutils/mathutils_geometry.c16
-rw-r--r--source/blender/python/mathutils/mathutils_interpolate.c16
-rw-r--r--source/blender/python/mathutils/mathutils_kdtree.c16
-rw-r--r--source/blender/python/mathutils/mathutils_noise.c16
-rw-r--r--source/blender/sequencer/intern/disk_cache.c20
144 files changed, 2657 insertions, 5305 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 3779fdc697a..3fbb346e94f 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -328,6 +328,7 @@ set(SRC_UTIL_HEADERS
../util/math_int2.h
../util/math_int3.h
../util/math_int4.h
+ ../util/math_int8.h
../util/math_matrix.h
../util/projection.h
../util/rect.h
@@ -350,6 +351,8 @@ set(SRC_UTIL_HEADERS
../util/types_int3_impl.h
../util/types_int4.h
../util/types_int4_impl.h
+ ../util/types_int8.h
+ ../util/types_int8_impl.h
../util/types_spectrum.h
../util/types_uchar2.h
../util/types_uchar2_impl.h
diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp
index 01087c96dd6..558431961ab 100644
--- a/intern/cycles/kernel/device/cpu/kernel.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel.cpp
@@ -7,6 +7,7 @@
* one with SSE2 intrinsics.
*/
#if defined(__x86_64__) || defined(_M_X64)
+# define __KERNEL_SSE__
# define __KERNEL_SSE2__
#endif
@@ -29,11 +30,15 @@
# define __KERNEL_SSE41__
# endif
# ifdef __AVX__
-# define __KERNEL_SSE__
+# ifndef __KERNEL_SSE__
+# define __KERNEL_SSE__
+# endif
# define __KERNEL_AVX__
# endif
# ifdef __AVX2__
-# define __KERNEL_SSE__
+# ifndef __KERNEL_SSE__
+# define __KERNEL_SSE__
+# endif
# define __KERNEL_AVX2__
# endif
#endif
diff --git a/intern/cycles/kernel/svm/noise.h b/intern/cycles/kernel/svm/noise.h
index 31e77d87413..209195a03f1 100644
--- a/intern/cycles/kernel/svm/noise.h
+++ b/intern/cycles/kernel/svm/noise.h
@@ -39,11 +39,11 @@ ccl_device_noinline_cpu float perlin_1d(float x)
}
/* 2D, 3D, and 4D noise can be accelerated using SSE, so we first check if
- * SSE is supported, that is, if __KERNEL_SSE2__ is defined. If it is not
+ * SSE is supported, that is, if __KERNEL_SSE__ is defined. If it is not
* supported, we do a standard implementation, but if it is supported, we
* do an implementation using SSE intrinsics.
*/
-#if !defined(__KERNEL_SSE2__)
+#if !defined(__KERNEL_SSE__)
/* ** Standard Implementation ** */
@@ -250,18 +250,18 @@ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
/* SSE Bilinear Interpolation:
*
- * The function takes two ssef inputs:
+ * The function takes two float4 inputs:
* - p : Contains the values at the points (v0, v1, v2, v3).
* - f : Contains the values (x, y, _, _). The third and fourth values are unused.
*
* The interpolation is done in two steps:
* 1. Interpolate (v0, v1) and (v2, v3) along the x axis to get g (g0, g1).
* (v2, v3) is generated by moving v2 and v3 to the first and second
- * places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
+ * places of the float4 using the shuffle mask <2, 3, 2, 3>. The third and
* fourth values are unused.
* 2. Interpolate g0 and g1 along the y axis to get the final value.
- * g1 is generated by populating an ssef with the second value of g.
- * Only the first value is important in the final ssef.
+ * g1 is generated by populating an float4 with the second value of g.
+ * Only the first value is important in the final float4.
*
* v1 v3 g1
* @ + + + + @ @ y
@@ -272,27 +272,27 @@ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
* v0 v2 g0
*
*/
-ccl_device_inline ssef bi_mix(ssef p, ssef f)
+ccl_device_inline float4 bi_mix(float4 p, float4 f)
{
- ssef g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
+ float4 g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
return mix(g, shuffle<1>(g), shuffle<1>(f));
}
-ccl_device_inline ssef fade(const ssef &t)
+ccl_device_inline float4 fade(const float4 t)
{
- ssef a = madd(t, 6.0f, -15.0f);
- ssef b = madd(t, a, 10.0f);
+ float4 a = madd(t, make_float4(6.0f), make_float4(-15.0f));
+ float4 b = madd(t, a, make_float4(10.0f));
return (t * t) * (t * b);
}
/* Negate val if the nth bit of h is 1. */
# define negate_if_nth_bit(val, h, n) ((val) ^ cast(((h) & (1 << (n))) << (31 - (n))))
-ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y)
+ccl_device_inline float4 grad(const int4 hash, const float4 x, const float4 y)
{
- ssei h = hash & 7;
- ssef u = select(h < 4, x, y);
- ssef v = 2.0f * select(h < 4, y, x);
+ int4 h = hash & 7;
+ float4 u = select(h < 4, x, y);
+ float4 v = 2.0f * select(h < 4, y, x);
return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
}
@@ -310,28 +310,28 @@ ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y)
*/
ccl_device_noinline_cpu float perlin_2d(float x, float y)
{
- ssei XY;
- ssef fxy = floorfrac(ssef(x, y, 0.0f, 0.0f), &XY);
- ssef uv = fade(fxy);
+ int4 XY;
+ float4 fxy = floorfrac(make_float4(x, y, 0.0f, 0.0f), &XY);
+ float4 uv = fade(fxy);
- ssei XY1 = XY + 1;
- ssei X = shuffle<0, 0, 0, 0>(XY, XY1);
- ssei Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
+ int4 XY1 = XY + make_int4(1);
+ int4 X = shuffle<0, 0, 0, 0>(XY, XY1);
+ int4 Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
- ssei h = hash_ssei2(X, Y);
+ int4 h = hash_int4_2(X, Y);
- ssef fxy1 = fxy - 1.0f;
- ssef fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
- ssef fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
+ float4 fxy1 = fxy - make_float4(1.0f);
+ float4 fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
+ float4 fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
- ssef g = grad(h, fx, fy);
+ float4 g = grad(h, fx, fy);
return extract<0>(bi_mix(g, uv));
}
/* SSE Trilinear Interpolation:
*
- * The function takes three ssef inputs:
+ * The function takes three float4 inputs:
* - p : Contains the values at the points (v0, v1, v2, v3).
* - q : Contains the values at the points (v4, v5, v6, v7).
* - f : Contains the values (x, y, z, _). The fourth value is unused.
@@ -340,11 +340,11 @@ ccl_device_noinline_cpu float perlin_2d(float x, float y)
* 1. Interpolate p and q along the x axis to get s (s0, s1, s2, s3).
* 2. Interpolate (s0, s1) and (s2, s3) along the y axis to get g (g0, g1).
* (s2, s3) is generated by moving v2 and v3 to the first and second
- * places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
+ * places of the float4 using the shuffle mask <2, 3, 2, 3>. The third and
* fourth values are unused.
* 3. Interpolate g0 and g1 along the z axis to get the final value.
- * g1 is generated by populating an ssef with the second value of g.
- * Only the first value is important in the final ssef.
+ * g1 is generated by populating an float4 with the second value of g.
+ * Only the first value is important in the final float4.
*
* v3 v7
* @ + + + + + + @ s3 @
@@ -362,10 +362,10 @@ ccl_device_noinline_cpu float perlin_2d(float x, float y)
* @ + + + + + + @ @
* v0 v4 s0
*/
-ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f)
+ccl_device_inline float4 tri_mix(float4 p, float4 q, float4 f)
{
- ssef s = mix(p, q, shuffle<0>(f));
- ssef g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
+ float4 s = mix(p, q, shuffle<0>(f));
+ float4 g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
return mix(g, shuffle<1>(g), shuffle<2>(f));
}
@@ -374,24 +374,24 @@ ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f)
* supported, we do an SSE implementation, but if it is supported,
* we do an implementation using AVX intrinsics.
*/
-# if !defined(__KERNEL_AVX__)
+# if !defined(__KERNEL_AVX2__)
-ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
+ccl_device_inline float4 grad(const int4 hash, const float4 x, const float4 y, const float4 z)
{
- ssei h = hash & 15;
- ssef u = select(h < 8, x, y);
- ssef vt = select((h == 12) | (h == 14), x, z);
- ssef v = select(h < 4, y, vt);
+ int4 h = hash & 15;
+ float4 u = select(h < 8, x, y);
+ float4 vt = select((h == 12) | (h == 14), x, z);
+ float4 v = select(h < 4, y, vt);
return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
}
-ccl_device_inline ssef
-grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef &w)
+ccl_device_inline float4
+grad(const int4 hash, const float4 x, const float4 y, const float4 z, const float4 w)
{
- ssei h = hash & 31;
- ssef u = select(h < 24, x, y);
- ssef v = select(h < 16, y, z);
- ssef s = select(h < 8, z, w);
+ int4 h = hash & 31;
+ float4 u = select(h < 24, x, y);
+ float4 v = select(h < 16, y, z);
+ float4 s = select(h < 8, z, w);
return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
}
@@ -401,7 +401,7 @@ grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef &
* between two trilinear interpolations.
*
*/
-ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f)
+ccl_device_inline float4 quad_mix(float4 p, float4 q, float4 r, float4 s, float4 f)
{
return mix(tri_mix(p, q, f), tri_mix(r, s, f), shuffle<3>(f));
}
@@ -427,23 +427,23 @@ ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f)
*/
ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
{
- ssei XYZ;
- ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
- ssef uvw = fade(fxyz);
+ int4 XYZ;
+ float4 fxyz = floorfrac(make_float4(x, y, z, 0.0f), &XYZ);
+ float4 uvw = fade(fxyz);
- ssei XYZ1 = XYZ + 1;
- ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
- ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
+ int4 XYZ1 = XYZ + make_int4(1);
+ int4 Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
+ int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
- ssei h1 = hash_ssei3(shuffle<0>(XYZ), Y, Z);
- ssei h2 = hash_ssei3(shuffle<0>(XYZ1), Y, Z);
+ int4 h1 = hash_int4_3(shuffle<0>(XYZ), Y, Z);
+ int4 h2 = hash_int4_3(shuffle<0>(XYZ1), Y, Z);
- ssef fxyz1 = fxyz - 1.0f;
- ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
- ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
+ float4 fxyz1 = fxyz - make_float4(1.0f);
+ float4 fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
+ float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
- ssef g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
- ssef g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
+ float4 g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
+ float4 g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
return extract<0>(tri_mix(g1, g2, uvw));
}
@@ -481,29 +481,29 @@ ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
*/
ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
{
- ssei XYZW;
- ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
- ssef uvws = fade(fxyzw);
+ int4 XYZW;
+ float4 fxyzw = floorfrac(make_float4(x, y, z, w), &XYZW);
+ float4 uvws = fade(fxyzw);
- ssei XYZW1 = XYZW + 1;
- ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
- ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
+ int4 XYZW1 = XYZW + make_int4(1);
+ int4 Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
+ int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
- ssei h1 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
- ssei h2 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
+ int4 h1 = hash_int4_4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
+ int4 h2 = hash_int4_4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
- ssei h3 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
- ssei h4 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
+ int4 h3 = hash_int4_4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
+ int4 h4 = hash_int4_4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
- ssef fxyzw1 = fxyzw - 1.0f;
- ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
- ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
+ float4 fxyzw1 = fxyzw - make_float4(1.0f);
+ float4 fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
+ float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
- ssef g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
- ssef g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
+ float4 g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
+ float4 g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
- ssef g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
- ssef g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
+ float4 g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
+ float4 g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
return extract<0>(quad_mix(g1, g2, g3, g4, uvws));
}
@@ -512,22 +512,22 @@ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
/* AVX Implementation */
-ccl_device_inline avxf grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z)
+ccl_device_inline vfloat8 grad(const vint8 hash, const vfloat8 x, const vfloat8 y, const vfloat8 z)
{
- avxi h = hash & 15;
- avxf u = select(h < 8, x, y);
- avxf vt = select((h == 12) | (h == 14), x, z);
- avxf v = select(h < 4, y, vt);
+ vint8 h = hash & 15;
+ vfloat8 u = select(h < 8, x, y);
+ vfloat8 vt = select((h == 12) | (h == 14), x, z);
+ vfloat8 v = select(h < 4, y, vt);
return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
}
-ccl_device_inline avxf
-grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z, const avxf &w)
+ccl_device_inline vfloat8
+grad(const vint8 hash, const vfloat8 x, const vfloat8 y, const vfloat8 z, const vfloat8 w)
{
- avxi h = hash & 31;
- avxf u = select(h < 24, x, y);
- avxf v = select(h < 16, y, z);
- avxf s = select(h < 8, z, w);
+ vint8 h = hash & 31;
+ vfloat8 u = select(h < 24, x, y);
+ vfloat8 v = select(h < 16, y, z);
+ vfloat8 s = select(h < 8, z, w);
return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
}
@@ -537,13 +537,13 @@ grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z, const avxf &
* 1. Interpolate p and q along the w axis to get s.
* 2. Trilinearly interpolate (s0, s1, s2, s3) and (s4, s5, s6, s7) to get the final
* value. (s0, s1, s2, s3) and (s4, s5, s6, s7) are generated by extracting the
- * low and high ssef from s.
+ * low and high float4 from s.
*
*/
-ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f)
+ccl_device_inline float4 quad_mix(vfloat8 p, vfloat8 q, float4 f)
{
- ssef fv = shuffle<3>(f);
- avxf s = mix(p, q, avxf(fv, fv));
+ float4 fv = shuffle<3>(f);
+ vfloat8 s = mix(p, q, make_vfloat8(fv, fv));
return tri_mix(low(s), high(s), f);
}
@@ -565,25 +565,25 @@ ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f)
*/
ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
{
- ssei XYZ;
- ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
- ssef uvw = fade(fxyz);
+ int4 XYZ;
+ float4 fxyz = floorfrac(make_float4(x, y, z, 0.0f), &XYZ);
+ float4 uvw = fade(fxyz);
- ssei XYZ1 = XYZ + 1;
- ssei X = shuffle<0>(XYZ);
- ssei X1 = shuffle<0>(XYZ1);
- ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
- ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
+ int4 XYZ1 = XYZ + make_int4(1);
+ int4 X = shuffle<0>(XYZ);
+ int4 X1 = shuffle<0>(XYZ1);
+ int4 Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
+ int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
- avxi h = hash_avxi3(avxi(X, X1), avxi(Y, Y), avxi(Z, Z));
+ vint8 h = hash_int8_3(make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z));
- ssef fxyz1 = fxyz - 1.0f;
- ssef fx = shuffle<0>(fxyz);
- ssef fx1 = shuffle<0>(fxyz1);
- ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
- ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
+ float4 fxyz1 = fxyz - make_float4(1.0f);
+ float4 fx = shuffle<0>(fxyz);
+ float4 fx1 = shuffle<0>(fxyz1);
+ float4 fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
+ float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
- avxf g = grad(h, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz));
+ vfloat8 g = grad(h, make_vfloat8(fx, fx1), make_vfloat8(fy, fy), make_vfloat8(fz, fz));
return extract<0>(tri_mix(low(g), high(g), uvw));
}
@@ -617,31 +617,37 @@ ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
*/
ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
{
- ssei XYZW;
- ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
- ssef uvws = fade(fxyzw);
-
- ssei XYZW1 = XYZW + 1;
- ssei X = shuffle<0>(XYZW);
- ssei X1 = shuffle<0>(XYZW1);
- ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
- ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
- ssei W = shuffle<3>(XYZW);
- ssei W1 = shuffle<3>(XYZW1);
-
- avxi h1 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W, W));
- avxi h2 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W1, W1));
-
- ssef fxyzw1 = fxyzw - 1.0f;
- ssef fx = shuffle<0>(fxyzw);
- ssef fx1 = shuffle<0>(fxyzw1);
- ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
- ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
- ssef fw = shuffle<3>(fxyzw);
- ssef fw1 = shuffle<3>(fxyzw1);
-
- avxf g1 = grad(h1, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw, fw));
- avxf g2 = grad(h2, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw1, fw1));
+ int4 XYZW;
+ float4 fxyzw = floorfrac(make_float4(x, y, z, w), &XYZW);
+ float4 uvws = fade(fxyzw);
+
+ int4 XYZW1 = XYZW + make_int4(1);
+ int4 X = shuffle<0>(XYZW);
+ int4 X1 = shuffle<0>(XYZW1);
+ int4 Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
+ int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
+ int4 W = shuffle<3>(XYZW);
+ int4 W1 = shuffle<3>(XYZW1);
+
+ vint8 h1 = hash_int8_4(make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z), make_vint8(W, W));
+ vint8 h2 = hash_int8_4(
+ make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z), make_vint8(W1, W1));
+
+ float4 fxyzw1 = fxyzw - make_float4(1.0f);
+ float4 fx = shuffle<0>(fxyzw);
+ float4 fx1 = shuffle<0>(fxyzw1);
+ float4 fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
+ float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
+ float4 fw = shuffle<3>(fxyzw);
+ float4 fw1 = shuffle<3>(fxyzw1);
+
+ vfloat8 g1 = grad(
+ h1, make_vfloat8(fx, fx1), make_vfloat8(fy, fy), make_vfloat8(fz, fz), make_vfloat8(fw, fw));
+ vfloat8 g2 = grad(h2,
+ make_vfloat8(fx, fx1),
+ make_vfloat8(fy, fy),
+ make_vfloat8(fz, fz),
+ make_vfloat8(fw1, fw1));
return extract<0>(quad_mix(g1, g2, uvws));
}
diff --git a/intern/cycles/test/CMakeLists.txt b/intern/cycles/test/CMakeLists.txt
index c3ae81ed1db..34e5a4770ea 100644
--- a/intern/cycles/test/CMakeLists.txt
+++ b/intern/cycles/test/CMakeLists.txt
@@ -45,17 +45,24 @@ set(SRC
# Disable AVX tests on macOS. Rosetta has problems running them, and other
# platforms should be enough to verify AVX operations are implemented correctly.
if(NOT APPLE)
+ if(CXX_HAS_SSE)
+ list(APPEND SRC
+ util_float8_sse2_test.cpp
+ )
+ set_source_files_properties(util_float8_avx_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ endif()
+
if(CXX_HAS_AVX)
list(APPEND SRC
- util_avxf_avx_test.cpp
+ util_float8_avx_test.cpp
)
- set_source_files_properties(util_avxf_avx_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+ set_source_files_properties(util_float8_avx_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX2)
list(APPEND SRC
- util_avxf_avx2_test.cpp
+ util_float8_avx2_test.cpp
)
- set_source_files_properties(util_avxf_avx2_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+ set_source_files_properties(util_float8_avx2_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif()
endif()
diff --git a/intern/cycles/test/util_avxf_test.h b/intern/cycles/test/util_avxf_test.h
deleted file mode 100644
index 34d966cc1a4..00000000000
--- a/intern/cycles/test/util_avxf_test.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#include "testing/testing.h"
-#include "util/system.h"
-#include "util/types.h"
-
-CCL_NAMESPACE_BEGIN
-
-static bool validate_cpu_capabilities()
-{
-
-#ifdef __KERNEL_AVX2__
- return system_cpu_support_avx2();
-#else
-# ifdef __KERNEL_AVX__
- return system_cpu_support_avx();
-# endif
-#endif
-}
-
-#define INIT_AVX_TEST \
- if (!validate_cpu_capabilities()) \
- return; \
-\
- const avxf avxf_a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); \
- const avxf avxf_b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); \
- const avxf avxf_c(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
-
-#define compare_vector_scalar(a, b) \
- for (size_t index = 0; index < a.size; index++) \
- EXPECT_FLOAT_EQ(a[index], b);
-
-#define compare_vector_vector(a, b) \
- for (size_t index = 0; index < a.size; index++) \
- EXPECT_FLOAT_EQ(a[index], b[index]);
-
-#define compare_vector_vector_near(a, b, abserror) \
- for (size_t index = 0; index < a.size; index++) \
- EXPECT_NEAR(a[index], b[index], abserror);
-
-#define basic_test_vv(a, b, op) \
- INIT_AVX_TEST \
- avxf c = a op b; \
- for (size_t i = 0; i < a.size; i++) \
- EXPECT_FLOAT_EQ(c[i], a[i] op b[i]);
-
-/* vector op float tests */
-#define basic_test_vf(a, b, op) \
- INIT_AVX_TEST \
- avxf c = a op b; \
- for (size_t i = 0; i < a.size; i++) \
- EXPECT_FLOAT_EQ(c[i], a[i] op b);
-
-static const float float_b = 1.5f;
-
-TEST(TEST_CATEGORY_NAME, avxf_add_vv){basic_test_vv(avxf_a, avxf_b, +)} TEST(TEST_CATEGORY_NAME,
- avxf_sub_vv){
- basic_test_vv(avxf_a, avxf_b, -)} TEST(TEST_CATEGORY_NAME, avxf_mul_vv){
- basic_test_vv(avxf_a, avxf_b, *)} TEST(TEST_CATEGORY_NAME, avxf_div_vv){
- basic_test_vv(avxf_a, avxf_b, /)} TEST(TEST_CATEGORY_NAME, avxf_add_vf){
- basic_test_vf(avxf_a, float_b, +)} TEST(TEST_CATEGORY_NAME, avxf_sub_vf){
- basic_test_vf(avxf_a, float_b, -)} TEST(TEST_CATEGORY_NAME, avxf_mul_vf){
- basic_test_vf(avxf_a, float_b, *)} TEST(TEST_CATEGORY_NAME,
- avxf_div_vf){basic_test_vf(avxf_a, float_b, /)}
-
-TEST(TEST_CATEGORY_NAME, avxf_ctor)
-{
- INIT_AVX_TEST
- compare_vector_scalar(avxf(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f),
- static_cast<float>(index));
- compare_vector_scalar(avxf(1.0f), 1.0f);
- compare_vector_vector(avxf(1.0f, 2.0f), avxf(1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f));
- compare_vector_vector(avxf(1.0f, 2.0f, 3.0f, 4.0f),
- avxf(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f));
- compare_vector_vector(avxf(make_float3(1.0f, 2.0f, 3.0f)),
- avxf(0.0f, 3.0f, 2.0f, 1.0f, 0.0f, 3.0f, 2.0f, 1.0f));
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_sqrt)
-{
- INIT_AVX_TEST
- compare_vector_vector(mm256_sqrt(avxf(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f)),
- avxf(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_min_max)
-{
- INIT_AVX_TEST
- compare_vector_vector(min(avxf_a, avxf_b), avxf_a);
- compare_vector_vector(max(avxf_a, avxf_b), avxf_b);
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_set_sign)
-{
- INIT_AVX_TEST
- avxf res = set_sign_bit<1, 0, 0, 0, 0, 0, 0, 0>(avxf_a);
- compare_vector_vector(res, avxf(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, -0.8f));
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_msub)
-{
- INIT_AVX_TEST
- avxf res = msub(avxf_a, avxf_b, avxf_c);
- avxf exp = avxf((avxf_a[7] * avxf_b[7]) - avxf_c[7],
- (avxf_a[6] * avxf_b[6]) - avxf_c[6],
- (avxf_a[5] * avxf_b[5]) - avxf_c[5],
- (avxf_a[4] * avxf_b[4]) - avxf_c[4],
- (avxf_a[3] * avxf_b[3]) - avxf_c[3],
- (avxf_a[2] * avxf_b[2]) - avxf_c[2],
- (avxf_a[1] * avxf_b[1]) - avxf_c[1],
- (avxf_a[0] * avxf_b[0]) - avxf_c[0]);
- compare_vector_vector(res, exp);
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_madd)
-{
- INIT_AVX_TEST
- avxf res = madd(avxf_a, avxf_b, avxf_c);
- avxf exp = avxf((avxf_a[7] * avxf_b[7]) + avxf_c[7],
- (avxf_a[6] * avxf_b[6]) + avxf_c[6],
- (avxf_a[5] * avxf_b[5]) + avxf_c[5],
- (avxf_a[4] * avxf_b[4]) + avxf_c[4],
- (avxf_a[3] * avxf_b[3]) + avxf_c[3],
- (avxf_a[2] * avxf_b[2]) + avxf_c[2],
- (avxf_a[1] * avxf_b[1]) + avxf_c[1],
- (avxf_a[0] * avxf_b[0]) + avxf_c[0]);
- compare_vector_vector(res, exp);
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_nmadd)
-{
- INIT_AVX_TEST
- avxf res = nmadd(avxf_a, avxf_b, avxf_c);
- avxf exp = avxf(avxf_c[7] - (avxf_a[7] * avxf_b[7]),
- avxf_c[6] - (avxf_a[6] * avxf_b[6]),
- avxf_c[5] - (avxf_a[5] * avxf_b[5]),
- avxf_c[4] - (avxf_a[4] * avxf_b[4]),
- avxf_c[3] - (avxf_a[3] * avxf_b[3]),
- avxf_c[2] - (avxf_a[2] * avxf_b[2]),
- avxf_c[1] - (avxf_a[1] * avxf_b[1]),
- avxf_c[0] - (avxf_a[0] * avxf_b[0]));
- compare_vector_vector(res, exp);
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_compare)
-{
- INIT_AVX_TEST
- avxf a(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f);
- avxf b(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
- avxb res = a <= b;
- int exp[8] = {
- a[0] <= b[0] ? -1 : 0,
- a[1] <= b[1] ? -1 : 0,
- a[2] <= b[2] ? -1 : 0,
- a[3] <= b[3] ? -1 : 0,
- a[4] <= b[4] ? -1 : 0,
- a[5] <= b[5] ? -1 : 0,
- a[6] <= b[6] ? -1 : 0,
- a[7] <= b[7] ? -1 : 0,
- };
- compare_vector_vector(res, exp);
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_permute)
-{
- INIT_AVX_TEST
- avxf res = permute<3, 0, 1, 7, 6, 5, 2, 4>(avxf_b);
- compare_vector_vector(res, avxf(4.0f, 6.0f, 3.0f, 2.0f, 1.0f, 7.0f, 8.0f, 5.0f));
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_blend)
-{
- INIT_AVX_TEST
- avxf res = blend<0, 0, 1, 0, 1, 0, 1, 0>(avxf_a, avxf_b);
- compare_vector_vector(res, avxf(0.1f, 0.2f, 3.0f, 0.4f, 5.0f, 0.6f, 7.0f, 0.8f));
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_shuffle)
-{
- INIT_AVX_TEST
- avxf res = shuffle<0, 1, 2, 3, 1, 3, 2, 0>(avxf_a);
- compare_vector_vector(res, avxf(0.4f, 0.2f, 0.1f, 0.3f, 0.5f, 0.6f, 0.7f, 0.8f));
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_cross)
-{
- INIT_AVX_TEST
- avxf res = cross(avxf_b, avxf_c);
- compare_vector_vector_near(res,
- avxf(0.0f,
- -9.5367432e-07f,
- 0.0f,
- 4.7683716e-07f,
- 0.0f,
- -3.8146973e-06f,
- 3.8146973e-06f,
- 3.8146973e-06f),
- 0.000002000f);
-}
-
-TEST(TEST_CATEGORY_NAME, avxf_dot3)
-{
- INIT_AVX_TEST
- float den, den2;
- dot3(avxf_a, avxf_b, den, den2);
- EXPECT_FLOAT_EQ(den, 14.9f);
- EXPECT_FLOAT_EQ(den2, 2.9f);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/test/util_avxf_avx2_test.cpp b/intern/cycles/test/util_float8_avx2_test.cpp
index 992c4d9a913..4682dce5b23 100644
--- a/intern/cycles/test/util_avxf_avx2_test.cpp
+++ b/intern/cycles/test/util_float8_avx2_test.cpp
@@ -1,11 +1,13 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
+#define __KERNEL_SSE__
+#define __KERNEL_AVX__
#define __KERNEL_AVX2__
#define TEST_CATEGORY_NAME util_avx2
#if (defined(i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)) && \
defined(__AVX2__)
-# include "util_avxf_test.h"
+# include "util_float8_test.h"
#endif
diff --git a/intern/cycles/test/util_avxf_avx_test.cpp b/intern/cycles/test/util_float8_avx_test.cpp
index abb98cdfb38..34fe750e766 100644
--- a/intern/cycles/test/util_avxf_avx_test.cpp
+++ b/intern/cycles/test/util_float8_avx_test.cpp
@@ -1,11 +1,12 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
+#define __KERNEL_SSE__
#define __KERNEL_AVX__
#define TEST_CATEGORY_NAME util_avx
#if (defined(i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)) && \
defined(__AVX__)
-# include "util_avxf_test.h"
+# include "util_float8_test.h"
#endif
diff --git a/intern/cycles/test/util_float8_sse2_test.cpp b/intern/cycles/test/util_float8_sse2_test.cpp
new file mode 100644
index 00000000000..ba8952a2b08
--- /dev/null
+++ b/intern/cycles/test/util_float8_sse2_test.cpp
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#define __KERNEL_SSE__
+#define __KERNEL_SSE2__
+
+#define TEST_CATEGORY_NAME util_sse2
+
+#if (defined(i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)) && \
+ defined(__SSE2__)
+# include "util_float8_test.h"
+#endif
diff --git a/intern/cycles/test/util_float8_test.h b/intern/cycles/test/util_float8_test.h
new file mode 100644
index 00000000000..54701afaf8b
--- /dev/null
+++ b/intern/cycles/test/util_float8_test.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#include "testing/testing.h"
+#include "util/math.h"
+#include "util/system.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+static bool validate_cpu_capabilities()
+{
+
+#if defined(__KERNEL_AVX2__)
+ return system_cpu_support_avx2();
+#elif defined(__KERNEL_AVX__)
+ return system_cpu_support_avx();
+#elif defined(__KERNEL_SSE2__)
+ return system_cpu_support_sse2();
+#else
+ return false;
+#endif
+}
+
+#define INIT_FLOAT8_TEST \
+ if (!validate_cpu_capabilities()) \
+ return; \
+\
+ const vfloat8 float8_a = make_vfloat8(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); \
+ const vfloat8 float8_b = make_vfloat8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); \
+ const vfloat8 float8_c = make_vfloat8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
+
+#define compare_vector_scalar(a, b) \
+ for (size_t index = 0; index < 8; index++) \
+ EXPECT_FLOAT_EQ(a[index], b);
+
+#define compare_vector_vector(a, b) \
+ for (size_t index = 0; index < 8; index++) \
+ EXPECT_FLOAT_EQ(a[index], b[index]);
+
+#define compare_vector_vector_near(a, b, abserror) \
+ for (size_t index = 0; index < 8; index++) \
+ EXPECT_NEAR(a[index], b[index], abserror);
+
+#define basic_test_vv(a, b, op) \
+ INIT_FLOAT8_TEST \
+ vfloat8 c = a op b; \
+ for (size_t i = 0; i < 8; i++) \
+ EXPECT_FLOAT_EQ(c[i], a[i] op b[i]);
+
+/* vector op float tests */
+#define basic_test_vf(a, b, op) \
+ INIT_FLOAT8_TEST \
+ vfloat8 c = a op b; \
+ for (size_t i = 0; i < 8; i++) \
+ EXPECT_FLOAT_EQ(c[i], a[i] op b);
+
+static const float float_b = 1.5f;
+
+TEST(TEST_CATEGORY_NAME,
+ float8_add_vv){basic_test_vv(float8_a, float8_b, +)} TEST(TEST_CATEGORY_NAME, float8_sub_vv){
+ basic_test_vv(float8_a, float8_b, -)} TEST(TEST_CATEGORY_NAME, float8_mul_vv){
+ basic_test_vv(float8_a, float8_b, *)} TEST(TEST_CATEGORY_NAME, float8_div_vv){
+ basic_test_vv(float8_a, float8_b, /)} TEST(TEST_CATEGORY_NAME, float8_add_vf){
+ basic_test_vf(float8_a, float_b, +)} TEST(TEST_CATEGORY_NAME, float8_sub_vf){
+ basic_test_vf(float8_a, float_b, -)} TEST(TEST_CATEGORY_NAME, float8_mul_vf){
+ basic_test_vf(float8_a, float_b, *)} TEST(TEST_CATEGORY_NAME,
+ float8_div_vf){basic_test_vf(float8_a, float_b, /)}
+
+TEST(TEST_CATEGORY_NAME, float8_ctor)
+{
+ INIT_FLOAT8_TEST
+ compare_vector_scalar(make_vfloat8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f),
+ static_cast<float>(index));
+ compare_vector_scalar(make_vfloat8(1.0f), 1.0f);
+}
+
+TEST(TEST_CATEGORY_NAME, float8_sqrt)
+{
+ INIT_FLOAT8_TEST
+ compare_vector_vector(sqrt(make_vfloat8(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f)),
+ make_vfloat8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
+}
+
+TEST(TEST_CATEGORY_NAME, float8_min_max)
+{
+ INIT_FLOAT8_TEST
+ compare_vector_vector(min(float8_a, float8_b), float8_a);
+ compare_vector_vector(max(float8_a, float8_b), float8_b);
+}
+
+TEST(TEST_CATEGORY_NAME, float8_shuffle)
+{
+ INIT_FLOAT8_TEST
+ vfloat8 res0 = shuffle<0, 1, 2, 3, 1, 3, 2, 0>(float8_a);
+ compare_vector_vector(res0, make_vfloat8(0.1f, 0.2f, 0.3f, 0.4f, 0.6f, 0.8f, 0.7f, 0.5f));
+ vfloat8 res1 = shuffle<3>(float8_a);
+ compare_vector_vector(res1, make_vfloat8(0.4f, 0.4f, 0.4f, 0.4f, 0.8f, 0.8f, 0.8f, 0.8f));
+ vfloat8 res2 = shuffle<3, 2, 1, 0>(float8_a, float8_b);
+ compare_vector_vector(res2, make_vfloat8(0.4f, 0.3f, 2.0f, 1.0f, 0.8f, 0.7f, 6.0f, 5.0f));
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 57628f99e35..7f8f4a5ce76 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -69,6 +69,7 @@ set(SRC_HEADERS
math_int2.h
math_int3.h
math_int4.h
+ math_int8.h
math_matrix.h
md5.h
murmurhash.h
@@ -85,13 +86,7 @@ set(SRC_HEADERS
rect.h
set.h
simd.h
- avxf.h
- avxb.h
- avxi.h
semaphore.h
- sseb.h
- ssef.h
- ssei.h
stack_allocator.h
static_assert.h
stats.h
@@ -118,6 +113,8 @@ set(SRC_HEADERS
types_int3_impl.h
types_int4.h
types_int4_impl.h
+ types_int8.h
+ types_int8_impl.h
types_spectrum.h
types_uchar2.h
types_uchar2_impl.h
diff --git a/intern/cycles/util/avxb.h b/intern/cycles/util/avxb.h
deleted file mode 100644
index fa3cb565309..00000000000
--- a/intern/cycles/util/avxb.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2013 Intel Corporation
- * Modifications Copyright 2014-2022 Blender Foundation. */
-
-#ifndef __UTIL_AVXB_H__
-#define __UTIL_AVXB_H__
-
-CCL_NAMESPACE_BEGIN
-
-struct avxf;
-
-/*! 4-wide SSE bool type. */
-struct avxb {
- typedef avxb Mask; // mask type
- typedef avxf Float; // float type
-
- enum { size = 8 }; // number of SIMD elements
- union {
- __m256 m256;
- int32_t v[8];
- }; // data
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Constructors, Assignment & Cast Operators
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline avxb()
- {
- }
- __forceinline avxb(const avxb &other)
- {
- m256 = other.m256;
- }
- __forceinline avxb &operator=(const avxb &other)
- {
- m256 = other.m256;
- return *this;
- }
-
- __forceinline avxb(const __m256 input) : m256(input)
- {
- }
- __forceinline avxb(const __m128 &a, const __m128 &b)
- : m256(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1))
- {
- }
- __forceinline operator const __m256 &(void) const
- {
- return m256;
- }
- __forceinline operator const __m256i(void) const
- {
- return _mm256_castps_si256(m256);
- }
- __forceinline operator const __m256d(void) const
- {
- return _mm256_castps_pd(m256);
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Constants
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline avxb(FalseTy) : m256(_mm256_setzero_ps())
- {
- }
- __forceinline avxb(TrueTy) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1)))
- {
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Array Access
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline bool operator[](const size_t i) const
- {
- assert(i < 8);
- return (_mm256_movemask_ps(m256) >> i) & 1;
- }
- __forceinline int32_t &operator[](const size_t i)
- {
- assert(i < 8);
- return v[i];
- }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxb operator!(const avxb &a)
-{
- return _mm256_xor_ps(a, avxb(True));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxb operator&(const avxb &a, const avxb &b)
-{
- return _mm256_and_ps(a, b);
-}
-__forceinline const avxb operator|(const avxb &a, const avxb &b)
-{
- return _mm256_or_ps(a, b);
-}
-__forceinline const avxb operator^(const avxb &a, const avxb &b)
-{
- return _mm256_xor_ps(a, b);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Assignment Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxb operator&=(avxb &a, const avxb &b)
-{
- return a = a & b;
-}
-__forceinline const avxb operator|=(avxb &a, const avxb &b)
-{
- return a = a | b;
-}
-__forceinline const avxb operator^=(avxb &a, const avxb &b)
-{
- return a = a ^ b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxb operator!=(const avxb &a, const avxb &b)
-{
- return _mm256_xor_ps(a, b);
-}
-__forceinline const avxb operator==(const avxb &a, const avxb &b)
-{
-#ifdef __KERNEL_AVX2__
- return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
-#else
- __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
- __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
- __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
- __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
- __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
- __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
- __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
- return _mm256_castsi256_ps(result);
-#endif
-}
-
-__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
-{
-#if defined(__KERNEL_SSE41__)
- return _mm256_blendv_ps(f, t, m);
-#else
- return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f));
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
-{
- return _mm256_unpacklo_ps(a, b);
-}
-__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
-{
- return _mm256_unpackhi_ps(a, b);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Reduction Operations
-////////////////////////////////////////////////////////////////////////////////
-
-#if defined(__KERNEL_SSE41__)
-__forceinline uint32_t popcnt(const avxb &a)
-{
- return _mm_popcnt_u32(_mm256_movemask_ps(a));
-}
-#else
-__forceinline uint32_t popcnt(const avxb &a)
-{
- return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]) + bool(a[4]) + bool(a[5]) + bool(a[6]) +
- bool(a[7]);
-}
-#endif
-
-__forceinline bool reduce_and(const avxb &a)
-{
- return _mm256_movemask_ps(a) == 0xf;
-}
-__forceinline bool reduce_or(const avxb &a)
-{
- return _mm256_movemask_ps(a) != 0x0;
-}
-__forceinline bool all(const avxb &b)
-{
- return _mm256_movemask_ps(b) == 0xf;
-}
-__forceinline bool any(const avxb &b)
-{
- return _mm256_movemask_ps(b) != 0x0;
-}
-__forceinline bool none(const avxb &b)
-{
- return _mm256_movemask_ps(b) == 0x0;
-}
-
-__forceinline uint32_t movemask(const avxb &a)
-{
- return _mm256_movemask_ps(a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Debug Functions
-////////////////////////////////////////////////////////////////////////////////
-
-ccl_device_inline void print_avxb(const char *label, const avxb &a)
-{
- printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
-}
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/avxf.h b/intern/cycles/util/avxf.h
deleted file mode 100644
index 03a13f30490..00000000000
--- a/intern/cycles/util/avxf.h
+++ /dev/null
@@ -1,379 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2016 Intel Corporation */
-
-#ifndef __UTIL_AVXF_H__
-#define __UTIL_AVXF_H__
-
-CCL_NAMESPACE_BEGIN
-
-struct avxb;
-
-struct avxf {
- typedef avxf Float;
-
- enum { size = 8 }; /* Number of SIMD elements. */
-
- union {
- __m256 m256;
- float f[8];
- int i[8];
- };
-
- __forceinline avxf()
- {
- }
- __forceinline avxf(const avxf &other)
- {
- m256 = other.m256;
- }
- __forceinline avxf &operator=(const avxf &other)
- {
- m256 = other.m256;
- return *this;
- }
-
- __forceinline avxf(const __m256 a) : m256(a)
- {
- }
- __forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps(a))
- {
- }
-
- __forceinline operator const __m256 &() const
- {
- return m256;
- }
- __forceinline operator __m256 &()
- {
- return m256;
- }
-
- __forceinline avxf(float a) : m256(_mm256_set1_ps(a))
- {
- }
-
- __forceinline avxf(float high32x4, float low32x4)
- : m256(_mm256_set_ps(
- high32x4, high32x4, high32x4, high32x4, low32x4, low32x4, low32x4, low32x4))
- {
- }
-
- __forceinline avxf(float a3, float a2, float a1, float a0)
- : m256(_mm256_set_ps(a3, a2, a1, a0, a3, a2, a1, a0))
- {
- }
-
- __forceinline avxf(
- float a7, float a6, float a5, float a4, float a3, float a2, float a1, float a0)
- : m256(_mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0))
- {
- }
-
- __forceinline avxf(float3 a) : m256(_mm256_set_ps(a.w, a.z, a.y, a.x, a.w, a.z, a.y, a.x))
- {
- }
-
- __forceinline avxf(int a3, int a2, int a1, int a0)
- {
- const __m256i foo = _mm256_set_epi32(a3, a2, a1, a0, a3, a2, a1, a0);
- m256 = _mm256_castsi256_ps(foo);
- }
-
- __forceinline avxf(int a7, int a6, int a5, int a4, int a3, int a2, int a1, int a0)
- {
- const __m256i foo = _mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0);
- m256 = _mm256_castsi256_ps(foo);
- }
-
- __forceinline avxf(__m128 a, __m128 b)
- {
- const __m256 foo = _mm256_castps128_ps256(a);
- m256 = _mm256_insertf128_ps(foo, b, 1);
- }
-
- __forceinline const float &operator[](const size_t i) const
- {
- assert(i < 8);
- return f[i];
- }
- __forceinline float &operator[](const size_t i)
- {
- assert(i < 8);
- return f[i];
- }
-};
-
-__forceinline avxf cross(const avxf &a, const avxf &b)
-{
- avxf r(0.0,
- a[4] * b[5] - a[5] * b[4],
- a[6] * b[4] - a[4] * b[6],
- a[5] * b[6] - a[6] * b[5],
- 0.0,
- a[0] * b[1] - a[1] * b[0],
- a[2] * b[0] - a[0] * b[2],
- a[1] * b[2] - a[2] * b[1]);
- return r;
-}
-
-__forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2)
-{
- const avxf t = _mm256_mul_ps(a.m256, b.m256);
- den = ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2];
- den2 = ((float *)&t)[4] + ((float *)&t)[5] + ((float *)&t)[6];
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxf cast(const __m256i &a)
-{
- return _mm256_castsi256_ps(a);
-}
-
-__forceinline const avxf mm256_sqrt(const avxf &a)
-{
- return _mm256_sqrt_ps(a.m256);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxf operator+(const avxf &a, const avxf &b)
-{
- return _mm256_add_ps(a.m256, b.m256);
-}
-__forceinline const avxf operator+(const avxf &a, const float &b)
-{
- return a + avxf(b);
-}
-__forceinline const avxf operator+(const float &a, const avxf &b)
-{
- return avxf(a) + b;
-}
-
-__forceinline const avxf operator-(const avxf &a, const avxf &b)
-{
- return _mm256_sub_ps(a.m256, b.m256);
-}
-__forceinline const avxf operator-(const avxf &a, const float &b)
-{
- return a - avxf(b);
-}
-__forceinline const avxf operator-(const float &a, const avxf &b)
-{
- return avxf(a) - b;
-}
-
-__forceinline const avxf operator*(const avxf &a, const avxf &b)
-{
- return _mm256_mul_ps(a.m256, b.m256);
-}
-__forceinline const avxf operator*(const avxf &a, const float &b)
-{
- return a * avxf(b);
-}
-__forceinline const avxf operator*(const float &a, const avxf &b)
-{
- return avxf(a) * b;
-}
-
-__forceinline const avxf operator/(const avxf &a, const avxf &b)
-{
- return _mm256_div_ps(a.m256, b.m256);
-}
-__forceinline const avxf operator/(const avxf &a, const float &b)
-{
- return a / avxf(b);
-}
-__forceinline const avxf operator/(const float &a, const avxf &b)
-{
- return avxf(a) / b;
-}
-
-__forceinline const avxf operator|(const avxf &a, const avxf &b)
-{
- return _mm256_or_ps(a.m256, b.m256);
-}
-
-__forceinline const avxf operator^(const avxf &a, const avxf &b)
-{
- return _mm256_xor_ps(a.m256, b.m256);
-}
-
-__forceinline const avxf operator&(const avxf &a, const avxf &b)
-{
- return _mm256_and_ps(a.m256, b.m256);
-}
-
-__forceinline const avxf max(const avxf &a, const avxf &b)
-{
- return _mm256_max_ps(a.m256, b.m256);
-}
-__forceinline const avxf min(const avxf &a, const avxf &b)
-{
- return _mm256_min_ps(a.m256, b.m256);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxf shuffle(const avxf &a, const __m256i &shuf)
-{
- return _mm256_permutevar_ps(a, shuf);
-}
-
-template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
-__forceinline const avxf shuffle(const avxf &a)
-{
- return _mm256_permutevar_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0));
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const avxf shuffle(const avxf &a, const avxf &b)
-{
- return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
-}
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const avxf shuffle(const avxf &a)
-{
- return shuffle<i0, i1, i2, i3>(a, a);
-}
-template<size_t i0> __forceinline const avxf shuffle(const avxf &a, const avxf &b)
-{
- return shuffle<i0, i0, i0, i0>(a, b);
-}
-template<size_t i0> __forceinline const avxf shuffle(const avxf &a)
-{
- return shuffle<i0>(a, a);
-}
-
-template<size_t i> __forceinline float extract(const avxf &a)
-{
- __m256 b = shuffle<i, i, i, i>(a).m256;
- return _mm256_cvtss_f32(b);
-}
-template<> __forceinline float extract<0>(const avxf &a)
-{
- return _mm256_cvtss_f32(a.m256);
-}
-
-__forceinline ssef low(const avxf &a)
-{
- return _mm256_extractf128_ps(a.m256, 0);
-}
-__forceinline ssef high(const avxf &a)
-{
- return _mm256_extractf128_ps(a.m256, 1);
-}
-
-template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
-__forceinline const avxf permute(const avxf &a)
-{
-#ifdef __KERNEL_AVX2__
- return _mm256_permutevar8x32_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0));
-#else
- float temp[8];
- _mm256_storeu_ps((float *)&temp, a);
- return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]);
-#endif
-}
-
-template<int S0, int S1, int S2, int S3, int S4, int S5, int S6, int S7>
-ccl_device_inline const avxf set_sign_bit(const avxf &a)
-{
- return a ^ avxf(S7 << 31, S6 << 31, S5 << 31, S4 << 31, S3 << 31, S2 << 31, S1 << 31, S0 << 31);
-}
-
-template<size_t S0, size_t S1, size_t S2, size_t S3, size_t S4, size_t S5, size_t S6, size_t S7>
-ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
-{
- return _mm256_blend_ps(
- a, b, S7 << 0 | S6 << 1 | S5 << 2 | S4 << 3 | S3 << 4 | S2 << 5 | S1 << 6 | S0 << 7);
-}
-
-template<size_t S0, size_t S1, size_t S2, size_t S3>
-ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
-{
- return blend<S0, S1, S2, S3, S0, S1, S2, S3>(a, b);
-}
-
-//#if defined(__KERNEL_SSE41__)
-__forceinline avxf maxi(const avxf &a, const avxf &b)
-{
- const avxf ci = _mm256_max_ps(a, b);
- return ci;
-}
-
-__forceinline avxf mini(const avxf &a, const avxf &b)
-{
- const avxf ci = _mm256_min_ps(a, b);
- return ci;
-}
-//#endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Ternary Operators
-////////////////////////////////////////////////////////////////////////////////
-__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c)
-{
-#ifdef __KERNEL_AVX2__
- return _mm256_fmadd_ps(a, b, c);
-#else
- return c + (a * b);
-#endif
-}
-
-__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c)
-{
-#ifdef __KERNEL_AVX2__
- return _mm256_fnmadd_ps(a, b, c);
-#else
- return c - (a * b);
-#endif
-}
-__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c)
-{
-#ifdef __KERNEL_AVX2__
- return _mm256_fmsub_ps(a, b, c);
-#else
- return (a * b) - c;
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-__forceinline const avxb operator<=(const avxf &a, const avxf &b)
-{
- return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS);
-}
-
-__forceinline const avxf select(const avxb &m, const avxf &t, const avxf &f)
-{
- return _mm256_blendv_ps(f, t, m);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Common Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline avxf mix(const avxf &a, const avxf &b, const avxf &t)
-{
- return madd(t, b, (avxf(1.0f) - t) * a);
-}
-
-#ifndef _mm256_set_m128
-# define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \
- _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1)
-#endif
-
-#define _mm256_loadu2_m128(/* float const* */ hiaddr, /* float const* */ loaddr) \
- _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr))
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/avxi.h b/intern/cycles/util/avxi.h
deleted file mode 100644
index 966a04a6b97..00000000000
--- a/intern/cycles/util/avxi.h
+++ /dev/null
@@ -1,732 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2009-2013 Intel Corporation */
-
-#ifndef __UTIL_AVXI_H__
-#define __UTIL_AVXI_H__
-
-CCL_NAMESPACE_BEGIN
-
-struct avxb;
-
-struct avxi {
- typedef avxb Mask; // mask type for us
- enum { size = 8 }; // number of SIMD elements
- union { // data
- __m256i m256;
-#if !defined(__KERNEL_AVX2__)
- struct {
- __m128i l, h;
- };
-#endif
- int32_t v[8];
- };
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Constructors, Assignment & Cast Operators
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline avxi()
- {
- }
- __forceinline avxi(const avxi &a)
- {
- m256 = a.m256;
- }
- __forceinline avxi &operator=(const avxi &a)
- {
- m256 = a.m256;
- return *this;
- }
-
- __forceinline avxi(const __m256i a) : m256(a)
- {
- }
- __forceinline operator const __m256i &(void) const
- {
- return m256;
- }
- __forceinline operator __m256i &(void)
- {
- return m256;
- }
-
- __forceinline explicit avxi(const ssei &a)
- : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), a, 1))
- {
- }
- __forceinline avxi(const ssei &a, const ssei &b)
- : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1))
- {
- }
-#if defined(__KERNEL_AVX2__)
- __forceinline avxi(const __m128i &a, const __m128i &b)
- : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1))
- {
- }
-#else
- __forceinline avxi(const __m128i &a, const __m128i &b) : l(a), h(b)
- {
- }
-#endif
- __forceinline explicit avxi(const int32_t *const a)
- : m256(_mm256_castps_si256(_mm256_loadu_ps((const float *)a)))
- {
- }
- __forceinline avxi(int32_t a) : m256(_mm256_set1_epi32(a))
- {
- }
- __forceinline avxi(int32_t a, int32_t b) : m256(_mm256_set_epi32(b, a, b, a, b, a, b, a))
- {
- }
- __forceinline avxi(int32_t a, int32_t b, int32_t c, int32_t d)
- : m256(_mm256_set_epi32(d, c, b, a, d, c, b, a))
- {
- }
- __forceinline avxi(
- int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, int32_t f, int32_t g, int32_t h)
- : m256(_mm256_set_epi32(h, g, f, e, d, c, b, a))
- {
- }
-
- __forceinline explicit avxi(const __m256 a) : m256(_mm256_cvtps_epi32(a))
- {
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Constants
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline avxi(ZeroTy) : m256(_mm256_setzero_si256())
- {
- }
-#if defined(__KERNEL_AVX2__)
- __forceinline avxi(OneTy) : m256(_mm256_set1_epi32(1))
- {
- }
- __forceinline avxi(PosInfTy) : m256(_mm256_set1_epi32(pos_inf))
- {
- }
- __forceinline avxi(NegInfTy) : m256(_mm256_set1_epi32(neg_inf))
- {
- }
-#else
- __forceinline avxi(OneTy) : m256(_mm256_set_epi32(1, 1, 1, 1, 1, 1, 1, 1))
- {
- }
- __forceinline avxi(PosInfTy)
- : m256(_mm256_set_epi32(
- pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf))
- {
- }
- __forceinline avxi(NegInfTy)
- : m256(_mm256_set_epi32(
- neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf))
- {
- }
-#endif
- __forceinline avxi(StepTy) : m256(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0))
- {
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Array Access
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline const int32_t &operator[](const size_t i) const
- {
- assert(i < 8);
- return v[i];
- }
- __forceinline int32_t &operator[](const size_t i)
- {
- assert(i < 8);
- return v[i];
- }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxi cast(const __m256 &a)
-{
- return _mm256_castps_si256(a);
-}
-__forceinline const avxi operator+(const avxi &a)
-{
- return a;
-}
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator-(const avxi &a)
-{
- return _mm256_sub_epi32(_mm256_setzero_si256(), a.m256);
-}
-__forceinline const avxi abs(const avxi &a)
-{
- return _mm256_abs_epi32(a.m256);
-}
-#else
-__forceinline const avxi operator-(const avxi &a)
-{
- return avxi(_mm_sub_epi32(_mm_setzero_si128(), a.l), _mm_sub_epi32(_mm_setzero_si128(), a.h));
-}
-__forceinline const avxi abs(const avxi &a)
-{
- return avxi(_mm_abs_epi32(a.l), _mm_abs_epi32(a.h));
-}
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator+(const avxi &a, const avxi &b)
-{
- return _mm256_add_epi32(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator+(const avxi &a, const avxi &b)
-{
- return avxi(_mm_add_epi32(a.l, b.l), _mm_add_epi32(a.h, b.h));
-}
-#endif
-__forceinline const avxi operator+(const avxi &a, const int32_t b)
-{
- return a + avxi(b);
-}
-__forceinline const avxi operator+(const int32_t a, const avxi &b)
-{
- return avxi(a) + b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator-(const avxi &a, const avxi &b)
-{
- return _mm256_sub_epi32(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator-(const avxi &a, const avxi &b)
-{
- return avxi(_mm_sub_epi32(a.l, b.l), _mm_sub_epi32(a.h, b.h));
-}
-#endif
-__forceinline const avxi operator-(const avxi &a, const int32_t b)
-{
- return a - avxi(b);
-}
-__forceinline const avxi operator-(const int32_t a, const avxi &b)
-{
- return avxi(a) - b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator*(const avxi &a, const avxi &b)
-{
- return _mm256_mullo_epi32(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator*(const avxi &a, const avxi &b)
-{
- return avxi(_mm_mullo_epi32(a.l, b.l), _mm_mullo_epi32(a.h, b.h));
-}
-#endif
-__forceinline const avxi operator*(const avxi &a, const int32_t b)
-{
- return a * avxi(b);
-}
-__forceinline const avxi operator*(const int32_t a, const avxi &b)
-{
- return avxi(a) * b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator&(const avxi &a, const avxi &b)
-{
- return _mm256_and_si256(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator&(const avxi &a, const avxi &b)
-{
- return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
-}
-#endif
-__forceinline const avxi operator&(const avxi &a, const int32_t b)
-{
- return a & avxi(b);
-}
-__forceinline const avxi operator&(const int32_t a, const avxi &b)
-{
- return avxi(a) & b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator|(const avxi &a, const avxi &b)
-{
- return _mm256_or_si256(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator|(const avxi &a, const avxi &b)
-{
- return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
-}
-#endif
-__forceinline const avxi operator|(const avxi &a, const int32_t b)
-{
- return a | avxi(b);
-}
-__forceinline const avxi operator|(const int32_t a, const avxi &b)
-{
- return avxi(a) | b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator^(const avxi &a, const avxi &b)
-{
- return _mm256_xor_si256(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator^(const avxi &a, const avxi &b)
-{
- return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
-}
-#endif
-__forceinline const avxi operator^(const avxi &a, const int32_t b)
-{
- return a ^ avxi(b);
-}
-__forceinline const avxi operator^(const int32_t a, const avxi &b)
-{
- return avxi(a) ^ b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator<<(const avxi &a, const int32_t n)
-{
- return _mm256_slli_epi32(a.m256, n);
-}
-__forceinline const avxi operator>>(const avxi &a, const int32_t n)
-{
- return _mm256_srai_epi32(a.m256, n);
-}
-
-__forceinline const avxi sra(const avxi &a, const int32_t b)
-{
- return _mm256_srai_epi32(a.m256, b);
-}
-__forceinline const avxi srl(const avxi &a, const int32_t b)
-{
- return _mm256_srli_epi32(a.m256, b);
-}
-#else
-__forceinline const avxi operator<<(const avxi &a, const int32_t n)
-{
- return avxi(_mm_slli_epi32(a.l, n), _mm_slli_epi32(a.h, n));
-}
-__forceinline const avxi operator>>(const avxi &a, const int32_t n)
-{
- return avxi(_mm_srai_epi32(a.l, n), _mm_srai_epi32(a.h, n));
-}
-
-__forceinline const avxi sra(const avxi &a, const int32_t b)
-{
- return avxi(_mm_srai_epi32(a.l, b), _mm_srai_epi32(a.h, b));
-}
-__forceinline const avxi srl(const avxi &a, const int32_t b)
-{
- return avxi(_mm_srli_epi32(a.l, b), _mm_srli_epi32(a.h, b));
-}
-#endif
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi min(const avxi &a, const avxi &b)
-{
- return _mm256_min_epi32(a.m256, b.m256);
-}
-#else
-__forceinline const avxi min(const avxi &a, const avxi &b)
-{
- return avxi(_mm_min_epi32(a.l, b.l), _mm_min_epi32(a.h, b.h));
-}
-#endif
-__forceinline const avxi min(const avxi &a, const int32_t b)
-{
- return min(a, avxi(b));
-}
-__forceinline const avxi min(const int32_t a, const avxi &b)
-{
- return min(avxi(a), b);
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi max(const avxi &a, const avxi &b)
-{
- return _mm256_max_epi32(a.m256, b.m256);
-}
-#else
-__forceinline const avxi max(const avxi &a, const avxi &b)
-{
- return avxi(_mm_max_epi32(a.l, b.l), _mm_max_epi32(a.h, b.h));
-}
-#endif
-__forceinline const avxi max(const avxi &a, const int32_t b)
-{
- return max(a, avxi(b));
-}
-__forceinline const avxi max(const int32_t a, const avxi &b)
-{
- return max(avxi(a), b);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Assignment Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline avxi &operator+=(avxi &a, const avxi &b)
-{
- return a = a + b;
-}
-__forceinline avxi &operator+=(avxi &a, const int32_t b)
-{
- return a = a + b;
-}
-
-__forceinline avxi &operator-=(avxi &a, const avxi &b)
-{
- return a = a - b;
-}
-__forceinline avxi &operator-=(avxi &a, const int32_t b)
-{
- return a = a - b;
-}
-
-__forceinline avxi &operator*=(avxi &a, const avxi &b)
-{
- return a = a * b;
-}
-__forceinline avxi &operator*=(avxi &a, const int32_t b)
-{
- return a = a * b;
-}
-
-__forceinline avxi &operator&=(avxi &a, const avxi &b)
-{
- return a = a & b;
-}
-__forceinline avxi &operator&=(avxi &a, const int32_t b)
-{
- return a = a & b;
-}
-
-__forceinline avxi &operator|=(avxi &a, const avxi &b)
-{
- return a = a | b;
-}
-__forceinline avxi &operator|=(avxi &a, const int32_t b)
-{
- return a = a | b;
-}
-
-__forceinline avxi &operator^=(avxi &a, const avxi &b)
-{
- return a = a ^ b;
-}
-__forceinline avxi &operator^=(avxi &a, const int32_t b)
-{
- return a = a ^ b;
-}
-
-__forceinline avxi &operator<<=(avxi &a, const int32_t b)
-{
- return a = a << b;
-}
-__forceinline avxi &operator>>=(avxi &a, const int32_t b)
-{
- return a = a >> b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxb operator==(const avxi &a, const avxi &b)
-{
- return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a.m256, b.m256));
-}
-#else
-__forceinline const avxb operator==(const avxi &a, const avxi &b)
-{
- return avxb(_mm_castsi128_ps(_mm_cmpeq_epi32(a.l, b.l)),
- _mm_castsi128_ps(_mm_cmpeq_epi32(a.h, b.h)));
-}
-#endif
-__forceinline const avxb operator==(const avxi &a, const int32_t b)
-{
- return a == avxi(b);
-}
-__forceinline const avxb operator==(const int32_t a, const avxi &b)
-{
- return avxi(a) == b;
-}
-
-__forceinline const avxb operator!=(const avxi &a, const avxi &b)
-{
- return !(a == b);
-}
-__forceinline const avxb operator!=(const avxi &a, const int32_t b)
-{
- return a != avxi(b);
-}
-__forceinline const avxb operator!=(const int32_t a, const avxi &b)
-{
- return avxi(a) != b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxb operator<(const avxi &a, const avxi &b)
-{
- return _mm256_castsi256_ps(_mm256_cmpgt_epi32(b.m256, a.m256));
-}
-#else
-__forceinline const avxb operator<(const avxi &a, const avxi &b)
-{
- return avxb(_mm_castsi128_ps(_mm_cmplt_epi32(a.l, b.l)),
- _mm_castsi128_ps(_mm_cmplt_epi32(a.h, b.h)));
-}
-#endif
-__forceinline const avxb operator<(const avxi &a, const int32_t b)
-{
- return a < avxi(b);
-}
-__forceinline const avxb operator<(const int32_t a, const avxi &b)
-{
- return avxi(a) < b;
-}
-
-__forceinline const avxb operator>=(const avxi &a, const avxi &b)
-{
- return !(a < b);
-}
-__forceinline const avxb operator>=(const avxi &a, const int32_t b)
-{
- return a >= avxi(b);
-}
-__forceinline const avxb operator>=(const int32_t a, const avxi &b)
-{
- return avxi(a) >= b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxb operator>(const avxi &a, const avxi &b)
-{
- return _mm256_castsi256_ps(_mm256_cmpgt_epi32(a.m256, b.m256));
-}
-#else
-__forceinline const avxb operator>(const avxi &a, const avxi &b)
-{
- return avxb(_mm_castsi128_ps(_mm_cmpgt_epi32(a.l, b.l)),
- _mm_castsi128_ps(_mm_cmpgt_epi32(a.h, b.h)));
-}
-#endif
-__forceinline const avxb operator>(const avxi &a, const int32_t b)
-{
- return a > avxi(b);
-}
-__forceinline const avxb operator>(const int32_t a, const avxi &b)
-{
- return avxi(a) > b;
-}
-
-__forceinline const avxb operator<=(const avxi &a, const avxi &b)
-{
- return !(a > b);
-}
-__forceinline const avxb operator<=(const avxi &a, const int32_t b)
-{
- return a <= avxi(b);
-}
-__forceinline const avxb operator<=(const int32_t a, const avxi &b)
-{
- return avxi(a) <= b;
-}
-
-__forceinline const avxi select(const avxb &m, const avxi &t, const avxi &f)
-{
- return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(f), _mm256_castsi256_ps(t), m));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-#if defined(__KERNEL_AVX2__)
-__forceinline avxi unpacklo(const avxi &a, const avxi &b)
-{
- return _mm256_unpacklo_epi32(a.m256, b.m256);
-}
-__forceinline avxi unpackhi(const avxi &a, const avxi &b)
-{
- return _mm256_unpackhi_epi32(a.m256, b.m256);
-}
-#else
-__forceinline avxi unpacklo(const avxi &a, const avxi &b)
-{
- return _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
-}
-__forceinline avxi unpackhi(const avxi &a, const avxi &b)
-{
- return _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
-}
-#endif
-
-template<size_t i> __forceinline const avxi shuffle(const avxi &a)
-{
- return _mm256_castps_si256(_mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i, i, i, i)));
-}
-
-template<size_t i0, size_t i1> __forceinline const avxi shuffle(const avxi &a)
-{
- return _mm256_permute2f128_si256(a, a, (i1 << 4) | (i0 << 0));
-}
-
-template<size_t i0, size_t i1> __forceinline const avxi shuffle(const avxi &a, const avxi &b)
-{
- return _mm256_permute2f128_si256(a, b, (i1 << 4) | (i0 << 0));
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const avxi shuffle(const avxi &a)
-{
- return _mm256_castps_si256(
- _mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i3, i2, i1, i0)));
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const avxi shuffle(const avxi &a, const avxi &b)
-{
- return _mm256_castps_si256(_mm256_shuffle_ps(
- _mm256_castsi256_ps(a), _mm256_castsi256_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
-}
-
-template<> __forceinline const avxi shuffle<0, 0, 2, 2>(const avxi &b)
-{
- return _mm256_castps_si256(_mm256_moveldup_ps(_mm256_castsi256_ps(b)));
-}
-template<> __forceinline const avxi shuffle<1, 1, 3, 3>(const avxi &b)
-{
- return _mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(b)));
-}
-template<> __forceinline const avxi shuffle<0, 1, 0, 1>(const avxi &b)
-{
- return _mm256_castps_si256(
- _mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(_mm256_castsi256_ps(b)))));
-}
-
-__forceinline const avxi broadcast(const int *ptr)
-{
- return _mm256_castps_si256(_mm256_broadcast_ss((const float *)ptr));
-}
-template<size_t i> __forceinline const avxi insert(const avxi &a, const ssei &b)
-{
- return _mm256_insertf128_si256(a, b, i);
-}
-template<size_t i> __forceinline const ssei extract(const avxi &a)
-{
- return _mm256_extractf128_si256(a, i);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Reductions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxi vreduce_min2(const avxi &v)
-{
- return min(v, shuffle<1, 0, 3, 2>(v));
-}
-__forceinline const avxi vreduce_min4(const avxi &v)
-{
- avxi v1 = vreduce_min2(v);
- return min(v1, shuffle<2, 3, 0, 1>(v1));
-}
-__forceinline const avxi vreduce_min(const avxi &v)
-{
- avxi v1 = vreduce_min4(v);
- return min(v1, shuffle<1, 0>(v1));
-}
-
-__forceinline const avxi vreduce_max2(const avxi &v)
-{
- return max(v, shuffle<1, 0, 3, 2>(v));
-}
-__forceinline const avxi vreduce_max4(const avxi &v)
-{
- avxi v1 = vreduce_max2(v);
- return max(v1, shuffle<2, 3, 0, 1>(v1));
-}
-__forceinline const avxi vreduce_max(const avxi &v)
-{
- avxi v1 = vreduce_max4(v);
- return max(v1, shuffle<1, 0>(v1));
-}
-
-__forceinline const avxi vreduce_add2(const avxi &v)
-{
- return v + shuffle<1, 0, 3, 2>(v);
-}
-__forceinline const avxi vreduce_add4(const avxi &v)
-{
- avxi v1 = vreduce_add2(v);
- return v1 + shuffle<2, 3, 0, 1>(v1);
-}
-__forceinline const avxi vreduce_add(const avxi &v)
-{
- avxi v1 = vreduce_add4(v);
- return v1 + shuffle<1, 0>(v1);
-}
-
-__forceinline int reduce_min(const avxi &v)
-{
- return extract<0>(extract<0>(vreduce_min(v)));
-}
-__forceinline int reduce_max(const avxi &v)
-{
- return extract<0>(extract<0>(vreduce_max(v)));
-}
-__forceinline int reduce_add(const avxi &v)
-{
- return extract<0>(extract<0>(vreduce_add(v)));
-}
-
-__forceinline uint32_t select_min(const avxi &v)
-{
- return __bsf(movemask(v == vreduce_min(v)));
-}
-__forceinline uint32_t select_max(const avxi &v)
-{
- return __bsf(movemask(v == vreduce_max(v)));
-}
-
-__forceinline uint32_t select_min(const avxb &valid, const avxi &v)
-{
- const avxi a = select(valid, v, avxi(pos_inf));
- return __bsf(movemask(valid & (a == vreduce_min(a))));
-}
-__forceinline uint32_t select_max(const avxb &valid, const avxi &v)
-{
- const avxi a = select(valid, v, avxi(neg_inf));
- return __bsf(movemask(valid & (a == vreduce_max(a))));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Output Operators
-////////////////////////////////////////////////////////////////////////////////
-
-ccl_device_inline void print_avxi(const char *label, const avxi &a)
-{
- printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
-}
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/color.h b/intern/cycles/util/color.h
index 537f8ab6771..93e984120f2 100644
--- a/intern/cycles/util/color.h
+++ b/intern/cycles/util/color.h
@@ -228,28 +228,27 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y)
* exp = exponent, encoded as uint32_t
* e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
*/
-template<unsigned exp, unsigned e2coeff> ccl_device_inline ssef fastpow(const ssef &arg)
+template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow(const float4 &arg)
{
- ssef ret;
- ret = arg * cast(ssei(e2coeff));
- ret = ssef(cast(ret));
- ret = ret * cast(ssei(exp));
- ret = cast(ssei(ret));
+ float4 ret = arg * cast(make_int4(e2coeff));
+ ret = make_float4(cast(ret));
+ ret = ret * cast(make_int4(exp));
+ ret = cast(make_int4(ret));
return ret;
}
/* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
-ccl_device_inline ssef improve_5throot_solution(const ssef &old_result, const ssef &x)
+ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, const float4 &x)
{
- ssef approx2 = old_result * old_result;
- ssef approx4 = approx2 * approx2;
- ssef t = x / approx4;
- ssef summ = madd(ssef(4.0f), old_result, t);
- return summ * ssef(1.0f / 5.0f);
+ float4 approx2 = old_result * old_result;
+ float4 approx4 = approx2 * approx2;
+ float4 t = x / approx4;
+ float4 summ = madd(make_float4(4.0f), old_result, t);
+ return summ * make_float4(1.0f / 5.0f);
}
/* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
-ccl_device_inline ssef fastpow24(const ssef &arg)
+ccl_device_inline float4 fastpow24(const float4 &arg)
{
/* max, avg and |avg| errors were calculated in gcc without FMA instructions
* The final precision should be better than powf in glibc */
@@ -257,9 +256,10 @@ ccl_device_inline ssef fastpow24(const ssef &arg)
/* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */
/* 0x3F4CCCCD = 4/5 */
/* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
- ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05
- ssef arg2 = arg * arg;
- ssef arg4 = arg2 * arg2;
+ float4 x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(
+ arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05
+ float4 arg2 = arg * arg;
+ float4 arg4 = arg2 * arg2;
/* error max = 0.018 avg = 0.0031 |avg| = 0.0031 */
x = improve_5throot_solution(x, arg4);
@@ -271,12 +271,12 @@ ccl_device_inline ssef fastpow24(const ssef &arg)
return x * (x * x);
}
-ccl_device ssef color_srgb_to_linear(const ssef &c)
+ccl_device float4 color_srgb_to_linear(const float4 &c)
{
- sseb cmp = c < ssef(0.04045f);
- ssef lt = max(c * ssef(1.0f / 12.92f), ssef(0.0f));
- ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f / 1.055f); /* fma */
- ssef gte = fastpow24(gtebase);
+ int4 cmp = c < make_float4(0.04045f);
+ float4 lt = max(c * make_float4(1.0f / 12.92f), make_float4(0.0f));
+ float4 gtebase = (c + make_float4(0.055f)) * make_float4(1.0f / 1.055f); /* fma */
+ float4 gte = fastpow24(gtebase);
return select(cmp, lt, gte);
}
#endif /* __KERNEL_SSE2__ */
@@ -302,10 +302,8 @@ ccl_device float4 color_linear_to_srgb_v4(float4 c)
ccl_device float4 color_srgb_to_linear_v4(float4 c)
{
#ifdef __KERNEL_SSE2__
- ssef r_ssef;
- float4 &r = (float4 &)r_ssef;
- r = c;
- r_ssef = color_srgb_to_linear(r_ssef);
+ float4 r = c;
+ r = color_srgb_to_linear(r);
r.w = c.w;
return r;
#else
diff --git a/intern/cycles/util/half.h b/intern/cycles/util/half.h
index c668638eb02..5665dd4c075 100644
--- a/intern/cycles/util/half.h
+++ b/intern/cycles/util/half.h
@@ -154,17 +154,17 @@ ccl_device_inline half float_to_half_display(const float f)
ccl_device_inline half4 float4_to_half4_display(const float4 f)
{
-#ifdef __KERNEL_SSE2__
+#ifdef __KERNEL_SSE__
/* CPU: SSE and AVX. */
- ssef x = min(max(load4f(f), 0.0f), 65504.0f);
+ float4 x = min(max(f, make_float4(0.0f)), make_float4(65504.0f));
# ifdef __KERNEL_AVX2__
- ssei rpack = _mm_cvtps_ph(x, 0);
+ int4 rpack = int4(_mm_cvtps_ph(x, 0));
# else
- ssei absolute = cast(x) & 0x7FFFFFFF;
- ssei Z = absolute + 0xC8000000;
- ssei result = andnot(absolute < 0x38800000, Z);
- ssei rshift = (result >> 13) & 0x7FFF;
- ssei rpack = _mm_packs_epi32(rshift, rshift);
+ int4 absolute = cast(x) & make_int4(0x7FFFFFFF);
+ int4 Z = absolute + make_int4(0xC8000000);
+ int4 result = andnot(absolute < make_int4(0x38800000), Z);
+ int4 rshift = (result >> 13) & make_int4(0x7FFF);
+ int4 rpack = int4(_mm_packs_epi32(rshift, rshift));
# endif
half4 h;
_mm_storel_pi((__m64 *)&h, _mm_castsi128_ps(rpack));
diff --git a/intern/cycles/util/hash.h b/intern/cycles/util/hash.h
index 4f83f331229..74210ff020e 100644
--- a/intern/cycles/util/hash.h
+++ b/intern/cycles/util/hash.h
@@ -222,7 +222,7 @@ ccl_device_inline float3 hash_float4_to_float3(float4 k)
/* SSE Versions Of Jenkins Lookup3 Hash Functions */
-#ifdef __KERNEL_SSE2__
+#ifdef __KERNEL_SSE__
# define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k))))
# define mix(a, b, c) \
@@ -265,10 +265,10 @@ ccl_device_inline float3 hash_float4_to_float3(float4 k)
c -= rot(b, 24); \
}
-ccl_device_inline ssei hash_ssei(ssei kx)
+ccl_device_inline int4 hash_int4(int4 kx)
{
- ssei a, b, c;
- a = b = c = ssei(0xdeadbeef + (1 << 2) + 13);
+ int4 a, b, c;
+ a = b = c = make_int4(0xdeadbeef + (1 << 2) + 13);
a += kx;
final(a, b, c);
@@ -276,10 +276,10 @@ ccl_device_inline ssei hash_ssei(ssei kx)
return c;
}
-ccl_device_inline ssei hash_ssei2(ssei kx, ssei ky)
+ccl_device_inline int4 hash_int4_2(int4 kx, int4 ky)
{
- ssei a, b, c;
- a = b = c = ssei(0xdeadbeef + (2 << 2) + 13);
+ int4 a, b, c;
+ a = b = c = make_int4(0xdeadbeef + (2 << 2) + 13);
b += ky;
a += kx;
@@ -288,10 +288,10 @@ ccl_device_inline ssei hash_ssei2(ssei kx, ssei ky)
return c;
}
-ccl_device_inline ssei hash_ssei3(ssei kx, ssei ky, ssei kz)
+ccl_device_inline int4 hash_int4_3(int4 kx, int4 ky, int4 kz)
{
- ssei a, b, c;
- a = b = c = ssei(0xdeadbeef + (3 << 2) + 13);
+ int4 a, b, c;
+ a = b = c = make_int4(0xdeadbeef + (3 << 2) + 13);
c += kz;
b += ky;
@@ -301,10 +301,10 @@ ccl_device_inline ssei hash_ssei3(ssei kx, ssei ky, ssei kz)
return c;
}
-ccl_device_inline ssei hash_ssei4(ssei kx, ssei ky, ssei kz, ssei kw)
+ccl_device_inline int4 hash_int4_4(int4 kx, int4 ky, int4 kz, int4 kw)
{
- ssei a, b, c;
- a = b = c = ssei(0xdeadbeef + (4 << 2) + 13);
+ int4 a, b, c;
+ a = b = c = make_int4(0xdeadbeef + (4 << 2) + 13);
a += kx;
b += ky;
@@ -317,11 +317,11 @@ ccl_device_inline ssei hash_ssei4(ssei kx, ssei ky, ssei kz, ssei kw)
return c;
}
-# if defined(__KERNEL_AVX__)
-ccl_device_inline avxi hash_avxi(avxi kx)
+# if defined(__KERNEL_AVX2__)
+ccl_device_inline vint8 hash_int8(vint8 kx)
{
- avxi a, b, c;
- a = b = c = avxi(0xdeadbeef + (1 << 2) + 13);
+ vint8 a, b, c;
+ a = b = c = make_vint8(0xdeadbeef + (1 << 2) + 13);
a += kx;
final(a, b, c);
@@ -329,10 +329,10 @@ ccl_device_inline avxi hash_avxi(avxi kx)
return c;
}
-ccl_device_inline avxi hash_avxi2(avxi kx, avxi ky)
+ccl_device_inline vint8 hash_int8_2(vint8 kx, vint8 ky)
{
- avxi a, b, c;
- a = b = c = avxi(0xdeadbeef + (2 << 2) + 13);
+ vint8 a, b, c;
+ a = b = c = make_vint8(0xdeadbeef + (2 << 2) + 13);
b += ky;
a += kx;
@@ -341,10 +341,10 @@ ccl_device_inline avxi hash_avxi2(avxi kx, avxi ky)
return c;
}
-ccl_device_inline avxi hash_avxi3(avxi kx, avxi ky, avxi kz)
+ccl_device_inline vint8 hash_int8_3(vint8 kx, vint8 ky, vint8 kz)
{
- avxi a, b, c;
- a = b = c = avxi(0xdeadbeef + (3 << 2) + 13);
+ vint8 a, b, c;
+ a = b = c = make_vint8(0xdeadbeef + (3 << 2) + 13);
c += kz;
b += ky;
@@ -354,10 +354,10 @@ ccl_device_inline avxi hash_avxi3(avxi kx, avxi ky, avxi kz)
return c;
}
-ccl_device_inline avxi hash_avxi4(avxi kx, avxi ky, avxi kz, avxi kw)
+ccl_device_inline vint8 hash_int8_4(vint8 kx, vint8 ky, vint8 kz, vint8 kw)
{
- avxi a, b, c;
- a = b = c = avxi(0xdeadbeef + (4 << 2) + 13);
+ vint8 a, b, c;
+ a = b = c = make_vint8(0xdeadbeef + (4 << 2) + 13);
a += kx;
b += ky;
diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h
index 3a2e0e074a2..0fbe7a67a4f 100644
--- a/intern/cycles/util/math.h
+++ b/intern/cycles/util/math.h
@@ -532,12 +532,14 @@ CCL_NAMESPACE_END
#include "util/math_int2.h"
#include "util/math_int3.h"
#include "util/math_int4.h"
+#include "util/math_int8.h"
#include "util/math_float2.h"
-#include "util/math_float3.h"
#include "util/math_float4.h"
#include "util/math_float8.h"
+#include "util/math_float3.h"
+
#include "util/rect.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/util/math_float2.h b/intern/cycles/util/math_float2.h
index 542dad93467..ad806d0f08a 100644
--- a/intern/cycles/util/math_float2.h
+++ b/intern/cycles/util/math_float2.h
@@ -10,55 +10,6 @@
CCL_NAMESPACE_BEGIN
-/*******************************************************************************
- * Declaration.
- */
-
-#if !defined(__KERNEL_METAL__)
-ccl_device_inline float2 operator-(const float2 &a);
-ccl_device_inline float2 operator*(const float2 &a, const float2 &b);
-ccl_device_inline float2 operator*(const float2 &a, float f);
-ccl_device_inline float2 operator*(float f, const float2 &a);
-ccl_device_inline float2 operator/(float f, const float2 &a);
-ccl_device_inline float2 operator/(const float2 &a, float f);
-ccl_device_inline float2 operator/(const float2 &a, const float2 &b);
-ccl_device_inline float2 operator+(const float2 &a, const float f);
-ccl_device_inline float2 operator+(const float2 &a, const float2 &b);
-ccl_device_inline float2 operator-(const float2 &a, const float f);
-ccl_device_inline float2 operator-(const float2 &a, const float2 &b);
-ccl_device_inline float2 operator+=(float2 &a, const float2 &b);
-ccl_device_inline float2 operator*=(float2 &a, const float2 &b);
-ccl_device_inline float2 operator*=(float2 &a, float f);
-ccl_device_inline float2 operator/=(float2 &a, const float2 &b);
-ccl_device_inline float2 operator/=(float2 &a, float f);
-
-ccl_device_inline bool operator==(const float2 &a, const float2 &b);
-ccl_device_inline bool operator!=(const float2 &a, const float2 &b);
-
-ccl_device_inline bool is_zero(const float2 &a);
-ccl_device_inline float average(const float2 &a);
-ccl_device_inline float distance(const float2 &a, const float2 &b);
-ccl_device_inline float dot(const float2 &a, const float2 &b);
-ccl_device_inline float cross(const float2 &a, const float2 &b);
-ccl_device_inline float len(const float2 a);
-ccl_device_inline float2 normalize(const float2 &a);
-ccl_device_inline float2 normalize_len(const float2 &a, float *t);
-ccl_device_inline float2 safe_normalize(const float2 &a);
-ccl_device_inline float2 min(const float2 &a, const float2 &b);
-ccl_device_inline float2 max(const float2 &a, const float2 &b);
-ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx);
-ccl_device_inline float2 fabs(const float2 &a);
-ccl_device_inline float2 as_float2(const float4 &a);
-ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t);
-ccl_device_inline float2 floor(const float2 &a);
-#endif /* !__KERNEL_METAL__ */
-
-ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b);
-
-/*******************************************************************************
- * Definition.
- */
-
ccl_device_inline float2 zero_float2()
{
return make_float2(0.0f, 0.0f);
@@ -75,63 +26,63 @@ ccl_device_inline float2 operator-(const float2 &a)
return make_float2(-a.x, -a.y);
}
-ccl_device_inline float2 operator*(const float2 &a, const float2 &b)
+ccl_device_inline float2 operator*(const float2 a, const float2 b)
{
return make_float2(a.x * b.x, a.y * b.y);
}
-ccl_device_inline float2 operator*(const float2 &a, float f)
+ccl_device_inline float2 operator*(const float2 a, float f)
{
return make_float2(a.x * f, a.y * f);
}
-ccl_device_inline float2 operator*(float f, const float2 &a)
+ccl_device_inline float2 operator*(float f, const float2 a)
{
return make_float2(a.x * f, a.y * f);
}
-ccl_device_inline float2 operator/(float f, const float2 &a)
+ccl_device_inline float2 operator/(float f, const float2 a)
{
return make_float2(f / a.x, f / a.y);
}
-ccl_device_inline float2 operator/(const float2 &a, float f)
+ccl_device_inline float2 operator/(const float2 a, float f)
{
float invf = 1.0f / f;
return make_float2(a.x * invf, a.y * invf);
}
-ccl_device_inline float2 operator/(const float2 &a, const float2 &b)
+ccl_device_inline float2 operator/(const float2 a, const float2 b)
{
return make_float2(a.x / b.x, a.y / b.y);
}
-ccl_device_inline float2 operator+(const float2 &a, const float f)
+ccl_device_inline float2 operator+(const float2 a, const float2 b)
{
- return a + make_float2(f, f);
+ return make_float2(a.x + b.x, a.y + b.y);
}
-ccl_device_inline float2 operator+(const float2 &a, const float2 &b)
+ccl_device_inline float2 operator+(const float2 a, const float f)
{
- return make_float2(a.x + b.x, a.y + b.y);
+ return a + make_float2(f, f);
}
-ccl_device_inline float2 operator-(const float2 &a, const float f)
+ccl_device_inline float2 operator-(const float2 a, const float2 b)
{
- return a - make_float2(f, f);
+ return make_float2(a.x - b.x, a.y - b.y);
}
-ccl_device_inline float2 operator-(const float2 &a, const float2 &b)
+ccl_device_inline float2 operator-(const float2 a, const float f)
{
- return make_float2(a.x - b.x, a.y - b.y);
+ return a - make_float2(f, f);
}
-ccl_device_inline float2 operator+=(float2 &a, const float2 &b)
+ccl_device_inline float2 operator+=(float2 &a, const float2 b)
{
return a = a + b;
}
-ccl_device_inline float2 operator*=(float2 &a, const float2 &b)
+ccl_device_inline float2 operator*=(float2 &a, const float2 b)
{
return a = a * b;
}
@@ -141,7 +92,7 @@ ccl_device_inline float2 operator*=(float2 &a, float f)
return a = a * f;
}
-ccl_device_inline float2 operator/=(float2 &a, const float2 &b)
+ccl_device_inline float2 operator/=(float2 &a, const float2 b)
{
return a = a / b;
}
@@ -152,74 +103,81 @@ ccl_device_inline float2 operator/=(float2 &a, float f)
return a = a * invf;
}
-ccl_device_inline bool operator==(const float2 &a, const float2 &b)
+ccl_device_inline bool operator==(const float2 a, const float2 b)
{
return (a.x == b.x && a.y == b.y);
}
-ccl_device_inline bool operator!=(const float2 &a, const float2 &b)
+ccl_device_inline bool operator!=(const float2 a, const float2 b)
{
return !(a == b);
}
-ccl_device_inline bool is_zero(const float2 &a)
+ccl_device_inline bool is_zero(const float2 a)
{
return (a.x == 0.0f && a.y == 0.0f);
}
-ccl_device_inline float average(const float2 &a)
+ccl_device_inline float average(const float2 a)
{
return (a.x + a.y) * (1.0f / 2.0f);
}
-ccl_device_inline float distance(const float2 &a, const float2 &b)
+ccl_device_inline float dot(const float2 a, const float2 b)
{
- return len(a - b);
+ return a.x * b.x + a.y * b.y;
}
+#endif
-ccl_device_inline float dot(const float2 &a, const float2 &b)
+ccl_device_inline float len(const float2 a)
{
- return a.x * b.x + a.y * b.y;
+ return sqrtf(dot(a, a));
}
-ccl_device_inline float cross(const float2 &a, const float2 &b)
+#if !defined(__KERNEL_METAL__)
+ccl_device_inline float distance(const float2 a, const float2 b)
+{
+ return len(a - b);
+}
+
+ccl_device_inline float cross(const float2 a, const float2 b)
{
return (a.x * b.y - a.y * b.x);
}
-ccl_device_inline float2 normalize(const float2 &a)
+ccl_device_inline float2 normalize(const float2 a)
{
return a / len(a);
}
-ccl_device_inline float2 normalize_len(const float2 &a, ccl_private float *t)
+ccl_device_inline float2 normalize_len(const float2 a, ccl_private float *t)
{
*t = len(a);
return a / (*t);
}
-ccl_device_inline float2 safe_normalize(const float2 &a)
+ccl_device_inline float2 safe_normalize(const float2 a)
{
float t = len(a);
return (t != 0.0f) ? a / t : a;
}
-ccl_device_inline float2 min(const float2 &a, const float2 &b)
+ccl_device_inline float2 min(const float2 a, const float2 b)
{
return make_float2(min(a.x, b.x), min(a.y, b.y));
}
-ccl_device_inline float2 max(const float2 &a, const float2 &b)
+ccl_device_inline float2 max(const float2 a, const float2 b)
{
return make_float2(max(a.x, b.x), max(a.y, b.y));
}
-ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx)
+ccl_device_inline float2 clamp(const float2 a, const float2 mn, const float2 mx)
{
return min(max(a, mn), mx);
}
-ccl_device_inline float2 fabs(const float2 &a)
+ccl_device_inline float2 fabs(const float2 a)
{
return make_float2(fabsf(a.x), fabsf(a.y));
}
@@ -229,28 +187,23 @@ ccl_device_inline float2 as_float2(const float4 &a)
return make_float2(a.x, a.y);
}
-ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t)
+ccl_device_inline float2 interp(const float2 a, const float2 b, float t)
{
return a + t * (b - a);
}
-ccl_device_inline float2 mix(const float2 &a, const float2 &b, float t)
+ccl_device_inline float2 mix(const float2 a, const float2 b, float t)
{
return a + t * (b - a);
}
-ccl_device_inline float2 floor(const float2 &a)
+ccl_device_inline float2 floor(const float2 a)
{
return make_float2(floorf(a.x), floorf(a.y));
}
#endif /* !__KERNEL_METAL__ */
-ccl_device_inline float len(const float2 a)
-{
- return sqrtf(dot(a, a));
-}
-
ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b)
{
return (b != 0.0f) ? a / b : zero_float2();
diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h
index eec7122b9dc..79ee86d9c82 100644
--- a/intern/cycles/util/math_float3.h
+++ b/intern/cycles/util/math_float3.h
@@ -1,4 +1,5 @@
/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2013 Intel Corporation
* Copyright 2011-2022 Blender Foundation */
#ifndef __UTIL_MATH_FLOAT3_H__
@@ -10,73 +11,6 @@
CCL_NAMESPACE_BEGIN
-/*******************************************************************************
- * Declaration.
- */
-
-#if !defined(__KERNEL_METAL__)
-ccl_device_inline float3 operator-(const float3 &a);
-ccl_device_inline float3 operator*(const float3 &a, const float3 &b);
-ccl_device_inline float3 operator*(const float3 &a, const float f);
-ccl_device_inline float3 operator*(const float f, const float3 &a);
-ccl_device_inline float3 operator/(const float f, const float3 &a);
-ccl_device_inline float3 operator/(const float3 &a, const float f);
-ccl_device_inline float3 operator/(const float3 &a, const float3 &b);
-ccl_device_inline float3 operator+(const float3 &a, const float f);
-ccl_device_inline float3 operator+(const float3 &a, const float3 &b);
-ccl_device_inline float3 operator-(const float3 &a, const float f);
-ccl_device_inline float3 operator-(const float3 &a, const float3 &b);
-ccl_device_inline float3 operator+=(float3 &a, const float3 &b);
-ccl_device_inline float3 operator-=(float3 &a, const float3 &b);
-ccl_device_inline float3 operator*=(float3 &a, const float3 &b);
-ccl_device_inline float3 operator*=(float3 &a, float f);
-ccl_device_inline float3 operator/=(float3 &a, const float3 &b);
-ccl_device_inline float3 operator/=(float3 &a, float f);
-
-ccl_device_inline bool operator==(const float3 &a, const float3 &b);
-ccl_device_inline bool operator!=(const float3 &a, const float3 &b);
-
-ccl_device_inline float distance(const float3 &a, const float3 &b);
-ccl_device_inline float dot(const float3 &a, const float3 &b);
-ccl_device_inline float dot_xy(const float3 &a, const float3 &b);
-ccl_device_inline float3 cross(const float3 &a, const float3 &b);
-ccl_device_inline float3 normalize(const float3 &a);
-ccl_device_inline float3 min(const float3 &a, const float3 &b);
-ccl_device_inline float3 max(const float3 &a, const float3 &b);
-ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx);
-ccl_device_inline float3 fabs(const float3 &a);
-ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t);
-ccl_device_inline float3 rcp(const float3 &a);
-ccl_device_inline float3 sqrt(const float3 &a);
-ccl_device_inline float3 floor(const float3 &a);
-ccl_device_inline float3 ceil(const float3 &a);
-ccl_device_inline float3 reflect(const float3 incident, const float3 normal);
-#endif /* !defined(__KERNEL_METAL__) */
-
-ccl_device_inline float reduce_min(float3 a);
-ccl_device_inline float reduce_max(float3 a);
-ccl_device_inline float len(const float3 a);
-ccl_device_inline float len_squared(const float3 a);
-
-ccl_device_inline float3 project(const float3 v, const float3 v_proj);
-
-ccl_device_inline float3 safe_normalize(const float3 a);
-ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t);
-ccl_device_inline float3 safe_normalize_len(const float3 a, ccl_private float *t);
-ccl_device_inline float3 safe_divide(const float3 a, const float3 b);
-ccl_device_inline float3 safe_divide(const float3 a, const float b);
-ccl_device_inline float3 interp(float3 a, float3 b, float t);
-ccl_device_inline float3 sqr(float3 a);
-
-ccl_device_inline bool is_zero(const float3 a);
-ccl_device_inline float reduce_add(const float3 a);
-ccl_device_inline float average(const float3 a);
-ccl_device_inline bool isequal(const float3 a, const float3 b);
-
-/*******************************************************************************
- * Definition.
- */
-
ccl_device_inline float3 zero_float3()
{
#ifdef __KERNEL_SSE__
@@ -109,7 +43,7 @@ ccl_device_inline float3 operator-(const float3 &a)
# endif
}
-ccl_device_inline float3 operator*(const float3 &a, const float3 &b)
+ccl_device_inline float3 operator*(const float3 a, const float3 b)
{
# ifdef __KERNEL_SSE__
return float3(_mm_mul_ps(a.m128, b.m128));
@@ -118,7 +52,7 @@ ccl_device_inline float3 operator*(const float3 &a, const float3 &b)
# endif
}
-ccl_device_inline float3 operator*(const float3 &a, const float f)
+ccl_device_inline float3 operator*(const float3 a, const float f)
{
# ifdef __KERNEL_SSE__
return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f)));
@@ -127,7 +61,7 @@ ccl_device_inline float3 operator*(const float3 &a, const float f)
# endif
}
-ccl_device_inline float3 operator*(const float f, const float3 &a)
+ccl_device_inline float3 operator*(const float f, const float3 a)
{
# if defined(__KERNEL_SSE__)
return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
@@ -136,7 +70,7 @@ ccl_device_inline float3 operator*(const float f, const float3 &a)
# endif
}
-ccl_device_inline float3 operator/(const float f, const float3 &a)
+ccl_device_inline float3 operator/(const float f, const float3 a)
{
# if defined(__KERNEL_SSE__)
return float3(_mm_div_ps(_mm_set1_ps(f), a.m128));
@@ -145,7 +79,7 @@ ccl_device_inline float3 operator/(const float f, const float3 &a)
# endif
}
-ccl_device_inline float3 operator/(const float3 &a, const float f)
+ccl_device_inline float3 operator/(const float3 a, const float f)
{
# if defined(__KERNEL_SSE__)
return float3(_mm_div_ps(a.m128, _mm_set1_ps(f)));
@@ -154,7 +88,7 @@ ccl_device_inline float3 operator/(const float3 &a, const float f)
# endif
}
-ccl_device_inline float3 operator/(const float3 &a, const float3 &b)
+ccl_device_inline float3 operator/(const float3 a, const float3 b)
{
# if defined(__KERNEL_SSE__)
return float3(_mm_div_ps(a.m128, b.m128));
@@ -163,12 +97,7 @@ ccl_device_inline float3 operator/(const float3 &a, const float3 &b)
# endif
}
-ccl_device_inline float3 operator+(const float3 &a, const float f)
-{
- return a + make_float3(f, f, f);
-}
-
-ccl_device_inline float3 operator+(const float3 &a, const float3 &b)
+ccl_device_inline float3 operator+(const float3 a, const float3 b)
{
# ifdef __KERNEL_SSE__
return float3(_mm_add_ps(a.m128, b.m128));
@@ -177,12 +106,12 @@ ccl_device_inline float3 operator+(const float3 &a, const float3 &b)
# endif
}
-ccl_device_inline float3 operator-(const float3 &a, const float f)
+ccl_device_inline float3 operator+(const float3 a, const float f)
{
- return a - make_float3(f, f, f);
+ return a + make_float3(f, f, f);
}
-ccl_device_inline float3 operator-(const float3 &a, const float3 &b)
+ccl_device_inline float3 operator-(const float3 a, const float3 b)
{
# ifdef __KERNEL_SSE__
return float3(_mm_sub_ps(a.m128, b.m128));
@@ -191,17 +120,22 @@ ccl_device_inline float3 operator-(const float3 &a, const float3 &b)
# endif
}
-ccl_device_inline float3 operator+=(float3 &a, const float3 &b)
+ccl_device_inline float3 operator-(const float3 a, const float f)
+{
+ return a - make_float3(f, f, f);
+}
+
+ccl_device_inline float3 operator+=(float3 &a, const float3 b)
{
return a = a + b;
}
-ccl_device_inline float3 operator-=(float3 &a, const float3 &b)
+ccl_device_inline float3 operator-=(float3 &a, const float3 b)
{
return a = a - b;
}
-ccl_device_inline float3 operator*=(float3 &a, const float3 &b)
+ccl_device_inline float3 operator*=(float3 &a, const float3 b)
{
return a = a * b;
}
@@ -211,7 +145,7 @@ ccl_device_inline float3 operator*=(float3 &a, float f)
return a = a * f;
}
-ccl_device_inline float3 operator/=(float3 &a, const float3 &b)
+ccl_device_inline float3 operator/=(float3 &a, const float3 b)
{
return a = a / b;
}
@@ -223,7 +157,7 @@ ccl_device_inline float3 operator/=(float3 &a, float f)
}
# if !(defined(__KERNEL_METAL__) || defined(__KERNEL_CUDA__))
-ccl_device_inline packed_float3 operator*=(packed_float3 &a, const float3 &b)
+ccl_device_inline packed_float3 operator*=(packed_float3 &a, const float3 b)
{
a = float3(a) * b;
return a;
@@ -235,7 +169,7 @@ ccl_device_inline packed_float3 operator*=(packed_float3 &a, float f)
return a;
}
-ccl_device_inline packed_float3 operator/=(packed_float3 &a, const float3 &b)
+ccl_device_inline packed_float3 operator/=(packed_float3 &a, const float3 b)
{
a = float3(a) / b;
return a;
@@ -248,7 +182,7 @@ ccl_device_inline packed_float3 operator/=(packed_float3 &a, float f)
}
# endif
-ccl_device_inline bool operator==(const float3 &a, const float3 &b)
+ccl_device_inline bool operator==(const float3 a, const float3 b)
{
# ifdef __KERNEL_SSE__
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
@@ -257,17 +191,12 @@ ccl_device_inline bool operator==(const float3 &a, const float3 &b)
# endif
}
-ccl_device_inline bool operator!=(const float3 &a, const float3 &b)
+ccl_device_inline bool operator!=(const float3 a, const float3 b)
{
return !(a == b);
}
-ccl_device_inline float distance(const float3 &a, const float3 &b)
-{
- return len(a - b);
-}
-
-ccl_device_inline float dot(const float3 &a, const float3 &b)
+ccl_device_inline float dot(const float3 a, const float3 b)
{
# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
@@ -276,26 +205,62 @@ ccl_device_inline float dot(const float3 &a, const float3 &b)
# endif
}
-ccl_device_inline float dot_xy(const float3 &a, const float3 &b)
+#endif
+
+ccl_device_inline float dot_xy(const float3 a, const float3 b)
{
-# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a, b), b));
-# else
+#else
return a.x * b.x + a.y * b.y;
-# endif
+#endif
+}
+
+ccl_device_inline float len(const float3 a)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+ return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F)));
+#else
+ return sqrtf(dot(a, a));
+#endif
+}
+
+ccl_device_inline float reduce_min(float3 a)
+{
+ return min(min(a.x, a.y), a.z);
+}
+
+ccl_device_inline float reduce_max(float3 a)
+{
+ return max(max(a.x, a.y), a.z);
+}
+
+ccl_device_inline float len_squared(const float3 a)
+{
+ return dot(a, a);
+}
+
+#ifndef __KERNEL_METAL__
+
+ccl_device_inline float distance(const float3 a, const float3 b)
+{
+ return len(a - b);
}
-ccl_device_inline float3 cross(const float3 &a, const float3 &b)
+ccl_device_inline float3 cross(const float3 a, const float3 b)
{
# ifdef __KERNEL_SSE__
- return float3(shuffle<1, 2, 0, 3>(
- msub(ssef(a), shuffle<1, 2, 0, 3>(ssef(b)), shuffle<1, 2, 0, 3>(ssef(a)) * ssef(b))));
+ const float4 x = float4(a.m128);
+ const float4 y = shuffle<1, 2, 0, 3>(float4(b.m128));
+ const float4 z = float4(_mm_mul_ps(shuffle<1, 2, 0, 3>(float4(a.m128)), float4(b.m128)));
+
+ return float3(shuffle<1, 2, 0, 3>(msub(x, y, z)).m128);
# else
return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
# endif
}
-ccl_device_inline float3 normalize(const float3 &a)
+ccl_device_inline float3 normalize(const float3 a)
{
# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
__m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
@@ -305,7 +270,7 @@ ccl_device_inline float3 normalize(const float3 &a)
# endif
}
-ccl_device_inline float3 min(const float3 &a, const float3 &b)
+ccl_device_inline float3 min(const float3 a, const float3 b)
{
# ifdef __KERNEL_SSE__
return float3(_mm_min_ps(a.m128, b.m128));
@@ -314,7 +279,7 @@ ccl_device_inline float3 min(const float3 &a, const float3 &b)
# endif
}
-ccl_device_inline float3 max(const float3 &a, const float3 &b)
+ccl_device_inline float3 max(const float3 a, const float3 b)
{
# ifdef __KERNEL_SSE__
return float3(_mm_max_ps(a.m128, b.m128));
@@ -323,12 +288,12 @@ ccl_device_inline float3 max(const float3 &a, const float3 &b)
# endif
}
-ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx)
+ccl_device_inline float3 clamp(const float3 a, const float3 mn, const float3 mx)
{
return min(max(a, mn), mx);
}
-ccl_device_inline float3 fabs(const float3 &a)
+ccl_device_inline float3 fabs(const float3 a)
{
# ifdef __KERNEL_SSE__
# ifdef __KERNEL_NEON__
@@ -342,7 +307,7 @@ ccl_device_inline float3 fabs(const float3 &a)
# endif
}
-ccl_device_inline float3 sqrt(const float3 &a)
+ccl_device_inline float3 sqrt(const float3 a)
{
# ifdef __KERNEL_SSE__
return float3(_mm_sqrt_ps(a));
@@ -351,7 +316,7 @@ ccl_device_inline float3 sqrt(const float3 &a)
# endif
}
-ccl_device_inline float3 floor(const float3 &a)
+ccl_device_inline float3 floor(const float3 a)
{
# ifdef __KERNEL_SSE__
return float3(_mm_floor_ps(a));
@@ -360,7 +325,7 @@ ccl_device_inline float3 floor(const float3 &a)
# endif
}
-ccl_device_inline float3 ceil(const float3 &a)
+ccl_device_inline float3 ceil(const float3 a)
{
# ifdef __KERNEL_SSE__
return float3(_mm_ceil_ps(a));
@@ -369,12 +334,12 @@ ccl_device_inline float3 ceil(const float3 &a)
# endif
}
-ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t)
+ccl_device_inline float3 mix(const float3 a, const float3 b, float t)
{
return a + t * (b - a);
}
-ccl_device_inline float3 rcp(const float3 &a)
+ccl_device_inline float3 rcp(const float3 a)
{
# ifdef __KERNEL_SSE__
/* Don't use _mm_rcp_ps due to poor precision. */
@@ -399,33 +364,6 @@ ccl_device_inline float3 log(float3 v)
return make_float3(logf(v.x), logf(v.y), logf(v.z));
}
-#endif /* !__KERNEL_METAL__ */
-
-ccl_device_inline float reduce_min(float3 a)
-{
- return min(min(a.x, a.y), a.z);
-}
-
-ccl_device_inline float reduce_max(float3 a)
-{
- return max(max(a.x, a.y), a.z);
-}
-
-ccl_device_inline float len(const float3 a)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
- return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F)));
-#else
- return sqrtf(dot(a, a));
-#endif
-}
-
-ccl_device_inline float len_squared(const float3 a)
-{
- return dot(a, a);
-}
-
-#if !defined(__KERNEL_METAL__)
ccl_device_inline float3 reflect(const float3 incident, const float3 normal)
{
float3 unit_normal = normalize(normal);
diff --git a/intern/cycles/util/math_float4.h b/intern/cycles/util/math_float4.h
index c2721873037..301d2d789c0 100644
--- a/intern/cycles/util/math_float4.h
+++ b/intern/cycles/util/math_float4.h
@@ -1,4 +1,5 @@
/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2013 Intel Corporation
* Copyright 2011-2022 Blender Foundation */
#ifndef __UTIL_MATH_FLOAT4_H__
@@ -10,85 +11,6 @@
CCL_NAMESPACE_BEGIN
-/*******************************************************************************
- * Declaration.
- */
-
-#if !defined(__KERNEL_METAL__)
-ccl_device_inline float4 operator-(const float4 &a);
-ccl_device_inline float4 operator*(const float4 &a, const float4 &b);
-ccl_device_inline float4 operator*(const float4 &a, float f);
-ccl_device_inline float4 operator*(float f, const float4 &a);
-ccl_device_inline float4 operator/(const float4 &a, float f);
-ccl_device_inline float4 operator/(const float4 &a, const float4 &b);
-ccl_device_inline float4 operator+(const float4 &a, const float f);
-ccl_device_inline float4 operator+(const float4 &a, const float4 &b);
-ccl_device_inline float4 operator-(const float4 &a, const float f);
-ccl_device_inline float4 operator-(const float4 &a, const float4 &b);
-ccl_device_inline float4 operator+=(float4 &a, const float4 &b);
-ccl_device_inline float4 operator*=(float4 &a, const float4 &b);
-ccl_device_inline float4 operator*=(float4 &a, float f);
-ccl_device_inline float4 operator/=(float4 &a, float f);
-
-ccl_device_inline int4 operator<(const float4 &a, const float4 &b);
-ccl_device_inline int4 operator>=(const float4 &a, const float4 &b);
-ccl_device_inline int4 operator<=(const float4 &a, const float4 &b);
-ccl_device_inline bool operator==(const float4 &a, const float4 &b);
-
-ccl_device_inline float distance(const float4 &a, const float4 &b);
-ccl_device_inline float dot(const float4 &a, const float4 &b);
-ccl_device_inline float len_squared(const float4 &a);
-ccl_device_inline float4 rcp(const float4 &a);
-ccl_device_inline float4 sqrt(const float4 &a);
-ccl_device_inline float4 sqr(const float4 &a);
-ccl_device_inline float4 cross(const float4 &a, const float4 &b);
-ccl_device_inline bool is_zero(const float4 &a);
-ccl_device_inline float average(const float4 &a);
-ccl_device_inline float len(const float4 &a);
-ccl_device_inline float4 normalize(const float4 &a);
-ccl_device_inline float4 safe_normalize(const float4 &a);
-ccl_device_inline float4 min(const float4 &a, const float4 &b);
-ccl_device_inline float4 max(const float4 &a, const float4 &b);
-ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx);
-ccl_device_inline float4 fabs(const float4 &a);
-ccl_device_inline float4 floor(const float4 &a);
-ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t);
-#endif /* !__KERNEL_METAL__*/
-
-ccl_device_inline float4 safe_divide(const float4 a, const float4 b);
-ccl_device_inline float4 safe_divide(const float4 a, const float b);
-
-#ifdef __KERNEL_SSE__
-template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
-__forceinline const float4 shuffle(const float4 &b);
-template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
-__forceinline const float4 shuffle(const float4 &a, const float4 &b);
-
-template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b);
-
-template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b);
-template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b);
-
-# ifdef __KERNEL_SSE3__
-template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b);
-template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b);
-# endif
-#endif /* __KERNEL_SSE__ */
-
-ccl_device_inline float reduce_min(const float4 a);
-ccl_device_inline float reduce_max(const float4 a);
-ccl_device_inline float reduce_add(const float4 a);
-
-ccl_device_inline bool isequal(const float4 a, const float4 b);
-
-#ifndef __KERNEL_GPU__
-ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b);
-#endif /* !__KERNEL_GPU__ */
-
-/*******************************************************************************
- * Definition.
- */
-
ccl_device_inline float4 zero_float4()
{
#ifdef __KERNEL_SSE__
@@ -103,6 +25,16 @@ ccl_device_inline float4 one_float4()
return make_float4(1.0f, 1.0f, 1.0f, 1.0f);
}
+ccl_device_inline int4 cast(const float4 a)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_castps_si128(a));
+#else
+ return make_int4(
+ __float_as_int(a.x), __float_as_int(a.y), __float_as_int(a.z), __float_as_int(a.w));
+#endif
+}
+
#if !defined(__KERNEL_METAL__)
ccl_device_inline float4 operator-(const float4 &a)
{
@@ -114,7 +46,7 @@ ccl_device_inline float4 operator-(const float4 &a)
# endif
}
-ccl_device_inline float4 operator*(const float4 &a, const float4 &b)
+ccl_device_inline float4 operator*(const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
return float4(_mm_mul_ps(a.m128, b.m128));
@@ -123,7 +55,7 @@ ccl_device_inline float4 operator*(const float4 &a, const float4 &b)
# endif
}
-ccl_device_inline float4 operator*(const float4 &a, float f)
+ccl_device_inline float4 operator*(const float4 a, float f)
{
# if defined(__KERNEL_SSE__)
return a * make_float4(f);
@@ -132,17 +64,17 @@ ccl_device_inline float4 operator*(const float4 &a, float f)
# endif
}
-ccl_device_inline float4 operator*(float f, const float4 &a)
+ccl_device_inline float4 operator*(float f, const float4 a)
{
return a * f;
}
-ccl_device_inline float4 operator/(const float4 &a, float f)
+ccl_device_inline float4 operator/(const float4 a, float f)
{
return a * (1.0f / f);
}
-ccl_device_inline float4 operator/(const float4 &a, const float4 &b)
+ccl_device_inline float4 operator/(const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
return float4(_mm_div_ps(a.m128, b.m128));
@@ -151,12 +83,7 @@ ccl_device_inline float4 operator/(const float4 &a, const float4 &b)
# endif
}
-ccl_device_inline float4 operator+(const float4 &a, const float f)
-{
- return a + make_float4(f, f, f, f);
-}
-
-ccl_device_inline float4 operator+(const float4 &a, const float4 &b)
+ccl_device_inline float4 operator+(const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
return float4(_mm_add_ps(a.m128, b.m128));
@@ -165,12 +92,12 @@ ccl_device_inline float4 operator+(const float4 &a, const float4 &b)
# endif
}
-ccl_device_inline float4 operator-(const float4 &a, const float f)
+ccl_device_inline float4 operator+(const float4 a, const float f)
{
- return a - make_float4(f, f, f, f);
+ return a + make_float4(f);
}
-ccl_device_inline float4 operator-(const float4 &a, const float4 &b)
+ccl_device_inline float4 operator-(const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
return float4(_mm_sub_ps(a.m128, b.m128));
@@ -179,17 +106,22 @@ ccl_device_inline float4 operator-(const float4 &a, const float4 &b)
# endif
}
-ccl_device_inline float4 operator+=(float4 &a, const float4 &b)
+ccl_device_inline float4 operator-(const float4 a, const float f)
+{
+ return a - make_float4(f);
+}
+
+ccl_device_inline float4 operator+=(float4 &a, const float4 b)
{
return a = a + b;
}
-ccl_device_inline float4 operator-=(float4 &a, const float4 &b)
+ccl_device_inline float4 operator-=(float4 &a, const float4 b)
{
return a = a - b;
}
-ccl_device_inline float4 operator*=(float4 &a, const float4 &b)
+ccl_device_inline float4 operator*=(float4 &a, const float4 b)
{
return a = a * b;
}
@@ -204,7 +136,7 @@ ccl_device_inline float4 operator/=(float4 &a, float f)
return a = a / f;
}
-ccl_device_inline int4 operator<(const float4 &a, const float4 &b)
+ccl_device_inline int4 operator<(const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(_mm_cmplt_ps(a.m128, b.m128)));
@@ -213,7 +145,7 @@ ccl_device_inline int4 operator<(const float4 &a, const float4 &b)
# endif
}
-ccl_device_inline int4 operator>=(const float4 &a, const float4 &b)
+ccl_device_inline int4 operator>=(const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(_mm_cmpge_ps(a.m128, b.m128)));
@@ -222,7 +154,7 @@ ccl_device_inline int4 operator>=(const float4 &a, const float4 &b)
# endif
}
-ccl_device_inline int4 operator<=(const float4 &a, const float4 &b)
+ccl_device_inline int4 operator<=(const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(_mm_cmple_ps(a.m128, b.m128)));
@@ -231,7 +163,7 @@ ccl_device_inline int4 operator<=(const float4 &a, const float4 &b)
# endif
}
-ccl_device_inline bool operator==(const float4 &a, const float4 &b)
+ccl_device_inline bool operator==(const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
@@ -240,160 +172,148 @@ ccl_device_inline bool operator==(const float4 &a, const float4 &b)
# endif
}
-ccl_device_inline float distance(const float4 &a, const float4 &b)
-{
- return len(a - b);
-}
-
-ccl_device_inline float dot(const float4 &a, const float4 &b)
+ccl_device_inline const float4 operator^(const float4 a, const float4 b)
{
-# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-# if defined(__KERNEL_NEON__)
- __m128 t = vmulq_f32(a, b);
- return vaddvq_f32(t);
-# else
- return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
-# endif
+# ifdef __KERNEL_SSE__
+ return float4(_mm_xor_ps(a.m128, b.m128));
# else
- return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w);
+ return make_float4(__uint_as_float(__float_as_uint(a.x) ^ __float_as_uint(b.x)),
+ __uint_as_float(__float_as_uint(a.y) ^ __float_as_uint(b.y)),
+ __uint_as_float(__float_as_uint(a.z) ^ __float_as_uint(b.z)),
+ __uint_as_float(__float_as_uint(a.w) ^ __float_as_uint(b.w)));
# endif
}
-ccl_device_inline float len_squared(const float4 &a)
-{
- return dot(a, a);
-}
-
-ccl_device_inline float4 rcp(const float4 &a)
+ccl_device_inline float4 min(const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
- /* Don't use _mm_rcp_ps due to poor precision. */
- return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
+ return float4(_mm_min_ps(a.m128, b.m128));
# else
- return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w);
+ return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
# endif
}
-ccl_device_inline float4 sqrt(const float4 &a)
+ccl_device_inline float4 max(const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
- return float4(_mm_sqrt_ps(a.m128));
+ return float4(_mm_max_ps(a.m128, b.m128));
# else
- return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
+ return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
# endif
}
-ccl_device_inline float4 sqr(const float4 &a)
+ccl_device_inline float4 clamp(const float4 a, const float4 mn, const float4 mx)
{
- return a * a;
+ return min(max(a, mn), mx);
}
+#endif /* !__KERNEL_METAL__*/
-ccl_device_inline float4 cross(const float4 &a, const float4 &b)
+ccl_device_inline const float4 madd(const float4 a, const float4 b, const float4 c)
{
-# ifdef __KERNEL_SSE__
- return (shuffle<1, 2, 0, 0>(a) * shuffle<2, 0, 1, 0>(b)) -
- (shuffle<2, 0, 1, 0>(a) * shuffle<1, 2, 0, 0>(b));
+#ifdef __KERNEL_SSE__
+# ifdef __KERNEL_NEON__
+ return float4(vfmaq_f32(c, a, b));
+# elif defined(__KERNEL_AVX2__)
+ return float4(_mm_fmadd_ps(a, b, c));
# else
- return make_float4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f);
+ return a * b + c;
# endif
+#else
+ return a * b + c;
+#endif
}
-ccl_device_inline bool is_zero(const float4 &a)
+ccl_device_inline float4 msub(const float4 a, const float4 b, const float4 c)
{
-# ifdef __KERNEL_SSE__
- return a == zero_float4();
+#ifdef __KERNEL_SSE__
+# ifdef __KERNEL_NEON__
+ return float4(vfmaq_f32(vnegq_f32(c), a, b));
+# elif defined(__KERNEL_AVX2__)
+ return float4(_mm_fmsub_ps(a, b, c));
# else
- return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
+ return a * b - c;
# endif
+#else
+ return a * b - c;
+#endif
}
-ccl_device_inline float average(const float4 &a)
+#ifdef __KERNEL_SSE__
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const float4 shuffle(const float4 b)
{
- return reduce_add(a) * 0.25f;
+# ifdef __KERNEL_NEON__
+ return float4(shuffle_neon<float32x4_t, i0, i1, i2, i3>(b.m128));
+# else
+ return float4(
+ _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0))));
+# endif
}
-ccl_device_inline float len(const float4 &a)
+template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 a)
{
- return sqrtf(dot(a, a));
+ return float4(_mm_movelh_ps(a, a));
}
-ccl_device_inline float4 normalize(const float4 &a)
+template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 a)
{
- return a / len(a);
+ return float4(_mm_movehl_ps(a, a));
}
-ccl_device_inline float4 safe_normalize(const float4 &a)
+# ifdef __KERNEL_SSE3__
+template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 b)
{
- float t = len(a);
- return (t != 0.0f) ? a / t : a;
+ return float4(_mm_moveldup_ps(b));
}
-ccl_device_inline float4 min(const float4 &a, const float4 &b)
+template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 b)
{
-# ifdef __KERNEL_SSE__
- return float4(_mm_min_ps(a.m128, b.m128));
-# else
- return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-# endif
+ return float4(_mm_movehdup_ps(b));
}
+# endif /* __KERNEL_SSE3__ */
-ccl_device_inline float4 max(const float4 &a, const float4 &b)
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const float4 shuffle(const float4 a, const float4 b)
{
-# ifdef __KERNEL_SSE__
- return float4(_mm_max_ps(a.m128, b.m128));
+# ifdef __KERNEL_NEON__
+ return float4(shuffle_neon<float32x4_t, i0, i1, i2, i3>(a, b));
# else
- return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
+ return float4(_mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)));
# endif
}
-ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx)
+template<size_t i0> __forceinline const float4 shuffle(const float4 b)
{
- return min(max(a, mn), mx);
+ return shuffle<i0, i0, i0, i0>(b);
}
-
-ccl_device_inline float4 fabs(const float4 &a)
+template<size_t i0> __forceinline const float4 shuffle(const float4 a, const float4 b)
{
-# if defined(__KERNEL_SSE__)
-# if defined(__KERNEL_NEON__)
- return float4(vabsq_f32(a));
-# else
- return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))));
-# endif
-# else
- return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
-# endif
-}
-
-ccl_device_inline float4 floor(const float4 &a)
-{
-# ifdef __KERNEL_SSE__
- return float4(_mm_floor_ps(a));
+# ifdef __KERNEL_NEON__
+ return float4(shuffle_neon<float32x4_t, i0, i0, i0, i0>(a, b));
# else
- return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
+ return float4(_mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0)));
# endif
}
-ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t)
+template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 a, const float4 b)
{
- return a + t * (b - a);
+ return float4(_mm_movelh_ps(a, b));
}
-ccl_device_inline float4 saturate(const float4 &a)
+template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 a, const float4 b)
{
- return make_float4(saturatef(a.x), saturatef(a.y), saturatef(a.z), saturatef(a.w));
+ return float4(_mm_movehl_ps(b, a));
}
-ccl_device_inline float4 exp(float4 v)
+template<size_t i> __forceinline float extract(const float4 a)
{
- return make_float4(expf(v.x), expf(v.y), expf(v.z), expf(v.z));
+ return _mm_cvtss_f32(shuffle<i, i, i, i>(a));
}
-
-ccl_device_inline float4 log(float4 v)
+template<> __forceinline float extract<0>(const float4 a)
{
- return make_float4(logf(v.x), logf(v.y), logf(v.z), logf(v.z));
+ return _mm_cvtss_f32(a);
}
-
-#endif /* !__KERNEL_METAL__*/
+#endif
ccl_device_inline float reduce_add(const float4 a)
{
@@ -440,77 +360,192 @@ ccl_device_inline float reduce_max(const float4 a)
#endif
}
-ccl_device_inline bool isequal(const float4 a, const float4 b)
+#if !defined(__KERNEL_METAL__)
+ccl_device_inline float dot(const float4 a, const float4 b)
{
-#if defined(__KERNEL_METAL__)
- return all(a == b);
-#else
- return a == b;
-#endif
+# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+# if defined(__KERNEL_NEON__)
+ __m128 t = vmulq_f32(a, b);
+ return vaddvq_f32(t);
+# else
+ return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
+# endif
+# else
+ return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w);
+# endif
}
+#endif /* !defined(__KERNEL_METAL__) */
-#ifdef __KERNEL_SSE__
-template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
-__forceinline const float4 shuffle(const float4 &b)
+ccl_device_inline float len(const float4 a)
{
-# if defined(__KERNEL_NEON__)
- return float4(shuffle_neon<__m128, index_0, index_1, index_2, index_3>(b.m128));
+ return sqrtf(dot(a, a));
+}
+
+ccl_device_inline float len_squared(const float4 a)
+{
+ return dot(a, a);
+}
+
+#if !defined(__KERNEL_METAL__)
+ccl_device_inline float distance(const float4 a, const float4 b)
+{
+ return len(a - b);
+}
+
+ccl_device_inline float4 rcp(const float4 a)
+{
+# ifdef __KERNEL_SSE__
+ /* Don't use _mm_rcp_ps due to poor precision. */
+ return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
# else
- return float4(_mm_castsi128_ps(
- _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0))));
+ return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w);
# endif
}
-template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
-__forceinline const float4 shuffle(const float4 &a, const float4 &b)
+ccl_device_inline float4 sqrt(const float4 a)
{
-# if defined(__KERNEL_NEON__)
- return float4(shuffle_neon<__m128, index_0, index_1, index_2, index_3>(a.m128, b.m128));
+# ifdef __KERNEL_SSE__
+ return float4(_mm_sqrt_ps(a.m128));
# else
- return float4(_mm_shuffle_ps(a.m128, b.m128, _MM_SHUFFLE(index_3, index_2, index_1, index_0)));
+ return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
# endif
}
-template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b)
+ccl_device_inline float4 sqr(const float4 a)
{
- return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b))));
+ return a * a;
}
-template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b)
+ccl_device_inline float4 cross(const float4 a, const float4 b)
{
- return float4(_mm_movelh_ps(a.m128, b.m128));
+# ifdef __KERNEL_SSE__
+ return (shuffle<1, 2, 0, 0>(a) * shuffle<2, 0, 1, 0>(b)) -
+ (shuffle<2, 0, 1, 0>(a) * shuffle<1, 2, 0, 0>(b));
+# else
+ return make_float4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f);
+# endif
}
-template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b)
+ccl_device_inline bool is_zero(const float4 a)
{
- return float4(_mm_movehl_ps(b.m128, a.m128));
+# ifdef __KERNEL_SSE__
+ return a == zero_float4();
+# else
+ return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
+# endif
}
-# ifdef __KERNEL_SSE3__
-template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b)
+ccl_device_inline float average(const float4 a)
{
- return float4(_mm_moveldup_ps(b));
+ return reduce_add(a) * 0.25f;
}
-template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b)
+ccl_device_inline float4 normalize(const float4 a)
{
- return float4(_mm_movehdup_ps(b));
+ return a / len(a);
+}
+
+ccl_device_inline float4 safe_normalize(const float4 a)
+{
+ float t = len(a);
+ return (t != 0.0f) ? a / t : a;
+}
+
+ccl_device_inline float4 fabs(const float4 a)
+{
+# if defined(__KERNEL_SSE__)
+# if defined(__KERNEL_NEON__)
+ return float4(vabsq_f32(a));
+# else
+ return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))));
+# endif
+# else
+ return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
+# endif
+}
+
+ccl_device_inline float4 floor(const float4 a)
+{
+# ifdef __KERNEL_SSE__
+# if defined(__KERNEL_NEON__)
+ return float4(vrndmq_f32(a));
+# else
+ return float4(_mm_floor_ps(a));
+# endif
+# else
+ return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
+# endif
+}
+
+ccl_device_inline float4 floorfrac(const float4 x, ccl_private int4 *i)
+{
+# ifdef __KERNEL_SSE__
+ const float4 f = floor(x);
+ *i = int4(_mm_cvttps_epi32(f.m128));
+ return x - f;
+# else
+ float4 r;
+ r.x = floorfrac(x.x, &i->x);
+ r.y = floorfrac(x.y, &i->y);
+ r.z = floorfrac(x.z, &i->z);
+ r.w = floorfrac(x.w, &i->w);
+ return r;
+# endif
+}
+
+ccl_device_inline float4 mix(const float4 a, const float4 b, float t)
+{
+ return a + t * (b - a);
+}
+
+ccl_device_inline float4 mix(const float4 a, const float4 b, const float4 t)
+{
+ return a + t * (b - a);
+}
+
+ccl_device_inline float4 saturate(const float4 a)
+{
+ return make_float4(saturatef(a.x), saturatef(a.y), saturatef(a.z), saturatef(a.w));
+}
+
+ccl_device_inline float4 exp(float4 v)
+{
+ return make_float4(expf(v.x), expf(v.y), expf(v.z), expf(v.z));
+}
+
+ccl_device_inline float4 log(float4 v)
+{
+ return make_float4(logf(v.x), logf(v.y), logf(v.z), logf(v.z));
+}
+
+#endif /* !__KERNEL_METAL__*/
+
+ccl_device_inline bool isequal(const float4 a, const float4 b)
+{
+#if defined(__KERNEL_METAL__)
+ return all(a == b);
+#else
+ return a == b;
+#endif
}
-# endif /* __KERNEL_SSE3__ */
-#endif /* __KERNEL_SSE__ */
#ifndef __KERNEL_GPU__
-ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b)
+ccl_device_inline float4 select(const int4 mask, const float4 a, const float4 b)
{
# ifdef __KERNEL_SSE__
+# ifdef __KERNEL_SSE41__
return float4(_mm_blendv_ps(b.m128, a.m128, _mm_castsi128_ps(mask.m128)));
+# else
+ return float4(
+ _mm_or_ps(_mm_and_ps(_mm_castsi128_ps(mask), a), _mm_andnot_ps(_mm_castsi128_ps(mask), b)));
+# endif
# else
return make_float4(
(mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w);
# endif
}
-ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
+ccl_device_inline float4 mask(const int4 mask, const float4 a)
{
/* Replace elements of x with zero where mask isn't set. */
return select(mask, a, zero_float4());
diff --git a/intern/cycles/util/math_float8.h b/intern/cycles/util/math_float8.h
index b538cfbe70b..755a720a10b 100644
--- a/intern/cycles/util/math_float8.h
+++ b/intern/cycles/util/math_float8.h
@@ -1,4 +1,5 @@
/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2013 Intel Corporation
* Copyright 2022 Blender Foundation */
#ifndef __UTIL_MATH_FLOAT8_H__
@@ -10,193 +11,138 @@
CCL_NAMESPACE_BEGIN
-/*******************************************************************************
- * Declaration.
- */
-
-ccl_device_inline float8_t operator+(const float8_t a, const float8_t b);
-ccl_device_inline float8_t operator+(const float8_t a, const float f);
-ccl_device_inline float8_t operator+(const float f, const float8_t a);
-
-ccl_device_inline float8_t operator-(const float8_t a);
-ccl_device_inline float8_t operator-(const float8_t a, const float8_t b);
-ccl_device_inline float8_t operator-(const float8_t a, const float f);
-ccl_device_inline float8_t operator-(const float f, const float8_t a);
-
-ccl_device_inline float8_t operator*(const float8_t a, const float8_t b);
-ccl_device_inline float8_t operator*(const float8_t a, const float f);
-ccl_device_inline float8_t operator*(const float f, const float8_t a);
-
-ccl_device_inline float8_t operator/(const float8_t a, const float8_t b);
-ccl_device_inline float8_t operator/(const float8_t a, float f);
-ccl_device_inline float8_t operator/(const float f, const float8_t a);
-
-ccl_device_inline float8_t operator+=(float8_t a, const float8_t b);
-
-ccl_device_inline float8_t operator*=(float8_t a, const float8_t b);
-ccl_device_inline float8_t operator*=(float8_t a, float f);
-
-ccl_device_inline float8_t operator/=(float8_t a, float f);
-
-ccl_device_inline bool operator==(const float8_t a, const float8_t b);
-
-ccl_device_inline float8_t rcp(const float8_t a);
-ccl_device_inline float8_t sqrt(const float8_t a);
-ccl_device_inline float8_t sqr(const float8_t a);
-ccl_device_inline bool is_zero(const float8_t a);
-ccl_device_inline float average(const float8_t a);
-ccl_device_inline float8_t min(const float8_t a, const float8_t b);
-ccl_device_inline float8_t max(const float8_t a, const float8_t b);
-ccl_device_inline float8_t clamp(const float8_t a, const float8_t mn, const float8_t mx);
-ccl_device_inline float8_t fabs(const float8_t a);
-ccl_device_inline float8_t mix(const float8_t a, const float8_t b, float t);
-ccl_device_inline float8_t saturate(const float8_t a);
-
-ccl_device_inline float8_t safe_divide(const float8_t a, const float b);
-ccl_device_inline float8_t safe_divide(const float8_t a, const float8_t b);
-
-ccl_device_inline float reduce_min(const float8_t a);
-ccl_device_inline float reduce_max(const float8_t a);
-ccl_device_inline float reduce_add(const float8_t a);
-
-ccl_device_inline bool isequal(const float8_t a, const float8_t b);
-
-/*******************************************************************************
- * Definition.
- */
-
-ccl_device_inline float8_t zero_float8_t()
+ccl_device_inline vfloat8 zero_vfloat8()
{
-#ifdef __KERNEL_AVX2__
- return float8_t(_mm256_setzero_ps());
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_setzero_ps());
#else
- return make_float8_t(0.0f);
+ return make_vfloat8(0.0f);
#endif
}
-ccl_device_inline float8_t one_float8_t()
+ccl_device_inline vfloat8 one_vfloat8()
{
- return make_float8_t(1.0f);
+ return make_vfloat8(1.0f);
}
-ccl_device_inline float8_t operator+(const float8_t a, const float8_t b)
+ccl_device_inline vfloat8 operator+(const vfloat8 a, const vfloat8 b)
{
-#ifdef __KERNEL_AVX2__
- return float8_t(_mm256_add_ps(a.m256, b.m256));
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_add_ps(a.m256, b.m256));
#else
- return make_float8_t(
+ return make_vfloat8(
a.a + b.a, a.b + b.b, a.c + b.c, a.d + b.d, a.e + b.e, a.f + b.f, a.g + b.g, a.h + b.h);
#endif
}
-ccl_device_inline float8_t operator+(const float8_t a, const float f)
+ccl_device_inline vfloat8 operator+(const vfloat8 a, const float f)
{
- return a + make_float8_t(f);
+ return a + make_vfloat8(f);
}
-ccl_device_inline float8_t operator+(const float f, const float8_t a)
+ccl_device_inline vfloat8 operator+(const float f, const vfloat8 a)
{
- return make_float8_t(f) + a;
+ return make_vfloat8(f) + a;
}
-ccl_device_inline float8_t operator-(const float8_t a)
+ccl_device_inline vfloat8 operator-(const vfloat8 a)
{
-#ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX__
__m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
- return float8_t(_mm256_xor_ps(a.m256, mask));
+ return vfloat8(_mm256_xor_ps(a.m256, mask));
#else
- return make_float8_t(-a.a, -a.b, -a.c, -a.d, -a.e, -a.f, -a.g, -a.h);
+ return make_vfloat8(-a.a, -a.b, -a.c, -a.d, -a.e, -a.f, -a.g, -a.h);
#endif
}
-ccl_device_inline float8_t operator-(const float8_t a, const float8_t b)
+ccl_device_inline vfloat8 operator-(const vfloat8 a, const vfloat8 b)
{
-#ifdef __KERNEL_AVX2__
- return float8_t(_mm256_sub_ps(a.m256, b.m256));
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_sub_ps(a.m256, b.m256));
#else
- return make_float8_t(
+ return make_vfloat8(
a.a - b.a, a.b - b.b, a.c - b.c, a.d - b.d, a.e - b.e, a.f - b.f, a.g - b.g, a.h - b.h);
#endif
}
-ccl_device_inline float8_t operator-(const float8_t a, const float f)
+ccl_device_inline vfloat8 operator-(const vfloat8 a, const float f)
{
- return a - make_float8_t(f);
+ return a - make_vfloat8(f);
}
-ccl_device_inline float8_t operator-(const float f, const float8_t a)
+ccl_device_inline vfloat8 operator-(const float f, const vfloat8 a)
{
- return make_float8_t(f) - a;
+ return make_vfloat8(f) - a;
}
-ccl_device_inline float8_t operator*(const float8_t a, const float8_t b)
+ccl_device_inline vfloat8 operator*(const vfloat8 a, const vfloat8 b)
{
-#ifdef __KERNEL_AVX2__
- return float8_t(_mm256_mul_ps(a.m256, b.m256));
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_mul_ps(a.m256, b.m256));
#else
- return make_float8_t(
+ return make_vfloat8(
a.a * b.a, a.b * b.b, a.c * b.c, a.d * b.d, a.e * b.e, a.f * b.f, a.g * b.g, a.h * b.h);
#endif
}
-ccl_device_inline float8_t operator*(const float8_t a, const float f)
+ccl_device_inline vfloat8 operator*(const vfloat8 a, const float f)
{
- return a * make_float8_t(f);
+ return a * make_vfloat8(f);
}
-ccl_device_inline float8_t operator*(const float f, const float8_t a)
+ccl_device_inline vfloat8 operator*(const float f, const vfloat8 a)
{
- return make_float8_t(f) * a;
+ return make_vfloat8(f) * a;
}
-ccl_device_inline float8_t operator/(const float8_t a, const float8_t b)
+ccl_device_inline vfloat8 operator/(const vfloat8 a, const vfloat8 b)
{
-#ifdef __KERNEL_AVX2__
- return float8_t(_mm256_div_ps(a.m256, b.m256));
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_div_ps(a.m256, b.m256));
#else
- return make_float8_t(
+ return make_vfloat8(
a.a / b.a, a.b / b.b, a.c / b.c, a.d / b.d, a.e / b.e, a.f / b.f, a.g / b.g, a.h / b.h);
#endif
}
-ccl_device_inline float8_t operator/(const float8_t a, const float f)
+ccl_device_inline vfloat8 operator/(const vfloat8 a, const float f)
{
- return a / make_float8_t(f);
+ return a / make_vfloat8(f);
}
-ccl_device_inline float8_t operator/(const float f, const float8_t a)
+ccl_device_inline vfloat8 operator/(const float f, const vfloat8 a)
{
- return make_float8_t(f) / a;
+ return make_vfloat8(f) / a;
}
-ccl_device_inline float8_t operator+=(float8_t a, const float8_t b)
+ccl_device_inline vfloat8 operator+=(vfloat8 a, const vfloat8 b)
{
return a = a + b;
}
-ccl_device_inline float8_t operator-=(float8_t a, const float8_t b)
+ccl_device_inline vfloat8 operator-=(vfloat8 a, const vfloat8 b)
{
return a = a - b;
}
-ccl_device_inline float8_t operator*=(float8_t a, const float8_t b)
+ccl_device_inline vfloat8 operator*=(vfloat8 a, const vfloat8 b)
{
return a = a * b;
}
-ccl_device_inline float8_t operator*=(float8_t a, float f)
+ccl_device_inline vfloat8 operator*=(vfloat8 a, float f)
{
return a = a * f;
}
-ccl_device_inline float8_t operator/=(float8_t a, float f)
+ccl_device_inline vfloat8 operator/=(vfloat8 a, float f)
{
return a = a / f;
}
-ccl_device_inline bool operator==(const float8_t a, const float8_t b)
+ccl_device_inline bool operator==(const vfloat8 a, const vfloat8 b)
{
-#ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX__
return (_mm256_movemask_ps(_mm256_castsi256_ps(
_mm256_cmpeq_epi32(_mm256_castps_si256(a.m256), _mm256_castps_si256(b.m256)))) &
0b11111111) == 0b11111111;
@@ -206,132 +152,180 @@ ccl_device_inline bool operator==(const float8_t a, const float8_t b)
#endif
}
-ccl_device_inline float8_t rcp(const float8_t a)
+ccl_device_inline const vfloat8 operator^(const vfloat8 a, const vfloat8 b)
{
-#ifdef __KERNEL_AVX2__
- return float8_t(_mm256_rcp_ps(a.m256));
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_xor_ps(a.m256, b.m256));
#else
- return make_float8_t(1.0f / a.a,
- 1.0f / a.b,
- 1.0f / a.c,
- 1.0f / a.d,
- 1.0f / a.e,
- 1.0f / a.f,
- 1.0f / a.g,
- 1.0f / a.h);
+ return make_vfloat8(__uint_as_float(__float_as_uint(a.a) ^ __float_as_uint(b.a)),
+ __uint_as_float(__float_as_uint(a.b) ^ __float_as_uint(b.b)),
+ __uint_as_float(__float_as_uint(a.c) ^ __float_as_uint(b.c)),
+ __uint_as_float(__float_as_uint(a.d) ^ __float_as_uint(b.d)),
+ __uint_as_float(__float_as_uint(a.e) ^ __float_as_uint(b.e)),
+ __uint_as_float(__float_as_uint(a.f) ^ __float_as_uint(b.f)),
+ __uint_as_float(__float_as_uint(a.g) ^ __float_as_uint(b.g)),
+ __uint_as_float(__float_as_uint(a.h) ^ __float_as_uint(b.h)));
#endif
}
-ccl_device_inline float8_t sqrt(const float8_t a)
+ccl_device_inline vfloat8 rcp(const vfloat8 a)
{
-#ifdef __KERNEL_AVX2__
- return float8_t(_mm256_sqrt_ps(a.m256));
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_rcp_ps(a.m256));
#else
- return make_float8_t(sqrtf(a.a),
- sqrtf(a.b),
- sqrtf(a.c),
- sqrtf(a.d),
- sqrtf(a.e),
- sqrtf(a.f),
- sqrtf(a.g),
- sqrtf(a.h));
+ return make_vfloat8(1.0f / a.a,
+ 1.0f / a.b,
+ 1.0f / a.c,
+ 1.0f / a.d,
+ 1.0f / a.e,
+ 1.0f / a.f,
+ 1.0f / a.g,
+ 1.0f / a.h);
#endif
}
-ccl_device_inline float8_t sqr(const float8_t a)
+ccl_device_inline vfloat8 sqrt(const vfloat8 a)
+{
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_sqrt_ps(a.m256));
+#else
+ return make_vfloat8(sqrtf(a.a),
+ sqrtf(a.b),
+ sqrtf(a.c),
+ sqrtf(a.d),
+ sqrtf(a.e),
+ sqrtf(a.f),
+ sqrtf(a.g),
+ sqrtf(a.h));
+#endif
+}
+
+ccl_device_inline vfloat8 sqr(const vfloat8 a)
{
return a * a;
}
-ccl_device_inline bool is_zero(const float8_t a)
+ccl_device_inline bool is_zero(const vfloat8 a)
{
- return a == make_float8_t(0.0f);
+ return a == make_vfloat8(0.0f);
}
-ccl_device_inline float average(const float8_t a)
+ccl_device_inline float reduce_add(const vfloat8 a)
+{
+#ifdef __KERNEL_AVX__
+ vfloat8 b(_mm256_hadd_ps(a.m256, a.m256));
+ vfloat8 h(_mm256_hadd_ps(b.m256, b.m256));
+ return h[0] + h[4];
+#else
+ return a.a + a.b + a.c + a.d + a.e + a.f + a.g + a.h;
+#endif
+}
+
+ccl_device_inline float average(const vfloat8 a)
{
return reduce_add(a) / 8.0f;
}
-ccl_device_inline float8_t min(const float8_t a, const float8_t b)
+ccl_device_inline vfloat8 min(const vfloat8 a, const vfloat8 b)
{
-#ifdef __KERNEL_AVX2__
- return float8_t(_mm256_min_ps(a.m256, b.m256));
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_min_ps(a.m256, b.m256));
#else
- return make_float8_t(min(a.a, b.a),
- min(a.b, b.b),
- min(a.c, b.c),
- min(a.d, b.d),
- min(a.e, b.e),
- min(a.f, b.f),
- min(a.g, b.g),
- min(a.h, b.h));
+ return make_vfloat8(min(a.a, b.a),
+ min(a.b, b.b),
+ min(a.c, b.c),
+ min(a.d, b.d),
+ min(a.e, b.e),
+ min(a.f, b.f),
+ min(a.g, b.g),
+ min(a.h, b.h));
#endif
}
-ccl_device_inline float8_t max(const float8_t a, const float8_t b)
+ccl_device_inline vfloat8 max(const vfloat8 a, const vfloat8 b)
{
-#ifdef __KERNEL_AVX2__
- return float8_t(_mm256_max_ps(a.m256, b.m256));
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_max_ps(a.m256, b.m256));
#else
- return make_float8_t(max(a.a, b.a),
- max(a.b, b.b),
- max(a.c, b.c),
- max(a.d, b.d),
- max(a.e, b.e),
- max(a.f, b.f),
- max(a.g, b.g),
- max(a.h, b.h));
+ return make_vfloat8(max(a.a, b.a),
+ max(a.b, b.b),
+ max(a.c, b.c),
+ max(a.d, b.d),
+ max(a.e, b.e),
+ max(a.f, b.f),
+ max(a.g, b.g),
+ max(a.h, b.h));
#endif
}
-ccl_device_inline float8_t clamp(const float8_t a, const float8_t mn, const float8_t mx)
+ccl_device_inline vfloat8 clamp(const vfloat8 a, const vfloat8 mn, const vfloat8 mx)
{
return min(max(a, mn), mx);
}
-ccl_device_inline float8_t fabs(const float8_t a)
+ccl_device_inline vfloat8 select(const vint8 mask, const vfloat8 a, const vfloat8 b)
{
-#ifdef __KERNEL_AVX2__
- return float8_t(_mm256_and_ps(a.m256, _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))));
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_blendv_ps(b, a, _mm256_castsi256_ps(mask)));
#else
- return make_float8_t(fabsf(a.a),
- fabsf(a.b),
- fabsf(a.c),
- fabsf(a.d),
- fabsf(a.e),
- fabsf(a.f),
- fabsf(a.g),
- fabsf(a.h));
+ return make_vfloat8((mask.a) ? a.a : b.a,
+ (mask.b) ? a.b : b.b,
+ (mask.c) ? a.c : b.c,
+ (mask.d) ? a.d : b.d,
+ (mask.e) ? a.e : b.e,
+ (mask.f) ? a.f : b.f,
+ (mask.g) ? a.g : b.g,
+ (mask.h) ? a.h : b.h);
#endif
}
-ccl_device_inline float8_t mix(const float8_t a, const float8_t b, float t)
+ccl_device_inline vfloat8 fabs(const vfloat8 a)
+{
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_and_ps(a.m256, _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))));
+#else
+ return make_vfloat8(fabsf(a.a),
+ fabsf(a.b),
+ fabsf(a.c),
+ fabsf(a.d),
+ fabsf(a.e),
+ fabsf(a.f),
+ fabsf(a.g),
+ fabsf(a.h));
+#endif
+}
+
+ccl_device_inline vfloat8 mix(const vfloat8 a, const vfloat8 b, float t)
+{
+ return a + t * (b - a);
+}
+
+ccl_device_inline vfloat8 mix(const vfloat8 a, const vfloat8 b, vfloat8 t)
{
return a + t * (b - a);
}
-ccl_device_inline float8_t saturate(const float8_t a)
+ccl_device_inline vfloat8 saturate(const vfloat8 a)
{
- return clamp(a, make_float8_t(0.0f), make_float8_t(1.0f));
+ return clamp(a, make_vfloat8(0.0f), make_vfloat8(1.0f));
}
-ccl_device_inline float8_t exp(float8_t v)
+ccl_device_inline vfloat8 exp(vfloat8 v)
{
- return make_float8_t(
+ return make_vfloat8(
expf(v.a), expf(v.b), expf(v.c), expf(v.d), expf(v.e), expf(v.f), expf(v.g), expf(v.h));
}
-ccl_device_inline float8_t log(float8_t v)
+ccl_device_inline vfloat8 log(vfloat8 v)
{
- return make_float8_t(
+ return make_vfloat8(
logf(v.a), logf(v.b), logf(v.c), logf(v.d), logf(v.e), logf(v.f), logf(v.g), logf(v.h));
}
-ccl_device_inline float dot(const float8_t a, const float8_t b)
+ccl_device_inline float dot(const vfloat8 a, const vfloat8 b)
{
-#ifdef __KERNEL_AVX2__
- float8_t t(_mm256_dp_ps(a.m256, b.m256, 0xFF));
+#ifdef __KERNEL_AVX__
+ vfloat8 t(_mm256_dp_ps(a.m256, b.m256, 0xFF));
return t[0] + t[4];
#else
return (a.a * b.a) + (a.b * b.b) + (a.c * b.c) + (a.d * b.d) + (a.e * b.e) + (a.f * b.f) +
@@ -339,62 +333,51 @@ ccl_device_inline float dot(const float8_t a, const float8_t b)
#endif
}
-ccl_device_inline float8_t pow(float8_t v, float e)
+ccl_device_inline vfloat8 pow(vfloat8 v, float e)
{
- return make_float8_t(powf(v.a, e),
- powf(v.b, e),
- powf(v.c, e),
- powf(v.d, e),
- powf(v.e, e),
- powf(v.f, e),
- powf(v.g, e),
- powf(v.h, e));
+ return make_vfloat8(powf(v.a, e),
+ powf(v.b, e),
+ powf(v.c, e),
+ powf(v.d, e),
+ powf(v.e, e),
+ powf(v.f, e),
+ powf(v.g, e),
+ powf(v.h, e));
}
-ccl_device_inline float reduce_min(const float8_t a)
+ccl_device_inline float reduce_min(const vfloat8 a)
{
return min(min(min(a.a, a.b), min(a.c, a.d)), min(min(a.e, a.f), min(a.g, a.h)));
}
-ccl_device_inline float reduce_max(const float8_t a)
+ccl_device_inline float reduce_max(const vfloat8 a)
{
return max(max(max(a.a, a.b), max(a.c, a.d)), max(max(a.e, a.f), max(a.g, a.h)));
}
-ccl_device_inline float reduce_add(const float8_t a)
-{
-#ifdef __KERNEL_AVX2__
- float8_t b(_mm256_hadd_ps(a.m256, a.m256));
- float8_t h(_mm256_hadd_ps(b.m256, b.m256));
- return h[0] + h[4];
-#else
- return a.a + a.b + a.c + a.d + a.e + a.f + a.g + a.h;
-#endif
-}
-
-ccl_device_inline bool isequal(const float8_t a, const float8_t b)
+ccl_device_inline bool isequal(const vfloat8 a, const vfloat8 b)
{
return a == b;
}
-ccl_device_inline float8_t safe_divide(const float8_t a, const float b)
+ccl_device_inline vfloat8 safe_divide(const vfloat8 a, const float b)
{
- return (b != 0.0f) ? a / b : make_float8_t(0.0f);
+ return (b != 0.0f) ? a / b : make_vfloat8(0.0f);
}
-ccl_device_inline float8_t safe_divide(const float8_t a, const float8_t b)
+ccl_device_inline vfloat8 safe_divide(const vfloat8 a, const vfloat8 b)
{
- return make_float8_t((b.a != 0.0f) ? a.a / b.a : 0.0f,
- (b.b != 0.0f) ? a.b / b.b : 0.0f,
- (b.c != 0.0f) ? a.c / b.c : 0.0f,
- (b.d != 0.0f) ? a.d / b.d : 0.0f,
- (b.e != 0.0f) ? a.e / b.e : 0.0f,
- (b.f != 0.0f) ? a.f / b.f : 0.0f,
- (b.g != 0.0f) ? a.g / b.g : 0.0f,
- (b.h != 0.0f) ? a.h / b.h : 0.0f);
+ return make_vfloat8((b.a != 0.0f) ? a.a / b.a : 0.0f,
+ (b.b != 0.0f) ? a.b / b.b : 0.0f,
+ (b.c != 0.0f) ? a.c / b.c : 0.0f,
+ (b.d != 0.0f) ? a.d / b.d : 0.0f,
+ (b.e != 0.0f) ? a.e / b.e : 0.0f,
+ (b.f != 0.0f) ? a.f / b.f : 0.0f,
+ (b.g != 0.0f) ? a.g / b.g : 0.0f,
+ (b.h != 0.0f) ? a.h / b.h : 0.0f);
}
-ccl_device_inline float8_t ensure_finite(float8_t v)
+ccl_device_inline vfloat8 ensure_finite(vfloat8 v)
{
v.a = ensure_finite(v.a);
v.b = ensure_finite(v.b);
@@ -408,12 +391,92 @@ ccl_device_inline float8_t ensure_finite(float8_t v)
return v;
}
-ccl_device_inline bool isfinite_safe(float8_t v)
+ccl_device_inline bool isfinite_safe(vfloat8 v)
{
return isfinite_safe(v.a) && isfinite_safe(v.b) && isfinite_safe(v.c) && isfinite_safe(v.d) &&
isfinite_safe(v.e) && isfinite_safe(v.f) && isfinite_safe(v.g) && isfinite_safe(v.h);
}
+ccl_device_inline vint8 cast(const vfloat8 a)
+{
+#ifdef __KERNEL_AVX__
+ return vint8(_mm256_castps_si256(a));
+#else
+ return make_vint8(__float_as_int(a.a),
+ __float_as_int(a.b),
+ __float_as_int(a.c),
+ __float_as_int(a.d),
+ __float_as_int(a.e),
+ __float_as_int(a.f),
+ __float_as_int(a.g),
+ __float_as_int(a.h));
+#endif
+}
+
+#ifdef __KERNEL_SSE__
+ccl_device_forceinline float4 low(const vfloat8 a)
+{
+# ifdef __KERNEL_AVX__
+ return float4(_mm256_extractf128_ps(a.m256, 0));
+# else
+ return make_float4(a.e, a.f, a.g, a.h);
+# endif
+}
+ccl_device_forceinline float4 high(const vfloat8 a)
+{
+# ifdef __KERNEL_AVX__
+ return float4(_mm256_extractf128_ps(a.m256, 1));
+# else
+ return make_float4(a.a, a.b, a.c, a.d);
+# endif
+}
+
+template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
+ccl_device_forceinline const vfloat8 shuffle(const vfloat8 a)
+{
+# ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_permutevar_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0)));
+# else
+ return make_vfloat8(a[i0], a[i1], a[i2], a[i3], a[i4 + 4], a[i5 + 4], a[i6 + 4], a[i7 + 4]);
+# endif
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+ccl_device_forceinline const vfloat8 shuffle(const vfloat8 a, const vfloat8 b)
+{
+# ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)));
+# else
+ return make_vfloat8(shuffle<i0, i1, i2, i3>(high(a), high(b)),
+ shuffle<i0, i1, i2, i3>(low(a), low(b)));
+# endif
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+ccl_device_forceinline const vfloat8 shuffle(const vfloat8 a)
+{
+ return shuffle<i0, i1, i2, i3>(a, a);
+}
+template<size_t i0> ccl_device_forceinline const vfloat8 shuffle(const vfloat8 a, const vfloat8 b)
+{
+ return shuffle<i0, i0, i0, i0>(a, b);
+}
+template<size_t i0> ccl_device_forceinline const vfloat8 shuffle(const vfloat8 a)
+{
+ return shuffle<i0>(a, a);
+}
+
+template<size_t i> ccl_device_forceinline float extract(const vfloat8 a)
+{
+# ifdef __KERNEL_AVX__
+ __m256 b = shuffle<i, i, i, i>(a).m256;
+ return _mm256_cvtss_f32(b);
+# else
+ return a[i];
+# endif
+}
+#endif
+
CCL_NAMESPACE_END
#endif /* __UTIL_MATH_FLOAT8_H__ */
diff --git a/intern/cycles/util/math_int2.h b/intern/cycles/util/math_int2.h
index f4d8a71221a..2df2ec5505b 100644
--- a/intern/cycles/util/math_int2.h
+++ b/intern/cycles/util/math_int2.h
@@ -10,23 +10,6 @@
CCL_NAMESPACE_BEGIN
-/*******************************************************************************
- * Declaration.
- */
-
-#if !defined(__KERNEL_METAL__)
-ccl_device_inline bool operator==(const int2 a, const int2 b);
-ccl_device_inline int2 operator+(const int2 &a, const int2 &b);
-ccl_device_inline int2 operator+=(int2 &a, const int2 &b);
-ccl_device_inline int2 operator-(const int2 &a, const int2 &b);
-ccl_device_inline int2 operator*(const int2 &a, const int2 &b);
-ccl_device_inline int2 operator/(const int2 &a, const int2 &b);
-#endif /* !__KERNEL_METAL__ */
-
-/*******************************************************************************
- * Definition.
- */
-
#if !defined(__KERNEL_METAL__)
ccl_device_inline bool operator==(const int2 a, const int2 b)
{
diff --git a/intern/cycles/util/math_int3.h b/intern/cycles/util/math_int3.h
index 48bffeaf553..b5b972ddfb5 100644
--- a/intern/cycles/util/math_int3.h
+++ b/intern/cycles/util/math_int3.h
@@ -10,21 +10,6 @@
CCL_NAMESPACE_BEGIN
-/*******************************************************************************
- * Declaration.
- */
-
-#if !defined(__KERNEL_METAL__)
-ccl_device_inline int3 min(int3 a, int3 b);
-ccl_device_inline int3 max(int3 a, int3 b);
-ccl_device_inline int3 clamp(const int3 &a, int mn, int mx);
-ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx);
-#endif /* !defined(__KERNEL_METAL__) */
-
-/*******************************************************************************
- * Definition.
- */
-
#if !defined(__KERNEL_METAL__)
ccl_device_inline int3 min(int3 a, int3 b)
{
@@ -44,7 +29,7 @@ ccl_device_inline int3 max(int3 a, int3 b)
# endif
}
-ccl_device_inline int3 clamp(const int3 &a, int mn, int mx)
+ccl_device_inline int3 clamp(const int3 a, int mn, int mx)
{
# ifdef __KERNEL_SSE__
return min(max(a, make_int3(mn)), make_int3(mx));
@@ -53,7 +38,7 @@ ccl_device_inline int3 clamp(const int3 &a, int mn, int mx)
# endif
}
-ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx)
+ccl_device_inline int3 clamp(const int3 a, int3 &mn, int mx)
{
# ifdef __KERNEL_SSE__
return min(max(a, mn), make_int3(mx));
@@ -62,22 +47,22 @@ ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx)
# endif
}
-ccl_device_inline bool operator==(const int3 &a, const int3 &b)
+ccl_device_inline bool operator==(const int3 a, const int3 b)
{
return a.x == b.x && a.y == b.y && a.z == b.z;
}
-ccl_device_inline bool operator!=(const int3 &a, const int3 &b)
+ccl_device_inline bool operator!=(const int3 a, const int3 b)
{
return !(a == b);
}
-ccl_device_inline bool operator<(const int3 &a, const int3 &b)
+ccl_device_inline bool operator<(const int3 a, const int3 b)
{
return a.x < b.x && a.y < b.y && a.z < b.z;
}
-ccl_device_inline int3 operator+(const int3 &a, const int3 &b)
+ccl_device_inline int3 operator+(const int3 a, const int3 b)
{
# ifdef __KERNEL_SSE__
return int3(_mm_add_epi32(a.m128, b.m128));
@@ -86,7 +71,7 @@ ccl_device_inline int3 operator+(const int3 &a, const int3 &b)
# endif
}
-ccl_device_inline int3 operator-(const int3 &a, const int3 &b)
+ccl_device_inline int3 operator-(const int3 a, const int3 b)
{
# ifdef __KERNEL_SSE__
return int3(_mm_sub_epi32(a.m128, b.m128));
diff --git a/intern/cycles/util/math_int4.h b/intern/cycles/util/math_int4.h
index fbdada223cb..c6d767d7587 100644
--- a/intern/cycles/util/math_int4.h
+++ b/intern/cycles/util/math_int4.h
@@ -1,4 +1,5 @@
/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2013 Intel Corporation
* Copyright 2011-2022 Blender Foundation */
#ifndef __UTIL_MATH_INT4_H__
@@ -10,30 +11,8 @@
CCL_NAMESPACE_BEGIN
-/*******************************************************************************
- * Declaration.
- */
-
#ifndef __KERNEL_GPU__
-ccl_device_inline int4 operator+(const int4 &a, const int4 &b);
-ccl_device_inline int4 operator+=(int4 &a, const int4 &b);
-ccl_device_inline int4 operator>>(const int4 &a, int i);
-ccl_device_inline int4 operator<<(const int4 &a, int i);
-ccl_device_inline int4 operator<(const int4 &a, const int4 &b);
-ccl_device_inline int4 operator>=(const int4 &a, const int4 &b);
-ccl_device_inline int4 operator&(const int4 &a, const int4 &b);
-ccl_device_inline int4 min(int4 a, int4 b);
-ccl_device_inline int4 max(int4 a, int4 b);
-ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx);
-ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b);
-#endif /* __KERNEL_GPU__ */
-
-/*******************************************************************************
- * Definition.
- */
-
-#ifndef __KERNEL_GPU__
-ccl_device_inline int4 operator+(const int4 &a, const int4 &b)
+ccl_device_inline int4 operator+(const int4 a, const int4 b)
{
# ifdef __KERNEL_SSE__
return int4(_mm_add_epi32(a.m128, b.m128));
@@ -42,12 +21,26 @@ ccl_device_inline int4 operator+(const int4 &a, const int4 &b)
# endif
}
-ccl_device_inline int4 operator+=(int4 &a, const int4 &b)
+ccl_device_inline int4 operator+=(int4 &a, const int4 b)
{
return a = a + b;
}
-ccl_device_inline int4 operator>>(const int4 &a, int i)
+ccl_device_inline int4 operator-(const int4 a, const int4 b)
+{
+# ifdef __KERNEL_SSE__
+ return int4(_mm_sub_epi32(a.m128, b.m128));
+# else
+ return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
+# endif
+}
+
+ccl_device_inline int4 operator-=(int4 &a, const int4 b)
+{
+ return a = a - b;
+}
+
+ccl_device_inline int4 operator>>(const int4 a, int i)
{
# ifdef __KERNEL_SSE__
return int4(_mm_srai_epi32(a.m128, i));
@@ -56,7 +49,7 @@ ccl_device_inline int4 operator>>(const int4 &a, int i)
# endif
}
-ccl_device_inline int4 operator<<(const int4 &a, int i)
+ccl_device_inline int4 operator<<(const int4 a, int i)
{
# ifdef __KERNEL_SSE__
return int4(_mm_slli_epi32(a.m128, i));
@@ -65,7 +58,7 @@ ccl_device_inline int4 operator<<(const int4 &a, int i)
# endif
}
-ccl_device_inline int4 operator<(const int4 &a, const int4 &b)
+ccl_device_inline int4 operator<(const int4 a, const int4 b)
{
# ifdef __KERNEL_SSE__
return int4(_mm_cmplt_epi32(a.m128, b.m128));
@@ -74,7 +67,26 @@ ccl_device_inline int4 operator<(const int4 &a, const int4 &b)
# endif
}
-ccl_device_inline int4 operator>=(const int4 &a, const int4 &b)
+ccl_device_inline int4 operator<(const int4 a, const int b)
+{
+ return a < make_int4(b);
+}
+
+ccl_device_inline int4 operator==(const int4 a, const int4 b)
+{
+# ifdef __KERNEL_SSE__
+ return int4(_mm_cmpeq_epi32(a.m128, b.m128));
+# else
+ return make_int4(a.x == b.x, a.y == b.y, a.z == b.z, a.w == b.w);
+# endif
+}
+
+ccl_device_inline int4 operator==(const int4 a, const int b)
+{
+ return a == make_int4(b);
+}
+
+ccl_device_inline int4 operator>=(const int4 a, const int4 b)
{
# ifdef __KERNEL_SSE__
return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128)));
@@ -83,7 +95,12 @@ ccl_device_inline int4 operator>=(const int4 &a, const int4 &b)
# endif
}
-ccl_device_inline int4 operator&(const int4 &a, const int4 &b)
+ccl_device_inline int4 operator>=(const int4 a, const int b)
+{
+ return a >= make_int4(b);
+}
+
+ccl_device_inline int4 operator&(const int4 a, const int4 b)
{
# ifdef __KERNEL_SSE__
return int4(_mm_and_si128(a.m128, b.m128));
@@ -92,6 +109,97 @@ ccl_device_inline int4 operator&(const int4 &a, const int4 &b)
# endif
}
+ccl_device_inline int4 operator|(const int4 a, const int4 b)
+{
+# ifdef __KERNEL_SSE__
+ return int4(_mm_or_si128(a.m128, b.m128));
+# else
+ return make_int4(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w);
+# endif
+}
+
+ccl_device_inline int4 operator^(const int4 a, const int4 b)
+{
+# ifdef __KERNEL_SSE__
+ return int4(_mm_xor_si128(a.m128, b.m128));
+# else
+ return make_int4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w);
+# endif
+}
+
+ccl_device_inline int4 operator&(const int32_t a, const int4 b)
+{
+ return make_int4(a) & b;
+}
+
+ccl_device_inline int4 operator&(const int4 a, const int32_t b)
+{
+ return a & make_int4(b);
+}
+
+ccl_device_inline int4 operator|(const int32_t a, const int4 b)
+{
+ return make_int4(a) | b;
+}
+
+ccl_device_inline int4 operator|(const int4 a, const int32_t b)
+{
+ return a | make_int4(b);
+}
+
+ccl_device_inline int4 operator^(const int32_t a, const int4 b)
+{
+ return make_int4(a) ^ b;
+}
+
+ccl_device_inline int4 operator^(const int4 a, const int32_t b)
+{
+ return a ^ make_int4(b);
+}
+
+ccl_device_inline int4 &operator&=(int4 &a, const int4 b)
+{
+ return a = a & b;
+}
+ccl_device_inline int4 &operator&=(int4 &a, const int32_t b)
+{
+ return a = a & b;
+}
+
+ccl_device_inline int4 &operator|=(int4 &a, const int4 b)
+{
+ return a = a | b;
+}
+ccl_device_inline int4 &operator|=(int4 &a, const int32_t b)
+{
+ return a = a | b;
+}
+
+ccl_device_inline int4 &operator^=(int4 &a, const int4 b)
+{
+ return a = a ^ b;
+}
+ccl_device_inline int4 &operator^=(int4 &a, const int32_t b)
+{
+ return a = a ^ b;
+}
+
+ccl_device_inline int4 &operator<<=(int4 &a, const int32_t b)
+{
+ return a = a << b;
+}
+ccl_device_inline int4 &operator>>=(int4 &a, const int32_t b)
+{
+ return a = a >> b;
+}
+
+# ifdef __KERNEL_SSE__
+ccl_device_forceinline const int4 srl(const int4 a, const int32_t b)
+{
+ return int4(_mm_srli_epi32(a.m128, b));
+}
+# endif
+
ccl_device_inline int4 min(int4 a, int4 b)
{
# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
@@ -110,12 +218,12 @@ ccl_device_inline int4 max(int4 a, int4 b)
# endif
}
-ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx)
+ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
{
return min(max(a, mn), mx);
}
-ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b)
+ccl_device_inline int4 select(const int4 mask, const int4 a, const int4 b)
{
# ifdef __KERNEL_SSE__
return int4(_mm_or_si128(_mm_and_si128(mask, a), _mm_andnot_si128(mask, b)));
@@ -135,6 +243,52 @@ ccl_device_inline int4 load_int4(const int *v)
}
#endif /* __KERNEL_GPU__ */
+ccl_device_inline float4 cast(const int4 a)
+{
+#ifdef __KERNEL_SSE__
+ return float4(_mm_castsi128_ps(a));
+#else
+ return make_float4(
+ __int_as_float(a.x), __int_as_float(a.y), __int_as_float(a.z), __int_as_float(a.w));
+#endif
+}
+
+#ifdef __KERNEL_SSE__
+ccl_device_forceinline int4 andnot(const int4 a, const int4 b)
+{
+ return int4(_mm_andnot_si128(a.m128, b.m128));
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+ccl_device_forceinline int4 shuffle(const int4 a)
+{
+# ifdef __KERNEL_NEON__
+ int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
+ return int4(vreinterpretq_m128i_s32(result));
+# else
+ return int4(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
+# endif
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+ccl_device_forceinline int4 shuffle(const int4 a, const int4 b)
+{
+# ifdef __KERNEL_NEON__
+ int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
+ vreinterpretq_s32_m128i(b));
+ return int4(vreinterpretq_m128i_s32(result));
+# else
+ return int4(_mm_castps_si128(
+ _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))));
+# endif
+}
+
+template<size_t i0> ccl_device_forceinline int4 shuffle(const int4 b)
+{
+ return shuffle<i0, i0, i0, i0>(b);
+}
+#endif
+
CCL_NAMESPACE_END
#endif /* __UTIL_MATH_INT4_H__ */
diff --git a/intern/cycles/util/math_int8.h b/intern/cycles/util/math_int8.h
new file mode 100644
index 00000000000..d150b0b74ec
--- /dev/null
+++ b/intern/cycles/util/math_int8.h
@@ -0,0 +1,355 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2013 Intel Corporation
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifndef __UTIL_MATH_INT8_H__
+#define __UTIL_MATH_INT8_H__
+
+#ifndef __UTIL_MATH_H__
+# error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+ccl_device_inline vint8 operator+(const vint8 a, const vint8 b)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_add_epi32(a.m256, b.m256));
+# else
+ return make_vint8(
+ a.a + b.a, a.b + b.b, a.c + b.c, a.d + b.d, a.e + b.e, a.f + b.f, a.g + b.g, a.h + b.h);
+# endif
+}
+
+ccl_device_inline vint8 operator+=(vint8 &a, const vint8 b)
+{
+ return a = a + b;
+}
+
+ccl_device_inline vint8 operator-(const vint8 a, const vint8 b)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_sub_epi32(a.m256, b.m256));
+# else
+ return make_vint8(
+ a.a - b.a, a.b - b.b, a.c - b.c, a.d - b.d, a.e - b.e, a.f - b.f, a.g - b.g, a.h - b.h);
+# endif
+}
+
+ccl_device_inline vint8 operator-=(vint8 &a, const vint8 b)
+{
+ return a = a - b;
+}
+
+ccl_device_inline vint8 operator>>(const vint8 a, int i)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_srai_epi32(a.m256, i));
+# else
+ return make_vint8(
+ a.a >> i, a.b >> i, a.c >> i, a.d >> i, a.e >> i, a.f >> i, a.g >> i, a.h >> i);
+# endif
+}
+
+ccl_device_inline vint8 operator<<(const vint8 a, int i)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_slli_epi32(a.m256, i));
+# else
+ return make_vint8(
+ a.a << i, a.b << i, a.c << i, a.d << i, a.e << i, a.f << i, a.g << i, a.h << i);
+# endif
+}
+
+ccl_device_inline vint8 operator<(const vint8 a, const vint8 b)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_cmpgt_epi32(b.m256, a.m256));
+# else
+ return make_vint8(
+ a.a < b.a, a.b < b.b, a.c < b.c, a.d < b.d, a.e < b.e, a.f < b.f, a.g < b.g, a.h < b.h);
+# endif
+}
+
+ccl_device_inline vint8 operator<(const vint8 a, const int b)
+{
+ return a < make_vint8(b);
+}
+
+ccl_device_inline vint8 operator==(const vint8 a, const vint8 b)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_cmpeq_epi32(a.m256, b.m256));
+# else
+ return make_vint8(a.a == b.a,
+ a.b == b.b,
+ a.c == b.c,
+ a.d == b.d,
+ a.e == b.e,
+ a.f == b.f,
+ a.g == b.g,
+ a.h == b.h);
+# endif
+}
+
+ccl_device_inline vint8 operator==(const vint8 a, const int b)
+{
+ return a == make_vint8(b);
+}
+
+ccl_device_inline vint8 operator>=(const vint8 a, const vint8 b)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(
+ _mm256_xor_si256(_mm256_set1_epi32(0xffffffff), _mm256_cmpgt_epi32(b.m256, a.m256)));
+# else
+ return make_vint8(a.a >= b.a,
+ a.b >= b.b,
+ a.c >= b.c,
+ a.d >= b.d,
+ a.e >= b.e,
+ a.f >= b.f,
+ a.g >= b.g,
+ a.h >= b.h);
+# endif
+}
+
+ccl_device_inline vint8 operator>=(const vint8 a, const int b)
+{
+ return a >= make_vint8(b);
+}
+
+ccl_device_inline vint8 operator&(const vint8 a, const vint8 b)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_and_si256(a.m256, b.m256));
+# else
+ return make_vint8(
+ a.a & b.a, a.b & b.b, a.c & b.c, a.d & b.d, a.e & b.e, a.f & b.f, a.g & b.g, a.h & b.h);
+# endif
+}
+
+ccl_device_inline vint8 operator|(const vint8 a, const vint8 b)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_or_si256(a.m256, b.m256));
+# else
+ return make_vint8(
+ a.a | b.a, a.b | b.b, a.c | b.c, a.d | b.d, a.e | b.e, a.f | b.f, a.g | b.g, a.h | b.h);
+# endif
+}
+
+ccl_device_inline vint8 operator^(const vint8 a, const vint8 b)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_xor_si256(a.m256, b.m256));
+# else
+ return make_vint8(
+ a.a ^ b.a, a.b ^ b.b, a.c ^ b.c, a.d ^ b.d, a.e ^ b.e, a.f ^ b.f, a.g ^ b.g, a.h ^ b.h);
+# endif
+}
+
+ccl_device_inline vint8 operator&(const int32_t a, const vint8 b)
+{
+ return make_vint8(a) & b;
+}
+
+ccl_device_inline vint8 operator&(const vint8 a, const int32_t b)
+{
+ return a & make_vint8(b);
+}
+
+ccl_device_inline vint8 operator|(const int32_t a, const vint8 b)
+{
+ return make_vint8(a) | b;
+}
+
+ccl_device_inline vint8 operator|(const vint8 a, const int32_t b)
+{
+ return a | make_vint8(b);
+}
+
+ccl_device_inline vint8 operator^(const int32_t a, const vint8 b)
+{
+ return make_vint8(a) ^ b;
+}
+
+ccl_device_inline vint8 operator^(const vint8 a, const int32_t b)
+{
+ return a ^ make_vint8(b);
+}
+
+ccl_device_inline vint8 &operator&=(vint8 &a, const vint8 b)
+{
+ return a = a & b;
+}
+ccl_device_inline vint8 &operator&=(vint8 &a, const int32_t b)
+{
+ return a = a & b;
+}
+
+ccl_device_inline vint8 &operator|=(vint8 &a, const vint8 b)
+{
+ return a = a | b;
+}
+ccl_device_inline vint8 &operator|=(vint8 &a, const int32_t b)
+{
+ return a = a | b;
+}
+
+ccl_device_inline vint8 &operator^=(vint8 &a, const vint8 b)
+{
+ return a = a ^ b;
+}
+ccl_device_inline vint8 &operator^=(vint8 &a, const int32_t b)
+{
+ return a = a ^ b;
+}
+
+ccl_device_inline vint8 &operator<<=(vint8 &a, const int32_t b)
+{
+ return a = a << b;
+}
+ccl_device_inline vint8 &operator>>=(vint8 &a, const int32_t b)
+{
+ return a = a >> b;
+}
+
+# ifdef __KERNEL_AVX__
+ccl_device_forceinline const vint8 srl(const vint8 a, const int32_t b)
+{
+ return vint8(_mm256_srli_epi32(a.m256, b));
+}
+# endif
+
+ccl_device_inline vint8 min(vint8 a, vint8 b)
+{
+# if defined(__KERNEL_AVX__) && defined(__KERNEL_AVX41__)
+ return vint8(_mm256_min_epi32(a.m256, b.m256));
+# else
+ return make_vint8(min(a.a, b.a),
+ min(a.b, b.b),
+ min(a.c, b.c),
+ min(a.d, b.d),
+ min(a.e, b.e),
+ min(a.f, b.f),
+ min(a.g, b.g),
+ min(a.h, b.h));
+# endif
+}
+
+ccl_device_inline vint8 max(vint8 a, vint8 b)
+{
+# if defined(__KERNEL_AVX__) && defined(__KERNEL_AVX41__)
+ return vint8(_mm256_max_epi32(a.m256, b.m256));
+# else
+ return make_vint8(max(a.a, b.a),
+ max(a.b, b.b),
+ max(a.c, b.c),
+ max(a.d, b.d),
+ max(a.e, b.e),
+ max(a.f, b.f),
+ max(a.g, b.g),
+ max(a.h, b.h));
+# endif
+}
+
+ccl_device_inline vint8 clamp(const vint8 a, const vint8 mn, const vint8 mx)
+{
+ return min(max(a, mn), mx);
+}
+
+ccl_device_inline vint8 select(const vint8 mask, const vint8 a, const vint8 b)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_castps_si256(_mm256_blendv_ps(
+ _mm256_castsi256_ps(b), _mm256_castsi256_ps(a), _mm256_castsi256_ps(mask))));
+# else
+ return make_vint8((mask.a) ? a.a : b.a,
+ (mask.b) ? a.b : b.b,
+ (mask.c) ? a.c : b.c,
+ (mask.d) ? a.d : b.d,
+ (mask.e) ? a.e : b.e,
+ (mask.f) ? a.f : b.f,
+ (mask.g) ? a.g : b.g,
+ (mask.h) ? a.h : b.h);
+# endif
+}
+
+ccl_device_inline vint8 load_vint8(const int *v)
+{
+# ifdef __KERNEL_AVX__
+ return vint8(_mm256_loadu_si256((__m256i *)v));
+# else
+ return make_vint8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
+# endif
+}
+#endif /* __KERNEL_GPU__ */
+
+ccl_device_inline vfloat8 cast(const vint8 a)
+{
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_castsi256_ps(a));
+#else
+ return make_vfloat8(__int_as_float(a.a),
+ __int_as_float(a.b),
+ __int_as_float(a.c),
+ __int_as_float(a.d),
+ __int_as_float(a.e),
+ __int_as_float(a.f),
+ __int_as_float(a.g),
+ __int_as_float(a.h));
+#endif
+}
+
+#ifdef __KERNEL_AVX__
+template<size_t i> ccl_device_forceinline const vint8 shuffle(const vint8 a)
+{
+ return vint8(
+ _mm256_castps_si256(_mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i, i, i, i))));
+}
+
+template<size_t i0, size_t i1> ccl_device_forceinline const vint8 shuffle(const vint8 a)
+{
+ return vint8(_mm256_permute2f128_si256(a, a, (i1 << 4) | (i0 << 0)));
+}
+
+template<size_t i0, size_t i1>
+ccl_device_forceinline const vint8 shuffle(const vint8 a, const vint8 b)
+{
+ return vint8(_mm256_permute2f128_si256(a, b, (i1 << 4) | (i0 << 0)));
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+ccl_device_forceinline const vint8 shuffle(const vint8 a)
+{
+ return vint8(
+ _mm256_castps_si256(_mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i3, i2, i1, i0))));
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+ccl_device_forceinline const vint8 shuffle(const vint8 a, const vint8 b)
+{
+ return vint8(_mm256_castps_si256(_mm256_shuffle_ps(
+ _mm256_castsi256_ps(a), _mm256_castsi256_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))));
+}
+
+template<> __forceinline const vint8 shuffle<0, 0, 2, 2>(const vint8 b)
+{
+ return vint8(_mm256_castps_si256(_mm256_moveldup_ps(_mm256_castsi256_ps(b))));
+}
+template<> __forceinline const vint8 shuffle<1, 1, 3, 3>(const vint8 b)
+{
+ return vint8(_mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(b))));
+}
+template<> __forceinline const vint8 shuffle<0, 1, 0, 1>(const vint8 b)
+{
+ return vint8(_mm256_castps_si256(
+ _mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(_mm256_castsi256_ps(b))))));
+}
+#endif
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_INT8_H__ */
diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h
index aa28682f8c1..0727debf775 100644
--- a/intern/cycles/util/math_intersect.h
+++ b/intern/cycles/util/math_intersect.h
@@ -133,7 +133,9 @@ ccl_device_forceinline float ray_triangle_rcp(const float x)
ccl_device_inline float ray_triangle_dot(const float3 a, const float3 b)
{
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
- return madd(ssef(a.x), ssef(b.x), madd(ssef(a.y), ssef(b.y), ssef(a.z) * ssef(b.z)))[0];
+ return madd(make_float4(a.x),
+ make_float4(b.x),
+ madd(make_float4(a.y), make_float4(b.y), make_float4(a.z) * make_float4(b.z)))[0];
#else
return a.x * b.x + a.y * b.y + a.z * b.z;
#endif
@@ -142,9 +144,10 @@ ccl_device_inline float ray_triangle_dot(const float3 a, const float3 b)
ccl_device_inline float3 ray_triangle_cross(const float3 a, const float3 b)
{
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
- return make_float3(msub(ssef(a.y), ssef(b.z), ssef(a.z) * ssef(b.y))[0],
- msub(ssef(a.z), ssef(b.x), ssef(a.x) * ssef(b.z))[0],
- msub(ssef(a.x), ssef(b.y), ssef(a.y) * ssef(b.x))[0]);
+ return make_float3(
+ msub(make_float4(a.y), make_float4(b.z), make_float4(a.z) * make_float4(b.y))[0],
+ msub(make_float4(a.z), make_float4(b.x), make_float4(a.x) * make_float4(b.z))[0],
+ msub(make_float4(a.x), make_float4(b.y), make_float4(a.y) * make_float4(b.x))[0]);
#else
return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
#endif
diff --git a/intern/cycles/util/sseb.h b/intern/cycles/util/sseb.h
deleted file mode 100644
index 6f78299711e..00000000000
--- a/intern/cycles/util/sseb.h
+++ /dev/null
@@ -1,345 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2013 Intel Corporation
- * Modifications Copyright 2014-2022 Blender Foundation. */
-
-#ifndef __UTIL_SSEB_H__
-#define __UTIL_SSEB_H__
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __KERNEL_SSE2__
-
-struct ssei;
-struct ssef;
-
-/*! 4-wide SSE bool type. */
-struct sseb {
- typedef sseb Mask; // mask type
- typedef ssei Int; // int type
- typedef ssef Float; // float type
-
- enum { size = 4 }; // number of SIMD elements
- union {
- __m128 m128;
- int32_t v[4];
- }; // data
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Constructors, Assignment & Cast Operators
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline sseb()
- {
- }
- __forceinline sseb(const sseb &other)
- {
- m128 = other.m128;
- }
- __forceinline sseb &operator=(const sseb &other)
- {
- m128 = other.m128;
- return *this;
- }
-
- __forceinline sseb(const __m128 input) : m128(input)
- {
- }
- __forceinline operator const __m128 &(void) const
- {
- return m128;
- }
- __forceinline operator const __m128i(void) const
- {
- return _mm_castps_si128(m128);
- }
- __forceinline operator const __m128d(void) const
- {
- return _mm_castps_pd(m128);
- }
-
- __forceinline sseb(bool a)
- : m128(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)])
- {
- }
- __forceinline sseb(bool a, bool b)
- : m128(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)])
- {
- }
- __forceinline sseb(bool a, bool b, bool c, bool d)
- : m128(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)])
- {
- }
- __forceinline sseb(int mask)
- {
- assert(mask >= 0 && mask < 16);
- m128 = _mm_lookupmask_ps[mask];
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Constants
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline sseb(FalseTy) : m128(_mm_setzero_ps())
- {
- }
- __forceinline sseb(TrueTy)
- : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())))
- {
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Array Access
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline bool operator[](const size_t i) const
- {
- assert(i < 4);
- return (_mm_movemask_ps(m128) >> i) & 1;
- }
- __forceinline int32_t &operator[](const size_t i)
- {
- assert(i < 4);
- return v[i];
- }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator!(const sseb &a)
-{
- return _mm_xor_ps(a, sseb(True));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator&(const sseb &a, const sseb &b)
-{
- return _mm_and_ps(a, b);
-}
-__forceinline const sseb operator|(const sseb &a, const sseb &b)
-{
- return _mm_or_ps(a, b);
-}
-__forceinline const sseb operator^(const sseb &a, const sseb &b)
-{
- return _mm_xor_ps(a, b);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Assignment Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator&=(sseb &a, const sseb &b)
-{
- return a = a & b;
-}
-__forceinline const sseb operator|=(sseb &a, const sseb &b)
-{
- return a = a | b;
-}
-__forceinline const sseb operator^=(sseb &a, const sseb &b)
-{
- return a = a ^ b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator!=(const sseb &a, const sseb &b)
-{
- return _mm_xor_ps(a, b);
-}
-__forceinline const sseb operator==(const sseb &a, const sseb &b)
-{
- return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b));
-}
-
-__forceinline const sseb select(const sseb &m, const sseb &t, const sseb &f)
-{
-# if defined(__KERNEL_SSE41__)
- return _mm_blendv_ps(f, t, m);
-# else
- return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
-# endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb unpacklo(const sseb &a, const sseb &b)
-{
- return _mm_unpacklo_ps(a, b);
-}
-__forceinline const sseb unpackhi(const sseb &a, const sseb &b)
-{
- return _mm_unpackhi_ps(a, b);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const sseb shuffle(const sseb &a)
-{
-# ifdef __KERNEL_NEON__
- return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a);
-# else
- return _mm_castsi128_ps(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
-# endif
-}
-
-# ifndef __KERNEL_NEON__
-template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a)
-{
- return _mm_movelh_ps(a, a);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a)
-{
- return _mm_movehl_ps(a, a);
-}
-# endif
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const sseb shuffle(const sseb &a, const sseb &b)
-{
-# ifdef __KERNEL_NEON__
- return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a, b);
-# else
- return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
-# endif
-}
-
-# ifndef __KERNEL_NEON__
-template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a, const sseb &b)
-{
- return _mm_movelh_ps(a, b);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a, const sseb &b)
-{
- return _mm_movehl_ps(b, a);
-}
-# endif
-
-# if defined(__KERNEL_SSE3__) && !defined(__KERNEL_NEON__)
-template<> __forceinline const sseb shuffle<0, 0, 2, 2>(const sseb &a)
-{
- return _mm_moveldup_ps(a);
-}
-template<> __forceinline const sseb shuffle<1, 1, 3, 3>(const sseb &a)
-{
- return _mm_movehdup_ps(a);
-}
-# endif
-
-# if defined(__KERNEL_SSE41__)
-template<size_t dst, size_t src, size_t clr>
-__forceinline const sseb insert(const sseb &a, const sseb &b)
-{
-# ifdef __KERNEL_NEON__
- sseb res = a;
- if (clr)
- res[dst] = 0;
- else
- res[dst] = b[src];
- return res;
-# else
- return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
-# endif
-}
-template<size_t dst, size_t src> __forceinline const sseb insert(const sseb &a, const sseb &b)
-{
- return insert<dst, src, 0>(a, b);
-}
-template<size_t dst> __forceinline const sseb insert(const sseb &a, const bool b)
-{
- return insert<dst, 0>(a, sseb(b));
-}
-# endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Reduction Operations
-////////////////////////////////////////////////////////////////////////////////
-
-# if defined(__KERNEL_SSE41__)
-__forceinline uint32_t popcnt(const sseb &a)
-{
-# if defined(__KERNEL_NEON__)
- const int32x4_t mask = {1, 1, 1, 1};
- int32x4_t t = vandq_s32(vreinterpretq_s32_m128(a.m128), mask);
- return vaddvq_s32(t);
-# else
- return _mm_popcnt_u32(_mm_movemask_ps(a));
-# endif
-}
-# else
-__forceinline uint32_t popcnt(const sseb &a)
-{
- return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]);
-}
-# endif
-
-__forceinline bool reduce_and(const sseb &a)
-{
-# if defined(__KERNEL_NEON__)
- return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) == -4;
-# else
- return _mm_movemask_ps(a) == 0xf;
-# endif
-}
-__forceinline bool reduce_or(const sseb &a)
-{
-# if defined(__KERNEL_NEON__)
- return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) != 0x0;
-# else
- return _mm_movemask_ps(a) != 0x0;
-# endif
-}
-__forceinline bool all(const sseb &b)
-{
-# if defined(__KERNEL_NEON__)
- return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == -4;
-# else
- return _mm_movemask_ps(b) == 0xf;
-# endif
-}
-__forceinline bool any(const sseb &b)
-{
-# if defined(__KERNEL_NEON__)
- return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) != 0x0;
-# else
- return _mm_movemask_ps(b) != 0x0;
-# endif
-}
-__forceinline bool none(const sseb &b)
-{
-# if defined(__KERNEL_NEON__)
- return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == 0x0;
-# else
- return _mm_movemask_ps(b) == 0x0;
-# endif
-}
-
-__forceinline uint32_t movemask(const sseb &a)
-{
- return _mm_movemask_ps(a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Debug Functions
-////////////////////////////////////////////////////////////////////////////////
-
-ccl_device_inline void print_sseb(const char *label, const sseb &a)
-{
- printf("%s: %d %d %d %d\n", label, a[0], a[1], a[2], a[3]);
-}
-
-#endif
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/ssef.h b/intern/cycles/util/ssef.h
deleted file mode 100644
index 1e2bfa90354..00000000000
--- a/intern/cycles/util/ssef.h
+++ /dev/null
@@ -1,1090 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2013 Intel Corporation
- * Modifications Copyright 2014-2022 Blender Foundation. */
-
-#ifndef __UTIL_SSEF_H__
-#define __UTIL_SSEF_H__
-
-#include <math.h>
-
-#include "util/ssei.h"
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __KERNEL_SSE2__
-
-struct sseb;
-struct ssef;
-
-/*! 4-wide SSE float type. */
-struct ssef {
- typedef sseb Mask; // mask type
- typedef ssei Int; // int type
- typedef ssef Float; // float type
-
- enum { size = 4 }; // number of SIMD elements
- union {
- __m128 m128;
- float f[4];
- int i[4];
- }; // data
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Constructors, Assignment & Cast Operators
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline ssef()
- {
- }
- __forceinline ssef(const ssef &other)
- {
- m128 = other.m128;
- }
- __forceinline ssef &operator=(const ssef &other)
- {
- m128 = other.m128;
- return *this;
- }
-
- __forceinline ssef(const __m128 a) : m128(a)
- {
- }
- __forceinline operator const __m128 &() const
- {
- return m128;
- }
- __forceinline operator __m128 &()
- {
- return m128;
- }
-
- __forceinline ssef(float a) : m128(_mm_set1_ps(a))
- {
- }
- __forceinline ssef(float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d))
- {
- }
-
- __forceinline explicit ssef(const __m128i a) : m128(_mm_cvtepi32_ps(a))
- {
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Loads and Stores
- ////////////////////////////////////////////////////////////////////////////////
-
-# if defined(__KERNEL_AVX__)
- static __forceinline ssef broadcast(const void *const a)
- {
- return _mm_broadcast_ss((float *)a);
- }
-# else
- static __forceinline ssef broadcast(const void *const a)
- {
- return _mm_set1_ps(*(float *)a);
- }
-# endif
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Array Access
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline const float &operator[](const size_t i) const
- {
- assert(i < 4);
- return f[i];
- }
- __forceinline float &operator[](const size_t i)
- {
- assert(i < 4);
- return f[i];
- }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssef cast(const __m128i &a)
-{
- return _mm_castsi128_ps(a);
-}
-__forceinline const ssef operator+(const ssef &a)
-{
- return a;
-}
-__forceinline const ssef operator-(const ssef &a)
-{
- return _mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
-}
-__forceinline const ssef abs(const ssef &a)
-{
- return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)));
-}
-# if defined(__KERNEL_SSE41__)
-__forceinline const ssef sign(const ssef &a)
-{
- return _mm_blendv_ps(ssef(1.0f), -ssef(1.0f), _mm_cmplt_ps(a, ssef(0.0f)));
-}
-# endif
-__forceinline const ssef signmsk(const ssef &a)
-{
- return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
-}
-
-__forceinline const ssef rcp(const ssef &a)
-{
- const ssef r = _mm_rcp_ps(a.m128);
- return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
-}
-__forceinline const ssef sqr(const ssef &a)
-{
- return _mm_mul_ps(a, a);
-}
-__forceinline const ssef mm_sqrt(const ssef &a)
-{
- return _mm_sqrt_ps(a.m128);
-}
-__forceinline const ssef rsqrt(const ssef &a)
-{
- const ssef r = _mm_rsqrt_ps(a.m128);
- return _mm_add_ps(
- _mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r),
- _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r),
- _mm_mul_ps(r, r)));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssef operator+(const ssef &a, const ssef &b)
-{
- return _mm_add_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator+(const ssef &a, const float &b)
-{
- return a + ssef(b);
-}
-__forceinline const ssef operator+(const float &a, const ssef &b)
-{
- return ssef(a) + b;
-}
-
-__forceinline const ssef operator-(const ssef &a, const ssef &b)
-{
- return _mm_sub_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator-(const ssef &a, const float &b)
-{
- return a - ssef(b);
-}
-__forceinline const ssef operator-(const float &a, const ssef &b)
-{
- return ssef(a) - b;
-}
-
-__forceinline const ssef operator*(const ssef &a, const ssef &b)
-{
- return _mm_mul_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator*(const ssef &a, const float &b)
-{
- return a * ssef(b);
-}
-__forceinline const ssef operator*(const float &a, const ssef &b)
-{
- return ssef(a) * b;
-}
-
-__forceinline const ssef operator/(const ssef &a, const ssef &b)
-{
- return _mm_div_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator/(const ssef &a, const float &b)
-{
- return a / ssef(b);
-}
-__forceinline const ssef operator/(const float &a, const ssef &b)
-{
- return ssef(a) / b;
-}
-
-__forceinline const ssef operator^(const ssef &a, const ssef &b)
-{
- return _mm_xor_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator^(const ssef &a, const ssei &b)
-{
- return _mm_xor_ps(a.m128, _mm_castsi128_ps(b.m128));
-}
-
-__forceinline const ssef operator&(const ssef &a, const ssef &b)
-{
- return _mm_and_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator&(const ssef &a, const ssei &b)
-{
- return _mm_and_ps(a.m128, _mm_castsi128_ps(b.m128));
-}
-
-__forceinline const ssef operator|(const ssef &a, const ssef &b)
-{
- return _mm_or_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator|(const ssef &a, const ssei &b)
-{
- return _mm_or_ps(a.m128, _mm_castsi128_ps(b.m128));
-}
-
-__forceinline const ssef andnot(const ssef &a, const ssef &b)
-{
- return _mm_andnot_ps(a.m128, b.m128);
-}
-
-__forceinline const ssef min(const ssef &a, const ssef &b)
-{
- return _mm_min_ps(a.m128, b.m128);
-}
-__forceinline const ssef min(const ssef &a, const float &b)
-{
- return _mm_min_ps(a.m128, ssef(b));
-}
-__forceinline const ssef min(const float &a, const ssef &b)
-{
- return _mm_min_ps(ssef(a), b.m128);
-}
-
-__forceinline const ssef max(const ssef &a, const ssef &b)
-{
- return _mm_max_ps(a.m128, b.m128);
-}
-__forceinline const ssef max(const ssef &a, const float &b)
-{
- return _mm_max_ps(a.m128, ssef(b));
-}
-__forceinline const ssef max(const float &a, const ssef &b)
-{
- return _mm_max_ps(ssef(a), b.m128);
-}
-
-# if defined(__KERNEL_SSE41__)
-__forceinline ssef mini(const ssef &a, const ssef &b)
-{
- const ssei ai = _mm_castps_si128(a);
- const ssei bi = _mm_castps_si128(b);
- const ssei ci = _mm_min_epi32(ai, bi);
- return _mm_castsi128_ps(ci);
-}
-# endif
-
-# if defined(__KERNEL_SSE41__)
-__forceinline ssef maxi(const ssef &a, const ssef &b)
-{
- const ssei ai = _mm_castps_si128(a);
- const ssei bi = _mm_castps_si128(b);
- const ssei ci = _mm_max_epi32(ai, bi);
- return _mm_castsi128_ps(ci);
-}
-# endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Ternary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssef madd(const ssef &a, const ssef &b, const ssef &c)
-{
-# if defined(__KERNEL_NEON__)
- return vfmaq_f32(c, a, b);
-# elif defined(__KERNEL_AVX2__)
- return _mm_fmadd_ps(a, b, c);
-# else
- return a * b + c;
-# endif
-}
-__forceinline const ssef msub(const ssef &a, const ssef &b, const ssef &c)
-{
-# if defined(__KERNEL_NEON__)
- return vfmaq_f32(vnegq_f32(c), a, b);
-# elif defined(__KERNEL_AVX2__)
- return _mm_fmsub_ps(a, b, c);
-# else
- return a * b - c;
-# endif
-}
-__forceinline const ssef nmadd(const ssef &a, const ssef &b, const ssef &c)
-{
-# if defined(__KERNEL_NEON__)
- return vfmsq_f32(c, a, b);
-# elif defined(__KERNEL_AVX2__)
- return _mm_fnmadd_ps(a, b, c);
-# else
- return c - a * b;
-# endif
-}
-__forceinline const ssef nmsub(const ssef &a, const ssef &b, const ssef &c)
-{
-# if defined(__KERNEL_NEON__)
- return vfmsq_f32(vnegq_f32(c), a, b);
-# elif defined(__KERNEL_AVX2__)
- return _mm_fnmsub_ps(a, b, c);
-# else
- return -a * b - c;
-# endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Assignment Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssef &operator+=(ssef &a, const ssef &b)
-{
- return a = a + b;
-}
-__forceinline ssef &operator+=(ssef &a, const float &b)
-{
- return a = a + b;
-}
-
-__forceinline ssef &operator-=(ssef &a, const ssef &b)
-{
- return a = a - b;
-}
-__forceinline ssef &operator-=(ssef &a, const float &b)
-{
- return a = a - b;
-}
-
-__forceinline ssef &operator*=(ssef &a, const ssef &b)
-{
- return a = a * b;
-}
-__forceinline ssef &operator*=(ssef &a, const float &b)
-{
- return a = a * b;
-}
-
-__forceinline ssef &operator/=(ssef &a, const ssef &b)
-{
- return a = a / b;
-}
-__forceinline ssef &operator/=(ssef &a, const float &b)
-{
- return a = a / b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator==(const ssef &a, const ssef &b)
-{
- return _mm_cmpeq_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator==(const ssef &a, const float &b)
-{
- return a == ssef(b);
-}
-__forceinline const sseb operator==(const float &a, const ssef &b)
-{
- return ssef(a) == b;
-}
-
-__forceinline const sseb operator!=(const ssef &a, const ssef &b)
-{
- return _mm_cmpneq_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator!=(const ssef &a, const float &b)
-{
- return a != ssef(b);
-}
-__forceinline const sseb operator!=(const float &a, const ssef &b)
-{
- return ssef(a) != b;
-}
-
-__forceinline const sseb operator<(const ssef &a, const ssef &b)
-{
- return _mm_cmplt_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator<(const ssef &a, const float &b)
-{
- return a < ssef(b);
-}
-__forceinline const sseb operator<(const float &a, const ssef &b)
-{
- return ssef(a) < b;
-}
-
-__forceinline const sseb operator>=(const ssef &a, const ssef &b)
-{
- return _mm_cmpnlt_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator>=(const ssef &a, const float &b)
-{
- return a >= ssef(b);
-}
-__forceinline const sseb operator>=(const float &a, const ssef &b)
-{
- return ssef(a) >= b;
-}
-
-__forceinline const sseb operator>(const ssef &a, const ssef &b)
-{
- return _mm_cmpnle_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator>(const ssef &a, const float &b)
-{
- return a > ssef(b);
-}
-__forceinline const sseb operator>(const float &a, const ssef &b)
-{
- return ssef(a) > b;
-}
-
-__forceinline const sseb operator<=(const ssef &a, const ssef &b)
-{
- return _mm_cmple_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator<=(const ssef &a, const float &b)
-{
- return a <= ssef(b);
-}
-__forceinline const sseb operator<=(const float &a, const ssef &b)
-{
- return ssef(a) <= b;
-}
-
-__forceinline const ssef select(const sseb &m, const ssef &t, const ssef &f)
-{
-# ifdef __KERNEL_SSE41__
- return _mm_blendv_ps(f, t, m);
-# else
- return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
-# endif
-}
-
-__forceinline const ssef select(const ssef &m, const ssef &t, const ssef &f)
-{
-# ifdef __KERNEL_SSE41__
- return _mm_blendv_ps(f, t, m);
-# else
- return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
-# endif
-}
-
-__forceinline const ssef select(const int mask, const ssef &t, const ssef &f)
-{
-# if defined(__KERNEL_SSE41__) && \
- ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER))
- return _mm_blend_ps(f, t, mask);
-# else
- return select(sseb(mask), t, f);
-# endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Rounding Functions
-////////////////////////////////////////////////////////////////////////////////
-
-# if defined(__KERNEL_SSE41__)
-__forceinline const ssef round_even(const ssef &a)
-{
-# ifdef __KERNEL_NEON__
- return vrndnq_f32(a);
-# else
- return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT);
-# endif
-}
-__forceinline const ssef round_down(const ssef &a)
-{
-# ifdef __KERNEL_NEON__
- return vrndmq_f32(a);
-# else
- return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF);
-# endif
-}
-__forceinline const ssef round_up(const ssef &a)
-{
-# ifdef __KERNEL_NEON__
- return vrndpq_f32(a);
-# else
- return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
-# endif
-}
-__forceinline const ssef round_zero(const ssef &a)
-{
-# ifdef __KERNEL_NEON__
- return vrndq_f32(a);
-# else
- return _mm_round_ps(a, _MM_FROUND_TO_ZERO);
-# endif
-}
-__forceinline const ssef floor(const ssef &a)
-{
-# ifdef __KERNEL_NEON__
- return vrndmq_f32(a);
-# else
- return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF);
-# endif
-}
-__forceinline const ssef ceil(const ssef &a)
-{
-# ifdef __KERNEL_NEON__
- return vrndpq_f32(a);
-# else
- return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
-# endif
-}
-# else
-/* Non-SSE4.1 fallback, needed for floorfrac. */
-__forceinline const ssef floor(const ssef &a)
-{
- return _mm_set_ps(floorf(a.f[3]), floorf(a.f[2]), floorf(a.f[1]), floorf(a.f[0]));
-}
-# endif
-
-__forceinline ssei truncatei(const ssef &a)
-{
- return _mm_cvttps_epi32(a.m128);
-}
-
-__forceinline ssef floorfrac(const ssef &x, ssei *i)
-{
- ssef f = floor(x);
- *i = truncatei(f);
- return x - f;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Common Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssef mix(const ssef &a, const ssef &b, const ssef &t)
-{
- return madd(t, b, (ssef(1.0f) - t) * a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssef unpacklo(const ssef &a, const ssef &b)
-{
- return _mm_unpacklo_ps(a.m128, b.m128);
-}
-__forceinline ssef unpackhi(const ssef &a, const ssef &b)
-{
- return _mm_unpackhi_ps(a.m128, b.m128);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const ssef shuffle(const ssef &b)
-{
-# ifdef __KERNEL_NEON__
- return shuffle_neon<float32x4_t, i0, i1, i2, i3>(b.m128);
-# else
- return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0)));
-# endif
-}
-
-template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a)
-{
- return _mm_movelh_ps(a, a);
-}
-
-template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a)
-{
- return _mm_movehl_ps(a, a);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const ssef shuffle(const ssef &a, const ssef &b)
-{
-# ifdef __KERNEL_NEON__
- return shuffle_neon<float32x4_t, i0, i1, i2, i3>(a, b);
-# else
- return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
-# endif
-}
-
-template<size_t i0> __forceinline const ssef shuffle(const ssef &a, const ssef &b)
-{
-# ifdef __KERNEL_NEON__
- return shuffle_neon<float32x4_t, i0, i0, i0, i0>(a, b);
-# else
- return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0));
-# endif
-}
-
-# ifndef __KERNEL_NEON__
-template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a, const ssef &b)
-{
- return _mm_movelh_ps(a, b);
-}
-
-template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a, const ssef &b)
-{
- return _mm_movehl_ps(b, a);
-}
-# endif
-
-# if defined(__KERNEL_SSSE3__)
-__forceinline const ssef shuffle8(const ssef &a, const ssei &shuf)
-{
- return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf));
-}
-# endif
-
-# if defined(__KERNEL_SSE3__)
-template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef &b)
-{
- return _mm_moveldup_ps(b);
-}
-template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef &b)
-{
- return _mm_movehdup_ps(b);
-}
-# endif
-
-template<size_t i0> __forceinline const ssef shuffle(const ssef &b)
-{
- return shuffle<i0, i0, i0, i0>(b);
-}
-
-# if defined(__KERNEL_AVX__)
-__forceinline const ssef shuffle(const ssef &a, const ssei &shuf)
-{
- return _mm_permutevar_ps(a, shuf);
-}
-# endif
-
-template<size_t i> __forceinline float extract(const ssef &a)
-{
- return _mm_cvtss_f32(shuffle<i, i, i, i>(a));
-}
-template<> __forceinline float extract<0>(const ssef &a)
-{
- return _mm_cvtss_f32(a);
-}
-
-# if defined(__KERNEL_SSE41__)
-template<size_t dst, size_t src, size_t clr>
-__forceinline const ssef insert(const ssef &a, const ssef &b)
-{
-# ifdef __KERNEL_NEON__
- ssef res = a;
- if (clr)
- res[dst] = 0;
- else
- res[dst] = b[src];
- return res;
-# else
- return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
-# endif
-}
-template<size_t dst, size_t src> __forceinline const ssef insert(const ssef &a, const ssef &b)
-{
- return insert<dst, src, 0>(a, b);
-}
-template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b)
-{
- return insert<dst, 0>(a, _mm_set_ss(b));
-}
-# else
-template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b)
-{
- ssef c = a;
- c[dst] = b;
- return c;
-}
-# endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Transpose
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline void transpose(const ssef &r0,
- const ssef &r1,
- const ssef &r2,
- const ssef &r3,
- ssef &c0,
- ssef &c1,
- ssef &c2,
- ssef &c3)
-{
- ssef l02 = unpacklo(r0, r2);
- ssef h02 = unpackhi(r0, r2);
- ssef l13 = unpacklo(r1, r3);
- ssef h13 = unpackhi(r1, r3);
- c0 = unpacklo(l02, l13);
- c1 = unpackhi(l02, l13);
- c2 = unpacklo(h02, h13);
- c3 = unpackhi(h02, h13);
-}
-
-__forceinline void transpose(
- const ssef &r0, const ssef &r1, const ssef &r2, const ssef &r3, ssef &c0, ssef &c1, ssef &c2)
-{
- ssef l02 = unpacklo(r0, r2);
- ssef h02 = unpackhi(r0, r2);
- ssef l13 = unpacklo(r1, r3);
- ssef h13 = unpackhi(r1, r3);
- c0 = unpacklo(l02, l13);
- c1 = unpackhi(l02, l13);
- c2 = unpacklo(h02, h13);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Reductions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssef vreduce_min(const ssef &v)
-{
-# ifdef __KERNEL_NEON__
- return vdupq_n_f32(vminvq_f32(v));
-# else
- ssef h = min(shuffle<1, 0, 3, 2>(v), v);
- return min(shuffle<2, 3, 0, 1>(h), h);
-# endif
-}
-__forceinline const ssef vreduce_max(const ssef &v)
-{
-# ifdef __KERNEL_NEON__
- return vdupq_n_f32(vmaxvq_f32(v));
-# else
- ssef h = max(shuffle<1, 0, 3, 2>(v), v);
- return max(shuffle<2, 3, 0, 1>(h), h);
-# endif
-}
-__forceinline const ssef vreduce_add(const ssef &v)
-{
-# ifdef __KERNEL_NEON__
- return vdupq_n_f32(vaddvq_f32(v));
-# else
- ssef h = shuffle<1, 0, 3, 2>(v) + v;
- return shuffle<2, 3, 0, 1>(h) + h;
-# endif
-}
-
-__forceinline float reduce_min(const ssef &v)
-{
-# ifdef __KERNEL_NEON__
- return vminvq_f32(v);
-# else
- return _mm_cvtss_f32(vreduce_min(v));
-# endif
-}
-__forceinline float reduce_max(const ssef &v)
-{
-# ifdef __KERNEL_NEON__
- return vmaxvq_f32(v);
-# else
- return _mm_cvtss_f32(vreduce_max(v));
-# endif
-}
-__forceinline float reduce_add(const ssef &v)
-{
-# ifdef __KERNEL_NEON__
- return vaddvq_f32(v);
-# else
- return _mm_cvtss_f32(vreduce_add(v));
-# endif
-}
-
-__forceinline uint32_t select_min(const ssef &v)
-{
- return __bsf(movemask(v == vreduce_min(v)));
-}
-__forceinline uint32_t select_max(const ssef &v)
-{
- return __bsf(movemask(v == vreduce_max(v)));
-}
-
-__forceinline uint32_t select_min(const sseb &valid, const ssef &v)
-{
- const ssef a = select(valid, v, ssef(pos_inf));
- return __bsf(movemask(valid & (a == vreduce_min(a))));
-}
-__forceinline uint32_t select_max(const sseb &valid, const ssef &v)
-{
- const ssef a = select(valid, v, ssef(neg_inf));
- return __bsf(movemask(valid & (a == vreduce_max(a))));
-}
-
-__forceinline uint32_t movemask(const ssef &a)
-{
- return _mm_movemask_ps(a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Memory load and store operations
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssef load4f(const float4 &a)
-{
-# ifdef __KERNEL_WITH_SSE_ALIGN__
- return _mm_load_ps(&a.x);
-# else
- return _mm_loadu_ps(&a.x);
-# endif
-}
-
-__forceinline ssef load4f(const float3 &a)
-{
-# ifdef __KERNEL_WITH_SSE_ALIGN__
- return _mm_load_ps(&a.x);
-# else
- return _mm_loadu_ps(&a.x);
-# endif
-}
-
-__forceinline ssef load4f(const void *const a)
-{
- return _mm_load_ps((float *)a);
-}
-
-__forceinline ssef load1f_first(const float a)
-{
- return _mm_set_ss(a);
-}
-
-__forceinline void store4f(void *ptr, const ssef &v)
-{
- _mm_store_ps((float *)ptr, v);
-}
-
-__forceinline ssef loadu4f(const void *const a)
-{
- return _mm_loadu_ps((float *)a);
-}
-
-__forceinline void storeu4f(void *ptr, const ssef &v)
-{
- _mm_storeu_ps((float *)ptr, v);
-}
-
-__forceinline void store4f(const sseb &mask, void *ptr, const ssef &f)
-{
-# if defined(__KERNEL_AVX__)
- _mm_maskstore_ps((float *)ptr, (__m128i)mask, f);
-# else
- *(ssef *)ptr = select(mask, f, *(ssef *)ptr);
-# endif
-}
-
-__forceinline ssef load4f_nt(void *ptr)
-{
-# if defined(__KERNEL_SSE41__)
- return _mm_castsi128_ps(_mm_stream_load_si128((__m128i *)ptr));
-# else
- return _mm_load_ps((float *)ptr);
-# endif
-}
-
-__forceinline void store4f_nt(void *ptr, const ssef &v)
-{
-# if defined(__KERNEL_SSE41__)
- _mm_stream_ps((float *)ptr, v);
-# else
- _mm_store_ps((float *)ptr, v);
-# endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Euclidean Space Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline float dot(const ssef &a, const ssef &b)
-{
- return reduce_add(a * b);
-}
-
-/* calculate shuffled cross product, useful when order of components does not matter */
-__forceinline ssef cross_zxy(const ssef &a, const ssef &b)
-{
- const ssef a0 = a;
- const ssef b0 = shuffle<1, 2, 0, 3>(b);
- const ssef a1 = shuffle<1, 2, 0, 3>(a);
- const ssef b1 = b;
- return msub(a0, b0, a1 * b1);
-}
-
-__forceinline ssef cross(const ssef &a, const ssef &b)
-{
- return shuffle<1, 2, 0, 3>(cross_zxy(a, b));
-}
-
-ccl_device_inline const ssef dot3_splat(const ssef &a, const ssef &b)
-{
-# ifdef __KERNEL_SSE41__
- return _mm_dp_ps(a.m128, b.m128, 0x7f);
-# else
- ssef t = a * b;
- return ssef(((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]);
-# endif
-}
-
-/* squared length taking only specified axes into account */
-template<size_t X, size_t Y, size_t Z, size_t W> ccl_device_inline float len_squared(const ssef &a)
-{
-# ifndef __KERNEL_SSE41__
- float4 &t = (float4 &)a;
- return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) +
- (W ? t.w * t.w : 0.0f);
-# else
- return extract<0>(
- ssef(_mm_dp_ps(a.m128, a.m128, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf)));
-# endif
-}
-
-ccl_device_inline float dot3(const ssef &a, const ssef &b)
-{
-# ifdef __KERNEL_SSE41__
- return extract<0>(ssef(_mm_dp_ps(a.m128, b.m128, 0x7f)));
-# else
- ssef t = a * b;
- return ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2];
-# endif
-}
-
-ccl_device_inline const ssef len3_squared_splat(const ssef &a)
-{
- return dot3_splat(a, a);
-}
-
-ccl_device_inline float len3_squared(const ssef &a)
-{
- return dot3(a, a);
-}
-
-ccl_device_inline float len3(const ssef &a)
-{
- return extract<0>(mm_sqrt(dot3_splat(a, a)));
-}
-
-/* SSE shuffle utility functions */
-
-# ifdef __KERNEL_SSSE3__
-
-/* faster version for SSSE3 */
-typedef ssei shuffle_swap_t;
-
-ccl_device_inline shuffle_swap_t shuffle_swap_identity()
-{
- return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-}
-
-ccl_device_inline shuffle_swap_t shuffle_swap_swap()
-{
- return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
-}
-
-ccl_device_inline const ssef shuffle_swap(const ssef &a, const shuffle_swap_t &shuf)
-{
- return cast(_mm_shuffle_epi8(cast(a), shuf));
-}
-
-# else
-
-/* somewhat slower version for SSE2 */
-typedef int shuffle_swap_t;
-
-ccl_device_inline shuffle_swap_t shuffle_swap_identity()
-{
- return 0;
-}
-
-ccl_device_inline shuffle_swap_t shuffle_swap_swap()
-{
- return 1;
-}
-
-ccl_device_inline const ssef shuffle_swap(const ssef &a, shuffle_swap_t shuf)
-{
- /* shuffle value must be a constant, so we need to branch */
- if (shuf)
- return shuffle<1, 0, 3, 2>(a);
- else
- return shuffle<3, 2, 1, 0>(a);
-}
-
-# endif
-
-# if defined(__KERNEL_SSE41__) && !defined(__KERNEL_NEON__)
-
-ccl_device_inline void gen_idirsplat_swap(const ssef &pn,
- const shuffle_swap_t &shuf_identity,
- const shuffle_swap_t &shuf_swap,
- const float3 &idir,
- ssef idirsplat[3],
- shuffle_swap_t shufflexyz[3])
-{
- const __m128 idirsplat_raw[] = {_mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z)};
- idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn);
- idirsplat[1] = _mm_xor_ps(idirsplat_raw[1], pn);
- idirsplat[2] = _mm_xor_ps(idirsplat_raw[2], pn);
-
- const ssef signmask = cast(ssei(0x80000000));
- const ssef shuf_identity_f = cast(shuf_identity);
- const ssef shuf_swap_f = cast(shuf_swap);
-
- shufflexyz[0] = _mm_castps_si128(
- _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[0], signmask)));
- shufflexyz[1] = _mm_castps_si128(
- _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[1], signmask)));
- shufflexyz[2] = _mm_castps_si128(
- _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[2], signmask)));
-}
-
-# else
-
-ccl_device_inline void gen_idirsplat_swap(const ssef &pn,
- const shuffle_swap_t &shuf_identity,
- const shuffle_swap_t &shuf_swap,
- const float3 &idir,
- ssef idirsplat[3],
- shuffle_swap_t shufflexyz[3])
-{
- idirsplat[0] = ssef(idir.x) ^ pn;
- idirsplat[1] = ssef(idir.y) ^ pn;
- idirsplat[2] = ssef(idir.z) ^ pn;
-
- shufflexyz[0] = (idir.x >= 0) ? shuf_identity : shuf_swap;
- shufflexyz[1] = (idir.y >= 0) ? shuf_identity : shuf_swap;
- shufflexyz[2] = (idir.z >= 0) ? shuf_identity : shuf_swap;
-}
-
-# endif
-
-ccl_device_inline const ssef uint32_to_float(const ssei &in)
-{
- ssei a = _mm_srli_epi32(in, 16);
- ssei b = _mm_and_si128(in, _mm_set1_epi32(0x0000ffff));
- ssei c = _mm_or_si128(a, _mm_set1_epi32(0x53000000));
- ssef d = _mm_cvtepi32_ps(b);
- ssef e = _mm_sub_ps(_mm_castsi128_ps(c), _mm_castsi128_ps(_mm_set1_epi32(0x53000000)));
- return _mm_add_ps(e, d);
-}
-
-template<size_t S1, size_t S2, size_t S3, size_t S4>
-ccl_device_inline const ssef set_sign_bit(const ssef &a)
-{
- return cast(cast(a) ^ ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Debug Functions
-////////////////////////////////////////////////////////////////////////////////
-
-ccl_device_inline void print_ssef(const char *label, const ssef &a)
-{
- printf(
- "%s: %.8f %.8f %.8f %.8f\n", label, (double)a[0], (double)a[1], (double)a[2], (double)a[3]);
-}
-
-#endif
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/ssei.h b/intern/cycles/util/ssei.h
deleted file mode 100644
index 5caf44c967f..00000000000
--- a/intern/cycles/util/ssei.h
+++ /dev/null
@@ -1,633 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2013 Intel Corporation
- * Modifications Copyright 2014-2022 Blender Foundation. */
-
-#ifndef __UTIL_SSEI_H__
-#define __UTIL_SSEI_H__
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __KERNEL_SSE2__
-
-struct sseb;
-struct ssef;
-
-/*! 4-wide SSE integer type. */
-struct ssei {
- typedef sseb Mask; // mask type
- typedef ssei Int; // int type
- typedef ssef Float; // float type
-
- enum { size = 4 }; // number of SIMD elements
- union {
- __m128i m128;
- int32_t i[4];
- }; // data
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Constructors, Assignment & Cast Operators
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline ssei()
- {
- }
- __forceinline ssei(const ssei &a)
- {
- m128 = a.m128;
- }
- __forceinline ssei &operator=(const ssei &a)
- {
- m128 = a.m128;
- return *this;
- }
-
- __forceinline ssei(const __m128i a) : m128(a)
- {
- }
- __forceinline operator const __m128i &(void) const
- {
- return m128;
- }
- __forceinline operator __m128i &(void)
- {
- return m128;
- }
-
- __forceinline ssei(const int a) : m128(_mm_set1_epi32(a))
- {
- }
- __forceinline ssei(int a, int b, int c, int d) : m128(_mm_setr_epi32(a, b, c, d))
- {
- }
-
- __forceinline explicit ssei(const __m128 a) : m128(_mm_cvtps_epi32(a))
- {
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- /// Array Access
- ////////////////////////////////////////////////////////////////////////////////
-
- __forceinline const int32_t &operator[](const size_t index) const
- {
- assert(index < 4);
- return i[index];
- }
- __forceinline int32_t &operator[](const size_t index)
- {
- assert(index < 4);
- return i[index];
- }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssei cast(const __m128 &a)
-{
- return _mm_castps_si128(a);
-}
-__forceinline const ssei operator+(const ssei &a)
-{
- return a;
-}
-__forceinline const ssei operator-(const ssei &a)
-{
- return _mm_sub_epi32(_mm_setzero_si128(), a.m128);
-}
-# if defined(__KERNEL_SSSE3__)
-__forceinline const ssei abs(const ssei &a)
-{
- return _mm_abs_epi32(a.m128);
-}
-# endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssei operator+(const ssei &a, const ssei &b)
-{
- return _mm_add_epi32(a.m128, b.m128);
-}
-__forceinline const ssei operator+(const ssei &a, const int32_t &b)
-{
- return a + ssei(b);
-}
-__forceinline const ssei operator+(const int32_t &a, const ssei &b)
-{
- return ssei(a) + b;
-}
-
-__forceinline const ssei operator-(const ssei &a, const ssei &b)
-{
- return _mm_sub_epi32(a.m128, b.m128);
-}
-__forceinline const ssei operator-(const ssei &a, const int32_t &b)
-{
- return a - ssei(b);
-}
-__forceinline const ssei operator-(const int32_t &a, const ssei &b)
-{
- return ssei(a) - b;
-}
-
-# if defined(__KERNEL_SSE41__)
-__forceinline const ssei operator*(const ssei &a, const ssei &b)
-{
- return _mm_mullo_epi32(a.m128, b.m128);
-}
-__forceinline const ssei operator*(const ssei &a, const int32_t &b)
-{
- return a * ssei(b);
-}
-__forceinline const ssei operator*(const int32_t &a, const ssei &b)
-{
- return ssei(a) * b;
-}
-# endif
-
-__forceinline const ssei operator&(const ssei &a, const ssei &b)
-{
- return _mm_and_si128(a.m128, b.m128);
-}
-__forceinline const ssei operator&(const ssei &a, const int32_t &b)
-{
- return a & ssei(b);
-}
-__forceinline const ssei operator&(const int32_t &a, const ssei &b)
-{
- return ssei(a) & b;
-}
-
-__forceinline const ssei operator|(const ssei &a, const ssei &b)
-{
- return _mm_or_si128(a.m128, b.m128);
-}
-__forceinline const ssei operator|(const ssei &a, const int32_t &b)
-{
- return a | ssei(b);
-}
-__forceinline const ssei operator|(const int32_t &a, const ssei &b)
-{
- return ssei(a) | b;
-}
-
-__forceinline const ssei operator^(const ssei &a, const ssei &b)
-{
- return _mm_xor_si128(a.m128, b.m128);
-}
-__forceinline const ssei operator^(const ssei &a, const int32_t &b)
-{
- return a ^ ssei(b);
-}
-__forceinline const ssei operator^(const int32_t &a, const ssei &b)
-{
- return ssei(a) ^ b;
-}
-
-__forceinline const ssei operator<<(const ssei &a, const int32_t &n)
-{
- return _mm_slli_epi32(a.m128, n);
-}
-__forceinline const ssei operator>>(const ssei &a, const int32_t &n)
-{
- return _mm_srai_epi32(a.m128, n);
-}
-
-__forceinline const ssei andnot(const ssei &a, const ssei &b)
-{
- return _mm_andnot_si128(a.m128, b.m128);
-}
-__forceinline const ssei andnot(const sseb &a, const ssei &b)
-{
- return _mm_andnot_si128(cast(a.m128), b.m128);
-}
-__forceinline const ssei andnot(const ssei &a, const sseb &b)
-{
- return _mm_andnot_si128(a.m128, cast(b.m128));
-}
-
-__forceinline const ssei sra(const ssei &a, const int32_t &b)
-{
- return _mm_srai_epi32(a.m128, b);
-}
-__forceinline const ssei srl(const ssei &a, const int32_t &b)
-{
- return _mm_srli_epi32(a.m128, b);
-}
-
-# if defined(__KERNEL_SSE41__)
-__forceinline const ssei min(const ssei &a, const ssei &b)
-{
- return _mm_min_epi32(a.m128, b.m128);
-}
-__forceinline const ssei min(const ssei &a, const int32_t &b)
-{
- return min(a, ssei(b));
-}
-__forceinline const ssei min(const int32_t &a, const ssei &b)
-{
- return min(ssei(a), b);
-}
-
-__forceinline const ssei max(const ssei &a, const ssei &b)
-{
- return _mm_max_epi32(a.m128, b.m128);
-}
-__forceinline const ssei max(const ssei &a, const int32_t &b)
-{
- return max(a, ssei(b));
-}
-__forceinline const ssei max(const int32_t &a, const ssei &b)
-{
- return max(ssei(a), b);
-}
-# endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Assignment Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssei &operator+=(ssei &a, const ssei &b)
-{
- return a = a + b;
-}
-__forceinline ssei &operator+=(ssei &a, const int32_t &b)
-{
- return a = a + b;
-}
-
-__forceinline ssei &operator-=(ssei &a, const ssei &b)
-{
- return a = a - b;
-}
-__forceinline ssei &operator-=(ssei &a, const int32_t &b)
-{
- return a = a - b;
-}
-
-# if defined(__KERNEL_SSE41__)
-__forceinline ssei &operator*=(ssei &a, const ssei &b)
-{
- return a = a * b;
-}
-__forceinline ssei &operator*=(ssei &a, const int32_t &b)
-{
- return a = a * b;
-}
-# endif
-
-__forceinline ssei &operator&=(ssei &a, const ssei &b)
-{
- return a = a & b;
-}
-__forceinline ssei &operator&=(ssei &a, const int32_t &b)
-{
- return a = a & b;
-}
-
-__forceinline ssei &operator|=(ssei &a, const ssei &b)
-{
- return a = a | b;
-}
-__forceinline ssei &operator|=(ssei &a, const int32_t &b)
-{
- return a = a | b;
-}
-
-__forceinline ssei &operator^=(ssei &a, const ssei &b)
-{
- return a = a ^ b;
-}
-__forceinline ssei &operator^=(ssei &a, const int32_t &b)
-{
- return a = a ^ b;
-}
-
-__forceinline ssei &operator<<=(ssei &a, const int32_t &b)
-{
- return a = a << b;
-}
-__forceinline ssei &operator>>=(ssei &a, const int32_t &b)
-{
- return a = a >> b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator==(const ssei &a, const ssei &b)
-{
- return _mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128));
-}
-__forceinline const sseb operator==(const ssei &a, const int32_t &b)
-{
- return a == ssei(b);
-}
-__forceinline const sseb operator==(const int32_t &a, const ssei &b)
-{
- return ssei(a) == b;
-}
-
-__forceinline const sseb operator!=(const ssei &a, const ssei &b)
-{
- return !(a == b);
-}
-__forceinline const sseb operator!=(const ssei &a, const int32_t &b)
-{
- return a != ssei(b);
-}
-__forceinline const sseb operator!=(const int32_t &a, const ssei &b)
-{
- return ssei(a) != b;
-}
-
-__forceinline const sseb operator<(const ssei &a, const ssei &b)
-{
- return _mm_castsi128_ps(_mm_cmplt_epi32(a.m128, b.m128));
-}
-__forceinline const sseb operator<(const ssei &a, const int32_t &b)
-{
- return a < ssei(b);
-}
-__forceinline const sseb operator<(const int32_t &a, const ssei &b)
-{
- return ssei(a) < b;
-}
-
-__forceinline const sseb operator>=(const ssei &a, const ssei &b)
-{
- return !(a < b);
-}
-__forceinline const sseb operator>=(const ssei &a, const int32_t &b)
-{
- return a >= ssei(b);
-}
-__forceinline const sseb operator>=(const int32_t &a, const ssei &b)
-{
- return ssei(a) >= b;
-}
-
-__forceinline const sseb operator>(const ssei &a, const ssei &b)
-{
- return _mm_castsi128_ps(_mm_cmpgt_epi32(a.m128, b.m128));
-}
-__forceinline const sseb operator>(const ssei &a, const int32_t &b)
-{
- return a > ssei(b);
-}
-__forceinline const sseb operator>(const int32_t &a, const ssei &b)
-{
- return ssei(a) > b;
-}
-
-__forceinline const sseb operator<=(const ssei &a, const ssei &b)
-{
- return !(a > b);
-}
-__forceinline const sseb operator<=(const ssei &a, const int32_t &b)
-{
- return a <= ssei(b);
-}
-__forceinline const sseb operator<=(const int32_t &a, const ssei &b)
-{
- return ssei(a) <= b;
-}
-
-__forceinline const ssei select(const sseb &m, const ssei &t, const ssei &f)
-{
-# ifdef __KERNEL_SSE41__
- return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m));
-# else
- return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f));
-# endif
-}
-
-__forceinline const ssei select(const int mask, const ssei &t, const ssei &f)
-{
-# if defined(__KERNEL_SSE41__) && \
- ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER))
- return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask));
-# else
- return select(sseb(mask), t, f);
-# endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssei unpacklo(const ssei &a, const ssei &b)
-{
- return _mm_unpacklo_epi32(a, b);
-}
-__forceinline ssei unpackhi(const ssei &a, const ssei &b)
-{
- return _mm_unpackhi_epi32(a, b);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const ssei shuffle(const ssei &a)
-{
-# ifdef __KERNEL_NEON__
- int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
- return vreinterpretq_m128i_s32(result);
-# else
- return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0));
-# endif
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const ssei shuffle(const ssei &a, const ssei &b)
-{
-# ifdef __KERNEL_NEON__
- int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
- vreinterpretq_s32_m128i(b));
- return vreinterpretq_m128i_s32(result);
-# else
- return _mm_castps_si128(
- _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
-# endif
-}
-
-template<size_t i0> __forceinline const ssei shuffle(const ssei &b)
-{
- return shuffle<i0, i0, i0, i0>(b);
-}
-
-# if defined(__KERNEL_SSE41__)
-template<size_t src> __forceinline int extract(const ssei &b)
-{
- return _mm_extract_epi32(b, src);
-}
-template<size_t dst> __forceinline const ssei insert(const ssei &a, const int32_t b)
-{
- return _mm_insert_epi32(a, b, dst);
-}
-# else
-template<size_t src> __forceinline int extract(const ssei &b)
-{
- return b[src];
-}
-template<size_t dst> __forceinline const ssei insert(const ssei &a, const int32_t b)
-{
- ssei c = a;
- c[dst] = b;
- return c;
-}
-# endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Reductions
-////////////////////////////////////////////////////////////////////////////////
-
-# if defined(__KERNEL_SSE41__)
-__forceinline const ssei vreduce_min(const ssei &v)
-{
- ssei h = min(shuffle<1, 0, 3, 2>(v), v);
- return min(shuffle<2, 3, 0, 1>(h), h);
-}
-__forceinline const ssei vreduce_max(const ssei &v)
-{
- ssei h = max(shuffle<1, 0, 3, 2>(v), v);
- return max(shuffle<2, 3, 0, 1>(h), h);
-}
-__forceinline const ssei vreduce_add(const ssei &v)
-{
- ssei h = shuffle<1, 0, 3, 2>(v) + v;
- return shuffle<2, 3, 0, 1>(h) + h;
-}
-
-__forceinline int reduce_min(const ssei &v)
-{
-# ifdef __KERNEL_NEON__
- return vminvq_s32(vreinterpretq_s32_m128i(v));
-# else
- return extract<0>(vreduce_min(v));
-# endif
-}
-__forceinline int reduce_max(const ssei &v)
-{
-# ifdef __KERNEL_NEON__
- return vmaxvq_s32(vreinterpretq_s32_m128i(v));
-# else
- return extract<0>(vreduce_max(v));
-# endif
-}
-__forceinline int reduce_add(const ssei &v)
-{
-# ifdef __KERNEL_NEON__
- return vaddvq_s32(vreinterpretq_s32_m128i(v));
-# else
- return extract<0>(vreduce_add(v));
-# endif
-}
-
-__forceinline uint32_t select_min(const ssei &v)
-{
- return __bsf(movemask(v == vreduce_min(v)));
-}
-__forceinline uint32_t select_max(const ssei &v)
-{
- return __bsf(movemask(v == vreduce_max(v)));
-}
-
-__forceinline uint32_t select_min(const sseb &valid, const ssei &v)
-{
- const ssei a = select(valid, v, ssei((int)pos_inf));
- return __bsf(movemask(valid & (a == vreduce_min(a))));
-}
-__forceinline uint32_t select_max(const sseb &valid, const ssei &v)
-{
- const ssei a = select(valid, v, ssei((int)neg_inf));
- return __bsf(movemask(valid & (a == vreduce_max(a))));
-}
-
-# else
-
-__forceinline int ssei_min(int a, int b)
-{
- return (a < b) ? a : b;
-}
-__forceinline int ssei_max(int a, int b)
-{
- return (a > b) ? a : b;
-}
-__forceinline int reduce_min(const ssei &v)
-{
- return ssei_min(ssei_min(v[0], v[1]), ssei_min(v[2], v[3]));
-}
-__forceinline int reduce_max(const ssei &v)
-{
- return ssei_max(ssei_max(v[0], v[1]), ssei_max(v[2], v[3]));
-}
-__forceinline int reduce_add(const ssei &v)
-{
- return v[0] + v[1] + v[2] + v[3];
-}
-
-# endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Memory load and store operations
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssei load4i(const void *const a)
-{
- return _mm_load_si128((__m128i *)a);
-}
-
-__forceinline void store4i(void *ptr, const ssei &v)
-{
- _mm_store_si128((__m128i *)ptr, v);
-}
-
-__forceinline void storeu4i(void *ptr, const ssei &v)
-{
- _mm_storeu_si128((__m128i *)ptr, v);
-}
-
-__forceinline void store4i(const sseb &mask, void *ptr, const ssei &i)
-{
-# if defined(__KERNEL_AVX__)
- _mm_maskstore_ps((float *)ptr, (__m128i)mask, _mm_castsi128_ps(i));
-# else
- *(ssei *)ptr = select(mask, i, *(ssei *)ptr);
-# endif
-}
-
-__forceinline ssei load4i_nt(void *ptr)
-{
-# if defined(__KERNEL_SSE41__)
- return _mm_stream_load_si128((__m128i *)ptr);
-# else
- return _mm_load_si128((__m128i *)ptr);
-# endif
-}
-
-__forceinline void store4i_nt(void *ptr, const ssei &v)
-{
-# if defined(__KERNEL_SSE41__)
- _mm_stream_ps((float *)ptr, _mm_castsi128_ps(v));
-# else
- _mm_store_si128((__m128i *)ptr, v);
-# endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Debug Functions
-////////////////////////////////////////////////////////////////////////////////
-
-ccl_device_inline void print_ssei(const char *label, const ssei &a)
-{
- printf("%s: %df %df %df %d\n", label, a[0], a[1], a[2], a[3]);
-}
-
-#endif
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/transform.cpp b/intern/cycles/util/transform.cpp
index cb985c65dd8..84116262437 100644
--- a/intern/cycles/util/transform.cpp
+++ b/intern/cycles/util/transform.cpp
@@ -102,7 +102,7 @@ ProjectionTransform projection_inverse(const ProjectionTransform &tfm)
return projection_identity();
}
- memcpy(&tfmR, R, sizeof(R));
+ memcpy(&tfmR.x[0], R, sizeof(R));
return tfmR;
}
diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h
index 24184dc7074..d7f95b7f296 100644
--- a/intern/cycles/util/transform.h
+++ b/intern/cycles/util/transform.h
@@ -63,17 +63,16 @@ ccl_device_inline float3 transform_point(ccl_private const Transform *t, const f
{
/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
- ssef x, y, z, w, aa;
- aa = a.m128;
+ const float4 aa(a.m128);
- x = _mm_loadu_ps(&t->x.x);
- y = _mm_loadu_ps(&t->y.x);
- z = _mm_loadu_ps(&t->z.x);
- w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
+ float4 x(_mm_loadu_ps(&t->x.x));
+ float4 y(_mm_loadu_ps(&t->y.x));
+ float4 z(_mm_loadu_ps(&t->z.x));
+ float4 w(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f));
- _MM_TRANSPOSE4_PS(x, y, z, w);
+ _MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128);
- ssef tmp = w;
+ float4 tmp = w;
tmp = madd(shuffle<2>(aa), z, tmp);
tmp = madd(shuffle<1>(aa), y, tmp);
tmp = madd(shuffle<0>(aa), x, tmp);
@@ -94,16 +93,16 @@ ccl_device_inline float3 transform_point(ccl_private const Transform *t, const f
ccl_device_inline float3 transform_direction(ccl_private const Transform *t, const float3 a)
{
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
- ssef x, y, z, w, aa;
- aa = a.m128;
- x = _mm_loadu_ps(&t->x.x);
- y = _mm_loadu_ps(&t->y.x);
- z = _mm_loadu_ps(&t->z.x);
- w = _mm_setzero_ps();
+ const float4 aa(a.m128);
- _MM_TRANSPOSE4_PS(x, y, z, w);
+ float4 x(_mm_loadu_ps(&t->x.x));
+ float4 y(_mm_loadu_ps(&t->y.x));
+ float4 z(_mm_loadu_ps(&t->z.x));
+ float4 w(_mm_setzero_ps());
- ssef tmp = shuffle<2>(aa) * z;
+ _MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128);
+
+ float4 tmp = shuffle<2>(aa) * z;
tmp = madd(shuffle<1>(aa), y, tmp);
tmp = madd(shuffle<0>(aa), x, tmp);
diff --git a/intern/cycles/util/transform_inverse.h b/intern/cycles/util/transform_inverse.h
index bb410a6daef..2faac576d82 100644
--- a/intern/cycles/util/transform_inverse.h
+++ b/intern/cycles/util/transform_inverse.h
@@ -9,26 +9,33 @@ CCL_NAMESPACE_BEGIN
* Normally we don't use SSE41/AVX outside the kernel, but for this it's
* important to match exactly for ray tracing precision. */
-ccl_device_forceinline float3 transform_inverse_cross(const float3 a, const float3 b)
+ccl_device_forceinline float3 transform_inverse_cross(const float3 a_, const float3 b_)
{
#if defined(__AVX2__) && defined(__KERNEL_SSE2__)
- const ssef sse_a = (const __m128 &)a;
- const ssef sse_b = (const __m128 &)b;
- const ssef r = shuffle<1, 2, 0, 3>(
- ssef(_mm_fmsub_ps(sse_a, shuffle<1, 2, 0, 3>(sse_b), shuffle<1, 2, 0, 3>(sse_a) * sse_b)));
+ const __m128 a = (const __m128 &)a_;
+ const __m128 b = (const __m128 &)b_;
+ const __m128 a_shuffle = _mm_castsi128_ps(
+ _mm_shuffle_epi32(_mm_castps_si128(a), _MM_SHUFFLE(3, 0, 2, 1)));
+ const __m128 b_shuffle = _mm_castsi128_ps(
+ _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(3, 0, 2, 1)));
+ const __m128 r = _mm_castsi128_ps(
+ _mm_shuffle_epi32(_mm_castps_si128(_mm_fmsub_ps(a, b_shuffle, _mm_mul_ps(a_shuffle, b))),
+ _MM_SHUFFLE(3, 0, 2, 1)));
return (const float3 &)r;
#endif
- return cross(a, b);
+ return cross(a_, b_);
}
-ccl_device_forceinline float transform_inverse_dot(const float3 a, const float3 b)
+ccl_device_forceinline float transform_inverse_dot(const float3 a_, const float3 b_)
{
-#ifdef __SSE4_1__
- return _mm_cvtss_f32(_mm_dp_ps((const __m128 &)a, (const __m128 &)b, 0x7F));
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+ const __m128 a = (const __m128 &)a_;
+ const __m128 b = (const __m128 &)b_;
+ return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
#endif
- return dot(a, b);
+ return dot(a_, b_);
}
ccl_device_forceinline Transform transform_inverse_impl(const Transform tfm)
diff --git a/intern/cycles/util/types.h b/intern/cycles/util/types.h
index 1ab6f76f9bc..cf7f35c4116 100644
--- a/intern/cycles/util/types.h
+++ b/intern/cycles/util/types.h
@@ -97,6 +97,7 @@ ccl_device_inline void print_float(ccl_private const char *label, const float a)
#include "util/types_int2.h"
#include "util/types_int3.h"
#include "util/types_int4.h"
+#include "util/types_int8.h"
#include "util/types_uint2.h"
#include "util/types_uint3.h"
@@ -119,6 +120,7 @@ ccl_device_inline void print_float(ccl_private const char *label, const float a)
#include "util/types_int2_impl.h"
#include "util/types_int3_impl.h"
#include "util/types_int4_impl.h"
+#include "util/types_int8_impl.h"
#include "util/types_uint2_impl.h"
#include "util/types_uint3_impl.h"
@@ -129,16 +131,4 @@ ccl_device_inline void print_float(ccl_private const char *label, const float a)
#include "util/types_float4_impl.h"
#include "util/types_float8_impl.h"
-/* SSE types. */
-#ifndef __KERNEL_GPU__
-# include "util/sseb.h"
-# include "util/ssef.h"
-# include "util/ssei.h"
-# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
-# include "util/avxb.h"
-# include "util/avxf.h"
-# include "util/avxi.h"
-# endif
-#endif
-
#endif /* __UTIL_TYPES_H__ */
diff --git a/intern/cycles/util/types_float8.h b/intern/cycles/util/types_float8.h
index 29fd632f08e..121141ddfd9 100644
--- a/intern/cycles/util/types_float8.h
+++ b/intern/cycles/util/types_float8.h
@@ -11,15 +11,15 @@
CCL_NAMESPACE_BEGIN
/* float8 is a reserved type in Metal that has not been implemented. For
- * that reason this is named float8_t and not using native vector types. */
+ * that reason this is named vfloat8 and not using native vector types. */
#ifdef __KERNEL_GPU__
-struct float8_t
+struct vfloat8
#else
-struct ccl_try_align(32) float8_t
+struct ccl_try_align(32) vfloat8
#endif
{
-#ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX__
union {
__m256 m256;
struct {
@@ -27,18 +27,18 @@ struct ccl_try_align(32) float8_t
};
};
- __forceinline float8_t();
- __forceinline float8_t(const float8_t &a);
- __forceinline explicit float8_t(const __m256 &a);
+ __forceinline vfloat8();
+ __forceinline vfloat8(const vfloat8 &a);
+ __forceinline explicit vfloat8(const __m256 &a);
__forceinline operator const __m256 &() const;
__forceinline operator __m256 &();
- __forceinline float8_t &operator=(const float8_t &a);
+ __forceinline vfloat8 &operator=(const vfloat8 &a);
-#else /* __KERNEL_AVX2__ */
+#else /* __KERNEL_AVX__ */
float a, b, c, d, e, f, g, h;
-#endif /* __KERNEL_AVX2__ */
+#endif /* __KERNEL_AVX__ */
#ifndef __KERNEL_GPU__
__forceinline float operator[](int i) const;
@@ -46,8 +46,11 @@ struct ccl_try_align(32) float8_t
#endif
};
-ccl_device_inline float8_t make_float8_t(float f);
-ccl_device_inline float8_t
-make_float8_t(float a, float b, float c, float d, float e, float f, float g, float h);
+ccl_device_inline vfloat8 make_vfloat8(float f);
+ccl_device_inline vfloat8
+make_vfloat8(float a, float b, float c, float d, float e, float f, float g, float h);
+ccl_device_inline vfloat8 make_vfloat8(const float4 a, const float4 b);
+
+ccl_device_inline void print_vfloat8(ccl_private const char *label, const vfloat8 a);
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float8_impl.h b/intern/cycles/util/types_float8_impl.h
index e8576cdaf70..9f42e0f663c 100644
--- a/intern/cycles/util/types_float8_impl.h
+++ b/intern/cycles/util/types_float8_impl.h
@@ -10,45 +10,45 @@
CCL_NAMESPACE_BEGIN
-#ifdef __KERNEL_AVX2__
-__forceinline float8_t::float8_t()
+#ifdef __KERNEL_AVX__
+__forceinline vfloat8::vfloat8()
{
}
-__forceinline float8_t::float8_t(const float8_t &f) : m256(f.m256)
+__forceinline vfloat8::vfloat8(const vfloat8 &f) : m256(f.m256)
{
}
-__forceinline float8_t::float8_t(const __m256 &f) : m256(f)
+__forceinline vfloat8::vfloat8(const __m256 &f) : m256(f)
{
}
-__forceinline float8_t::operator const __m256 &() const
+__forceinline vfloat8::operator const __m256 &() const
{
return m256;
}
-__forceinline float8_t::operator __m256 &()
+__forceinline vfloat8::operator __m256 &()
{
return m256;
}
-__forceinline float8_t &float8_t::operator=(const float8_t &f)
+__forceinline vfloat8 &vfloat8::operator=(const vfloat8 &f)
{
m256 = f.m256;
return *this;
}
-#endif /* __KERNEL_AVX2__ */
+#endif /* __KERNEL_AVX__ */
#ifndef __KERNEL_GPU__
-__forceinline float float8_t::operator[](int i) const
+__forceinline float vfloat8::operator[](int i) const
{
util_assert(i >= 0);
util_assert(i < 8);
return *(&a + i);
}
-__forceinline float &float8_t::operator[](int i)
+__forceinline float &vfloat8::operator[](int i)
{
util_assert(i >= 0);
util_assert(i < 8);
@@ -56,25 +56,50 @@ __forceinline float &float8_t::operator[](int i)
}
#endif
-ccl_device_inline float8_t make_float8_t(float f)
+ccl_device_inline vfloat8 make_vfloat8(float f)
{
-#ifdef __KERNEL_AVX2__
- float8_t r(_mm256_set1_ps(f));
+#ifdef __KERNEL_AVX__
+ vfloat8 r(_mm256_set1_ps(f));
#else
- float8_t r = {f, f, f, f, f, f, f, f};
+ vfloat8 r = {f, f, f, f, f, f, f, f};
#endif
return r;
}
-ccl_device_inline float8_t
-make_float8_t(float a, float b, float c, float d, float e, float f, float g, float h)
+ccl_device_inline vfloat8
+make_vfloat8(float a, float b, float c, float d, float e, float f, float g, float h)
{
-#ifdef __KERNEL_AVX2__
- float8_t r(_mm256_setr_ps(a, b, c, d, e, f, g, h));
+#ifdef __KERNEL_AVX__
+ vfloat8 r(_mm256_setr_ps(a, b, c, d, e, f, g, h));
#else
- float8_t r = {a, b, c, d, e, f, g, h};
+ vfloat8 r = {a, b, c, d, e, f, g, h};
#endif
return r;
}
+ccl_device_inline vfloat8 make_vfloat8(const float4 a, const float4 b)
+{
+#ifdef __KERNEL_AVX__
+ return vfloat8(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1));
+#else
+ return make_vfloat8(a.x, a.y, a.z, a.w, b.x, b.y, b.z, b.w);
+#endif
+}
+
+ccl_device_inline void print_vfloat8(ccl_private const char *label, const vfloat8 a)
+{
+#ifdef __KERNEL_PRINTF__
+ printf("%s: %.8f %.8f %.8f %.8f %.8f %.8f %.8f %.8f\n",
+ label,
+ (double)a.a,
+ (double)a.b,
+ (double)a.c,
+ (double)a.d,
+ (double)a.e,
+ (double)a.f,
+ (double)a.g,
+ (double)a.h);
+#endif
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int8.h b/intern/cycles/util/types_int8.h
new file mode 100644
index 00000000000..8643ebe96ad
--- /dev/null
+++ b/intern/cycles/util/types_int8.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#ifndef __UTIL_TYPES_H__
+# error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+struct vfloat8;
+
+#ifdef __KERNEL_GPU__
+struct vint8
+#else
+struct ccl_try_align(32) vint8
+#endif
+{
+#ifdef __KERNEL_AVX__
+ union {
+ __m256i m256;
+ struct {
+ int a, b, c, d, e, f, g, h;
+ };
+ };
+
+ __forceinline vint8();
+ __forceinline vint8(const vint8 &a);
+ __forceinline explicit vint8(const __m256i &a);
+
+ __forceinline operator const __m256i &() const;
+ __forceinline operator __m256i &();
+
+ __forceinline vint8 &operator=(const vint8 &a);
+#else /* __KERNEL_AVX__ */
+ int a, b, c, d, e, f, g, h;
+#endif /* __KERNEL_AVX__ */
+
+#ifndef __KERNEL_GPU__
+ __forceinline int operator[](int i) const;
+ __forceinline int &operator[](int i);
+#endif
+};
+
+ccl_device_inline vint8 make_vint8(int a, int b, int c, int d, int e, int f, int g, int h);
+ccl_device_inline vint8 make_vint8(int i);
+ccl_device_inline vint8 make_vint8(const vfloat8 f);
+ccl_device_inline vint8 make_vint8(const int4 a, const int4 b);
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int8_impl.h b/intern/cycles/util/types_int8_impl.h
new file mode 100644
index 00000000000..080bcaa6a2b
--- /dev/null
+++ b/intern/cycles/util/types_int8_impl.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#ifndef __UTIL_TYPES_H__
+# error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __KERNEL_AVX__
+__forceinline vint8::vint8()
+{
+}
+
+__forceinline vint8::vint8(const vint8 &a) : m256(a.m256)
+{
+}
+
+__forceinline vint8::vint8(const __m256i &a) : m256(a)
+{
+}
+
+__forceinline vint8::operator const __m256i &() const
+{
+ return m256;
+}
+
+__forceinline vint8::operator __m256i &()
+{
+ return m256;
+}
+
+__forceinline vint8 &vint8::operator=(const vint8 &a)
+{
+ m256 = a.m256;
+ return *this;
+}
+#endif /* __KERNEL_AVX__ */
+
+#ifndef __KERNEL_GPU__
+__forceinline int vint8::operator[](int i) const
+{
+ util_assert(i >= 0);
+ util_assert(i < 8);
+ return *(&a + i);
+}
+
+__forceinline int &vint8::operator[](int i)
+{
+ util_assert(i >= 0);
+ util_assert(i < 8);
+ return *(&a + i);
+}
+#endif
+
+ccl_device_inline vint8 make_vint8(int a, int b, int c, int d, int e, int f, int g, int h)
+{
+#ifdef __KERNEL_AVX__
+ return vint8(_mm256_set_epi32(h, g, f, e, d, c, b, a));
+#else
+ return {a, b, c, d, e, f, g, h};
+#endif
+}
+
+ccl_device_inline vint8 make_vint8(int i)
+{
+#ifdef __KERNEL_AVX__
+ return vint8(_mm256_set1_epi32(i));
+#else
+ return make_vint8(i, i, i, i, i, i, i, i);
+#endif
+}
+
+ccl_device_inline vint8 make_vint8(const vfloat8 f)
+{
+#ifdef __KERNEL_AVX__
+ return vint8(_mm256_cvtps_epi32(f.m256));
+#else
+ return make_vint8(
+ (int)f.a, (int)f.b, (int)f.c, (int)f.d, (int)f.e, (int)f.f, (int)f.g, (int)f.h);
+#endif
+}
+
+ccl_device_inline vint8 make_vint8(const int4 a, const int4 b)
+{
+#ifdef __KERNEL_AVX__
+ return vint8(_mm256_insertf128_si256(_mm256_castsi128_si256(a.m128), b.m128, 1));
+#else
+ return make_vint8(a.x, a.y, a.z, a.w, b.x, b.y, b.z, b.w);
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/ffmpeg/CMakeLists.txt b/intern/ffmpeg/CMakeLists.txt
index 0de8496f3f3..4fb5df9d4cd 100644
--- a/intern/ffmpeg/CMakeLists.txt
+++ b/intern/ffmpeg/CMakeLists.txt
@@ -6,6 +6,7 @@ if(WITH_GTESTS)
tests/ffmpeg_codecs.cc
)
set(TEST_INC
+ .
)
set(TEST_INC_SYS
${FFMPEG_INCLUDE_DIRS}
diff --git a/intern/ffmpeg/ffmpeg_compat.h b/intern/ffmpeg/ffmpeg_compat.h
index f311e04d8e0..f7d87af8bca 100644
--- a/intern/ffmpeg/ffmpeg_compat.h
+++ b/intern/ffmpeg/ffmpeg_compat.h
@@ -36,6 +36,14 @@
# define FFMPEG_INLINE static inline
#endif
+#if (LIBAVFORMAT_VERSION_MAJOR < 59)
+/* For versions older than ffmpeg 5.0, use the old channel layout variables.
+ * We intend to only keep this workaround for around two releases (3.5, 3.6).
+ * If it sticks around any longer, then we should consider refactoring this.
+ */
+# define FFMPEG_USE_OLD_CHANNEL_VARS
+#endif
+
#if (LIBAVFORMAT_VERSION_MAJOR < 58) || \
((LIBAVFORMAT_VERSION_MAJOR == 58) && (LIBAVFORMAT_VERSION_MINOR < 76))
# define FFMPEG_USE_DURATION_WORKAROUND 1
diff --git a/intern/ffmpeg/tests/ffmpeg_codecs.cc b/intern/ffmpeg/tests/ffmpeg_codecs.cc
index 10cbe4b938b..cd06917f59b 100644
--- a/intern/ffmpeg/tests/ffmpeg_codecs.cc
+++ b/intern/ffmpeg/tests/ffmpeg_codecs.cc
@@ -3,6 +3,8 @@
#include "testing/testing.h"
extern "C" {
+#include "ffmpeg_compat.h"
+
#include <libavcodec/avcodec.h>
#include <libavutil/channel_layout.h>
#include <libavutil/log.h>
@@ -40,7 +42,11 @@ bool test_acodec(const AVCodec *codec, AVSampleFormat fmt)
if (ctx) {
ctx->sample_fmt = fmt;
ctx->sample_rate = 48000;
+#ifdef FFMPEG_USE_OLD_CHANNEL_VARS
+ ctx->channel_layout = AV_CH_LAYOUT_MONO;
+#else
av_channel_layout_from_mask(&ctx->ch_layout, AV_CH_LAYOUT_MONO);
+#endif
ctx->bit_rate = 128000;
int open = avcodec_open2(ctx, codec, NULL);
if (open >= 0) {
diff --git a/release/scripts/modules/rna_info.py b/release/scripts/modules/rna_info.py
index e2bbc4077a1..07daf7c55eb 100644
--- a/release/scripts/modules/rna_info.py
+++ b/release/scripts/modules/rna_info.py
@@ -286,7 +286,10 @@ class InfoPropertyRNA:
self.enum_pointer = 0
if self.type == "enum":
- items = tuple(rna_prop.enum_items)
+ # WARNING: don't convert to a tuple as this causes dynamically allocated enums to access freed memory
+ # since freeing the iterator may free the memory used to store the internal `EnumPropertyItem` array.
+ # To support this properly RNA would have to support owning the dynamically allocated memory.
+ items = rna_prop.enum_items
items_static = tuple(rna_prop.enum_items_static)
self.enum_items[:] = [(item.identifier, item.name, item.description) for item in items]
self.is_enum_flag = rna_prop.is_enum_flag
@@ -295,6 +298,7 @@ class InfoPropertyRNA:
item = (items_static or items)
if item:
self.enum_pointer = item[0].as_pointer()
+ del items, items_static, item
else:
self.is_enum_flag = False
diff --git a/source/blender/blenkernel/BKE_curves.hh b/source/blender/blenkernel/BKE_curves.hh
index 4c7ff8c1813..a479dcb574d 100644
--- a/source/blender/blenkernel/BKE_curves.hh
+++ b/source/blender/blenkernel/BKE_curves.hh
@@ -11,6 +11,7 @@
#include <mutex>
+#include "BLI_cache_mutex.hh"
#include "BLI_float3x3.hh"
#include "BLI_float4x4.hh"
#include "BLI_generic_virtual_array.hh"
@@ -80,17 +81,14 @@ class CurvesGeometryRuntime {
*/
mutable Vector<int> evaluated_offsets_cache;
mutable Vector<int> bezier_evaluated_offsets;
- mutable std::mutex offsets_cache_mutex;
- mutable bool offsets_cache_dirty = true;
+ mutable CacheMutex offsets_cache_mutex;
mutable Vector<curves::nurbs::BasisCache> nurbs_basis_cache;
- mutable std::mutex nurbs_basis_cache_mutex;
- mutable bool nurbs_basis_cache_dirty = true;
+ mutable CacheMutex nurbs_basis_cache_mutex;
/** Cache of evaluated positions. */
mutable Vector<float3> evaluated_position_cache;
- mutable std::mutex position_cache_mutex;
- mutable bool position_cache_dirty = true;
+ mutable CacheMutex position_cache_mutex;
/**
* The evaluated positions result, using a separate span in case all curves are poly curves,
* in which case a separate array of evaluated positions is unnecessary.
@@ -103,18 +101,15 @@ class CurvesGeometryRuntime {
* make slicing this array for a curve fast, an extra float is stored for every curve.
*/
mutable Vector<float> evaluated_length_cache;
- mutable std::mutex length_cache_mutex;
- mutable bool length_cache_dirty = true;
+ mutable CacheMutex length_cache_mutex;
/** Direction of the curve at each evaluated point. */
mutable Vector<float3> evaluated_tangent_cache;
- mutable std::mutex tangent_cache_mutex;
- mutable bool tangent_cache_dirty = true;
+ mutable CacheMutex tangent_cache_mutex;
/** Normal direction vectors for each evaluated point. */
mutable Vector<float3> evaluated_normal_cache;
- mutable std::mutex normal_cache_mutex;
- mutable bool normal_cache_dirty = true;
+ mutable CacheMutex normal_cache_mutex;
};
/**
@@ -909,13 +904,13 @@ inline int CurvesGeometry::evaluated_points_num() const
inline IndexRange CurvesGeometry::evaluated_points_for_curve(int index) const
{
- BLI_assert(!this->runtime->offsets_cache_dirty);
+ BLI_assert(this->runtime->offsets_cache_mutex.is_cached());
return offsets_to_range(this->runtime->evaluated_offsets_cache.as_span(), index);
}
inline IndexRange CurvesGeometry::evaluated_points_for_curves(const IndexRange curves) const
{
- BLI_assert(!this->runtime->offsets_cache_dirty);
+ BLI_assert(this->runtime->offsets_cache_mutex.is_cached());
BLI_assert(this->curve_num > 0);
const int offset = this->runtime->evaluated_offsets_cache[curves.start()];
const int offset_next = this->runtime->evaluated_offsets_cache[curves.one_after_last()];
@@ -940,7 +935,7 @@ inline IndexRange CurvesGeometry::lengths_range_for_curve(const int curve_index,
inline Span<float> CurvesGeometry::evaluated_lengths_for_curve(const int curve_index,
const bool cyclic) const
{
- BLI_assert(!this->runtime->length_cache_dirty);
+ BLI_assert(this->runtime->length_cache_mutex.is_cached());
const IndexRange range = this->lengths_range_for_curve(curve_index, cyclic);
return this->runtime->evaluated_length_cache.as_span().slice(range);
}
diff --git a/source/blender/blenkernel/intern/curves_geometry.cc b/source/blender/blenkernel/intern/curves_geometry.cc
index 7c338480c71..43bdb8e7b8c 100644
--- a/source/blender/blenkernel/intern/curves_geometry.cc
+++ b/source/blender/blenkernel/intern/curves_geometry.cc
@@ -511,17 +511,7 @@ static void calculate_evaluated_offsets(const CurvesGeometry &curves,
void CurvesGeometry::ensure_evaluated_offsets() const
{
- if (!this->runtime->offsets_cache_dirty) {
- return;
- }
-
- /* A double checked lock. */
- std::scoped_lock lock{this->runtime->offsets_cache_mutex};
- if (!this->runtime->offsets_cache_dirty) {
- return;
- }
-
- threading::isolate_task([&]() {
+ this->runtime->offsets_cache_mutex.ensure([&]() {
this->runtime->evaluated_offsets_cache.resize(this->curves_num() + 1);
if (this->has_curve_with_type(CURVE_TYPE_BEZIER)) {
@@ -534,8 +524,6 @@ void CurvesGeometry::ensure_evaluated_offsets() const
calculate_evaluated_offsets(
*this, this->runtime->evaluated_offsets_cache, this->runtime->bezier_evaluated_offsets);
});
-
- this->runtime->offsets_cache_dirty = false;
}
Span<int> CurvesGeometry::evaluated_offsets() const
@@ -569,17 +557,7 @@ Array<int> CurvesGeometry::point_to_curve_map() const
void CurvesGeometry::ensure_nurbs_basis_cache() const
{
- if (!this->runtime->nurbs_basis_cache_dirty) {
- return;
- }
-
- /* A double checked lock. */
- std::scoped_lock lock{this->runtime->nurbs_basis_cache_mutex};
- if (!this->runtime->nurbs_basis_cache_dirty) {
- return;
- }
-
- threading::isolate_task([&]() {
+ this->runtime->nurbs_basis_cache_mutex.ensure([&]() {
Vector<int64_t> nurbs_indices;
const IndexMask nurbs_mask = this->indices_for_curve_type(CURVE_TYPE_NURBS, nurbs_indices);
if (nurbs_mask.is_empty()) {
@@ -619,23 +597,11 @@ void CurvesGeometry::ensure_nurbs_basis_cache() const
}
});
});
-
- this->runtime->nurbs_basis_cache_dirty = false;
}
Span<float3> CurvesGeometry::evaluated_positions() const
{
- if (!this->runtime->position_cache_dirty) {
- return this->runtime->evaluated_positions_span;
- }
-
- /* A double checked lock. */
- std::scoped_lock lock{this->runtime->position_cache_mutex};
- if (!this->runtime->position_cache_dirty) {
- return this->runtime->evaluated_positions_span;
- }
-
- threading::isolate_task([&]() {
+ this->runtime->position_cache_mutex.ensure([&]() {
if (this->is_single_type(CURVE_TYPE_POLY)) {
this->runtime->evaluated_positions_span = this->positions();
this->runtime->evaluated_position_cache.clear_and_make_inline();
@@ -699,24 +665,12 @@ Span<float3> CurvesGeometry::evaluated_positions() const
}
});
});
-
- this->runtime->position_cache_dirty = false;
return this->runtime->evaluated_positions_span;
}
Span<float3> CurvesGeometry::evaluated_tangents() const
{
- if (!this->runtime->tangent_cache_dirty) {
- return this->runtime->evaluated_tangent_cache;
- }
-
- /* A double checked lock. */
- std::scoped_lock lock{this->runtime->tangent_cache_mutex};
- if (!this->runtime->tangent_cache_dirty) {
- return this->runtime->evaluated_tangent_cache;
- }
-
- threading::isolate_task([&]() {
+ this->runtime->tangent_cache_mutex.ensure([&]() {
const Span<float3> evaluated_positions = this->evaluated_positions();
const VArray<bool> cyclic = this->cyclic();
@@ -732,9 +686,9 @@ Span<float3> CurvesGeometry::evaluated_tangents() const
}
});
- /* Correct the first and last tangents of non-cyclic Bezier curves so that they align with the
- * inner handles. This is a separate loop to avoid the cost when Bezier type curves are not
- * used. */
+ /* Correct the first and last tangents of non-cyclic Bezier curves so that they align with
+ * the inner handles. This is a separate loop to avoid the cost when Bezier type curves are
+ * not used. */
Vector<int64_t> bezier_indices;
const IndexMask bezier_mask = this->indices_for_curve_type(CURVE_TYPE_BEZIER, bezier_indices);
if (!bezier_mask.is_empty()) {
@@ -765,8 +719,6 @@ Span<float3> CurvesGeometry::evaluated_tangents() const
});
}
});
-
- this->runtime->tangent_cache_dirty = false;
return this->runtime->evaluated_tangent_cache;
}
@@ -781,17 +733,7 @@ static void rotate_directions_around_axes(MutableSpan<float3> directions,
Span<float3> CurvesGeometry::evaluated_normals() const
{
- if (!this->runtime->normal_cache_dirty) {
- return this->runtime->evaluated_normal_cache;
- }
-
- /* A double checked lock. */
- std::scoped_lock lock{this->runtime->normal_cache_mutex};
- if (!this->runtime->normal_cache_dirty) {
- return this->runtime->evaluated_normal_cache;
- }
-
- threading::isolate_task([&]() {
+ this->runtime->normal_cache_mutex.ensure([&]() {
const Span<float3> evaluated_tangents = this->evaluated_tangents();
const VArray<bool> cyclic = this->cyclic();
const VArray<int8_t> normal_mode = this->normal_mode();
@@ -842,8 +784,6 @@ Span<float3> CurvesGeometry::evaluated_normals() const
}
});
});
-
- this->runtime->normal_cache_dirty = false;
return this->runtime->evaluated_normal_cache;
}
@@ -851,8 +791,8 @@ void CurvesGeometry::interpolate_to_evaluated(const int curve_index,
const GSpan src,
GMutableSpan dst) const
{
- BLI_assert(!this->runtime->offsets_cache_dirty);
- BLI_assert(!this->runtime->nurbs_basis_cache_dirty);
+ BLI_assert(this->runtime->offsets_cache_mutex.is_cached());
+ BLI_assert(this->runtime->nurbs_basis_cache_mutex.is_cached());
const IndexRange points = this->points_for_curve(curve_index);
BLI_assert(src.size() == points.size());
BLI_assert(dst.size() == this->evaluated_points_for_curve(curve_index).size());
@@ -881,8 +821,8 @@ void CurvesGeometry::interpolate_to_evaluated(const int curve_index,
void CurvesGeometry::interpolate_to_evaluated(const GSpan src, GMutableSpan dst) const
{
- BLI_assert(!this->runtime->offsets_cache_dirty);
- BLI_assert(!this->runtime->nurbs_basis_cache_dirty);
+ BLI_assert(this->runtime->offsets_cache_mutex.is_cached());
+ BLI_assert(this->runtime->nurbs_basis_cache_mutex.is_cached());
const VArray<int8_t> types = this->curve_types();
const VArray<int> resolution = this->resolution();
const VArray<bool> cyclic = this->cyclic();
@@ -923,17 +863,7 @@ void CurvesGeometry::interpolate_to_evaluated(const GSpan src, GMutableSpan dst)
void CurvesGeometry::ensure_evaluated_lengths() const
{
- if (!this->runtime->length_cache_dirty) {
- return;
- }
-
- /* A double checked lock. */
- std::scoped_lock lock{this->runtime->length_cache_mutex};
- if (!this->runtime->length_cache_dirty) {
- return;
- }
-
- threading::isolate_task([&]() {
+ this->runtime->length_cache_mutex.ensure([&]() {
/* Use an extra length value for the final cyclic segment for a consistent size
* (see comment on #evaluated_length_cache). */
const int total_num = this->evaluated_points_num() + this->curves_num();
@@ -954,8 +884,6 @@ void CurvesGeometry::ensure_evaluated_lengths() const
}
});
});
-
- this->runtime->length_cache_dirty = false;
}
void CurvesGeometry::ensure_can_interpolate_to_evaluated() const
@@ -986,23 +914,23 @@ void CurvesGeometry::resize(const int points_num, const int curves_num)
void CurvesGeometry::tag_positions_changed()
{
- this->runtime->position_cache_dirty = true;
- this->runtime->tangent_cache_dirty = true;
- this->runtime->normal_cache_dirty = true;
- this->runtime->length_cache_dirty = true;
+ this->runtime->position_cache_mutex.tag_dirty();
+ this->runtime->tangent_cache_mutex.tag_dirty();
+ this->runtime->normal_cache_mutex.tag_dirty();
+ this->runtime->length_cache_mutex.tag_dirty();
}
void CurvesGeometry::tag_topology_changed()
{
- this->runtime->position_cache_dirty = true;
- this->runtime->tangent_cache_dirty = true;
- this->runtime->normal_cache_dirty = true;
- this->runtime->offsets_cache_dirty = true;
- this->runtime->nurbs_basis_cache_dirty = true;
- this->runtime->length_cache_dirty = true;
+ this->runtime->position_cache_mutex.tag_dirty();
+ this->runtime->tangent_cache_mutex.tag_dirty();
+ this->runtime->normal_cache_mutex.tag_dirty();
+ this->runtime->offsets_cache_mutex.tag_dirty();
+ this->runtime->nurbs_basis_cache_mutex.tag_dirty();
+ this->runtime->length_cache_mutex.tag_dirty();
}
void CurvesGeometry::tag_normals_changed()
{
- this->runtime->normal_cache_dirty = true;
+ this->runtime->normal_cache_mutex.tag_dirty();
}
static void translate_positions(MutableSpan<float3> positions, const float3 &translation)
diff --git a/source/blender/blenkernel/intern/customdata.cc b/source/blender/blenkernel/intern/customdata.cc
index 8b791eb4a00..84aa2207400 100644
--- a/source/blender/blenkernel/intern/customdata.cc
+++ b/source/blender/blenkernel/intern/customdata.cc
@@ -2799,11 +2799,6 @@ static CustomDataLayer *customData_add_layer__internal(CustomData *data,
const LayerTypeInfo *typeInfo = layerType_getInfo(type);
int flag = 0;
- if (!typeInfo->defaultname && CustomData_has_layer(data, type)) {
- MEM_SAFE_FREE(layerdata);
- return &data->layers[CustomData_get_layer_index(data, type)];
- }
-
void *newlayerdata = nullptr;
switch (alloctype) {
case CD_SET_DEFAULT:
@@ -2856,6 +2851,21 @@ static CustomDataLayer *customData_add_layer__internal(CustomData *data,
break;
}
+ /* Some layer types only support a single layer. */
+ const bool reuse_existing_layer = !typeInfo->defaultname && CustomData_has_layer(data, type);
+ if (reuse_existing_layer) {
+ CustomDataLayer &layer = data->layers[CustomData_get_layer_index(data, type)];
+ if (layer.data != nullptr) {
+ if (typeInfo->free) {
+ typeInfo->free(layer.data, totelem, typeInfo->size);
+ }
+ MEM_SAFE_FREE(layer.data);
+ }
+ layer.data = newlayerdata;
+ layer.flag = flag;
+ return &layer;
+ }
+
int index = data->totlayer;
if (index >= data->maxlayer) {
if (!customData_resize(data, CUSTOMDATA_GROW)) {
diff --git a/source/blender/blenkernel/intern/displist.cc b/source/blender/blenkernel/intern/displist.cc
index f8117a89198..2e285170b93 100644
--- a/source/blender/blenkernel/intern/displist.cc
+++ b/source/blender/blenkernel/intern/displist.cc
@@ -533,7 +533,8 @@ static ModifierData *curve_get_tessellate_point(const Scene *scene,
}
if (md->type == eModifierType_Smooth) {
- /* Smooth modifier works with mesh edges explicitly (so needs tesselation, thus cannnot work on control points). */
+ /* Smooth modifier works with mesh edges explicitly
+ * (so needs tessellation, thus cannot work on control points). */
md->mode &= ~eModifierMode_ApplyOnSpline;
return pretessellatePoint;
}
diff --git a/source/blender/blenkernel/intern/freestyle.c b/source/blender/blenkernel/intern/freestyle.c
index a0649930dfc..28d0d1719d7 100644
--- a/source/blender/blenkernel/intern/freestyle.c
+++ b/source/blender/blenkernel/intern/freestyle.c
@@ -183,7 +183,7 @@ FreestyleLineSet *BKE_freestyle_lineset_add(struct Main *bmain,
BLI_strncpy(lineset->name, name, sizeof(lineset->name));
}
else if (lineset_index > 0) {
- sprintf(lineset->name, "LineSet %i", lineset_index + 1);
+ BLI_snprintf(lineset->name, sizeof(lineset->name), "LineSet %i", lineset_index + 1);
}
else {
strcpy(lineset->name, "LineSet");
diff --git a/source/blender/blenkernel/intern/image.cc b/source/blender/blenkernel/intern/image.cc
index eae8b454189..75e3e22afa7 100644
--- a/source/blender/blenkernel/intern/image.cc
+++ b/source/blender/blenkernel/intern/image.cc
@@ -3610,12 +3610,12 @@ void BKE_image_set_filepath_from_tile_number(char *filepath,
}
if (tile_format == UDIM_TILE_FORMAT_UDIM) {
- sprintf(filepath, pattern, tile_number);
+ BLI_sprintf(filepath, pattern, tile_number);
}
else if (tile_format == UDIM_TILE_FORMAT_UVTILE) {
int u = ((tile_number - 1001) % 10);
int v = ((tile_number - 1001) / 10);
- sprintf(filepath, pattern, u + 1, v + 1);
+ BLI_sprintf(filepath, pattern, u + 1, v + 1);
}
}
diff --git a/source/blender/blenkernel/intern/node.cc b/source/blender/blenkernel/intern/node.cc
index aa6ca37f48c..31fc8afea84 100644
--- a/source/blender/blenkernel/intern/node.cc
+++ b/source/blender/blenkernel/intern/node.cc
@@ -3436,7 +3436,7 @@ void ntreeRemoveSocketInterface(bNodeTree *ntree, bNodeSocket *sock)
static void ntree_interface_identifier_base(bNodeTree *ntree, char *base)
{
/* generate a valid RNA identifier */
- sprintf(base, "NodeTreeInterface_%s", ntree->id.name + 2);
+ BLI_sprintf(base, "NodeTreeInterface_%s", ntree->id.name + 2);
RNA_identifier_sanitize(base, false);
}
@@ -3462,8 +3462,8 @@ static void ntree_interface_identifier(bNodeTree *ntree,
BLI_uniquename_cb(
ntree_interface_unique_identifier_check, nullptr, base, '_', identifier, maxlen);
- sprintf(name, "Node Tree %s Interface", ntree->id.name + 2);
- sprintf(description, "Interface properties of node group %s", ntree->id.name + 2);
+ BLI_sprintf(name, "Node Tree %s Interface", ntree->id.name + 2);
+ BLI_sprintf(description, "Interface properties of node group %s", ntree->id.name + 2);
}
static void ntree_interface_type_create(bNodeTree *ntree)
diff --git a/source/blender/blenkernel/intern/pointcache.c b/source/blender/blenkernel/intern/pointcache.c
index 868cdde6d01..5622530ea41 100644
--- a/source/blender/blenkernel/intern/pointcache.c
+++ b/source/blender/blenkernel/intern/pointcache.c
@@ -3132,15 +3132,15 @@ static void ptcache_dt_to_str(char *str, double dtime)
{
if (dtime > 60.0) {
if (dtime > 3600.0) {
- sprintf(
+ BLI_sprintf(
str, "%ih %im %is", (int)(dtime / 3600), ((int)(dtime / 60)) % 60, ((int)dtime) % 60);
}
else {
- sprintf(str, "%im %is", ((int)(dtime / 60)) % 60, ((int)dtime) % 60);
+ BLI_sprintf(str, "%im %is", ((int)(dtime / 60)) % 60, ((int)dtime) % 60);
}
}
else {
- sprintf(str, "%is", ((int)dtime) % 60);
+ BLI_sprintf(str, "%is", ((int)dtime) % 60);
}
}
diff --git a/source/blender/blenkernel/intern/writeffmpeg.c b/source/blender/blenkernel/intern/writeffmpeg.c
index d71db8f71a5..8d6dba440fd 100644
--- a/source/blender/blenkernel/intern/writeffmpeg.c
+++ b/source/blender/blenkernel/intern/writeffmpeg.c
@@ -141,18 +141,25 @@ static int write_audio_frame(FFMpegContext *context)
frame->pts = context->audio_time / av_q2d(c->time_base);
frame->nb_samples = context->audio_input_samples;
frame->format = c->sample_fmt;
+# ifdef FFMPEG_USE_OLD_CHANNEL_VARS
+ frame->channels = c->channels;
+ frame->channel_layout = c->channel_layout;
+ const int num_channels = c->channels;
+# else
av_channel_layout_copy(&frame->ch_layout, &c->ch_layout);
+ const int num_channels = c->ch_layout.nb_channels;
+# endif
if (context->audio_deinterleave) {
int channel, i;
uint8_t *temp;
- for (channel = 0; channel < c->ch_layout.nb_channels; channel++) {
+ for (channel = 0; channel < num_channels; channel++) {
for (i = 0; i < frame->nb_samples; i++) {
memcpy(context->audio_deinterleave_buffer +
(i + channel * frame->nb_samples) * context->audio_sample_size,
context->audio_input_buffer +
- (c->ch_layout.nb_channels * i + channel) * context->audio_sample_size,
+ (num_channels * i + channel) * context->audio_sample_size,
context->audio_sample_size);
}
}
@@ -163,10 +170,10 @@ static int write_audio_frame(FFMpegContext *context)
}
avcodec_fill_audio_frame(frame,
- c->ch_layout.nb_channels,
+ num_channels,
c->sample_fmt,
context->audio_input_buffer,
- context->audio_input_samples * c->ch_layout.nb_channels *
+ context->audio_input_samples * num_channels *
context->audio_sample_size,
1);
@@ -944,25 +951,34 @@ static AVStream *alloc_audio_stream(FFMpegContext *context,
c->sample_rate = rd->ffcodecdata.audio_mixrate;
c->bit_rate = context->ffmpeg_audio_bitrate * 1000;
c->sample_fmt = AV_SAMPLE_FMT_S16;
- c->ch_layout.nb_channels = rd->ffcodecdata.audio_channels;
+ const int num_channels = rd->ffcodecdata.audio_channels;
+ int channel_layout_mask = 0;
switch (rd->ffcodecdata.audio_channels) {
case FFM_CHANNELS_MONO:
- av_channel_layout_from_mask(&c->ch_layout, AV_CH_LAYOUT_MONO);
+ channel_layout_mask = AV_CH_LAYOUT_MONO;
break;
case FFM_CHANNELS_STEREO:
- av_channel_layout_from_mask(&c->ch_layout, AV_CH_LAYOUT_STEREO);
+ channel_layout_mask = AV_CH_LAYOUT_STEREO;
break;
case FFM_CHANNELS_SURROUND4:
- av_channel_layout_from_mask(&c->ch_layout, AV_CH_LAYOUT_QUAD);
+ channel_layout_mask = AV_CH_LAYOUT_QUAD;
break;
case FFM_CHANNELS_SURROUND51:
- av_channel_layout_from_mask(&c->ch_layout, AV_CH_LAYOUT_5POINT1_BACK);
+ channel_layout_mask = AV_CH_LAYOUT_5POINT1_BACK;
break;
case FFM_CHANNELS_SURROUND71:
- av_channel_layout_from_mask(&c->ch_layout, AV_CH_LAYOUT_7POINT1);
+ channel_layout_mask = AV_CH_LAYOUT_7POINT1;
break;
}
+ BLI_assert(channel_layout_mask != 0);
+
+# ifdef FFMPEG_USE_OLD_CHANNEL_VARS
+ c->channels = num_channels;
+ c->channel_layout = channel_layout_mask;
+# else
+ av_channel_layout_from_mask(&c->ch_layout, channel_layout_mask);
+# endif
if (request_float_audio_buffer(codec_id)) {
/* mainly for AAC codec which is experimental */
@@ -1027,7 +1043,7 @@ static AVStream *alloc_audio_stream(FFMpegContext *context,
* not sure if that is needed anymore, so let's try out if there are any
* complaints regarding some FFmpeg versions users might have. */
context->audio_input_samples = AV_INPUT_BUFFER_MIN_SIZE * 8 / c->bits_per_coded_sample /
- c->ch_layout.nb_channels;
+ num_channels;
}
else {
context->audio_input_samples = c->frame_size;
@@ -1037,11 +1053,11 @@ static AVStream *alloc_audio_stream(FFMpegContext *context,
context->audio_sample_size = av_get_bytes_per_sample(c->sample_fmt);
- context->audio_input_buffer = (uint8_t *)av_malloc(
- context->audio_input_samples * c->ch_layout.nb_channels * context->audio_sample_size);
+ context->audio_input_buffer = (uint8_t *)av_malloc(context->audio_input_samples * num_channels *
+ context->audio_sample_size);
if (context->audio_deinterleave) {
context->audio_deinterleave_buffer = (uint8_t *)av_malloc(
- context->audio_input_samples * c->ch_layout.nb_channels * context->audio_sample_size);
+ context->audio_input_samples * num_channels * context->audio_sample_size);
}
context->audio_time = 0.0f;
@@ -1370,7 +1386,7 @@ static void ffmpeg_filepath_get(
if ((rd->ffcodecdata.flags & FFMPEG_AUTOSPLIT_OUTPUT) != 0) {
if (context) {
- sprintf(autosplit, "_%03d", context->ffmpeg_autosplit_count);
+ BLI_snprintf(autosplit, sizeof(autosplit), "_%03d", context->ffmpeg_autosplit_count);
}
}
@@ -1432,7 +1448,11 @@ int BKE_ffmpeg_start(void *context_v,
AVCodecContext *c = context->audio_codec;
AUD_DeviceSpecs specs;
+# ifdef FFMPEG_USE_OLD_CHANNEL_VARS
+ specs.channels = c->channels;
+# else
specs.channels = c->ch_layout.nb_channels;
+# endif
switch (av_get_packed_sample_fmt(c->sample_fmt)) {
case AV_SAMPLE_FMT_U8:
diff --git a/source/blender/blenlib/BLI_cache_mutex.hh b/source/blender/blenlib/BLI_cache_mutex.hh
new file mode 100644
index 00000000000..8e2a0d1b1a5
--- /dev/null
+++ b/source/blender/blenlib/BLI_cache_mutex.hh
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/**
+ * A #CacheMutex is used to protect a lazily computed cache from being computed more than once.
+ * Using #CacheMutex instead of a "raw mutex" to protect a cache has some benefits:
+ * - Avoid common pitfalls like forgetting to use task isolation or a double checked lock.
+ * - Cleaner and less redundant code because the same locking patterns don't have to be repeated
+ * everywhere.
+ * - One can benefit from potential future improvements to #CacheMutex of which there are a few
+ * mentioned below.
+ *
+ * The data protected by #CacheMutex is not part of #CacheMutex. Instead, the #CacheMutex and its
+ * protected data should generally be placed next to each other.
+ *
+ * Each #CacheMutex protects exactly one cache, so multiple cache mutexes have to be used when a
+ * class has multiple caches. That is contrary to a "custom" solution using `std::mutex` where one
+ * mutex could protect multiple caches at the cost of higher lock contention.
+ *
+ * To make sure the cache is up to date, call `CacheMutex::ensure` and pass in the function that
+ * computes the cache.
+ *
+ * To tell the #CacheMutex that the cache is invalidated and to be re-evaluated upon next access
+ * use `CacheMutex::tag_dirty`.
+ *
+ * This example shows how one could implement a lazily computed average vertex position in an
+ * imaginary `Mesh` data structure:
+ *
+ * \code{.cpp}
+ * class Mesh {
+ * private:
+ * mutable CacheMutex average_position_cache_mutex_;
+ * mutable float3 average_position_cache_;
+ *
+ * public:
+ * const float3 &average_position() const
+ * {
+ * average_position_cache_mutex_.ensure([&]() {
+ * average_position_cache_ = actually_compute_average_position();
+ * });
+ * return average_position_cache_;
+ * }
+ *
+ * void tag_positions_changed()
+ * {
+ * average_position_cache_mutex_.tag_dirty();
+ * }
+ * };
+ * \endcode
+ *
+ * Possible future improvements:
+ * - Avoid task isolation when we know that the cache computation does not use threading.
+ * - Try to use a smaller mutex. The mutex does not have to be fair for this use case.
+ * - Try to join the cache computation instead of blocking if another thread is computing the cache
+ * already.
+ */
+
+#include <atomic>
+#include <mutex>
+
+#include "BLI_function_ref.hh"
+
+namespace blender {
+
+class CacheMutex {
+ private:
+ std::mutex mutex_;
+ std::atomic<bool> cache_valid_ = false;
+
+ public:
+ /**
+ * Make sure the cache exists and is up to date. This calls `compute_cache` once to update the
+ * cache (which is stored outside of this class) if it is dirty, otherwise it does nothing.
+ *
+ * This function is thread-safe under the assumption that the same parameters are passed from
+ * every thread.
+ */
+ void ensure(FunctionRef<void()> compute_cache);
+
+ /**
+ * Reset the cache. The next time #ensure is called, it will recompute that code.
+ */
+ void tag_dirty()
+ {
+ cache_valid_.store(false);
+ }
+
+ /**
+ * Return true if the cache currently does not exist or has been invalidated.
+ */
+ bool is_dirty() const
+ {
+ return !this->is_cached();
+ }
+
+ /**
+ * Return true if the cache exists and is valid.
+ */
+ bool is_cached() const
+ {
+ return cache_valid_.load(std::memory_order_relaxed);
+ }
+};
+
+} // namespace blender
diff --git a/source/blender/blenlib/BLI_string.h b/source/blender/blenlib/BLI_string.h
index 17abcf52ecc..fb02ea5fb17 100644
--- a/source/blender/blenlib/BLI_string.h
+++ b/source/blender/blenlib/BLI_string.h
@@ -206,6 +206,13 @@ char *BLI_sprintfN(const char *__restrict format, ...) ATTR_WARN_UNUSED_RESULT
ATTR_NONNULL(1) ATTR_MALLOC ATTR_PRINTF_FORMAT(1, 2);
/**
+ * A wrapper around ::sprintf() which does not generate security warnings.
+ *
+ * \note Use BLI_snprintf for cases when the string size is known.
+ */
+int BLI_sprintf(char *__restrict str, const char *__restrict format, ...);
+
+/**
* This roughly matches C and Python's string escaping with double quotes - `"`.
*
* Since every character may need escaping,
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index 2ac77f000e9..693a4d98675 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -54,6 +54,7 @@ set(SRC
intern/bitmap_draw_2d.c
intern/boxpack_2d.c
intern/buffer.c
+ intern/cache_mutex.cc
intern/compute_context.cc
intern/convexhull_2d.c
intern/cpp_type.cc
@@ -178,6 +179,7 @@ set(SRC
BLI_bounds.hh
BLI_boxpack_2d.h
BLI_buffer.h
+ BLI_cache_mutex.hh
BLI_color.hh
BLI_color_mix.hh
BLI_compiler_attrs.h
diff --git a/source/blender/blenlib/intern/cache_mutex.cc b/source/blender/blenlib/intern/cache_mutex.cc
new file mode 100644
index 00000000000..db474b1ef87
--- /dev/null
+++ b/source/blender/blenlib/intern/cache_mutex.cc
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_cache_mutex.hh"
+#include "BLI_task.hh"
+
+namespace blender {
+
+void CacheMutex::ensure(const FunctionRef<void()> compute_cache)
+{
+ if (cache_valid_.load(std::memory_order_acquire)) {
+ return;
+ }
+ std::scoped_lock lock{mutex_};
+ /* Double checked lock. */
+ if (cache_valid_.load(std::memory_order_relaxed)) {
+ return;
+ }
+ /* Use task isolation because a mutex is locked and the cache computation might use
+ * multi-threading. */
+ threading::isolate_task(compute_cache);
+
+ cache_valid_.store(true, std::memory_order_release);
+}
+
+} // namespace blender
diff --git a/source/blender/blenlib/intern/path_util.c b/source/blender/blenlib/intern/path_util.c
index d13f3fe5ced..2376bd82b69 100644
--- a/source/blender/blenlib/intern/path_util.c
+++ b/source/blender/blenlib/intern/path_util.c
@@ -123,7 +123,7 @@ int BLI_path_sequence_decode(const char *string, char *head, char *tail, ushort
void BLI_path_sequence_encode(
char *string, const char *head, const char *tail, ushort numlen, int pic)
{
- sprintf(string, "%s%.*d%s", head, numlen, MAX2(0, pic), tail);
+ BLI_sprintf(string, "%s%.*d%s", head, numlen, MAX2(0, pic), tail);
}
static int BLI_path_unc_prefix_len(const char *path); /* defined below in same file */
@@ -620,7 +620,7 @@ bool BLI_path_suffix(char *string, size_t maxlen, const char *suffix, const char
}
BLI_strncpy(extension, string + a, sizeof(extension));
- sprintf(string + a, "%s%s%s", sep, suffix, extension);
+ BLI_sprintf(string + a, "%s%s%s", sep, suffix, extension);
return true;
}
diff --git a/source/blender/blenlib/intern/string.c b/source/blender/blenlib/intern/string.c
index 755d2dbd55d..3c3dcaf90f4 100644
--- a/source/blender/blenlib/intern/string.c
+++ b/source/blender/blenlib/intern/string.c
@@ -241,6 +241,17 @@ char *BLI_sprintfN(const char *__restrict format, ...)
return n;
}
+int BLI_sprintf(char *__restrict str, const char *__restrict format, ...)
+{
+ va_list arg;
+
+ va_start(arg, format);
+ const int result = vsprintf(str, format, arg);
+ va_end(arg);
+
+ return result;
+}
+
/** \} */
/* -------------------------------------------------------------------- */
@@ -1114,7 +1125,7 @@ static size_t BLI_str_format_int_grouped_ex(char src[16], char dst[16], int num_
size_t BLI_str_format_int_grouped(char dst[16], int num)
{
char src[16];
- int num_len = sprintf(src, "%d", num);
+ const int num_len = BLI_snprintf(src, sizeof(src), "%d", num);
return BLI_str_format_int_grouped_ex(src, dst, num_len);
}
@@ -1124,7 +1135,7 @@ size_t BLI_str_format_uint64_grouped(char dst[16], uint64_t num)
/* NOTE: Buffer to hold maximum `uint64`, which is 1.8e+19. but
* we also need space for commas and null-terminator. */
char src[27];
- int num_len = sprintf(src, "%" PRIu64 "", num);
+ const int num_len = BLI_snprintf(src, sizeof(src), "%" PRIu64 "", num);
return BLI_str_format_int_grouped_ex(src, dst, num_len);
}
diff --git a/source/blender/blenlib/intern/uuid.cc b/source/blender/blenlib/intern/uuid.cc
index 023dd1ec409..b845208f0da 100644
--- a/source/blender/blenlib/intern/uuid.cc
+++ b/source/blender/blenlib/intern/uuid.cc
@@ -5,6 +5,7 @@
*/
#include "BLI_assert.h"
+#include "BLI_string.h"
#include "BLI_uuid.h"
#include <cstdio>
@@ -85,19 +86,19 @@ bool BLI_uuid_equal(const bUUID uuid1, const bUUID uuid2)
void BLI_uuid_format(char *buffer, const bUUID uuid)
{
- std::sprintf(buffer,
- "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
- uuid.time_low,
- uuid.time_mid,
- uuid.time_hi_and_version,
- uuid.clock_seq_hi_and_reserved,
- uuid.clock_seq_low,
- uuid.node[0],
- uuid.node[1],
- uuid.node[2],
- uuid.node[3],
- uuid.node[4],
- uuid.node[5]);
+ BLI_sprintf(buffer,
+ "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ uuid.time_low,
+ uuid.time_mid,
+ uuid.time_hi_and_version,
+ uuid.clock_seq_hi_and_reserved,
+ uuid.clock_seq_low,
+ uuid.node[0],
+ uuid.node[1],
+ uuid.node[2],
+ uuid.node[3],
+ uuid.node[4],
+ uuid.node[5]);
}
bool BLI_uuid_parse_string(bUUID *uuid, const char *buffer)
diff --git a/source/blender/blenlib/intern/winstuff.c b/source/blender/blenlib/intern/winstuff.c
index 7e2c5e8f1dd..3a574b60ae2 100644
--- a/source/blender/blenlib/intern/winstuff.c
+++ b/source/blender/blenlib/intern/winstuff.c
@@ -110,7 +110,7 @@ bool BLI_windows_register_blend_extension(const bool background)
&hkey,
&dwd);
if (lresult == ERROR_SUCCESS) {
- sprintf(buffer, "\"%s\" \"%%1\"", BlPath);
+ BLI_snprintf(buffer, sizeof(buffer), "\"%s\" \"%%1\"", BlPath);
lresult = RegSetValueEx(hkey, NULL, 0, REG_SZ, (BYTE *)buffer, strlen(buffer) + 1);
RegCloseKey(hkey);
}
@@ -129,7 +129,7 @@ bool BLI_windows_register_blend_extension(const bool background)
&hkey,
&dwd);
if (lresult == ERROR_SUCCESS) {
- sprintf(buffer, "\"%s\", 1", BlPath);
+ BLI_snprintf(buffer, sizeof(buffer), "\"%s\", 1", BlPath);
lresult = RegSetValueEx(hkey, NULL, 0, REG_SZ, (BYTE *)buffer, strlen(buffer) + 1);
RegCloseKey(hkey);
}
@@ -167,10 +167,12 @@ bool BLI_windows_register_blend_extension(const bool background)
RegCloseKey(root);
printf("success (%s)\n", usr_mode ? "user" : "system");
if (!background) {
- sprintf(MBox,
- "File extension registered for %s.",
- usr_mode ? "the current user. To register for all users, run as an administrator" :
- "all users");
+ BLI_snprintf(MBox,
+ sizeof(MBox),
+ "File extension registered for %s.",
+ usr_mode ?
+ "the current user. To register for all users, run as an administrator" :
+ "all users");
MessageBox(0, MBox, "Blender", MB_OK | MB_ICONINFORMATION);
}
return true;
diff --git a/source/blender/blenloader/intern/writefile.cc b/source/blender/blenloader/intern/writefile.cc
index 42bc884098e..6e48b65eb25 100644
--- a/source/blender/blenloader/intern/writefile.cc
+++ b/source/blender/blenloader/intern/writefile.cc
@@ -1048,7 +1048,7 @@ static void write_global(WriteData *wd, int fileflags, Main *mainvar)
if (fileflags & G_FILE_RECOVER_WRITE) {
STRNCPY(fg.filepath, mainvar->filepath);
}
- sprintf(subvstr, "%4d", BLENDER_FILE_SUBVERSION);
+ BLI_snprintf(subvstr, sizeof(subvstr), "%4d", BLENDER_FILE_SUBVERSION);
memcpy(fg.subvstr, subvstr, 4);
fg.subversion = BLENDER_FILE_SUBVERSION;
@@ -1102,11 +1102,12 @@ static bool write_file_handle(Main *mainvar,
wd = mywrite_begin(ww, compare, current);
BlendWriter writer = {wd};
- sprintf(buf,
- "BLENDER%c%c%.3d",
- (sizeof(void *) == 8) ? '-' : '_',
- (ENDIAN_ORDER == B_ENDIAN) ? 'V' : 'v',
- BLENDER_FILE_VERSION);
+ BLI_snprintf(buf,
+ sizeof(buf),
+ "BLENDER%c%c%.3d",
+ (sizeof(void *) == 8) ? '-' : '_',
+ (ENDIAN_ORDER == B_ENDIAN) ? 'V' : 'v',
+ BLENDER_FILE_VERSION);
mywrite(wd, buf, 12);
diff --git a/source/blender/compositor/intern/COM_Debug.cc b/source/blender/compositor/intern/COM_Debug.cc
index d184e5540ea..a670af5eaca 100644
--- a/source/blender/compositor/intern/COM_Debug.cc
+++ b/source/blender/compositor/intern/COM_Debug.cc
@@ -305,7 +305,7 @@ bool DebugInfo::graphviz_system(const ExecutionSystem *system, char *str, int ma
for (NodeOperation *operation : group->operations_) {
- sprintf(strbuf, "_%p", group);
+ BLI_snprintf(strbuf, sizeof(strbuf), "_%p", group);
op_groups[operation].push_back(std::string(strbuf));
len += graphviz_operation(
diff --git a/source/blender/draw/intern/draw_manager_profiling.c b/source/blender/draw/intern/draw_manager_profiling.c
index d14f5c7f125..92cb3e008b9 100644
--- a/source/blender/draw/intern/draw_manager_profiling.c
+++ b/source/blender/draw/intern/draw_manager_profiling.c
@@ -225,15 +225,15 @@ void DRW_stats_draw(const rcti *rect)
/* ------------------------------------------ */
/* Label row */
char col_label[32];
- sprintf(col_label, "Engine");
+ BLI_snprintf(col_label, sizeof(col_label), "Engine");
draw_stat_5row(rect, u++, v, col_label, sizeof(col_label));
- sprintf(col_label, "Init");
+ BLI_snprintf(col_label, sizeof(col_label), "Init");
draw_stat_5row(rect, u++, v, col_label, sizeof(col_label));
- sprintf(col_label, "Background");
+ BLI_snprintf(col_label, sizeof(col_label), "Background");
draw_stat_5row(rect, u++, v, col_label, sizeof(col_label));
- sprintf(col_label, "Render");
+ BLI_snprintf(col_label, sizeof(col_label), "Render");
draw_stat_5row(rect, u++, v, col_label, sizeof(col_label));
- sprintf(col_label, "Total (w/o cache)");
+ BLI_snprintf(col_label, sizeof(col_label), "Total (w/o cache)");
draw_stat_5row(rect, u++, v, col_label, sizeof(col_label));
v++;
@@ -245,42 +245,45 @@ void DRW_stats_draw(const rcti *rect)
draw_stat_5row(rect, u++, v, engine->idname, sizeof(engine->idname));
init_tot_time += data->init_time;
- sprintf(time_to_txt, "%.2fms", data->init_time);
+ BLI_snprintf(time_to_txt, sizeof(time_to_txt), "%.2fms", data->init_time);
draw_stat_5row(rect, u++, v, time_to_txt, sizeof(time_to_txt));
background_tot_time += data->background_time;
- sprintf(time_to_txt, "%.2fms", data->background_time);
+ BLI_snprintf(time_to_txt, sizeof(time_to_txt), "%.2fms", data->background_time);
draw_stat_5row(rect, u++, v, time_to_txt, sizeof(time_to_txt));
render_tot_time += data->render_time;
- sprintf(time_to_txt, "%.2fms", data->render_time);
+ BLI_snprintf(time_to_txt, sizeof(time_to_txt), "%.2fms", data->render_time);
draw_stat_5row(rect, u++, v, time_to_txt, sizeof(time_to_txt));
tot_time += data->init_time + data->background_time + data->render_time;
- sprintf(time_to_txt, "%.2fms", data->init_time + data->background_time + data->render_time);
+ BLI_snprintf(time_to_txt,
+ sizeof(time_to_txt),
+ "%.2fms",
+ data->init_time + data->background_time + data->render_time);
draw_stat_5row(rect, u++, v, time_to_txt, sizeof(time_to_txt));
v++;
}
/* Totals row */
u = 0;
- sprintf(col_label, "Sub Total");
+ BLI_snprintf(col_label, sizeof(col_label), "Sub Total");
draw_stat_5row(rect, u++, v, col_label, sizeof(col_label));
- sprintf(time_to_txt, "%.2fms", init_tot_time);
+ BLI_snprintf(time_to_txt, sizeof(time_to_txt), "%.2fms", init_tot_time);
draw_stat_5row(rect, u++, v, time_to_txt, sizeof(time_to_txt));
- sprintf(time_to_txt, "%.2fms", background_tot_time);
+ BLI_snprintf(time_to_txt, sizeof(time_to_txt), "%.2fms", background_tot_time);
draw_stat_5row(rect, u++, v, time_to_txt, sizeof(time_to_txt));
- sprintf(time_to_txt, "%.2fms", render_tot_time);
+ BLI_snprintf(time_to_txt, sizeof(time_to_txt), "%.2fms", render_tot_time);
draw_stat_5row(rect, u++, v, time_to_txt, sizeof(time_to_txt));
- sprintf(time_to_txt, "%.2fms", tot_time);
+ BLI_snprintf(time_to_txt, sizeof(time_to_txt), "%.2fms", tot_time);
draw_stat_5row(rect, u++, v, time_to_txt, sizeof(time_to_txt));
v += 2;
u = 0;
double *cache_time = DRW_view_data_cache_time_get(DST.view_data_active);
- sprintf(col_label, "Cache Time");
+ BLI_snprintf(col_label, sizeof(col_label), "Cache Time");
draw_stat_5row(rect, u++, v, col_label, sizeof(col_label));
- sprintf(time_to_txt, "%.2fms", *cache_time);
+ BLI_snprintf(time_to_txt, sizeof(time_to_txt), "%.2fms", *cache_time);
draw_stat_5row(rect, u++, v, time_to_txt, sizeof(time_to_txt));
v += 2;
@@ -292,17 +295,18 @@ void DRW_stats_draw(const rcti *rect)
uint tex_mem = GPU_texture_memory_usage_get();
uint vbo_mem = GPU_vertbuf_get_memory_usage();
- sprintf(stat_string, "GPU Memory");
+ BLI_snprintf(stat_string, sizeof(stat_string), "GPU Memory");
draw_stat(rect, 0, v, stat_string, sizeof(stat_string));
- sprintf(stat_string, "%.2fMB", (double)(tex_mem + vbo_mem) / 1000000.0);
+ BLI_snprintf(
+ stat_string, sizeof(stat_string), "%.2fMB", (double)(tex_mem + vbo_mem) / 1000000.0);
draw_stat_5row(rect, 1, v++, stat_string, sizeof(stat_string));
- sprintf(stat_string, "Textures");
+ BLI_snprintf(stat_string, sizeof(stat_string), "Textures");
draw_stat(rect, 1, v, stat_string, sizeof(stat_string));
- sprintf(stat_string, "%.2fMB", (double)tex_mem / 1000000.0);
+ BLI_snprintf(stat_string, sizeof(stat_string), "%.2fMB", (double)tex_mem / 1000000.0);
draw_stat_5row(rect, 1, v++, stat_string, sizeof(stat_string));
- sprintf(stat_string, "Meshes");
+ BLI_snprintf(stat_string, sizeof(stat_string), "Meshes");
draw_stat(rect, 1, v, stat_string, sizeof(stat_string));
- sprintf(stat_string, "%.2fMB", (double)vbo_mem / 1000000.0);
+ BLI_snprintf(stat_string, sizeof(stat_string), "%.2fMB", (double)vbo_mem / 1000000.0);
draw_stat_5row(rect, 1, v++, stat_string, sizeof(stat_string));
v += 1;
diff --git a/source/blender/draw/intern/draw_pbvh.cc b/source/blender/draw/intern/draw_pbvh.cc
index b25bb42a8a5..6c504e63511 100644
--- a/source/blender/draw/intern/draw_pbvh.cc
+++ b/source/blender/draw/intern/draw_pbvh.cc
@@ -86,7 +86,7 @@ struct PBVHVbo {
{
char buf[512];
- sprintf(buf, "%d:%d:%s", int(type), int(domain), name.c_str());
+ BLI_snprintf(buf, sizeof(buf), "%d:%d:%s", int(type), int(domain), name.c_str());
key = string(buf);
return key;
diff --git a/source/blender/editors/include/ED_image.h b/source/blender/editors/include/ED_image.h
index da303f3552b..20e62ac8626 100644
--- a/source/blender/editors/include/ED_image.h
+++ b/source/blender/editors/include/ED_image.h
@@ -51,6 +51,7 @@ void ED_space_image_set(struct Main *bmain,
struct SpaceImage *sima,
struct Image *ima,
bool automatic);
+void ED_space_image_sync(struct Main *bmain, Image *image, bool ignore_render_viewer);
void ED_space_image_auto_set(const struct bContext *C, struct SpaceImage *sima);
struct Mask *ED_space_image_get_mask(const struct SpaceImage *sima);
void ED_space_image_set_mask(struct bContext *C, struct SpaceImage *sima, struct Mask *mask);
diff --git a/source/blender/editors/interface/interface_region_menu_popup.cc b/source/blender/editors/interface/interface_region_menu_popup.cc
index 871fd5c6166..8fd6731d2ec 100644
--- a/source/blender/editors/interface/interface_region_menu_popup.cc
+++ b/source/blender/editors/interface/interface_region_menu_popup.cc
@@ -286,13 +286,13 @@ static uiBlock *ui_block_func_POPUP(bContext *C, uiPopupBlockHandle *handle, voi
int width, height;
UI_block_layout_resolve(block, &width, &height);
- UI_block_flag_enable(block, UI_BLOCK_MOVEMOUSE_QUIT);
+ UI_block_flag_enable(block, UI_BLOCK_MOVEMOUSE_QUIT | UI_BLOCK_NUMSELECT);
if (pup->popup) {
int offset[2] = {0, 0};
uiBut *but_activate = nullptr;
- UI_block_flag_enable(block, UI_BLOCK_LOOP | UI_BLOCK_NUMSELECT);
+ UI_block_flag_enable(block, UI_BLOCK_LOOP);
UI_block_theme_style_set(block, UI_BLOCK_THEME_STYLE_POPUP);
UI_block_direction_set(block, direction);
diff --git a/source/blender/editors/interface/interface_templates.c b/source/blender/editors/interface/interface_templates.c
index a3259831c9f..b32aa82ad9e 100644
--- a/source/blender/editors/interface/interface_templates.c
+++ b/source/blender/editors/interface/interface_templates.c
@@ -3673,13 +3673,9 @@ static void colorband_buttons_layout(uiLayout *layout,
row = uiLayoutRow(split, false);
uiItemR(row, &ptr, "position", 0, IFACE_("Pos"), ICON_NONE);
- bt = block->buttons.last;
- UI_but_func_set(bt, colorband_update_cb, bt, coba);
row = uiLayoutRow(layout, false);
uiItemR(row, &ptr, "color", 0, "", ICON_NONE);
- bt = block->buttons.last;
- UI_but_funcN_set(bt, rna_update_cb, MEM_dupallocN(cb), NULL);
}
else {
split = uiLayoutSplit(layout, 0.5f, false);
@@ -3704,13 +3700,28 @@ static void colorband_buttons_layout(uiLayout *layout,
row = uiLayoutRow(subsplit, false);
uiItemR(row, &ptr, "position", UI_ITEM_R_SLIDER, IFACE_("Pos"), ICON_NONE);
- bt = block->buttons.last;
- UI_but_func_set(bt, colorband_update_cb, bt, coba);
row = uiLayoutRow(split, false);
uiItemR(row, &ptr, "color", 0, "", ICON_NONE);
- bt = block->buttons.last;
- UI_but_funcN_set(bt, rna_update_cb, MEM_dupallocN(cb), NULL);
+ }
+
+ /* Some special (rather awkward) treatment to update UI state on certain property changes. */
+ LISTBASE_FOREACH_BACKWARD (uiBut *, but, &block->buttons) {
+ if (but->rnapoin.data != ptr.data) {
+ continue;
+ }
+ if (!but->rnaprop) {
+ continue;
+ }
+
+ const char *prop_identifier = RNA_property_identifier(but->rnaprop);
+ if (STREQ(prop_identifier, "position")) {
+ UI_but_func_set(but, colorband_update_cb, but, coba);
+ }
+
+ if (STREQ(prop_identifier, "color")) {
+ UI_but_funcN_set(but, rna_update_cb, MEM_dupallocN(cb), NULL);
+ }
}
}
}
diff --git a/source/blender/editors/io/io_collada.c b/source/blender/editors/io/io_collada.c
index a630f150e0e..7bc28a0ba89 100644
--- a/source/blender/editors/io/io_collada.c
+++ b/source/blender/editors/io/io_collada.c
@@ -219,7 +219,7 @@ static int wm_collada_export_exec(bContext *C, wmOperator *op)
}
char buff[100];
- sprintf(buff, "Exported %d Objects", export_count);
+ BLI_snprintf(buff, sizeof(buff), "Exported %d Objects", export_count);
BKE_report(op->reports, RPT_INFO, buff);
return OPERATOR_FINISHED;
}
diff --git a/source/blender/editors/object/object_bake_api.c b/source/blender/editors/object/object_bake_api.c
index d647578dc50..e69ccf5a50d 100644
--- a/source/blender/editors/object/object_bake_api.c
+++ b/source/blender/editors/object/object_bake_api.c
@@ -893,7 +893,7 @@ static bool bake_targets_output_external(const BakeAPIRender *bkr,
else {
/* if everything else fails, use the material index */
char tmp[5];
- sprintf(tmp, "%d", i % 1000);
+ BLI_snprintf(tmp, sizeof(tmp), "%d", i % 1000);
BLI_path_suffix(name, FILE_MAX, tmp, "_");
}
}
diff --git a/source/blender/editors/object/object_constraint.c b/source/blender/editors/object/object_constraint.c
index 1386da5dbde..cbed01442ee 100644
--- a/source/blender/editors/object/object_constraint.c
+++ b/source/blender/editors/object/object_constraint.c
@@ -182,7 +182,7 @@ static char *buildmenu_pyconstraints(Main *bmain, Text *con_text, int *pyconinde
int i;
/* add title first */
- sprintf(buf, "Scripts: %%t|[None]%%x0|");
+ BLI_snprintf(buf, sizeof(buf), "Scripts: %%t|[None]%%x0|");
BLI_dynstr_append(pupds, buf);
/* init active-index first */
@@ -201,7 +201,7 @@ static char *buildmenu_pyconstraints(Main *bmain, Text *con_text, int *pyconinde
if (BPY_is_pyconstraint(text)) {
BLI_dynstr_append(pupds, text->id.name + 2);
- sprintf(buf, "%%x%d", i);
+ BLI_snprintf(buf, sizeof(buf), "%%x%d", i);
BLI_dynstr_append(pupds, buf);
if (text->id.next) {
diff --git a/source/blender/editors/object/object_remesh.cc b/source/blender/editors/object/object_remesh.cc
index a50fb28805e..b9acf5ae27b 100644
--- a/source/blender/editors/object/object_remesh.cc
+++ b/source/blender/editors/object/object_remesh.cc
@@ -286,7 +286,7 @@ static void voxel_size_parallel_lines_draw(uint pos3d,
immEnd();
}
-static void voxel_size_edit_draw(const bContext *C, ARegion * /*ar*/, void *arg)
+static void voxel_size_edit_draw(const bContext *C, ARegion * /*region*/, void *arg)
{
VoxelSizeEditCustomData *cd = static_cast<VoxelSizeEditCustomData *>(arg);
diff --git a/source/blender/editors/render/render_internal.cc b/source/blender/editors/render/render_internal.cc
index b05f430d198..5abf5867f92 100644
--- a/source/blender/editors/render/render_internal.cc
+++ b/source/blender/editors/render/render_internal.cc
@@ -405,56 +405,57 @@ static void make_renderinfo_string(const RenderStats *rs,
/* local view */
if (rs->localview) {
- spos += sprintf(spos, "%s | ", TIP_("3D Local View"));
+ spos += BLI_sprintf(spos, "%s | ", TIP_("3D Local View"));
}
else if (v3d_override) {
- spos += sprintf(spos, "%s | ", TIP_("3D View"));
+ spos += BLI_sprintf(spos, "%s | ", TIP_("3D View"));
}
/* frame number */
- spos += sprintf(spos, TIP_("Frame:%d "), (scene->r.cfra));
+ spos += BLI_sprintf(spos, TIP_("Frame:%d "), (scene->r.cfra));
/* previous and elapsed time */
BLI_timecode_string_from_time_simple(info_time_str, sizeof(info_time_str), rs->lastframetime);
if (rs->infostr && rs->infostr[0]) {
if (rs->lastframetime != 0.0) {
- spos += sprintf(spos, TIP_("| Last:%s "), info_time_str);
+ spos += BLI_sprintf(spos, TIP_("| Last:%s "), info_time_str);
}
else {
- spos += sprintf(spos, "| ");
+ spos += BLI_sprintf(spos, "| ");
}
BLI_timecode_string_from_time_simple(
info_time_str, sizeof(info_time_str), PIL_check_seconds_timer() - rs->starttime);
}
else {
- spos += sprintf(spos, "| ");
+ spos += BLI_sprintf(spos, "| ");
}
- spos += sprintf(spos, TIP_("Time:%s "), info_time_str);
+ spos += BLI_sprintf(spos, TIP_("Time:%s "), info_time_str);
/* statistics */
if (rs->statstr) {
if (rs->statstr[0]) {
- spos += sprintf(spos, "| %s ", rs->statstr);
+ spos += BLI_sprintf(spos, "| %s ", rs->statstr);
}
}
else {
if (rs->mem_peak == 0.0f) {
- spos += sprintf(spos, TIP_("| Mem:%.2fM (Peak %.2fM) "), megs_used_memory, megs_peak_memory);
+ spos += BLI_sprintf(
+ spos, TIP_("| Mem:%.2fM (Peak %.2fM) "), megs_used_memory, megs_peak_memory);
}
else {
- spos += sprintf(spos, TIP_("| Mem:%.2fM, Peak: %.2fM "), rs->mem_used, rs->mem_peak);
+ spos += BLI_sprintf(spos, TIP_("| Mem:%.2fM, Peak: %.2fM "), rs->mem_used, rs->mem_peak);
}
}
/* extra info */
if (rs->infostr && rs->infostr[0]) {
- spos += sprintf(spos, "| %s ", rs->infostr);
+ spos += BLI_sprintf(spos, "| %s ", rs->infostr);
}
else if (error && error[0]) {
- spos += sprintf(spos, "| %s ", error);
+ spos += BLI_sprintf(spos, "| %s ", error);
}
/* very weak... but 512 characters is quite safe */
diff --git a/source/blender/editors/render/render_preview.cc b/source/blender/editors/render/render_preview.cc
index 8e50e8c04d1..09394ea33be 100644
--- a/source/blender/editors/render/render_preview.cc
+++ b/source/blender/editors/render/render_preview.cc
@@ -630,10 +630,10 @@ static bool ed_preview_draw_rect(ScrArea *area, int split, int first, rcti *rect
bool ok = false;
if (!split || first) {
- sprintf(name, "Preview %p", (void *)area);
+ BLI_snprintf(name, sizeof(name), "Preview %p", (void *)area);
}
else {
- sprintf(name, "SecondPreview %p", (void *)area);
+ BLI_snprintf(name, sizeof(name), "SecondPreview %p", (void *)area);
}
if (split) {
@@ -1152,10 +1152,10 @@ static void shader_preview_render(ShaderPreview *sp, ID *id, int split, int firs
}
if (!split || first) {
- sprintf(name, "Preview %p", sp->owner);
+ BLI_snprintf(name, sizeof(name), "Preview %p", sp->owner);
}
else {
- sprintf(name, "SecondPreview %p", sp->owner);
+ BLI_snprintf(name, sizeof(name), "SecondPreview %p", sp->owner);
}
re = RE_GetRender(name);
diff --git a/source/blender/editors/sculpt_paint/paint_cursor.c b/source/blender/editors/sculpt_paint/paint_cursor.c
index b85b878af7d..b6e83187c86 100644
--- a/source/blender/editors/sculpt_paint/paint_cursor.c
+++ b/source/blender/editors/sculpt_paint/paint_cursor.c
@@ -562,31 +562,36 @@ static bool paint_draw_tex_overlay(UnifiedPaintSettings *ups,
if (mtex->brush_map_mode == MTEX_MAP_MODE_VIEW) {
GPU_matrix_push();
+ float center[2] = {
+ ups->draw_anchored ? ups->anchored_initial_mouse[0] : x,
+ ups->draw_anchored ? ups->anchored_initial_mouse[1] : y,
+ };
+
/* Brush rotation. */
- GPU_matrix_translate_2f(x, y);
+ GPU_matrix_translate_2fv(center);
GPU_matrix_rotate_2d(-RAD2DEGF(primary ? ups->brush_rotation : ups->brush_rotation_sec));
- GPU_matrix_translate_2f(-x, -y);
+ GPU_matrix_translate_2f(-center[0], -center[1]);
/* Scale based on tablet pressure. */
if (primary && ups->stroke_active && BKE_brush_use_size_pressure(brush)) {
const float scale = ups->size_pressure_value;
- GPU_matrix_translate_2f(x, y);
+ GPU_matrix_translate_2fv(center);
GPU_matrix_scale_2f(scale, scale);
- GPU_matrix_translate_2f(-x, -y);
+ GPU_matrix_translate_2f(-center[0], -center[1]);
}
if (ups->draw_anchored) {
- quad.xmin = ups->anchored_initial_mouse[0] - ups->anchored_size;
- quad.ymin = ups->anchored_initial_mouse[1] - ups->anchored_size;
- quad.xmax = ups->anchored_initial_mouse[0] + ups->anchored_size;
- quad.ymax = ups->anchored_initial_mouse[1] + ups->anchored_size;
+ quad.xmin = center[0] - ups->anchored_size;
+ quad.ymin = center[1] - ups->anchored_size;
+ quad.xmax = center[0] + ups->anchored_size;
+ quad.ymax = center[1] + ups->anchored_size;
}
else {
const int radius = BKE_brush_size_get(vc->scene, brush) * zoom;
- quad.xmin = x - radius;
- quad.ymin = y - radius;
- quad.xmax = x + radius;
- quad.ymax = y + radius;
+ quad.xmin = center[0] - radius;
+ quad.ymin = center[1] - radius;
+ quad.xmax = center[0] + radius;
+ quad.ymax = center[1] + radius;
}
}
else if (mtex->brush_map_mode == MTEX_MAP_MODE_TILED) {
diff --git a/source/blender/editors/sculpt_paint/paint_image.cc b/source/blender/editors/sculpt_paint/paint_image.cc
index 8c6358520ca..f334b2eb8f8 100644
--- a/source/blender/editors/sculpt_paint/paint_image.cc
+++ b/source/blender/editors/sculpt_paint/paint_image.cc
@@ -780,20 +780,7 @@ void ED_object_texture_paint_mode_enter_ex(Main *bmain, Scene *scene, Object *ob
}
if (ima) {
- wmWindowManager *wm = static_cast<wmWindowManager *>(bmain->wm.first);
- LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
- const bScreen *screen = WM_window_get_active_screen(win);
- LISTBASE_FOREACH (ScrArea *, area, &screen->areabase) {
- SpaceLink *sl = static_cast<SpaceLink *>(area->spacedata.first);
- if (sl->spacetype == SPACE_IMAGE) {
- SpaceImage *sima = (SpaceImage *)sl;
-
- if (!sima->pin) {
- ED_space_image_set(bmain, sima, ima, true);
- }
- }
- }
- }
+ ED_space_image_sync(bmain, ima, false);
}
ob->mode |= OB_MODE_TEXTURE_PAINT;
diff --git a/source/blender/editors/sculpt_paint/paint_image_proj.c b/source/blender/editors/sculpt_paint/paint_image_proj.c
index ea33449f0dd..c85044bf915 100644
--- a/source/blender/editors/sculpt_paint/paint_image_proj.c
+++ b/source/blender/editors/sculpt_paint/paint_image_proj.c
@@ -74,6 +74,7 @@
#include "DEG_depsgraph.h"
#include "DEG_depsgraph_query.h"
+#include "ED_image.h"
#include "ED_node.h"
#include "ED_object.h"
#include "ED_paint.h"
@@ -6704,6 +6705,7 @@ static bool proj_paint_add_slot(bContext *C, wmOperator *op)
BKE_texpaint_slot_refresh_cache(scene, ma, ob);
BKE_image_signal(bmain, ima, NULL, IMA_SIGNAL_USER_NEW_IMAGE);
WM_event_add_notifier(C, NC_IMAGE | NA_ADDED, ima);
+ ED_space_image_sync(bmain, ima, false);
}
if (layer) {
BKE_texpaint_slot_refresh_cache(scene, ma, ob);
diff --git a/source/blender/editors/sculpt_paint/paint_stroke.c b/source/blender/editors/sculpt_paint/paint_stroke.c
index 97f5bd77d47..f1f864fdf82 100644
--- a/source/blender/editors/sculpt_paint/paint_stroke.c
+++ b/source/blender/editors/sculpt_paint/paint_stroke.c
@@ -249,7 +249,7 @@ static bool paint_stroke_use_scene_spacing(Brush *brush, ePaintMode mode)
static bool paint_tool_raycast_original(Brush *brush, ePaintMode UNUSED(mode))
{
- return brush->flag & BRUSH_ANCHORED;
+ return brush->flag & (BRUSH_ANCHORED | BRUSH_DRAG_DOT);
}
static bool paint_tool_require_inbetween_mouse_events(Brush *brush, ePaintMode mode)
diff --git a/source/blender/editors/space_graph/graph_select.c b/source/blender/editors/space_graph/graph_select.c
index 932ed417f21..3265dcbf0d1 100644
--- a/source/blender/editors/space_graph/graph_select.c
+++ b/source/blender/editors/space_graph/graph_select.c
@@ -1061,6 +1061,12 @@ static int graph_circle_select_exec(bContext *C, wmOperator *op)
/* Apply box_select action. */
const bool any_key_selection_changed = box_select_graphkeys(
&ac, &rect_fl, BEZT_OK_REGION_CIRCLE, selectmode, incl_handles, &data);
+ if (any_key_selection_changed) {
+ /* If any key was selected at any time during this process, the entire-curve selection should
+ * be disabled. Otherwise, sliding over any keyless part of the curve will immediately cause
+ * the entire curve to be selected. */
+ RNA_boolean_set(op->ptr, "use_curve_selection", false);
+ }
const bool use_curve_selection = RNA_boolean_get(op->ptr, "use_curve_selection");
if (use_curve_selection && !any_key_selection_changed) {
box_select_graphcurves(&ac, &rect_fl, BEZT_OK_REGION_CIRCLE, selectmode, incl_handles, &data);
diff --git a/source/blender/editors/space_image/image_edit.c b/source/blender/editors/space_image/image_edit.c
index 0122e509c3b..68b1a9e4466 100644
--- a/source/blender/editors/space_image/image_edit.c
+++ b/source/blender/editors/space_image/image_edit.c
@@ -67,6 +67,30 @@ void ED_space_image_set(Main *bmain, SpaceImage *sima, Image *ima, bool automati
WM_main_add_notifier(NC_SPACE | ND_SPACE_IMAGE, NULL);
}
+void ED_space_image_sync(struct Main *bmain, struct Image *image, bool ignore_render_viewer)
+{
+ wmWindowManager *wm = (wmWindowManager *)bmain->wm.first;
+ LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
+ const bScreen *screen = WM_window_get_active_screen(win);
+ LISTBASE_FOREACH (ScrArea *, area, &screen->areabase) {
+ LISTBASE_FOREACH (SpaceLink *, sl, &area->spacedata) {
+ if (sl->spacetype != SPACE_IMAGE) {
+ continue;
+ }
+ SpaceImage *sima = (SpaceImage *)sl;
+ if (sima->pin) {
+ continue;
+ }
+ if (ignore_render_viewer && sima->image &&
+ ELEM(sima->image->type, IMA_TYPE_R_RESULT, IMA_TYPE_COMPOSITE)) {
+ continue;
+ }
+ ED_space_image_set(bmain, sima, image, true);
+ }
+ }
+ }
+}
+
void ED_space_image_auto_set(const bContext *C, SpaceImage *sima)
{
if (sima->mode != SI_MODE_UV || sima->pin) {
diff --git a/source/blender/editors/space_node/drawnode.cc b/source/blender/editors/space_node/drawnode.cc
index df31a0342cb..c66b8ad4ff0 100644
--- a/source/blender/editors/space_node/drawnode.cc
+++ b/source/blender/editors/space_node/drawnode.cc
@@ -1581,10 +1581,12 @@ void draw_nodespace_back_pix(const bContext &C,
GPU_matrix_pop();
}
-static float2 socket_link_connection_location(const bNodeSocket &socket, const bNodeLink &link)
+static float2 socket_link_connection_location(const bNode &node,
+ const bNodeSocket &socket,
+ const bNodeLink &link)
{
const float2 socket_location(socket.locx, socket.locy);
- if (socket.is_multi_input() && socket.is_input() && !(socket.owner_node().flag & NODE_HIDDEN)) {
+ if (socket.is_multi_input() && socket.is_input() && !(node.flag & NODE_HIDDEN)) {
return node_link_calculate_multi_input_position(
socket_location, link.multi_input_socket_index, socket.total_inputs);
}
@@ -1620,8 +1622,8 @@ static void calculate_inner_link_bezier_points(std::array<float2, 4> &points)
static std::array<float2, 4> node_link_bezier_points(const bNodeLink &link)
{
std::array<float2, 4> points;
- points[0] = socket_link_connection_location(*link.fromsock, link);
- points[3] = socket_link_connection_location(*link.tosock, link);
+ points[0] = socket_link_connection_location(*link.fromnode, *link.fromsock, link);
+ points[3] = socket_link_connection_location(*link.tonode, *link.tosock, link);
calculate_inner_link_bezier_points(points);
return points;
}
@@ -2212,8 +2214,11 @@ static std::array<float2, 4> node_link_bezier_points_dragged(const SpaceNode &sn
{
const float2 cursor = snode.runtime->cursor * UI_DPI_FAC;
std::array<float2, 4> points;
- points[0] = link.fromsock ? socket_link_connection_location(*link.fromsock, link) : cursor;
- points[3] = link.tosock ? socket_link_connection_location(*link.tosock, link) : cursor;
+ points[0] = link.fromsock ?
+ socket_link_connection_location(*link.fromnode, *link.fromsock, link) :
+ cursor;
+ points[3] = link.tosock ? socket_link_connection_location(*link.tonode, *link.tosock, link) :
+ cursor;
calculate_inner_link_bezier_points(points);
return points;
}
diff --git a/source/blender/editors/space_node/node_edit.cc b/source/blender/editors/space_node/node_edit.cc
index e6eba45d45e..7982b47f363 100644
--- a/source/blender/editors/space_node/node_edit.cc
+++ b/source/blender/editors/space_node/node_edit.cc
@@ -502,6 +502,7 @@ void ED_node_shader_default(const bContext *C, ID *id)
}
ma->nodetree = ntreeCopyTree(bmain, ma_default->nodetree);
+ ma->nodetree->owner_id = &ma->id;
BKE_ntree_update_main_tree(bmain, ma->nodetree, nullptr);
}
else if (ELEM(GS(id->name), ID_WO, ID_LA)) {
@@ -736,26 +737,7 @@ void ED_node_set_active(
* - current image is not a Render Result or ViewerNode (want to keep looking at these) */
if (node->id != nullptr && GS(node->id->name) == ID_IM) {
Image *image = (Image *)node->id;
- wmWindowManager *wm = (wmWindowManager *)bmain->wm.first;
- LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
- const bScreen *screen = WM_window_get_active_screen(win);
- LISTBASE_FOREACH (ScrArea *, area, &screen->areabase) {
- LISTBASE_FOREACH (SpaceLink *, sl, &area->spacedata) {
- if (sl->spacetype != SPACE_IMAGE) {
- continue;
- }
- SpaceImage *sima = (SpaceImage *)sl;
- if (sima->pin) {
- continue;
- }
- if (sima->image &&
- ELEM(sima->image->type, IMA_TYPE_R_RESULT, IMA_TYPE_COMPOSITE)) {
- continue;
- }
- ED_space_image_set(bmain, sima, image, true);
- }
- }
- }
+ ED_space_image_sync(bmain, image, true);
}
if (r_active_texture_changed) {
diff --git a/source/blender/editors/space_node/space_node.cc b/source/blender/editors/space_node/space_node.cc
index 5754e77399f..ce0273eec81 100644
--- a/source/blender/editors/space_node/space_node.cc
+++ b/source/blender/editors/space_node/space_node.cc
@@ -181,7 +181,7 @@ void ED_node_tree_path_get(SpaceNode *snode, char *value)
value += strlen(path->display_name);
}
else {
- sprintf(value, "/%s", path->display_name);
+ BLI_sprintf(value, "/%s", path->display_name);
value += strlen(path->display_name) + 1;
}
}
diff --git a/source/blender/editors/space_outliner/tree/tree_element_rna.cc b/source/blender/editors/space_outliner/tree/tree_element_rna.cc
index 6e92fa087fb..275b4391d24 100644
--- a/source/blender/editors/space_outliner/tree/tree_element_rna.cc
+++ b/source/blender/editors/space_outliner/tree/tree_element_rna.cc
@@ -232,12 +232,13 @@ TreeElementRNAArrayElement::TreeElementRNAArrayElement(TreeElement &legacy_te,
char c = RNA_property_array_item_char(TreeElementRNAArrayElement::getPropertyRNA(), index);
- legacy_te_.name = static_cast<char *>(MEM_callocN(sizeof(char[20]), "OutlinerRNAArrayName"));
+ const size_t name_size = sizeof(char[20]);
+ legacy_te_.name = static_cast<char *>(MEM_callocN(name_size, "OutlinerRNAArrayName"));
if (c) {
- sprintf((char *)legacy_te_.name, " %c", c);
+ BLI_snprintf((char *)legacy_te_.name, name_size, " %c", c);
}
else {
- sprintf((char *)legacy_te_.name, " %d", index + 1);
+ BLI_snprintf((char *)legacy_te_.name, name_size, " %d", index + 1);
}
legacy_te_.flag |= TE_FREE_NAME;
}
diff --git a/source/blender/editors/space_view3d/view3d_draw.cc b/source/blender/editors/space_view3d/view3d_draw.cc
index 31b26ba4cda..5d4ed032bb4 100644
--- a/source/blender/editors/space_view3d/view3d_draw.cc
+++ b/source/blender/editors/space_view3d/view3d_draw.cc
@@ -1306,15 +1306,15 @@ static void draw_selected_name(
char info[300];
char *s = info;
- s += sprintf(s, "(%d)", cfra);
+ s += BLI_sprintf(s, "(%d)", cfra);
if ((ob == nullptr) || (ob->mode == OB_MODE_OBJECT)) {
BKE_view_layer_synced_ensure(scene, view_layer);
LayerCollection *layer_collection = BKE_view_layer_active_collection_get(view_layer);
- s += sprintf(s,
- " %s%s",
- BKE_collection_ui_name_get(layer_collection->collection),
- (ob == nullptr) ? "" : " |");
+ s += BLI_sprintf(s,
+ " %s%s",
+ BKE_collection_ui_name_get(layer_collection->collection),
+ (ob == nullptr) ? "" : " |");
}
/* Info can contain:
@@ -1407,12 +1407,12 @@ static void draw_selected_name(
}
if (markern) {
- s += sprintf(s, " <%s>", markern);
+ s += BLI_sprintf(s, " <%s>", markern);
}
if (v3d->flag2 & V3D_SHOW_VIEWER) {
if (!BLI_listbase_is_empty(&v3d->viewer_path.path)) {
- s += sprintf(s, "%s", IFACE_(" (Viewer)"));
+ s += BLI_sprintf(s, "%s", IFACE_(" (Viewer)"));
}
}
diff --git a/source/blender/freestyle/intern/python/BPy_ContextFunctions.cpp b/source/blender/freestyle/intern/python/BPy_ContextFunctions.cpp
index d79c2f4d9b0..a4b83b840f3 100644
--- a/source/blender/freestyle/intern/python/BPy_ContextFunctions.cpp
+++ b/source/blender/freestyle/intern/python/BPy_ContextFunctions.cpp
@@ -280,10 +280,14 @@ static PyMethodDef module_functions[] = {
static PyModuleDef module_definition = {
PyModuleDef_HEAD_INIT,
- "Freestyle.ContextFunctions",
- module_docstring,
- -1,
- module_functions,
+ /*m_name*/ "Freestyle.ContextFunctions",
+ /*m_doc*/ module_docstring,
+ /*m_size*/ -1,
+ /*m_methods*/ module_functions,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
//------------------- MODULE INITIALIZATION --------------------------------
diff --git a/source/blender/freestyle/intern/python/BPy_Freestyle.cpp b/source/blender/freestyle/intern/python/BPy_Freestyle.cpp
index 237f1802026..f99e66c822d 100644
--- a/source/blender/freestyle/intern/python/BPy_Freestyle.cpp
+++ b/source/blender/freestyle/intern/python/BPy_Freestyle.cpp
@@ -509,10 +509,14 @@ static PyMethodDef module_functions[] = {
static PyModuleDef module_definition = {
PyModuleDef_HEAD_INIT,
- "_freestyle",
- module_docstring,
- -1,
- module_functions,
+ /*m_name*/ "_freestyle",
+ /*m_doc*/ module_docstring,
+ /*m_size*/ -1,
+ /*m_methods*/ module_functions,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
//-------------------MODULE INITIALIZATION--------------------------------
diff --git a/source/blender/freestyle/intern/python/BPy_IntegrationType.cpp b/source/blender/freestyle/intern/python/BPy_IntegrationType.cpp
index dfebfd3ff73..c862b226271 100644
--- a/source/blender/freestyle/intern/python/BPy_IntegrationType.cpp
+++ b/source/blender/freestyle/intern/python/BPy_IntegrationType.cpp
@@ -112,10 +112,14 @@ static PyMethodDef module_functions[] = {
static PyModuleDef module_definition = {
PyModuleDef_HEAD_INIT,
- "Freestyle.Integrator",
- module_docstring,
- -1,
- module_functions,
+ /*m_name*/ "Freestyle.Integrator",
+ /*m_doc*/ module_docstring,
+ /*m_size*/ -1,
+ /*m_methods*/ module_functions,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
/*-----------------------BPy_IntegrationType type definition ------------------------------*/
diff --git a/source/blender/freestyle/intern/python/Interface1D/BPy_FEdge.cpp b/source/blender/freestyle/intern/python/Interface1D/BPy_FEdge.cpp
index c3884e73ebd..9684f96d586 100644
--- a/source/blender/freestyle/intern/python/Interface1D/BPy_FEdge.cpp
+++ b/source/blender/freestyle/intern/python/Interface1D/BPy_FEdge.cpp
@@ -89,7 +89,7 @@ static Py_ssize_t FEdge_sq_length(BPy_FEdge * /*self*/)
return 2;
}
-static PyObject *FEdge_sq_item(BPy_FEdge *self, int keynum)
+static PyObject *FEdge_sq_item(BPy_FEdge *self, Py_ssize_t keynum)
{
if (keynum < 0) {
keynum += FEdge_sq_length(self);
diff --git a/source/blender/freestyle/intern/python/Interface1D/BPy_Stroke.cpp b/source/blender/freestyle/intern/python/Interface1D/BPy_Stroke.cpp
index 2156a91b1f2..8acbfefa995 100644
--- a/source/blender/freestyle/intern/python/Interface1D/BPy_Stroke.cpp
+++ b/source/blender/freestyle/intern/python/Interface1D/BPy_Stroke.cpp
@@ -74,7 +74,7 @@ static Py_ssize_t Stroke_sq_length(BPy_Stroke *self)
return self->s->strokeVerticesSize();
}
-static PyObject *Stroke_sq_item(BPy_Stroke *self, int keynum)
+static PyObject *Stroke_sq_item(BPy_Stroke *self, Py_ssize_t keynum)
{
if (keynum < 0) {
keynum += Stroke_sq_length(self);
diff --git a/source/blender/imbuf/intern/jp2.c b/source/blender/imbuf/intern/jp2.c
index d2b94355f85..f3d6d19cb8d 100644
--- a/source/blender/imbuf/intern/jp2.c
+++ b/source/blender/imbuf/intern/jp2.c
@@ -885,7 +885,10 @@ static opj_image_t *ibuftoimage(ImBuf *ibuf, opj_cparameters_t *parameters)
memset(&cmptparm, 0, sizeof(opj_image_cmptparm_t[4]));
for (i = 0; i < numcomps; i++) {
cmptparm[i].prec = prec;
+ /* Deprecated in openjpeg 2.5. */
+#if (OPJ_VERSION_MAJOR < 2) || (OPJ_VERSION_MAJOR == 2 && OPJ_VERSION_MINOR < 5)
cmptparm[i].bpp = prec;
+#endif
cmptparm[i].sgnd = 0;
cmptparm[i].dx = subsampling_dx;
cmptparm[i].dy = subsampling_dy;
diff --git a/source/blender/io/avi/intern/avi_codecs.c b/source/blender/io/avi/intern/avi_codecs.c
index cbb96e12930..ba897ef808a 100644
--- a/source/blender/io/avi/intern/avi_codecs.c
+++ b/source/blender/io/avi/intern/avi_codecs.c
@@ -14,6 +14,8 @@
#include "avi_rgb.h"
#include "avi_rgb32.h"
+#include "BLI_string.h"
+
void *avi_format_convert(
AviMovie *movie, int stream, void *buffer, AviFormat from, AviFormat to, size_t *size)
{
@@ -68,10 +70,10 @@ int avi_get_data_id(AviFormat format, int stream)
char fcc[5];
if (avi_get_format_type(format) == FCC("vids")) {
- sprintf(fcc, "%2.2ddc", stream);
+ BLI_snprintf(fcc, sizeof(fcc), "%2.2ddc", stream);
}
else if (avi_get_format_type(format) == FCC("auds")) {
- sprintf(fcc, "%2.2ddc", stream);
+ BLI_snprintf(fcc, sizeof(fcc), "%2.2ddc", stream);
}
else {
return 0;
diff --git a/source/blender/io/collada/AnimationExporter.cpp b/source/blender/io/collada/AnimationExporter.cpp
index 85e8ccf9f2a..24d2072f840 100644
--- a/source/blender/io/collada/AnimationExporter.cpp
+++ b/source/blender/io/collada/AnimationExporter.cpp
@@ -43,7 +43,8 @@ bool AnimationExporter::open_animation_container(bool has_container, Object *ob)
{
if (!has_container) {
char anim_id[200];
- sprintf(anim_id, "action_container-%s", translate_id(id_name(ob)).c_str());
+ BLI_snprintf(
+ anim_id, sizeof(anim_id), "action_container-%s", translate_id(id_name(ob)).c_str());
openAnimation(anim_id, encode_xml(id_name(ob)));
}
return true;
diff --git a/source/blender/io/collada/GeometryExporter.cpp b/source/blender/io/collada/GeometryExporter.cpp
index a069c32026b..f6f9026481c 100644
--- a/source/blender/io/collada/GeometryExporter.cpp
+++ b/source/blender/io/collada/GeometryExporter.cpp
@@ -522,7 +522,7 @@ std::string GeometryExporter::makeTexcoordSourceId(std::string &geom_id,
suffix[0] = '\0';
}
else {
- sprintf(suffix, "-%d", layer_index);
+ BLI_snprintf(suffix, sizeof(suffix), "-%d", layer_index);
}
return getIdBySemantics(geom_id, COLLADASW::InputSemantic::TEXCOORD) + suffix;
}
diff --git a/source/blender/io/gpencil/intern/gpencil_io_export_svg.cc b/source/blender/io/gpencil/intern/gpencil_io_export_svg.cc
index 8acb87f8e58..b85fd33e116 100644
--- a/source/blender/io/gpencil/intern/gpencil_io_export_svg.cc
+++ b/source/blender/io/gpencil/intern/gpencil_io_export_svg.cc
@@ -90,7 +90,8 @@ void GpencilExporterSVG::create_document_header()
pugi::xml_node comment = main_doc_.append_child(pugi::node_comment);
char txt[128];
- sprintf(txt, " Generator: Blender, %s - %s ", SVG_EXPORTER_NAME, SVG_EXPORTER_VERSION);
+ BLI_snprintf(
+ txt, sizeof(txt), " Generator: Blender, %s - %s ", SVG_EXPORTER_NAME, SVG_EXPORTER_VERSION);
comment.set_value(txt);
pugi::xml_node doctype = main_doc_.append_child(pugi::node_doctype);
@@ -147,7 +148,7 @@ void GpencilExporterSVG::export_gpencil_layers()
pugi::xml_node ob_node = frame_node_.append_child("g");
char obtxt[96];
- sprintf(obtxt, "blender_object_%s", ob->id.name + 2);
+ BLI_snprintf(obtxt, sizeof(obtxt), "blender_object_%s", ob->id.name + 2);
ob_node.append_attribute("id").set_value(obtxt);
/* Use evaluated version to get strokes with modifiers. */
@@ -402,7 +403,7 @@ std::string GpencilExporterSVG::rgb_to_hexstr(const float color[3])
uint8_t g = color[1] * 255.0f;
uint8_t b = color[2] * 255.0f;
char hex_string[20];
- sprintf(hex_string, "#%02X%02X%02X", r, g, b);
+ BLI_snprintf(hex_string, sizeof(hex_string), "#%02X%02X%02X", r, g, b);
std::string hexstr = hex_string;
diff --git a/source/blender/makesrna/intern/rna_color.c b/source/blender/makesrna/intern/rna_color.c
index fe4d51bafb9..2ba82da09bb 100644
--- a/source/blender/makesrna/intern/rna_color.c
+++ b/source/blender/makesrna/intern/rna_color.c
@@ -577,15 +577,8 @@ static void rna_ColorManagedColorspaceSettings_colorspace_set(struct PointerRNA
}
static const EnumPropertyItem *rna_ColorManagedColorspaceSettings_colorspace_itemf(
- bContext *C, PointerRNA *UNUSED(ptr), PropertyRNA *UNUSED(prop), bool *r_free)
+ bContext *UNUSED(C), PointerRNA *UNUSED(ptr), PropertyRNA *UNUSED(prop), bool *r_free)
{
-# if 0 /* FIXME: Causes blank drop-down, see T102316. */
- if (C == NULL) {
- return rna_enum_color_space_convert_default_items;
- }
-# else
- UNUSED_VARS(C);
-# endif
EnumPropertyItem *items = NULL;
int totitem = 0;
diff --git a/source/blender/makesrna/intern/rna_material.c b/source/blender/makesrna/intern/rna_material.c
index 252d2e657b5..1616684cb6a 100644
--- a/source/blender/makesrna/intern/rna_material.c
+++ b/source/blender/makesrna/intern/rna_material.c
@@ -142,7 +142,6 @@ static void rna_Material_texpaint_begin(CollectionPropertyIterator *iter, Pointe
static void rna_Material_active_paint_texture_index_update(bContext *C, PointerRNA *ptr)
{
Main *bmain = CTX_data_main(C);
- bScreen *screen;
Material *ma = (Material *)ptr->owner_id;
if (ma->use_nodes && ma->nodetree) {
@@ -157,25 +156,7 @@ static void rna_Material_active_paint_texture_index_update(bContext *C, PointerR
TexPaintSlot *slot = &ma->texpaintslot[ma->paint_active_slot];
Image *image = slot->ima;
if (image) {
- for (screen = bmain->screens.first; screen; screen = screen->id.next) {
- wmWindow *win = ED_screen_window_find(screen, bmain->wm.first);
- if (win == NULL) {
- continue;
- }
-
- ScrArea *area;
- for (area = screen->areabase.first; area; area = area->next) {
- SpaceLink *sl;
- for (sl = area->spacedata.first; sl; sl = sl->next) {
- if (sl->spacetype == SPACE_IMAGE) {
- SpaceImage *sima = (SpaceImage *)sl;
- if (!sima->pin) {
- ED_space_image_set(bmain, sima, image, true);
- }
- }
- }
- }
- }
+ ED_space_image_sync(bmain, image, false);
}
/* For compatibility reasons with vertex paint we activate the color attribute. */
diff --git a/source/blender/makesrna/intern/rna_nodetree.c b/source/blender/makesrna/intern/rna_nodetree.c
index 9dc940059dd..cfec020c739 100644
--- a/source/blender/makesrna/intern/rna_nodetree.c
+++ b/source/blender/makesrna/intern/rna_nodetree.c
@@ -4512,15 +4512,8 @@ static void rna_NodeConvertColorSpace_to_color_space_set(struct PointerRNA *ptr,
}
static const EnumPropertyItem *rna_NodeConvertColorSpace_color_space_itemf(
- bContext *C, PointerRNA *UNUSED(ptr), PropertyRNA *UNUSED(prop), bool *r_free)
+ bContext *UNUSED(C), PointerRNA *UNUSED(ptr), PropertyRNA *UNUSED(prop), bool *r_free)
{
-# if 0 /* FIXME: Causes blank drop-down, see T102316. */
- if (C == NULL) {
- return rna_enum_color_space_convert_default_items;
- }
-# else
- UNUSED_VARS(C);
-# endif
EnumPropertyItem *items = NULL;
int totitem = 0;
diff --git a/source/blender/makesrna/intern/rna_particle.c b/source/blender/makesrna/intern/rna_particle.c
index 02105c7b425..a56e7d28ef7 100644
--- a/source/blender/makesrna/intern/rna_particle.c
+++ b/source/blender/makesrna/intern/rna_particle.c
@@ -1188,7 +1188,7 @@ static void rna_ParticleTarget_name_get(PointerRNA *ptr, char *str)
if (psys) {
if (pt->ob) {
- sprintf(str, "%s: %s", pt->ob->id.name + 2, psys->name);
+ BLI_sprintf(str, "%s: %s", pt->ob->id.name + 2, psys->name);
}
else {
strcpy(str, psys->name);
@@ -1315,7 +1315,7 @@ static void rna_ParticleDupliWeight_name_get(PointerRNA *ptr, char *str)
ParticleDupliWeight *dw = ptr->data;
if (dw->ob) {
- sprintf(str, "%s: %i", dw->ob->id.name + 2, dw->count);
+ BLI_sprintf(str, "%s: %i", dw->ob->id.name + 2, dw->count);
}
else {
strcpy(str, "No object");
diff --git a/source/blender/makesrna/intern/rna_sculpt_paint.c b/source/blender/makesrna/intern/rna_sculpt_paint.c
index 396a3c77829..c5e7c6a6e91 100644
--- a/source/blender/makesrna/intern/rna_sculpt_paint.c
+++ b/source/blender/makesrna/intern/rna_sculpt_paint.c
@@ -537,24 +537,9 @@ static void rna_ImaPaint_canvas_update(bContext *C, PointerRNA *UNUSED(ptr))
ViewLayer *view_layer = CTX_data_view_layer(C);
BKE_view_layer_synced_ensure(scene, view_layer);
Object *ob = BKE_view_layer_active_object_get(view_layer);
- bScreen *screen;
Image *ima = scene->toolsettings->imapaint.canvas;
- for (screen = bmain->screens.first; screen; screen = screen->id.next) {
- ScrArea *area;
- for (area = screen->areabase.first; area; area = area->next) {
- SpaceLink *slink;
- for (slink = area->spacedata.first; slink; slink = slink->next) {
- if (slink->spacetype == SPACE_IMAGE) {
- SpaceImage *sima = (SpaceImage *)slink;
-
- if (!sima->pin) {
- ED_space_image_set(bmain, sima, ima, true);
- }
- }
- }
- }
- }
+ ED_space_image_sync(bmain, ima, false);
if (ob && ob->type == OB_MESH) {
ED_paint_proj_mesh_data_check(scene, ob, NULL, NULL, NULL, NULL);
diff --git a/source/blender/modifiers/intern/MOD_ui_common.c b/source/blender/modifiers/intern/MOD_ui_common.c
index e882435077a..0f6c40610a0 100644
--- a/source/blender/modifiers/intern/MOD_ui_common.c
+++ b/source/blender/modifiers/intern/MOD_ui_common.c
@@ -352,7 +352,8 @@ static void modifier_panel_header(const bContext *C, Panel *panel)
buttons_number++;
}
/* Some modifiers can work with pre-tessellated curves only. */
- else if (ELEM(md->type, eModifierType_Hook, eModifierType_Softbody, eModifierType_MeshDeform)) {
+ else if (ELEM(
+ md->type, eModifierType_Hook, eModifierType_Softbody, eModifierType_MeshDeform)) {
/* Add button (appearing to be ON) and add tip why this can't be changed. */
sub = uiLayoutRow(row, true);
uiBlock *block = uiLayoutGetBlock(sub);
diff --git a/source/blender/nodes/texture/nodes/node_texture_output.c b/source/blender/nodes/texture/nodes/node_texture_output.c
index 37e39dae22c..b640880ba9b 100644
--- a/source/blender/nodes/texture/nodes/node_texture_output.c
+++ b/source/blender/nodes/texture/nodes/node_texture_output.c
@@ -83,7 +83,7 @@ static void unique_name(bNode *node)
BLI_strncpy(new_name, name, sizeof(tno->name));
name = new_name;
}
- sprintf(new_name + new_len - 4, ".%03d", ++suffix);
+ BLI_sprintf(new_name + new_len - 4, ".%03d", ++suffix);
}
if (new_name[0] != '\0') {
diff --git a/source/blender/python/bmesh/bmesh_py_api.c b/source/blender/python/bmesh/bmesh_py_api.c
index 2e6d1698da9..cc413ad7924 100644
--- a/source/blender/python/bmesh/bmesh_py_api.c
+++ b/source/blender/python/bmesh/bmesh_py_api.c
@@ -156,14 +156,14 @@ PyDoc_STRVAR(BPy_BM_doc,
".. include:: include__bmesh.rst\n");
static struct PyModuleDef BPy_BM_module_def = {
PyModuleDef_HEAD_INIT,
- "bmesh", /* m_name */
- BPy_BM_doc, /* m_doc */
- 0, /* m_size */
- BPy_BM_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "bmesh",
+ /*m_doc*/ BPy_BM_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ BPy_BM_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit_bmesh(void)
diff --git a/source/blender/python/bmesh/bmesh_py_geometry.c b/source/blender/python/bmesh/bmesh_py_geometry.c
index f2af8599807..aed197bc353 100644
--- a/source/blender/python/bmesh/bmesh_py_geometry.c
+++ b/source/blender/python/bmesh/bmesh_py_geometry.c
@@ -62,14 +62,14 @@ PyDoc_STRVAR(BPy_BM_utils_doc,
"This module provides access to bmesh geometry evaluation functions.");
static struct PyModuleDef BPy_BM_geometry_module_def = {
PyModuleDef_HEAD_INIT,
- "bmesh.geometry", /* m_name */
- BPy_BM_utils_doc, /* m_doc */
- 0, /* m_size */
- BPy_BM_geometry_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "bmesh.geometry",
+ /*m_doc*/ BPy_BM_utils_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ BPy_BM_geometry_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit_bmesh_geometry(void)
diff --git a/source/blender/python/bmesh/bmesh_py_ops.c b/source/blender/python/bmesh/bmesh_py_ops.c
index 35f87172d78..e5ca170599f 100644
--- a/source/blender/python/bmesh/bmesh_py_ops.c
+++ b/source/blender/python/bmesh/bmesh_py_ops.c
@@ -235,14 +235,14 @@ static struct PyMethodDef BPy_BM_ops_methods[] = {
PyDoc_STRVAR(BPy_BM_ops_doc, "Access to BMesh operators");
static struct PyModuleDef BPy_BM_ops_module_def = {
PyModuleDef_HEAD_INIT,
- "bmesh.ops", /* m_name */
- BPy_BM_ops_doc, /* m_doc */
- 0, /* m_size */
- BPy_BM_ops_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "bmesh.ops",
+ /*m_doc*/ BPy_BM_ops_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ BPy_BM_ops_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit_bmesh_ops(void)
diff --git a/source/blender/python/bmesh/bmesh_py_types.c b/source/blender/python/bmesh/bmesh_py_types.c
index cf20e1349ee..12267ef5b13 100644
--- a/source/blender/python/bmesh/bmesh_py_types.c
+++ b/source/blender/python/bmesh/bmesh_py_types.c
@@ -7,6 +7,7 @@
#include "BLI_math.h"
#include "BLI_sort.h"
+#include "BLI_string.h"
#include "DNA_material_types.h"
#include "DNA_mesh_types.h"
@@ -3118,7 +3119,7 @@ static Py_ssize_t bpy_bmelemseq_length(BPy_BMElemSeq *self)
}
}
-static PyObject *bpy_bmelemseq_subscript_int(BPy_BMElemSeq *self, int keynum)
+static PyObject *bpy_bmelemseq_subscript_int(BPy_BMElemSeq *self, Py_ssize_t keynum)
{
BPY_BM_CHECK_OBJ(self);
@@ -3735,14 +3736,14 @@ void BPy_BM_init_types(void)
static struct PyModuleDef BPy_BM_types_module_def = {
PyModuleDef_HEAD_INIT,
- "bmesh.types", /* m_name */
- NULL, /* m_doc */
- 0, /* m_size */
- NULL, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "bmesh.types",
+ /*m_doc*/ NULL,
+ /*m_size*/ 0,
+ /*m_methods*/ NULL,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit_bmesh_types(void)
@@ -4246,16 +4247,16 @@ char *BPy_BMElem_StringFromHType_ex(const char htype, char ret[32])
/* zero to ensure string is always NULL terminated */
char *ret_ptr = ret;
if (htype & BM_VERT) {
- ret_ptr += sprintf(ret_ptr, "/%s", BPy_BMVert_Type.tp_name);
+ ret_ptr += BLI_sprintf(ret_ptr, "/%s", BPy_BMVert_Type.tp_name);
}
if (htype & BM_EDGE) {
- ret_ptr += sprintf(ret_ptr, "/%s", BPy_BMEdge_Type.tp_name);
+ ret_ptr += BLI_sprintf(ret_ptr, "/%s", BPy_BMEdge_Type.tp_name);
}
if (htype & BM_FACE) {
- ret_ptr += sprintf(ret_ptr, "/%s", BPy_BMFace_Type.tp_name);
+ ret_ptr += BLI_sprintf(ret_ptr, "/%s", BPy_BMFace_Type.tp_name);
}
if (htype & BM_LOOP) {
- ret_ptr += sprintf(ret_ptr, "/%s", BPy_BMLoop_Type.tp_name);
+ ret_ptr += BLI_sprintf(ret_ptr, "/%s", BPy_BMLoop_Type.tp_name);
}
ret[0] = '(';
*ret_ptr++ = ')';
diff --git a/source/blender/python/bmesh/bmesh_py_types_customdata.c b/source/blender/python/bmesh/bmesh_py_types_customdata.c
index 2d0e31b9e3f..1288083f8e7 100644
--- a/source/blender/python/bmesh/bmesh_py_types_customdata.c
+++ b/source/blender/python/bmesh/bmesh_py_types_customdata.c
@@ -740,7 +740,8 @@ static PyObject *bpy_bmlayercollection_subscript_str(BPy_BMLayerCollection *self
return NULL;
}
-static PyObject *bpy_bmlayercollection_subscript_int(BPy_BMLayerCollection *self, int keynum)
+static PyObject *bpy_bmlayercollection_subscript_int(BPy_BMLayerCollection *self,
+ Py_ssize_t keynum)
{
Py_ssize_t len;
BPY_BM_CHECK_OBJ(self);
diff --git a/source/blender/python/bmesh/bmesh_py_types_meshdata.c b/source/blender/python/bmesh/bmesh_py_types_meshdata.c
index c58c9caf0a8..9bd98f2df53 100644
--- a/source/blender/python/bmesh/bmesh_py_types_meshdata.c
+++ b/source/blender/python/bmesh/bmesh_py_types_meshdata.c
@@ -391,7 +391,7 @@ typedef struct BPy_BMDeformVert {
/* Mapping Protocols
* ================= */
-static int bpy_bmdeformvert_len(BPy_BMDeformVert *self)
+static Py_ssize_t bpy_bmdeformvert_len(BPy_BMDeformVert *self)
{
return self->data->totweight;
}
diff --git a/source/blender/python/bmesh/bmesh_py_types_select.c b/source/blender/python/bmesh/bmesh_py_types_select.c
index 20bf06e8016..ed25caee74d 100644
--- a/source/blender/python/bmesh/bmesh_py_types_select.c
+++ b/source/blender/python/bmesh/bmesh_py_types_select.c
@@ -163,7 +163,7 @@ static Py_ssize_t bpy_bmeditselseq_length(BPy_BMEditSelSeq *self)
return BLI_listbase_count(&self->bm->selected);
}
-static PyObject *bpy_bmeditselseq_subscript_int(BPy_BMEditSelSeq *self, int keynum)
+static PyObject *bpy_bmeditselseq_subscript_int(BPy_BMEditSelSeq *self, Py_ssize_t keynum)
{
BMEditSelection *ese;
diff --git a/source/blender/python/bmesh/bmesh_py_utils.c b/source/blender/python/bmesh/bmesh_py_utils.c
index 6630eb4924e..5eca6f854f6 100644
--- a/source/blender/python/bmesh/bmesh_py_utils.c
+++ b/source/blender/python/bmesh/bmesh_py_utils.c
@@ -818,14 +818,14 @@ static struct PyMethodDef BPy_BM_utils_methods[] = {
PyDoc_STRVAR(BPy_BM_utils_doc, "This module provides access to blenders bmesh data structures.");
static struct PyModuleDef BPy_BM_utils_module_def = {
PyModuleDef_HEAD_INIT,
- "bmesh.utils", /* m_name */
- BPy_BM_utils_doc, /* m_doc */
- 0, /* m_size */
- BPy_BM_utils_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "bmesh.utils",
+ /*m_doc*/ BPy_BM_utils_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ BPy_BM_utils_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit_bmesh_utils(void)
diff --git a/source/blender/python/generic/bgl.c b/source/blender/python/generic/bgl.c
index a03e0ab31eb..fe64b247d43 100644
--- a/source/blender/python/generic/bgl.c
+++ b/source/blender/python/generic/bgl.c
@@ -417,11 +417,11 @@ static PyObject *Method_ShaderSource(PyObject *self, PyObject *args);
/* Buffer sequence methods */
-static int Buffer_len(Buffer *self);
-static PyObject *Buffer_item(Buffer *self, int i);
-static PyObject *Buffer_slice(Buffer *self, int begin, int end);
-static int Buffer_ass_item(Buffer *self, int i, PyObject *v);
-static int Buffer_ass_slice(Buffer *self, int begin, int end, PyObject *seq);
+static Py_ssize_t Buffer_len(Buffer *self);
+static PyObject *Buffer_item(Buffer *self, Py_ssize_t i);
+static PyObject *Buffer_slice(Buffer *self, Py_ssize_t begin, Py_ssize_t end);
+static int Buffer_ass_item(Buffer *self, Py_ssize_t i, PyObject *v);
+static int Buffer_ass_slice(Buffer *self, Py_ssize_t begin, Py_ssize_t end, PyObject *seq);
static PyObject *Buffer_subscript(Buffer *self, PyObject *item);
static int Buffer_ass_subscript(Buffer *self, PyObject *item, PyObject *value);
@@ -794,12 +794,12 @@ static PyObject *Buffer_new(PyTypeObject *UNUSED(type), PyObject *args, PyObject
/* Buffer sequence methods */
-static int Buffer_len(Buffer *self)
+static Py_ssize_t Buffer_len(Buffer *self)
{
return self->dimensions[0];
}
-static PyObject *Buffer_item(Buffer *self, int i)
+static PyObject *Buffer_item(Buffer *self, Py_ssize_t i)
{
if (i >= self->dimensions[0] || i < 0) {
PyErr_SetString(PyExc_IndexError, "array index out of range");
@@ -837,10 +837,9 @@ static PyObject *Buffer_item(Buffer *self, int i)
return NULL;
}
-static PyObject *Buffer_slice(Buffer *self, int begin, int end)
+static PyObject *Buffer_slice(Buffer *self, Py_ssize_t begin, Py_ssize_t end)
{
PyObject *list;
- int count;
if (begin < 0) {
begin = 0;
@@ -854,13 +853,13 @@ static PyObject *Buffer_slice(Buffer *self, int begin, int end)
list = PyList_New(end - begin);
- for (count = begin; count < end; count++) {
+ for (Py_ssize_t count = begin; count < end; count++) {
PyList_SET_ITEM(list, count - begin, Buffer_item(self, count));
}
return list;
}
-static int Buffer_ass_item(Buffer *self, int i, PyObject *v)
+static int Buffer_ass_item(Buffer *self, Py_ssize_t i, PyObject *v)
{
if (i >= self->dimensions[0] || i < 0) {
PyErr_SetString(PyExc_IndexError, "array assignment index out of range");
@@ -895,10 +894,11 @@ static int Buffer_ass_item(Buffer *self, int i, PyObject *v)
}
}
-static int Buffer_ass_slice(Buffer *self, int begin, int end, PyObject *seq)
+static int Buffer_ass_slice(Buffer *self, Py_ssize_t begin, Py_ssize_t end, PyObject *seq)
{
PyObject *item;
- int count, err = 0;
+ int err = 0;
+ Py_ssize_t count;
if (begin < 0) {
begin = 0;
@@ -918,7 +918,7 @@ static int Buffer_ass_slice(Buffer *self, int begin, int end, PyObject *seq)
return -1;
}
- /* re-use count var */
+ /* Re-use count variable. */
if ((count = PySequence_Size(seq)) != (end - begin)) {
PyErr_Format(PyExc_TypeError,
"buffer[:] = value, size mismatch in assignment. "
@@ -1376,14 +1376,14 @@ BGL_Wrap(TexImage3DMultisample,
static struct PyModuleDef BGL_module_def = {
PyModuleDef_HEAD_INIT,
- "bgl", /* m_name */
- NULL, /* m_doc */
- 0, /* m_size */
- NULL, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "bgl",
+ /*m_doc*/ NULL,
+ /*m_size*/ 0,
+ /*m_methods*/ NULL,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
static void py_module_dict_add_int(PyObject *dict, const char *name, int value)
diff --git a/source/blender/python/generic/bl_math_py_api.c b/source/blender/python/generic/bl_math_py_api.c
index 19958a99df9..47a1687d20b 100644
--- a/source/blender/python/generic/bl_math_py_api.c
+++ b/source/blender/python/generic/bl_math_py_api.c
@@ -129,14 +129,14 @@ static PyMethodDef M_bl_math_methods[] = {
static struct PyModuleDef M_bl_math_module_def = {
PyModuleDef_HEAD_INIT,
- "bl_math", /* m_name */
- M_bl_math_doc, /* m_doc */
- 0, /* m_size */
- M_bl_math_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "bl_math",
+ /*m_doc*/ M_bl_math_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ M_bl_math_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyMODINIT_FUNC BPyInit_bl_math(void)
diff --git a/source/blender/python/generic/blf_py_api.c b/source/blender/python/generic/blf_py_api.c
index 979a581463e..2b7c5ed7e55 100644
--- a/source/blender/python/generic/blf_py_api.c
+++ b/source/blender/python/generic/blf_py_api.c
@@ -466,14 +466,14 @@ static PyMethodDef BLF_methods[] = {
PyDoc_STRVAR(BLF_doc, "This module provides access to Blender's text drawing functions.");
static struct PyModuleDef BLF_module_def = {
PyModuleDef_HEAD_INIT,
- "blf", /* m_name */
- BLF_doc, /* m_doc */
- 0, /* m_size */
- BLF_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "blf",
+ /*m_doc*/ BLF_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ BLF_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit_blf(void)
diff --git a/source/blender/python/generic/idprop_py_api.c b/source/blender/python/generic/idprop_py_api.c
index a5599a3622e..9a6568355af 100644
--- a/source/blender/python/generic/idprop_py_api.c
+++ b/source/blender/python/generic/idprop_py_api.c
@@ -1695,12 +1695,12 @@ static PyMethodDef BPy_IDArray_methods[] = {
{NULL, NULL, 0, NULL},
};
-static int BPy_IDArray_Len(BPy_IDArray *self)
+static Py_ssize_t BPy_IDArray_Len(BPy_IDArray *self)
{
return self->prop->len;
}
-static PyObject *BPy_IDArray_GetItem(BPy_IDArray *self, int index)
+static PyObject *BPy_IDArray_GetItem(BPy_IDArray *self, Py_ssize_t index)
{
if (index < 0 || index >= self->prop->len) {
PyErr_SetString(PyExc_IndexError, "index out of range!");
@@ -1722,7 +1722,7 @@ static PyObject *BPy_IDArray_GetItem(BPy_IDArray *self, int index)
return NULL;
}
-static int BPy_IDArray_SetItem(BPy_IDArray *self, int index, PyObject *value)
+static int BPy_IDArray_SetItem(BPy_IDArray *self, Py_ssize_t index, PyObject *value)
{
if (index < 0 || index >= self->prop->len) {
PyErr_SetString(PyExc_RuntimeError, "index out of range!");
@@ -2092,14 +2092,14 @@ static PyObject *BPy_IDGroup_ViewItems_CreatePyObject(BPy_IDProperty *group)
static struct PyModuleDef IDProp_types_module_def = {
PyModuleDef_HEAD_INIT,
- "idprop.types", /* m_name */
- NULL, /* m_doc */
- 0, /* m_size */
- NULL, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "idprop.types",
+ /*m_doc*/ NULL,
+ /*m_size*/ 0,
+ /*m_methods*/ NULL,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
static PyObject *BPyInit_idprop_types(void)
@@ -2141,14 +2141,14 @@ PyDoc_STRVAR(IDProp_module_doc,
"This module provides access id property types (currently mainly for docs).");
static struct PyModuleDef IDProp_module_def = {
PyModuleDef_HEAD_INIT,
- "idprop", /* m_name */
- IDProp_module_doc, /* m_doc */
- 0, /* m_size */
- IDProp_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "idprop",
+ /*m_doc*/ IDProp_module_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ IDProp_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit_idprop(void)
diff --git a/source/blender/python/generic/imbuf_py_api.c b/source/blender/python/generic/imbuf_py_api.c
index c6a817fa01c..77d450d496e 100644
--- a/source/blender/python/generic/imbuf_py_api.c
+++ b/source/blender/python/generic/imbuf_py_api.c
@@ -557,14 +557,14 @@ PyDoc_STRVAR(IMB_doc,
":class:`bpy.types.Image` data-block context.\n");
static struct PyModuleDef IMB_module_def = {
PyModuleDef_HEAD_INIT,
- "imbuf", /* m_name */
- IMB_doc, /* m_doc */
- 0, /* m_size */
- IMB_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "imbuf",
+ /*m_doc*/ IMB_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ IMB_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit_imbuf(void)
@@ -601,14 +601,14 @@ PyDoc_STRVAR(IMB_types_doc,
static struct PyModuleDef IMB_types_module_def = {
PyModuleDef_HEAD_INIT,
- "imbuf.types", /* m_name */
- IMB_types_doc, /* m_doc */
- 0, /* m_size */
- NULL, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "imbuf.types",
+ /*m_doc*/ IMB_types_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ NULL,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit_imbuf_types(void)
diff --git a/source/blender/python/gpu/gpu_py_api.c b/source/blender/python/gpu/gpu_py_api.c
index a2075566f31..8f5897be649 100644
--- a/source/blender/python/gpu/gpu_py_api.c
+++ b/source/blender/python/gpu/gpu_py_api.c
@@ -32,8 +32,14 @@ PyDoc_STRVAR(pygpu_doc,
"Some higher level functions can be found in the `gpu_extras` module.");
static struct PyModuleDef pygpu_module_def = {
PyModuleDef_HEAD_INIT,
- .m_name = "gpu",
- .m_doc = pygpu_doc,
+ /*m_name*/ "gpu",
+ /*m_doc*/ pygpu_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ NULL,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit_gpu(void)
diff --git a/source/blender/python/gpu/gpu_py_buffer.c b/source/blender/python/gpu/gpu_py_buffer.c
index 3dc141f400d..51bda9b4941 100644
--- a/source/blender/python/gpu/gpu_py_buffer.c
+++ b/source/blender/python/gpu/gpu_py_buffer.c
@@ -159,7 +159,7 @@ static BPyGPUBuffer *pygpu_buffer_make_from_data(PyObject *parent,
return buffer;
}
-static PyObject *pygpu_buffer__sq_item(BPyGPUBuffer *self, int i)
+static PyObject *pygpu_buffer__sq_item(BPyGPUBuffer *self, Py_ssize_t i)
{
if (i >= self->shape[0] || i < 0) {
PyErr_SetString(PyExc_IndexError, "array index out of range");
@@ -200,10 +200,10 @@ static PyObject *pygpu_buffer__sq_item(BPyGPUBuffer *self, int i)
static PyObject *pygpu_buffer_to_list(BPyGPUBuffer *self)
{
- int i, len = self->shape[0];
+ const Py_ssize_t len = self->shape[0];
PyObject *list = PyList_New(len);
- for (i = 0; i < len; i++) {
+ for (Py_ssize_t i = 0; i < len; i++) {
PyList_SET_ITEM(list, i, pygpu_buffer__sq_item(self, i));
}
@@ -313,7 +313,7 @@ static PyObject *pygpu_buffer__tp_repr(BPyGPUBuffer *self)
return repr;
}
-static int pygpu_buffer__sq_ass_item(BPyGPUBuffer *self, int i, PyObject *v);
+static int pygpu_buffer__sq_ass_item(BPyGPUBuffer *self, Py_ssize_t i, PyObject *v);
static int pygpu_buffer_ass_slice(BPyGPUBuffer *self,
Py_ssize_t begin,
@@ -430,7 +430,7 @@ static int pygpu_buffer__tp_is_gc(BPyGPUBuffer *self)
/* BPyGPUBuffer sequence methods */
-static int pygpu_buffer__sq_length(BPyGPUBuffer *self)
+static Py_ssize_t pygpu_buffer__sq_length(BPyGPUBuffer *self)
{
return self->shape[0];
}
@@ -458,7 +458,7 @@ static PyObject *pygpu_buffer_slice(BPyGPUBuffer *self, Py_ssize_t begin, Py_ssi
return list;
}
-static int pygpu_buffer__sq_ass_item(BPyGPUBuffer *self, int i, PyObject *v)
+static int pygpu_buffer__sq_ass_item(BPyGPUBuffer *self, Py_ssize_t i, PyObject *v)
{
if (i >= self->shape[0] || i < 0) {
PyErr_SetString(PyExc_IndexError, "array assignment index out of range");
diff --git a/source/blender/python/gpu/gpu_py_capabilities.c b/source/blender/python/gpu/gpu_py_capabilities.c
index dea057bf8e3..2345982eb42 100644
--- a/source/blender/python/gpu/gpu_py_capabilities.c
+++ b/source/blender/python/gpu/gpu_py_capabilities.c
@@ -301,9 +301,14 @@ static struct PyMethodDef pygpu_capabilities__tp_methods[] = {
PyDoc_STRVAR(pygpu_capabilities__tp_doc, "This module provides access to the GPU capabilities.");
static PyModuleDef pygpu_capabilities_module_def = {
PyModuleDef_HEAD_INIT,
- .m_name = "gpu.capabilities",
- .m_doc = pygpu_capabilities__tp_doc,
- .m_methods = pygpu_capabilities__tp_methods,
+ /*m_name*/ "gpu.capabilities",
+ /*m_doc*/ pygpu_capabilities__tp_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ pygpu_capabilities__tp_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *bpygpu_capabilities_init(void)
diff --git a/source/blender/python/gpu/gpu_py_matrix.c b/source/blender/python/gpu/gpu_py_matrix.c
index 18925a101da..7d80f763162 100644
--- a/source/blender/python/gpu/gpu_py_matrix.c
+++ b/source/blender/python/gpu/gpu_py_matrix.c
@@ -528,9 +528,14 @@ static struct PyMethodDef pygpu_matrix__tp_methods[] = {
PyDoc_STRVAR(pygpu_matrix__tp_doc, "This module provides access to the matrix stack.");
static PyModuleDef pygpu_matrix_module_def = {
PyModuleDef_HEAD_INIT,
- .m_name = "gpu.matrix",
- .m_doc = pygpu_matrix__tp_doc,
- .m_methods = pygpu_matrix__tp_methods,
+ /*m_name*/ "gpu.matrix",
+ /*m_doc*/ pygpu_matrix__tp_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ pygpu_matrix__tp_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *bpygpu_matrix_init(void)
diff --git a/source/blender/python/gpu/gpu_py_platform.c b/source/blender/python/gpu/gpu_py_platform.c
index c6e964405fa..5f9f653a3c4 100644
--- a/source/blender/python/gpu/gpu_py_platform.c
+++ b/source/blender/python/gpu/gpu_py_platform.c
@@ -141,9 +141,14 @@ static struct PyMethodDef pygpu_platform__tp_methods[] = {
PyDoc_STRVAR(pygpu_platform__tp_doc, "This module provides access to GPU Platform definitions.");
static PyModuleDef pygpu_platform_module_def = {
PyModuleDef_HEAD_INIT,
- .m_name = "gpu.platform",
- .m_doc = pygpu_platform__tp_doc,
- .m_methods = pygpu_platform__tp_methods,
+ /*m_name*/ "gpu.platform",
+ /*m_doc*/ pygpu_platform__tp_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ pygpu_platform__tp_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *bpygpu_platform_init(void)
diff --git a/source/blender/python/gpu/gpu_py_select.c b/source/blender/python/gpu/gpu_py_select.c
index 6d11e94433e..02e06e0ffd5 100644
--- a/source/blender/python/gpu/gpu_py_select.c
+++ b/source/blender/python/gpu/gpu_py_select.c
@@ -58,9 +58,14 @@ static struct PyMethodDef pygpu_select__tp_methods[] = {
PyDoc_STRVAR(pygpu_select__tp_doc, "This module provides access to selection.");
static PyModuleDef pygpu_select_module_def = {
PyModuleDef_HEAD_INIT,
- .m_name = "gpu.select",
- .m_doc = pygpu_select__tp_doc,
- .m_methods = pygpu_select__tp_methods,
+ /*m_name*/ "gpu.select",
+ /*m_doc*/ pygpu_select__tp_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ pygpu_select__tp_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *bpygpu_select_init(void)
diff --git a/source/blender/python/gpu/gpu_py_shader.c b/source/blender/python/gpu/gpu_py_shader.c
index a57b00e671e..7222cb1696a 100644
--- a/source/blender/python/gpu/gpu_py_shader.c
+++ b/source/blender/python/gpu/gpu_py_shader.c
@@ -923,9 +923,14 @@ PyDoc_STRVAR(pygpu_shader_module__tp_doc,
"\n" PYDOC_BUILTIN_SHADER_DESCRIPTION);
static PyModuleDef pygpu_shader_module_def = {
PyModuleDef_HEAD_INIT,
- .m_name = "gpu.shader",
- .m_doc = pygpu_shader_module__tp_doc,
- .m_methods = pygpu_shader_module__tp_methods,
+ /*m_name*/ "gpu.shader",
+ /*m_doc*/ pygpu_shader_module__tp_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ pygpu_shader_module__tp_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
/** \} */
diff --git a/source/blender/python/gpu/gpu_py_state.c b/source/blender/python/gpu/gpu_py_state.c
index 35ecc2aff40..f17a3fecfdd 100644
--- a/source/blender/python/gpu/gpu_py_state.c
+++ b/source/blender/python/gpu/gpu_py_state.c
@@ -431,9 +431,14 @@ static struct PyMethodDef pygpu_state__tp_methods[] = {
PyDoc_STRVAR(pygpu_state__tp_doc, "This module provides access to the gpu state.");
static PyModuleDef pygpu_state_module_def = {
PyModuleDef_HEAD_INIT,
- .m_name = "gpu.state",
- .m_doc = pygpu_state__tp_doc,
- .m_methods = pygpu_state__tp_methods,
+ /*m_name*/ "gpu.state",
+ /*m_doc*/ pygpu_state__tp_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ pygpu_state__tp_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *bpygpu_state_init(void)
diff --git a/source/blender/python/gpu/gpu_py_texture.c b/source/blender/python/gpu/gpu_py_texture.c
index ae004341304..11b44a2a35e 100644
--- a/source/blender/python/gpu/gpu_py_texture.c
+++ b/source/blender/python/gpu/gpu_py_texture.c
@@ -565,9 +565,14 @@ static struct PyMethodDef pygpu_texture__m_methods[] = {
PyDoc_STRVAR(pygpu_texture__m_doc, "This module provides utils for textures.");
static PyModuleDef pygpu_texture_module_def = {
PyModuleDef_HEAD_INIT,
- .m_name = "gpu.texture",
- .m_doc = pygpu_texture__m_doc,
- .m_methods = pygpu_texture__m_methods,
+ /*m_name*/ "gpu.texture",
+ /*m_doc*/ pygpu_texture__m_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ pygpu_texture__m_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
/** \} */
diff --git a/source/blender/python/gpu/gpu_py_types.c b/source/blender/python/gpu/gpu_py_types.c
index eccbebbd8dd..b5cabd93b42 100644
--- a/source/blender/python/gpu/gpu_py_types.c
+++ b/source/blender/python/gpu/gpu_py_types.c
@@ -19,7 +19,14 @@
static struct PyModuleDef pygpu_types_module_def = {
PyModuleDef_HEAD_INIT,
- .m_name = "gpu.types",
+ /*m_name*/ "gpu.types",
+ /*m_doc*/ NULL,
+ /*m_size*/ 0,
+ /*m_methods*/ NULL,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *bpygpu_types_init(void)
diff --git a/source/blender/python/intern/bpy_app_icons.c b/source/blender/python/intern/bpy_app_icons.c
index 918d96d9f44..65edbb597ca 100644
--- a/source/blender/python/intern/bpy_app_icons.c
+++ b/source/blender/python/intern/bpy_app_icons.c
@@ -162,14 +162,14 @@ static struct PyMethodDef M_AppIcons_methods[] = {
static struct PyModuleDef M_AppIcons_module_def = {
PyModuleDef_HEAD_INIT,
- "bpy.app.icons", /* m_name */
- NULL, /* m_doc */
- 0, /* m_size */
- M_AppIcons_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "bpy.app.icons",
+ /*m_doc*/ NULL,
+ /*m_size*/ 0,
+ /*m_methods*/ M_AppIcons_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPY_app_icons_module(void)
diff --git a/source/blender/python/intern/bpy_app_timers.c b/source/blender/python/intern/bpy_app_timers.c
index 4adc200357b..e5de5121e9e 100644
--- a/source/blender/python/intern/bpy_app_timers.c
+++ b/source/blender/python/intern/bpy_app_timers.c
@@ -164,14 +164,14 @@ static struct PyMethodDef M_AppTimers_methods[] = {
static struct PyModuleDef M_AppTimers_module_def = {
PyModuleDef_HEAD_INIT,
- "bpy.app.timers", /* m_name */
- NULL, /* m_doc */
- 0, /* m_size */
- M_AppTimers_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "bpy.app.timers",
+ /*m_doc*/ NULL,
+ /*m_size*/ 0,
+ /*m_methods*/ M_AppTimers_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPY_app_timers_module(void)
diff --git a/source/blender/python/intern/bpy_interface.c b/source/blender/python/intern/bpy_interface.c
index 3a095f4b9f3..a83dc464e43 100644
--- a/source/blender/python/intern/bpy_interface.c
+++ b/source/blender/python/intern/bpy_interface.c
@@ -768,14 +768,14 @@ extern void main_python_exit(void);
static struct PyModuleDef bpy_proxy_def = {
PyModuleDef_HEAD_INIT,
- "bpy", /* m_name */
- NULL, /* m_doc */
- 0, /* m_size */
- NULL, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- bpy_module_free, /* m_free */
+ /*m_name*/ "bpy",
+ /*m_doc*/ NULL,
+ /*m_size*/ 0,
+ /*m_methods*/ NULL,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ bpy_module_free,
};
typedef struct {
diff --git a/source/blender/python/intern/bpy_msgbus.c b/source/blender/python/intern/bpy_msgbus.c
index 4e41c44cab4..835ef8f7f1f 100644
--- a/source/blender/python/intern/bpy_msgbus.c
+++ b/source/blender/python/intern/bpy_msgbus.c
@@ -393,8 +393,14 @@ static struct PyMethodDef BPy_msgbus_methods[] = {
static struct PyModuleDef _bpy_msgbus_def = {
PyModuleDef_HEAD_INIT,
- .m_name = "msgbus",
- .m_methods = BPy_msgbus_methods,
+ /*m_name*/ "msgbus",
+ /*m_doc*/ NULL,
+ /*m_size*/ 0,
+ /*m_methods*/ BPy_msgbus_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPY_msgbus_module(void)
diff --git a/source/blender/python/intern/bpy_operator.c b/source/blender/python/intern/bpy_operator.c
index 2db8c08cfd4..546e67a521f 100644
--- a/source/blender/python/intern/bpy_operator.c
+++ b/source/blender/python/intern/bpy_operator.c
@@ -493,14 +493,14 @@ static struct PyMethodDef bpy_ops_methods[] = {
static struct PyModuleDef bpy_ops_module = {
PyModuleDef_HEAD_INIT,
- "_bpy.ops",
- NULL,
- -1, /* multiple "initialization" just copies the module dict. */
- bpy_ops_methods,
- NULL,
- NULL,
- NULL,
- NULL,
+ /*m_name*/ "_bpy.ops",
+ /*m_doc*/ NULL,
+ /*m_size*/ -1, /* multiple "initialization" just copies the module dict. */
+ /*m_methods*/ bpy_ops_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPY_operator_module(void)
diff --git a/source/blender/python/intern/bpy_path.c b/source/blender/python/intern/bpy_path.c
index f3a1a7cb1df..6b9a09b636b 100644
--- a/source/blender/python/intern/bpy_path.c
+++ b/source/blender/python/intern/bpy_path.c
@@ -22,14 +22,14 @@ extern const char *imb_ext_audio[];
/*----------------------------MODULE INIT-------------------------*/
static struct PyModuleDef _bpy_path_module_def = {
PyModuleDef_HEAD_INIT,
- "_bpy_path", /* m_name */
- NULL, /* m_doc */
- 0, /* m_size */
- NULL, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "_bpy_path",
+ /*m_doc*/ NULL,
+ /*m_size*/ 0,
+ /*m_methods*/ NULL,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPyInit__bpy_path(void)
diff --git a/source/blender/python/intern/bpy_props.c b/source/blender/python/intern/bpy_props.c
index b6c75f7a793..dec4c65e48d 100644
--- a/source/blender/python/intern/bpy_props.c
+++ b/source/blender/python/intern/bpy_props.c
@@ -4621,21 +4621,25 @@ static int props_clear(PyObject *UNUSED(self))
return 0;
}
-static struct PyModuleDef props_module = {
- PyModuleDef_HEAD_INIT,
- "bpy.props",
+PyDoc_STRVAR(
+ props_module_doc,
"This module defines properties to extend Blender's internal data. The result of these "
"functions"
" is used to assign properties to classes registered with Blender and can't be used "
"directly.\n"
"\n"
- ".. note:: All parameters to these functions must be passed as keywords.\n",
- -1, /* multiple "initialization" just copies the module dict. */
- props_methods,
- NULL,
- props_visit,
- props_clear,
- NULL,
+ ".. note:: All parameters to these functions must be passed as keywords.\n");
+
+static struct PyModuleDef props_module = {
+ PyModuleDef_HEAD_INIT,
+ /*m_name*/ "bpy.props",
+ /*m_doc*/ props_module_doc,
+ /*m_size*/ -1, /* multiple "initialization" just copies the module dict. */
+ /*m_methods*/ props_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ props_visit,
+ /*m_clear*/ props_clear,
+ /*m_free*/ NULL,
};
PyObject *BPY_rna_props(void)
diff --git a/source/blender/python/intern/bpy_rna.c b/source/blender/python/intern/bpy_rna.c
index 908c93388a9..c61cab5aa6f 100644
--- a/source/blender/python/intern/bpy_rna.c
+++ b/source/blender/python/intern/bpy_rna.c
@@ -993,7 +993,7 @@ static PyObject *pyrna_prop_str(BPy_PropertyRNA *self)
}
if (len != -1) {
- sprintf(--c, "[%d]", len);
+ BLI_sprintf(--c, "[%d]", len);
}
/* If a pointer, try to print name of pointer target too. */
@@ -2328,7 +2328,7 @@ static int pyrna_prop_collection_ass_subscript_int(BPy_PropertyRNA *self,
return 0;
}
-static PyObject *pyrna_prop_array_subscript_int(BPy_PropertyArrayRNA *self, int keynum)
+static PyObject *pyrna_prop_array_subscript_int(BPy_PropertyArrayRNA *self, Py_ssize_t keynum)
{
int len;
@@ -2883,7 +2883,7 @@ static PyObject *pyrna_prop_array_subscript(BPy_PropertyArrayRNA *self, PyObject
if (key_slice->start == Py_None && key_slice->stop == Py_None) {
/* NOTE: no significant advantage with optimizing [:] slice as with collections,
* but include here for consistency with collection slice func */
- const Py_ssize_t len = (Py_ssize_t)pyrna_prop_array_length(self);
+ const Py_ssize_t len = pyrna_prop_array_length(self);
return pyrna_prop_array_subscript_slice(self, &self->ptr, self->prop, 0, len, len);
}
@@ -3497,8 +3497,10 @@ PyDoc_STRVAR(pyrna_struct_keys_doc,
" :return: custom property keys.\n"
" :rtype: :class:`idprop.type.IDPropertyGroupViewKeys`\n"
"\n" BPY_DOC_ID_PROP_TYPE_NOTE);
-static PyObject *pyrna_struct_keys(BPy_PropertyRNA *self)
+static PyObject *pyrna_struct_keys(BPy_StructRNA *self)
{
+ PYRNA_STRUCT_CHECK_OBJ(self);
+
if (RNA_struct_idprops_check(self->ptr.type) == 0) {
PyErr_SetString(PyExc_TypeError, "bpy_struct.keys(): this type doesn't support IDProperties");
return NULL;
@@ -3518,8 +3520,10 @@ PyDoc_STRVAR(pyrna_struct_items_doc,
" :return: custom property key, value pairs.\n"
" :rtype: :class:`idprop.type.IDPropertyGroupViewItems`\n"
"\n" BPY_DOC_ID_PROP_TYPE_NOTE);
-static PyObject *pyrna_struct_items(BPy_PropertyRNA *self)
+static PyObject *pyrna_struct_items(BPy_StructRNA *self)
{
+ PYRNA_STRUCT_CHECK_OBJ(self);
+
if (RNA_struct_idprops_check(self->ptr.type) == 0) {
PyErr_SetString(PyExc_TypeError, "bpy_struct.items(): this type doesn't support IDProperties");
return NULL;
@@ -3539,8 +3543,10 @@ PyDoc_STRVAR(pyrna_struct_values_doc,
" :return: custom property values.\n"
" :rtype: :class:`idprop.type.IDPropertyGroupViewValues`\n"
"\n" BPY_DOC_ID_PROP_TYPE_NOTE);
-static PyObject *pyrna_struct_values(BPy_PropertyRNA *self)
+static PyObject *pyrna_struct_values(BPy_StructRNA *self)
{
+ PYRNA_STRUCT_CHECK_OBJ(self);
+
if (RNA_struct_idprops_check(self->ptr.type) == 0) {
PyErr_SetString(PyExc_TypeError,
"bpy_struct.values(): this type doesn't support IDProperties");
@@ -7672,14 +7678,14 @@ static struct PyMethodDef bpy_types_module_methods[] = {
PyDoc_STRVAR(bpy_types_module_doc, "Access to internal Blender types");
static struct PyModuleDef bpy_types_module_def = {
PyModuleDef_HEAD_INIT,
- "bpy.types", /* m_name */
- bpy_types_module_doc, /* m_doc */
- sizeof(struct BPy_TypesModule_State), /* m_size */
- bpy_types_module_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "bpy.types",
+ /*m_doc*/ bpy_types_module_doc,
+ /*m_size*/ sizeof(struct BPy_TypesModule_State),
+ /*m_methods*/ bpy_types_module_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPY_rna_types(void)
diff --git a/source/blender/python/intern/bpy_utils_previews.c b/source/blender/python/intern/bpy_utils_previews.c
index aa79ac56347..83e74816020 100644
--- a/source/blender/python/intern/bpy_utils_previews.c
+++ b/source/blender/python/intern/bpy_utils_previews.c
@@ -163,14 +163,14 @@ PyDoc_STRVAR(
"(low-level API, not exposed to final users).");
static struct PyModuleDef bpy_utils_previews_module = {
PyModuleDef_HEAD_INIT,
- "bpy._utils_previews",
- bpy_utils_previews_doc,
- 0,
- bpy_utils_previews_methods,
- NULL,
- NULL,
- NULL,
- NULL,
+ /*m_name*/ "bpy._utils_previews",
+ /*m_doc*/ bpy_utils_previews_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ bpy_utils_previews_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPY_utils_previews_module(void)
diff --git a/source/blender/python/intern/bpy_utils_units.c b/source/blender/python/intern/bpy_utils_units.c
index 075a68f31f9..95d7f4f2eb7 100644
--- a/source/blender/python/intern/bpy_utils_units.c
+++ b/source/blender/python/intern/bpy_utils_units.c
@@ -336,14 +336,14 @@ PyDoc_STRVAR(bpyunits_doc, "This module contains some data/methods regarding uni
static struct PyModuleDef bpyunits_module = {
PyModuleDef_HEAD_INIT,
- "bpy.utils.units",
- bpyunits_doc,
- -1, /* multiple "initialization" just copies the module dict. */
- bpyunits_methods,
- NULL,
- NULL,
- NULL,
- NULL,
+ /*m_name*/ "bpy.utils.units",
+ /*m_doc*/ bpyunits_doc,
+ /*m_size*/ -1, /* multiple "initialization" just copies the module dict. */
+ /*m_methods*/ bpyunits_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyObject *BPY_utils_units(void)
diff --git a/source/blender/python/mathutils/mathutils.c b/source/blender/python/mathutils/mathutils.c
index c4c0659100b..ef5339c3aa3 100644
--- a/source/blender/python/mathutils/mathutils.c
+++ b/source/blender/python/mathutils/mathutils.c
@@ -766,14 +766,14 @@ static struct PyMethodDef M_Mathutils_methods[] = {
static struct PyModuleDef M_Mathutils_module_def = {
PyModuleDef_HEAD_INIT,
- "mathutils", /* m_name */
- M_Mathutils_doc, /* m_doc */
- 0, /* m_size */
- M_Mathutils_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "mathutils",
+ /*m_doc*/ M_Mathutils_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ M_Mathutils_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
/* submodules only */
diff --git a/source/blender/python/mathutils/mathutils_Color.c b/source/blender/python/mathutils/mathutils_Color.c
index e4f1ea87563..1a1ea1a2535 100644
--- a/source/blender/python/mathutils/mathutils_Color.c
+++ b/source/blender/python/mathutils/mathutils_Color.c
@@ -351,13 +351,13 @@ static Py_hash_t Color_hash(ColorObject *self)
* \{ */
/** Sequence length: `len(object)`. */
-static int Color_len(ColorObject *UNUSED(self))
+static Py_ssize_t Color_len(ColorObject *UNUSED(self))
{
return COLOR_SIZE;
}
/** Sequence accessor (get): `x = object[i]`. */
-static PyObject *Color_item(ColorObject *self, int i)
+static PyObject *Color_item(ColorObject *self, Py_ssize_t i)
{
if (i < 0) {
i = COLOR_SIZE - i;
@@ -378,7 +378,7 @@ static PyObject *Color_item(ColorObject *self, int i)
}
/** Sequence accessor (set): `object[i] = x`. */
-static int Color_ass_item(ColorObject *self, int i, PyObject *value)
+static int Color_ass_item(ColorObject *self, Py_ssize_t i, PyObject *value)
{
float f;
diff --git a/source/blender/python/mathutils/mathutils_Euler.c b/source/blender/python/mathutils/mathutils_Euler.c
index d72ec5bbc77..551e5d6706d 100644
--- a/source/blender/python/mathutils/mathutils_Euler.c
+++ b/source/blender/python/mathutils/mathutils_Euler.c
@@ -434,13 +434,13 @@ static Py_hash_t Euler_hash(EulerObject *self)
* \{ */
/** Sequence length: `len(object)`. */
-static int Euler_len(EulerObject *UNUSED(self))
+static Py_ssize_t Euler_len(EulerObject *UNUSED(self))
{
return EULER_SIZE;
}
/** Sequence accessor (get): `x = object[i]`. */
-static PyObject *Euler_item(EulerObject *self, int i)
+static PyObject *Euler_item(EulerObject *self, Py_ssize_t i)
{
if (i < 0) {
i = EULER_SIZE - i;
@@ -461,7 +461,7 @@ static PyObject *Euler_item(EulerObject *self, int i)
}
/** Sequence accessor (set): `object[i] = x`. */
-static int Euler_ass_item(EulerObject *self, int i, PyObject *value)
+static int Euler_ass_item(EulerObject *self, Py_ssize_t i, PyObject *value)
{
float f;
diff --git a/source/blender/python/mathutils/mathutils_Matrix.c b/source/blender/python/mathutils/mathutils_Matrix.c
index 858b01b2a84..21ad79bc94d 100644
--- a/source/blender/python/mathutils/mathutils_Matrix.c
+++ b/source/blender/python/mathutils/mathutils_Matrix.c
@@ -2379,7 +2379,7 @@ static Py_hash_t Matrix_hash(MatrixObject *self)
* \{ */
/** Sequence length: `len(object)`. */
-static int Matrix_len(MatrixObject *self)
+static Py_ssize_t Matrix_len(MatrixObject *self)
{
return self->row_num;
}
@@ -2388,7 +2388,7 @@ static int Matrix_len(MatrixObject *self)
* Sequence accessor (get): `x = object[i]`.
* \note the wrapped vector gives direct access to the matrix data.
*/
-static PyObject *Matrix_item_row(MatrixObject *self, int row)
+static PyObject *Matrix_item_row(MatrixObject *self, Py_ssize_t row)
{
if (BaseMath_ReadCallback_ForWrite(self) == -1) {
return NULL;
@@ -2407,7 +2407,7 @@ static PyObject *Matrix_item_row(MatrixObject *self, int row)
* Sequence accessor (get): `x = object.col[i]`.
* \note the wrapped vector gives direct access to the matrix data.
*/
-static PyObject *Matrix_item_col(MatrixObject *self, int col)
+static PyObject *Matrix_item_col(MatrixObject *self, Py_ssize_t col)
{
if (BaseMath_ReadCallback_ForWrite(self) == -1) {
return NULL;
@@ -3633,15 +3633,15 @@ static int MatrixAccess_len(MatrixAccessObject *self)
return (self->type == MAT_ACCESS_ROW) ? self->matrix_user->row_num : self->matrix_user->col_num;
}
-static PyObject *MatrixAccess_slice(MatrixAccessObject *self, int begin, int end)
+static PyObject *MatrixAccess_slice(MatrixAccessObject *self, Py_ssize_t begin, Py_ssize_t end)
{
PyObject *tuple;
- int count;
+ Py_ssize_t count;
/* row/col access */
MatrixObject *matrix_user = self->matrix_user;
int matrix_access_len;
- PyObject *(*Matrix_item_new)(MatrixObject *, int);
+ PyObject *(*Matrix_item_new)(MatrixObject *, Py_ssize_t);
if (self->type == MAT_ACCESS_ROW) {
matrix_access_len = matrix_user->row_num;
diff --git a/source/blender/python/mathutils/mathutils_Quaternion.c b/source/blender/python/mathutils/mathutils_Quaternion.c
index 4c64746b764..f2348713433 100644
--- a/source/blender/python/mathutils/mathutils_Quaternion.c
+++ b/source/blender/python/mathutils/mathutils_Quaternion.c
@@ -881,13 +881,13 @@ static Py_hash_t Quaternion_hash(QuaternionObject *self)
* \{ */
/** Sequence length: `len(object)`. */
-static int Quaternion_len(QuaternionObject *UNUSED(self))
+static Py_ssize_t Quaternion_len(QuaternionObject *UNUSED(self))
{
return QUAT_SIZE;
}
/** Sequence accessor (get): `x = object[i]`. */
-static PyObject *Quaternion_item(QuaternionObject *self, int i)
+static PyObject *Quaternion_item(QuaternionObject *self, Py_ssize_t i)
{
if (i < 0) {
i = QUAT_SIZE - i;
@@ -908,7 +908,7 @@ static PyObject *Quaternion_item(QuaternionObject *self, int i)
}
/** Sequence accessor (set): `object[i] = x`. */
-static int Quaternion_ass_item(QuaternionObject *self, int i, PyObject *ob)
+static int Quaternion_ass_item(QuaternionObject *self, Py_ssize_t i, PyObject *ob)
{
float f;
diff --git a/source/blender/python/mathutils/mathutils_Vector.c b/source/blender/python/mathutils/mathutils_Vector.c
index 0f534e76e73..c5a002a6058 100644
--- a/source/blender/python/mathutils/mathutils_Vector.c
+++ b/source/blender/python/mathutils/mathutils_Vector.c
@@ -1667,7 +1667,7 @@ static Py_hash_t Vector_hash(VectorObject *self)
* \{ */
/** Sequence length: `len(object)`. */
-static int Vector_len(VectorObject *self)
+static Py_ssize_t Vector_len(VectorObject *self)
{
return self->vec_num;
}
@@ -1699,7 +1699,7 @@ static PyObject *vector_item_internal(VectorObject *self, int i, const bool is_a
}
/** Sequence accessor (get): `x = object[i]`. */
-static PyObject *Vector_item(VectorObject *self, int i)
+static PyObject *Vector_item(VectorObject *self, Py_ssize_t i)
{
return vector_item_internal(self, i, false);
}
@@ -1747,7 +1747,7 @@ static int vector_ass_item_internal(VectorObject *self, int i, PyObject *value,
}
/** Sequence accessor (set): `object[i] = x`. */
-static int Vector_ass_item(VectorObject *self, int i, PyObject *value)
+static int Vector_ass_item(VectorObject *self, Py_ssize_t i, PyObject *value)
{
return vector_ass_item_internal(self, i, value, false);
}
diff --git a/source/blender/python/mathutils/mathutils_bvhtree.c b/source/blender/python/mathutils/mathutils_bvhtree.c
index 11f8d57397e..7b6c444515b 100644
--- a/source/blender/python/mathutils/mathutils_bvhtree.c
+++ b/source/blender/python/mathutils/mathutils_bvhtree.c
@@ -1293,14 +1293,14 @@ PyDoc_STRVAR(py_bvhtree_doc,
"BVH tree structures for proximity searches and ray casts on geometry.");
static struct PyModuleDef bvhtree_moduledef = {
PyModuleDef_HEAD_INIT,
- "mathutils.bvhtree", /* m_name */
- py_bvhtree_doc, /* m_doc */
- 0, /* m_size */
- NULL, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "mathutils.bvhtree",
+ /*m_doc*/ py_bvhtree_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ NULL,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyMODINIT_FUNC PyInit_mathutils_bvhtree(void)
diff --git a/source/blender/python/mathutils/mathutils_geometry.c b/source/blender/python/mathutils/mathutils_geometry.c
index 52ea2a9ed31..59a3bc40b1c 100644
--- a/source/blender/python/mathutils/mathutils_geometry.c
+++ b/source/blender/python/mathutils/mathutils_geometry.c
@@ -1793,14 +1793,14 @@ static PyMethodDef M_Geometry_methods[] = {
static struct PyModuleDef M_Geometry_module_def = {
PyModuleDef_HEAD_INIT,
- "mathutils.geometry", /* m_name */
- M_Geometry_doc, /* m_doc */
- 0, /* m_size */
- M_Geometry_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "mathutils.geometry",
+ /*m_doc*/ M_Geometry_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ M_Geometry_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
/*----------------------------MODULE INIT-------------------------*/
diff --git a/source/blender/python/mathutils/mathutils_interpolate.c b/source/blender/python/mathutils/mathutils_interpolate.c
index 10f42d9b070..76a0fc55774 100644
--- a/source/blender/python/mathutils/mathutils_interpolate.c
+++ b/source/blender/python/mathutils/mathutils_interpolate.c
@@ -89,14 +89,14 @@ static PyMethodDef M_Interpolate_methods[] = {
static struct PyModuleDef M_Interpolate_module_def = {
PyModuleDef_HEAD_INIT,
- "mathutils.interpolate", /* m_name */
- M_Interpolate_doc, /* m_doc */
- 0, /* m_size */
- M_Interpolate_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "mathutils.interpolate",
+ /*m_doc*/ M_Interpolate_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ M_Interpolate_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
/*----------------------------MODULE INIT-------------------------*/
diff --git a/source/blender/python/mathutils/mathutils_kdtree.c b/source/blender/python/mathutils/mathutils_kdtree.c
index 02d9482b575..ace8ccdeb2a 100644
--- a/source/blender/python/mathutils/mathutils_kdtree.c
+++ b/source/blender/python/mathutils/mathutils_kdtree.c
@@ -428,14 +428,14 @@ PyTypeObject PyKDTree_Type = {
PyDoc_STRVAR(py_kdtree_doc, "Generic 3-dimensional kd-tree to perform spatial searches.");
static struct PyModuleDef kdtree_moduledef = {
PyModuleDef_HEAD_INIT,
- "mathutils.kdtree", /* m_name */
- py_kdtree_doc, /* m_doc */
- 0, /* m_size */
- NULL, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "mathutils.kdtree",
+ /*m_doc*/ py_kdtree_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ NULL,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
PyMODINIT_FUNC PyInit_mathutils_kdtree(void)
diff --git a/source/blender/python/mathutils/mathutils_noise.c b/source/blender/python/mathutils/mathutils_noise.c
index 3c564142160..869201bbcfd 100644
--- a/source/blender/python/mathutils/mathutils_noise.c
+++ b/source/blender/python/mathutils/mathutils_noise.c
@@ -1085,14 +1085,14 @@ static PyMethodDef M_Noise_methods[] = {
static struct PyModuleDef M_Noise_module_def = {
PyModuleDef_HEAD_INIT,
- "mathutils.noise", /* m_name */
- M_Noise_doc, /* m_doc */
- 0, /* m_size */
- M_Noise_methods, /* m_methods */
- NULL, /* m_slots */
- NULL, /* m_traverse */
- NULL, /* m_clear */
- NULL, /* m_free */
+ /*m_name*/ "mathutils.noise",
+ /*m_doc*/ M_Noise_doc,
+ /*m_size*/ 0,
+ /*m_methods*/ M_Noise_methods,
+ /*m_slots*/ NULL,
+ /*m_traverse*/ NULL,
+ /*m_clear*/ NULL,
+ /*m_free*/ NULL,
};
/*----------------------------MODULE INIT-------------------------*/
diff --git a/source/blender/sequencer/intern/disk_cache.c b/source/blender/sequencer/intern/disk_cache.c
index 596a28201cc..beb2c77b003 100644
--- a/source/blender/sequencer/intern/disk_cache.c
+++ b/source/blender/sequencer/intern/disk_cache.c
@@ -303,7 +303,8 @@ static void seq_disk_cache_get_dir(
char project_dir[FILE_MAX];
seq_disk_cache_get_project_dir(disk_cache, project_dir, sizeof(project_dir));
- sprintf(scene_name, "%s-%" PRId64, scene->id.name, disk_cache->timestamp);
+ BLI_snprintf(
+ scene_name, sizeof(scene_name), "%s-%" PRId64, scene->id.name, disk_cache->timestamp);
BLI_strncpy(seq_name, seq->name, sizeof(seq_name));
BLI_filename_make_safe(scene_name);
BLI_filename_make_safe(seq_name);
@@ -319,14 +320,15 @@ static void seq_disk_cache_get_file_path(SeqDiskCache *disk_cache,
seq_disk_cache_get_dir(disk_cache, key->context.scene, key->seq, path, path_len);
int frameno = (int)key->frame_index / DCACHE_IMAGES_PER_FILE;
char cache_filename[FILE_MAXFILE];
- sprintf(cache_filename,
- DCACHE_FNAME_FORMAT,
- key->type,
- key->context.rectx,
- key->context.recty,
- key->context.preview_render_size,
- key->context.view_id,
- frameno);
+ BLI_snprintf(cache_filename,
+ sizeof(cache_filename),
+ DCACHE_FNAME_FORMAT,
+ key->type,
+ key->context.rectx,
+ key->context.recty,
+ key->context.preview_render_size,
+ key->context.view_id,
+ frameno);
BLI_path_append(path, path_len, cache_filename);
}