Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/util_math_fast.h2
-rw-r--r--intern/cycles/util/util_simd.h8
-rw-r--r--intern/cycles/util/util_sseb.h12
-rw-r--r--intern/cycles/util/util_ssef.h4
-rw-r--r--intern/cycles/util/util_ssei.h13
-rw-r--r--intern/cycles/util/util_texture.h12
6 files changed, 21 insertions, 30 deletions
diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h
index 107b36ce6cd..5ae56290f05 100644
--- a/intern/cycles/util/util_math_fast.h
+++ b/intern/cycles/util/util_math_fast.h
@@ -362,7 +362,7 @@ ccl_device float fast_atan2f(float y, float x)
ccl_device float fast_log2f(float x)
{
/* NOTE: clamp to avoid special cases and make result "safe" from large
- * negative values/nans. */
+ * negative values/NAN's. */
x = clamp(x, FLT_MIN, FLT_MAX);
unsigned bits = __float_as_uint(x);
int exponent = (int)(bits >> 23) - 127;
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 718ec9266b1..8e8caa98a1b 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -124,7 +124,7 @@ static struct StepTy {
template<class type, int i0, int i1, int i2, int i3> type shuffle_neon(const type &a)
{
if (i0 == i1 && i0 == i2 && i0 == i3) {
- return vdupq_laneq_s32(a, i0);
+ return type(vdupq_laneq_s32(int32x4_t(a), i0));
}
static const uint8_t tbl[16] = {(i0 * 4) + 0,
(i0 * 4) + 1,
@@ -143,7 +143,7 @@ template<class type, int i0, int i1, int i2, int i3> type shuffle_neon(const typ
(i3 * 4) + 2,
(i3 * 4) + 3};
- return vqtbl1q_s8(int8x16_t(a), *(int8x16_t *)tbl);
+ return type(vqtbl1q_s8(int8x16_t(a), *(uint8x16_t *)tbl));
}
template<class type, int i0, int i1, int i2, int i3>
@@ -167,7 +167,7 @@ type shuffle_neon(const type &a, const type &b)
(i3 * 4) + 2,
(i3 * 4) + 3};
- return vqtbl1q_s8(int8x16_t(b), *(int8x16_t *)tbl);
+ return type(vqtbl1q_s8(int8x16_t(b), *(uint8x16_t *)tbl));
}
else {
@@ -188,7 +188,7 @@ type shuffle_neon(const type &a, const type &b)
(i3 * 4) + 2 + 16,
(i3 * 4) + 3 + 16};
- return vqtbl2q_s8((int8x16x2_t){a, b}, *(int8x16_t *)tbl);
+ return type(vqtbl2q_s8((int8x16x2_t){int8x16_t(a), int8x16_t(b)}, *(uint8x16_t *)tbl));
}
}
#endif /* __KERNEL_NEON */
diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h
index 1488da46b09..4dbd5b8046e 100644
--- a/intern/cycles/util/util_sseb.h
+++ b/intern/cycles/util/util_sseb.h
@@ -283,7 +283,7 @@ __forceinline uint32_t popcnt(const sseb &a)
{
# if defined(__KERNEL_NEON__)
const int32x4_t mask = {1, 1, 1, 1};
- int32x4_t t = vandq_s32(a.m128, mask);
+ int32x4_t t = vandq_s32(vreinterpretq_s32_m128(a.m128), mask);
return vaddvq_s32(t);
# else
return _mm_popcnt_u32(_mm_movemask_ps(a));
@@ -299,7 +299,7 @@ __forceinline uint32_t popcnt(const sseb &a)
__forceinline bool reduce_and(const sseb &a)
{
# if defined(__KERNEL_NEON__)
- return vaddvq_s32(a.m128) == -4;
+ return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) == -4;
# else
return _mm_movemask_ps(a) == 0xf;
# endif
@@ -307,7 +307,7 @@ __forceinline bool reduce_and(const sseb &a)
__forceinline bool reduce_or(const sseb &a)
{
# if defined(__KERNEL_NEON__)
- return vaddvq_s32(a.m128) != 0x0;
+ return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) != 0x0;
# else
return _mm_movemask_ps(a) != 0x0;
# endif
@@ -315,7 +315,7 @@ __forceinline bool reduce_or(const sseb &a)
__forceinline bool all(const sseb &b)
{
# if defined(__KERNEL_NEON__)
- return vaddvq_s32(b.m128) == -4;
+ return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == -4;
# else
return _mm_movemask_ps(b) == 0xf;
# endif
@@ -323,7 +323,7 @@ __forceinline bool all(const sseb &b)
__forceinline bool any(const sseb &b)
{
# if defined(__KERNEL_NEON__)
- return vaddvq_s32(b.m128) != 0x0;
+ return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) != 0x0;
# else
return _mm_movemask_ps(b) != 0x0;
# endif
@@ -331,7 +331,7 @@ __forceinline bool any(const sseb &b)
__forceinline bool none(const sseb &b)
{
# if defined(__KERNEL_NEON__)
- return vaddvq_s32(b.m128) == 0x0;
+ return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == 0x0;
# else
return _mm_movemask_ps(b) == 0x0;
# endif
diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h
index d039b50a7d2..0c81ed87553 100644
--- a/intern/cycles/util/util_ssef.h
+++ b/intern/cycles/util/util_ssef.h
@@ -596,7 +596,7 @@ template<size_t i0, size_t i1, size_t i2, size_t i3>
__forceinline const ssef shuffle(const ssef &b)
{
# ifdef __KERNEL_NEON__
- return shuffle_neon<ssef, i0, i1, i2, i3>(b.m128);
+ return shuffle_neon<float32x4_t, i0, i1, i2, i3>(b.m128);
# else
return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0)));
# endif
@@ -625,7 +625,7 @@ __forceinline const ssef shuffle(const ssef &a, const ssef &b)
template<size_t i0> __forceinline const ssef shuffle(const ssef &a, const ssef &b)
{
# ifdef __KERNEL_NEON__
- return shuffle<float32x4_t, i0, i0, i0, i0>(a, b);
+ return shuffle_neon<float32x4_t, i0, i0, i0, i0>(a, b);
# else
return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0));
# endif
diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h
index 3ec69ab3700..cd51dbff2f1 100644
--- a/intern/cycles/util/util_ssei.h
+++ b/intern/cycles/util/util_ssei.h
@@ -446,7 +446,8 @@ template<size_t i0, size_t i1, size_t i2, size_t i3>
__forceinline const ssei shuffle(const ssei &a)
{
# ifdef __KERNEL_NEON__
- return shuffle_neon<ssei, i0, i1, i2, i3>(a);
+ int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
+ return vreinterpretq_m128i_s32(result);
# else
return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0));
# endif
@@ -456,7 +457,9 @@ template<size_t i0, size_t i1, size_t i2, size_t i3>
__forceinline const ssei shuffle(const ssei &a, const ssei &b)
{
# ifdef __KERNEL_NEON__
- return shuffle_neon<ssei, i0, i1, i2, i3>(a, b);
+ int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
+ vreinterpretq_s32_m128i(b));
+ return vreinterpretq_m128i_s32(result);
# else
return _mm_castps_si128(
_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
@@ -514,7 +517,7 @@ __forceinline const ssei vreduce_add(const ssei &v)
__forceinline int reduce_min(const ssei &v)
{
# ifdef __KERNEL_NEON__
- return vminvq_s32(v);
+ return vminvq_s32(vreinterpretq_s32_m128i(v));
# else
return extract<0>(vreduce_min(v));
# endif
@@ -522,7 +525,7 @@ __forceinline int reduce_min(const ssei &v)
__forceinline int reduce_max(const ssei &v)
{
# ifdef __KERNEL_NEON__
- return vmaxvq_s32(v);
+ return vmaxvq_s32(vreinterpretq_s32_m128i(v));
# else
return extract<0>(vreduce_max(v));
# endif
@@ -530,7 +533,7 @@ __forceinline int reduce_max(const ssei &v)
__forceinline int reduce_add(const ssei &v)
{
# ifdef __KERNEL_NEON__
- return vaddvq_s32(v);
+ return vaddvq_s32(vreinterpretq_s32_m128i(v));
# else
return extract<0>(vreduce_add(v));
# endif
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index b445ab1488f..71bf9c65911 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -21,18 +21,12 @@
CCL_NAMESPACE_BEGIN
-/* Texture limits on devices. */
-#define TEX_NUM_MAX (INT_MAX >> 4)
-
/* Color to use when textures are not found. */
#define TEX_IMAGE_MISSING_R 1
#define TEX_IMAGE_MISSING_G 0
#define TEX_IMAGE_MISSING_B 1
#define TEX_IMAGE_MISSING_A 1
-/* Texture type. */
-#define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK)
-
/* Interpolation types for textures
* cuda also use texture space to store other objects */
typedef enum InterpolationType {
@@ -45,9 +39,6 @@ typedef enum InterpolationType {
INTERPOLATION_NUM_TYPES,
} InterpolationType;
-/* Texture types
- * Since we store the type in the lower bits of a flat index,
- * the shift and bit mask constant below need to be kept in sync. */
typedef enum ImageDataType {
IMAGE_DATA_TYPE_FLOAT4 = 0,
IMAGE_DATA_TYPE_BYTE4 = 1,
@@ -75,9 +66,6 @@ typedef enum ImageAlphaType {
IMAGE_ALPHA_NUM_TYPES,
} ImageAlphaType;
-#define IMAGE_DATA_TYPE_SHIFT 4
-#define IMAGE_DATA_TYPE_MASK 0xF
-
/* Extension types for textures.
*
* Defines how the image is extrapolated past its original bounds. */