Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mours <pmours@nvidia.com>2021-04-20 15:00:05 +0300
committerPatrick Mours <pmours@nvidia.com>2021-04-20 19:45:46 +0300
commit847579b422507917c4252ecc5c777bf5e0fc6f09 (patch)
tree6f9def3399bd8c01cb5e45d3e0eeab0406f47cca /intern/cycles/util
parentf2626f14209e574eb23f115bf84a3452bfb9a89c (diff)
Add support for building on Linux aarch64
Differential Revision: https://developer.blender.org/D10958
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/util_simd.h8
-rw-r--r--intern/cycles/util/util_sseb.h12
-rw-r--r--intern/cycles/util/util_ssef.h4
-rw-r--r--intern/cycles/util/util_ssei.h12
-rw-r--r--intern/cycles/util/util_system.cpp25
5 files changed, 42 insertions, 19 deletions
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 718ec9266b1..8e8caa98a1b 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -124,7 +124,7 @@ static struct StepTy {
template<class type, int i0, int i1, int i2, int i3> type shuffle_neon(const type &a)
{
if (i0 == i1 && i0 == i2 && i0 == i3) {
- return vdupq_laneq_s32(a, i0);
+ return type(vdupq_laneq_s32(int32x4_t(a), i0));
}
static const uint8_t tbl[16] = {(i0 * 4) + 0,
(i0 * 4) + 1,
@@ -143,7 +143,7 @@ template<class type, int i0, int i1, int i2, int i3> type shuffle_neon(const typ
(i3 * 4) + 2,
(i3 * 4) + 3};
- return vqtbl1q_s8(int8x16_t(a), *(int8x16_t *)tbl);
+ return type(vqtbl1q_s8(int8x16_t(a), *(uint8x16_t *)tbl));
}
template<class type, int i0, int i1, int i2, int i3>
@@ -167,7 +167,7 @@ type shuffle_neon(const type &a, const type &b)
(i3 * 4) + 2,
(i3 * 4) + 3};
- return vqtbl1q_s8(int8x16_t(b), *(int8x16_t *)tbl);
+ return type(vqtbl1q_s8(int8x16_t(b), *(uint8x16_t *)tbl));
}
else {
@@ -188,7 +188,7 @@ type shuffle_neon(const type &a, const type &b)
(i3 * 4) + 2 + 16,
(i3 * 4) + 3 + 16};
- return vqtbl2q_s8((int8x16x2_t){a, b}, *(int8x16_t *)tbl);
+ return type(vqtbl2q_s8((int8x16x2_t){int8x16_t(a), int8x16_t(b)}, *(uint8x16_t *)tbl));
}
}
#endif /* __KERNEL_NEON */
diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h
index 1488da46b09..4dbd5b8046e 100644
--- a/intern/cycles/util/util_sseb.h
+++ b/intern/cycles/util/util_sseb.h
@@ -283,7 +283,7 @@ __forceinline uint32_t popcnt(const sseb &a)
{
# if defined(__KERNEL_NEON__)
const int32x4_t mask = {1, 1, 1, 1};
- int32x4_t t = vandq_s32(a.m128, mask);
+ int32x4_t t = vandq_s32(vreinterpretq_s32_m128(a.m128), mask);
return vaddvq_s32(t);
# else
return _mm_popcnt_u32(_mm_movemask_ps(a));
@@ -299,7 +299,7 @@ __forceinline uint32_t popcnt(const sseb &a)
__forceinline bool reduce_and(const sseb &a)
{
# if defined(__KERNEL_NEON__)
- return vaddvq_s32(a.m128) == -4;
+ return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) == -4;
# else
return _mm_movemask_ps(a) == 0xf;
# endif
@@ -307,7 +307,7 @@ __forceinline bool reduce_and(const sseb &a)
__forceinline bool reduce_or(const sseb &a)
{
# if defined(__KERNEL_NEON__)
- return vaddvq_s32(a.m128) != 0x0;
+ return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) != 0x0;
# else
return _mm_movemask_ps(a) != 0x0;
# endif
@@ -315,7 +315,7 @@ __forceinline bool reduce_or(const sseb &a)
__forceinline bool all(const sseb &b)
{
# if defined(__KERNEL_NEON__)
- return vaddvq_s32(b.m128) == -4;
+ return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == -4;
# else
return _mm_movemask_ps(b) == 0xf;
# endif
@@ -323,7 +323,7 @@ __forceinline bool all(const sseb &b)
__forceinline bool any(const sseb &b)
{
# if defined(__KERNEL_NEON__)
- return vaddvq_s32(b.m128) != 0x0;
+ return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) != 0x0;
# else
return _mm_movemask_ps(b) != 0x0;
# endif
@@ -331,7 +331,7 @@ __forceinline bool any(const sseb &b)
__forceinline bool none(const sseb &b)
{
# if defined(__KERNEL_NEON__)
- return vaddvq_s32(b.m128) == 0x0;
+ return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == 0x0;
# else
return _mm_movemask_ps(b) == 0x0;
# endif
diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h
index d039b50a7d2..0c81ed87553 100644
--- a/intern/cycles/util/util_ssef.h
+++ b/intern/cycles/util/util_ssef.h
@@ -596,7 +596,7 @@ template<size_t i0, size_t i1, size_t i2, size_t i3>
__forceinline const ssef shuffle(const ssef &b)
{
# ifdef __KERNEL_NEON__
- return shuffle_neon<ssef, i0, i1, i2, i3>(b.m128);
+ return shuffle_neon<float32x4_t, i0, i1, i2, i3>(b.m128);
# else
return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0)));
# endif
@@ -625,7 +625,7 @@ __forceinline const ssef shuffle(const ssef &a, const ssef &b)
template<size_t i0> __forceinline const ssef shuffle(const ssef &a, const ssef &b)
{
# ifdef __KERNEL_NEON__
- return shuffle<float32x4_t, i0, i0, i0, i0>(a, b);
+ return shuffle_neon<float32x4_t, i0, i0, i0, i0>(a, b);
# else
return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0));
# endif
diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h
index 3ec69ab3700..ce8f7de3fa2 100644
--- a/intern/cycles/util/util_ssei.h
+++ b/intern/cycles/util/util_ssei.h
@@ -446,7 +446,8 @@ template<size_t i0, size_t i1, size_t i2, size_t i3>
__forceinline const ssei shuffle(const ssei &a)
{
# ifdef __KERNEL_NEON__
- return shuffle_neon<ssei, i0, i1, i2, i3>(a);
+ int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
+ return vreinterpretq_m128i_s32(result);
# else
return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0));
# endif
@@ -456,7 +457,8 @@ template<size_t i0, size_t i1, size_t i2, size_t i3>
__forceinline const ssei shuffle(const ssei &a, const ssei &b)
{
# ifdef __KERNEL_NEON__
- return shuffle_neon<ssei, i0, i1, i2, i3>(a, b);
+ int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b));
+ return vreinterpretq_m128i_s32(result);
# else
return _mm_castps_si128(
_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
@@ -514,7 +516,7 @@ __forceinline const ssei vreduce_add(const ssei &v)
__forceinline int reduce_min(const ssei &v)
{
# ifdef __KERNEL_NEON__
- return vminvq_s32(v);
+ return vminvq_s32(vreinterpretq_s32_m128i(v));
# else
return extract<0>(vreduce_min(v));
# endif
@@ -522,7 +524,7 @@ __forceinline int reduce_min(const ssei &v)
__forceinline int reduce_max(const ssei &v)
{
# ifdef __KERNEL_NEON__
- return vmaxvq_s32(v);
+ return vmaxvq_s32(vreinterpretq_s32_m128i(v));
# else
return extract<0>(vreduce_max(v));
# endif
@@ -530,7 +532,7 @@ __forceinline int reduce_max(const ssei &v)
__forceinline int reduce_add(const ssei &v)
{
# ifdef __KERNEL_NEON__
- return vaddvq_s32(v);
+ return vaddvq_s32(vreinterpretq_s32_m128i(v));
# else
return extract<0>(vreduce_add(v));
# endif
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index 2c1716ce515..6500a59e42c 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -166,12 +166,33 @@ static void __cpuid(int data[4], int selector)
string system_cpu_brand_string()
{
+#if !defined(WIN32) && !defined(__x86_64__) && !defined(__i386__)
+ FILE *cpuinfo = fopen("/proc/cpuinfo", "r");
+ if (cpuinfo != nullptr) {
+ char cpuinfo_buf[513] = "";
+ fread(cpuinfo_buf, sizeof(cpuinfo_buf) - 1, 1, cpuinfo);
+ fclose(cpuinfo);
+
+ char *modelname = strstr(cpuinfo_buf, "model name");
+ if (modelname != nullptr) {
+ modelname = strchr(modelname, ':');
+ if (modelname != nullptr) {
+ modelname += 2;
+ char *modelname_end = strchr(modelname, '\n');
+ if (modelname_end != nullptr) {
+ *modelname_end = '\0';
+ return modelname;
+ }
+ }
+ }
+ }
+#else
char buf[49] = {0};
int result[4] = {0};
__cpuid(result, 0x80000000);
- if (result[0] >= (int)0x80000004) {
+ if (result[0] != 0 && result[0] >= (int)0x80000004) {
__cpuid((int *)(buf + 0), 0x80000002);
__cpuid((int *)(buf + 16), 0x80000003);
__cpuid((int *)(buf + 32), 0x80000004);
@@ -183,7 +204,7 @@ string system_cpu_brand_string()
return brand;
}
-
+#endif
return "Unknown CPU";
}