diff options
author | Patrick Mours <pmours@nvidia.com> | 2021-04-20 15:00:05 +0300 |
---|---|---|
committer | Patrick Mours <pmours@nvidia.com> | 2021-04-20 19:45:46 +0300 |
commit | 847579b422507917c4252ecc5c777bf5e0fc6f09 (patch) | |
tree | 6f9def3399bd8c01cb5e45d3e0eeab0406f47cca /intern/cycles/util | |
parent | f2626f14209e574eb23f115bf84a3452bfb9a89c (diff) |
Add support for building on Linux aarch64
Differential Revision: https://developer.blender.org/D10958
Diffstat (limited to 'intern/cycles/util')
-rw-r--r-- | intern/cycles/util/util_simd.h | 8 | ||||
-rw-r--r-- | intern/cycles/util/util_sseb.h | 12 | ||||
-rw-r--r-- | intern/cycles/util/util_ssef.h | 4 | ||||
-rw-r--r-- | intern/cycles/util/util_ssei.h | 12 | ||||
-rw-r--r-- | intern/cycles/util/util_system.cpp | 25 |
5 files changed, 42 insertions, 19 deletions
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h index 718ec9266b1..8e8caa98a1b 100644 --- a/intern/cycles/util/util_simd.h +++ b/intern/cycles/util/util_simd.h @@ -124,7 +124,7 @@ static struct StepTy { template<class type, int i0, int i1, int i2, int i3> type shuffle_neon(const type &a) { if (i0 == i1 && i0 == i2 && i0 == i3) { - return vdupq_laneq_s32(a, i0); + return type(vdupq_laneq_s32(int32x4_t(a), i0)); } static const uint8_t tbl[16] = {(i0 * 4) + 0, (i0 * 4) + 1, @@ -143,7 +143,7 @@ template<class type, int i0, int i1, int i2, int i3> type shuffle_neon(const typ (i3 * 4) + 2, (i3 * 4) + 3}; - return vqtbl1q_s8(int8x16_t(a), *(int8x16_t *)tbl); + return type(vqtbl1q_s8(int8x16_t(a), *(uint8x16_t *)tbl)); } template<class type, int i0, int i1, int i2, int i3> @@ -167,7 +167,7 @@ type shuffle_neon(const type &a, const type &b) (i3 * 4) + 2, (i3 * 4) + 3}; - return vqtbl1q_s8(int8x16_t(b), *(int8x16_t *)tbl); + return type(vqtbl1q_s8(int8x16_t(b), *(uint8x16_t *)tbl)); } else { @@ -188,7 +188,7 @@ type shuffle_neon(const type &a, const type &b) (i3 * 4) + 2 + 16, (i3 * 4) + 3 + 16}; - return vqtbl2q_s8((int8x16x2_t){a, b}, *(int8x16_t *)tbl); + return type(vqtbl2q_s8((int8x16x2_t){int8x16_t(a), int8x16_t(b)}, *(uint8x16_t *)tbl)); } } #endif /* __KERNEL_NEON */ diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h index 1488da46b09..4dbd5b8046e 100644 --- a/intern/cycles/util/util_sseb.h +++ b/intern/cycles/util/util_sseb.h @@ -283,7 +283,7 @@ __forceinline uint32_t popcnt(const sseb &a) { # if defined(__KERNEL_NEON__) const int32x4_t mask = {1, 1, 1, 1}; - int32x4_t t = vandq_s32(a.m128, mask); + int32x4_t t = vandq_s32(vreinterpretq_s32_m128(a.m128), mask); return vaddvq_s32(t); # else return _mm_popcnt_u32(_mm_movemask_ps(a)); @@ -299,7 +299,7 @@ __forceinline uint32_t popcnt(const sseb &a) __forceinline bool reduce_and(const sseb &a) { # if defined(__KERNEL_NEON__) - return vaddvq_s32(a.m128) == -4; + return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) == -4; # else return _mm_movemask_ps(a) == 0xf; # endif @@ -307,7 +307,7 @@ __forceinline bool reduce_and(const sseb &a) __forceinline bool reduce_or(const sseb &a) { # if defined(__KERNEL_NEON__) - return vaddvq_s32(a.m128) != 0x0; + return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) != 0x0; # else return _mm_movemask_ps(a) != 0x0; # endif @@ -315,7 +315,7 @@ __forceinline bool reduce_or(const sseb &a) __forceinline bool all(const sseb &b) { # if defined(__KERNEL_NEON__) - return vaddvq_s32(b.m128) == -4; + return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == -4; # else return _mm_movemask_ps(b) == 0xf; # endif @@ -323,7 +323,7 @@ __forceinline bool all(const sseb &b) __forceinline bool any(const sseb &b) { # if defined(__KERNEL_NEON__) - return vaddvq_s32(b.m128) != 0x0; + return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) != 0x0; # else return _mm_movemask_ps(b) != 0x0; # endif @@ -331,7 +331,7 @@ __forceinline bool any(const sseb &b) __forceinline bool none(const sseb &b) { # if defined(__KERNEL_NEON__) - return vaddvq_s32(b.m128) == 0x0; + return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == 0x0; # else return _mm_movemask_ps(b) == 0x0; # endif diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h index d039b50a7d2..0c81ed87553 100644 --- a/intern/cycles/util/util_ssef.h +++ b/intern/cycles/util/util_ssef.h @@ -596,7 +596,7 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssef shuffle(const ssef &b) { # ifdef __KERNEL_NEON__ - return shuffle_neon<ssef, i0, i1, i2, i3>(b.m128); + return shuffle_neon<float32x4_t, i0, i1, i2, i3>(b.m128); # else return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0))); # endif @@ -625,7 +625,7 @@ __forceinline const ssef shuffle(const ssef &a, const ssef &b) template<size_t i0> __forceinline const ssef shuffle(const ssef &a, const ssef &b) { # ifdef __KERNEL_NEON__ - return shuffle<float32x4_t, i0, i0, i0, i0>(a, b); + return shuffle_neon<float32x4_t, i0, i0, i0, i0>(a, b); # else return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0)); # endif diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h index 3ec69ab3700..ce8f7de3fa2 100644 --- a/intern/cycles/util/util_ssei.h +++ b/intern/cycles/util/util_ssei.h @@ -446,7 +446,8 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei shuffle(const ssei &a) { # ifdef __KERNEL_NEON__ - return shuffle_neon<ssei, i0, i1, i2, i3>(a); + int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a)); + return vreinterpretq_m128i_s32(result); # else return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)); # endif @@ -456,7 +457,8 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei shuffle(const ssei &a, const ssei &b) { # ifdef __KERNEL_NEON__ - return shuffle_neon<ssei, i0, i1, i2, i3>(a, b); + int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)); + return vreinterpretq_m128i_s32(result); # else return _mm_castps_si128( _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); @@ -514,7 +516,7 @@ __forceinline const ssei vreduce_add(const ssei &v) __forceinline int reduce_min(const ssei &v) { # ifdef __KERNEL_NEON__ - return vminvq_s32(v); + return vminvq_s32(vreinterpretq_s32_m128i(v)); # else return extract<0>(vreduce_min(v)); # endif @@ -522,7 +524,7 @@ __forceinline int reduce_min(const ssei &v) __forceinline int reduce_max(const ssei &v) { # ifdef __KERNEL_NEON__ - return vmaxvq_s32(v); + return vmaxvq_s32(vreinterpretq_s32_m128i(v)); # else return extract<0>(vreduce_max(v)); # endif @@ -530,7 +532,7 @@ __forceinline int reduce_max(const ssei &v) __forceinline int reduce_add(const ssei &v) { # ifdef __KERNEL_NEON__ - return vaddvq_s32(v); + return vaddvq_s32(vreinterpretq_s32_m128i(v)); # else return extract<0>(vreduce_add(v)); # endif diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index 2c1716ce515..6500a59e42c 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -166,12 +166,33 @@ static void __cpuid(int data[4], int selector) string system_cpu_brand_string() { +#if !defined(WIN32) && !defined(__x86_64__) && !defined(__i386__) + FILE *cpuinfo = fopen("/proc/cpuinfo", "r"); + if (cpuinfo != nullptr) { + char cpuinfo_buf[513] = ""; + fread(cpuinfo_buf, sizeof(cpuinfo_buf) - 1, 1, cpuinfo); + fclose(cpuinfo); + + char *modelname = strstr(cpuinfo_buf, "model name"); + if (modelname != nullptr) { + modelname = strchr(modelname, ':'); + if (modelname != nullptr) { + modelname += 2; + char *modelname_end = strchr(modelname, '\n'); + if (modelname_end != nullptr) { + *modelname_end = '\0'; + return modelname; + } + } + } + } +#else char buf[49] = {0}; int result[4] = {0}; __cpuid(result, 0x80000000); - if (result[0] >= (int)0x80000004) { + if (result[0] != 0 && result[0] >= (int)0x80000004) { __cpuid((int *)(buf + 0), 0x80000002); __cpuid((int *)(buf + 16), 0x80000003); __cpuid((int *)(buf + 32), 0x80000004); @@ -183,7 +204,7 @@ string system_cpu_brand_string() return brand; } - +#endif return "Unknown CPU"; } |