From a8cc0d707e82ac781f44bf6cd7ed1e8974d8ed39 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Wed, 2 Aug 2017 02:09:08 +0200 Subject: Code refactor: split defines into separate header, changes to SSE type headers. I need to use some macros defined in util_simd.h for float3/float4, to emulate SSE4 instructions on SSE2. But due to issues with order of header includes this was not possible, this does some refactoring to make it work. Differential Revision: https://developer.blender.org/D2764 --- intern/cycles/util/CMakeLists.txt | 1 + intern/cycles/util/util_defines.h | 134 +++++++++++++++++++++++++++++++++ intern/cycles/util/util_optimization.h | 52 ------------- intern/cycles/util/util_simd.h | 45 ++++++++--- intern/cycles/util/util_sseb.h | 3 + intern/cycles/util/util_ssef.h | 3 + intern/cycles/util/util_ssei.h | 9 ++- intern/cycles/util/util_types.h | 132 ++++---------------------------- 8 files changed, 197 insertions(+), 182 deletions(-) create mode 100644 intern/cycles/util/util_defines.h (limited to 'intern/cycles/util') diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 43f9a57d099..7f3747a0f58 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -38,6 +38,7 @@ set(SRC_HEADERS util_atomic.h util_boundbox.h util_debug.h + util_defines.h util_guarded_allocator.cpp util_foreach.h util_function.h diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h new file mode 100644 index 00000000000..d0d87e74332 --- /dev/null +++ b/intern/cycles/util/util_defines.h @@ -0,0 +1,134 @@ + +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_DEFINES_H__ +#define __UTIL_DEFINES_H__ + +/* Bitness */ + +#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) +# define __KERNEL_64_BIT__ +#endif + +/* Qualifiers for kernel code shared by CPU and GPU */ + +#ifndef __KERNEL_GPU__ +# define ccl_device static inline +# define ccl_device_noinline static +# define ccl_global +# define ccl_constant +# define ccl_local +# define ccl_local_param +# define ccl_private +# define ccl_restrict __restrict +# define __KERNEL_WITH_SSE_ALIGN__ + +# if defined(_WIN32) && !defined(FREE_WINDOWS) +# define ccl_device_inline static __forceinline +# define ccl_device_forceinline static __forceinline +# define ccl_align(...) __declspec(align(__VA_ARGS__)) +# ifdef __KERNEL_64_BIT__ +# define ccl_try_align(...) __declspec(align(__VA_ARGS__)) +# else /* __KERNEL_64_BIT__ */ +# undef __KERNEL_WITH_SSE_ALIGN__ +/* No support for function arguments (error C2719). */ +# define ccl_try_align(...) +# endif /* __KERNEL_64_BIT__ */ +# define ccl_may_alias +# define ccl_always_inline __forceinline +# define ccl_never_inline __declspec(noinline) +# define ccl_maybe_unused +# else /* _WIN32 && !FREE_WINDOWS */ +# define ccl_device_inline static inline __attribute__((always_inline)) +# define ccl_device_forceinline static inline __attribute__((always_inline)) +# define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) +# ifndef FREE_WINDOWS64 +# define __forceinline inline __attribute__((always_inline)) +# endif +# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) +# define ccl_may_alias __attribute__((__may_alias__)) +# define ccl_always_inline __attribute__((always_inline)) +# define ccl_never_inline __attribute__((noinline)) +# define ccl_maybe_unused __attribute__((used)) +# endif /* _WIN32 && !FREE_WINDOWS */ + +/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */ +# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */ +# define ATTR_FALLTHROUGH __attribute__((fallthrough)) +# else +# define ATTR_FALLTHROUGH ((void)0) +# endif +#endif /* __KERNEL_GPU__ */ + +/* macros */ + +/* hints for branch prediction, only use in code that runs a _lot_ */ +#if defined(__GNUC__) && defined(__KERNEL_CPU__) +# define LIKELY(x) __builtin_expect(!!(x), 1) +# define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +# define LIKELY(x) (x) +# define UNLIKELY(x) (x) +#endif + +#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800)) +# define HAS_CPP11_FEATURES +#endif + +#if defined(__GNUC__) || defined(__clang__) +# if defined(HAS_CPP11_FEATURES) +/* Some magic to be sure we don't have reference in the type. */ +template static inline T decltype_helper(T x) { return x; } +# define TYPEOF(x) decltype(decltype_helper(x)) +# else +# define TYPEOF(x) typeof(x) +# endif +#endif + +/* Causes warning: + * incompatible types when assigning to type 'Foo' from type 'Bar' + * ... the compiler optimizes away the temp var */ +#ifdef __GNUC__ +#define CHECK_TYPE(var, type) { \ + TYPEOF(var) *__tmp; \ + __tmp = (type *)NULL; \ + (void)__tmp; \ +} (void)0 + +#define CHECK_TYPE_PAIR(var_a, var_b) { \ + TYPEOF(var_a) *__tmp; \ + __tmp = (typeof(var_b) *)NULL; \ + (void)__tmp; \ +} (void)0 +#else +# define CHECK_TYPE(var, type) +# define CHECK_TYPE_PAIR(var_a, var_b) +#endif + +/* can be used in simple macros */ +#define CHECK_TYPE_INLINE(val, type) \ + ((void)(((type)0) != (val))) + +#ifndef __KERNEL_GPU__ +# include +# define util_assert(statement) assert(statement) +#else +# define util_assert(statement) +#endif + +#endif /* __UTIL_DEFINES_H__ */ + diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h index 6f70a474fe7..0382c0811dd 100644 --- a/intern/cycles/util/util_optimization.h +++ b/intern/cycles/util/util_optimization.h @@ -19,16 +19,6 @@ #ifndef __KERNEL_GPU__ -/* quiet unused define warnings */ -#if defined(__KERNEL_SSE2__) || \ - defined(__KERNEL_SSE3__) || \ - defined(__KERNEL_SSSE3__) || \ - defined(__KERNEL_SSE41__) || \ - defined(__KERNEL_AVX__) || \ - defined(__KERNEL_AVX2__) - /* do nothing */ -#endif - /* x86 * * Compile a regular, SSE2 and SSE3 kernel. */ @@ -73,48 +63,6 @@ #endif /* defined(__x86_64__) || defined(_M_X64) */ -/* SSE Experiment - * - * This is disabled code for an experiment to use SSE types globally for types - * such as float3 and float4. Currently this gives an overall slowdown. */ - -#if 0 -# define __KERNEL_SSE__ -# ifndef __KERNEL_SSE2__ -# define __KERNEL_SSE2__ -# endif -# ifndef __KERNEL_SSE3__ -# define __KERNEL_SSE3__ -# endif -# ifndef __KERNEL_SSSE3__ -# define __KERNEL_SSSE3__ -# endif -# ifndef __KERNEL_SSE4__ -# define __KERNEL_SSE4__ -# endif -#endif - -/* SSE Intrinsics includes - * - * We assume __KERNEL_SSEX__ flags to have been defined at this point */ - -/* SSE intrinsics headers */ -#ifndef FREE_WINDOWS64 - -#ifdef _MSC_VER -# include -#elif (defined(__x86_64__) || defined(__i386__)) -# include -#endif - -#else - -/* MinGW64 has conflicting declarations for these SSE headers in . - * Since we can't avoid including , better only include that */ -#include "util/util_windows.h" - -#endif - #endif #endif /* __UTIL_OPTIMIZATION_H__ */ diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h index 587febe3e52..7d938a0fbca 100644 --- a/intern/cycles/util/util_simd.h +++ b/intern/cycles/util/util_simd.h @@ -18,19 +18,38 @@ #ifndef __UTIL_SIMD_TYPES_H__ #define __UTIL_SIMD_TYPES_H__ +#ifndef __KERNEL_GPU__ + #include #include "util/util_debug.h" -#include "util/util_types.h" +#include "util/util_defines.h" + +/* SSE Intrinsics includes + * + * We assume __KERNEL_SSEX__ flags to have been defined at this point */ + +/* SSE intrinsics headers */ +#ifndef FREE_WINDOWS64 + +#ifdef _MSC_VER +# include +#elif (defined(__x86_64__) || defined(__i386__)) +# include +#endif + +#else + +/* MinGW64 has conflicting declarations for these SSE headers in . + * Since we can't avoid including , better only include that */ +#include "util/util_windows.h" + +#endif CCL_NAMESPACE_BEGIN #ifdef __KERNEL_SSE2__ -struct sseb; -struct ssei; -struct ssef; - extern const __m128 _mm_lookupmask_ps[16]; /* Special Types */ @@ -496,13 +515,19 @@ ccl_device_inline int bitscan(int value) #endif /* __KERNEL_SSE2__ */ +/* quiet unused define warnings */ +#if defined(__KERNEL_SSE2__) || \ + defined(__KERNEL_SSE3__) || \ + defined(__KERNEL_SSSE3__) || \ + defined(__KERNEL_SSE41__) || \ + defined(__KERNEL_AVX__) || \ + defined(__KERNEL_AVX2__) + /* do nothing */ +#endif + CCL_NAMESPACE_END -#include "util/util_math.h" -#include "util/util_sseb.h" -#include "util/util_ssei.h" -#include "util/util_ssef.h" -#include "util/util_avxf.h" +#endif /* __KERNEL_GPU__ */ #endif /* __UTIL_SIMD_TYPES_H__ */ diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h index 6e669701f3b..93c22aafdcd 100644 --- a/intern/cycles/util/util_sseb.h +++ b/intern/cycles/util/util_sseb.h @@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_SSE2__ +struct ssei; +struct ssef; + /*! 4-wide SSE bool type. */ struct sseb { diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h index cf99a08efae..bb007ff84a9 100644 --- a/intern/cycles/util/util_ssef.h +++ b/intern/cycles/util/util_ssef.h @@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_SSE2__ +struct sseb; +struct ssef; + /*! 4-wide SSE float type. */ struct ssef { diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h index 5f62569268c..ef2a9e68b7d 100644 --- a/intern/cycles/util/util_ssei.h +++ b/intern/cycles/util/util_ssei.h @@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_SSE2__ +struct sseb; +struct ssef; + /*! 4-wide SSE integer type. */ struct ssei { @@ -234,8 +237,10 @@ __forceinline size_t select_max(const sseb& valid, const ssei& v) { const ssei a #else -__forceinline int reduce_min(const ssei& v) { return min(min(v[0],v[1]),min(v[2],v[3])); } -__forceinline int reduce_max(const ssei& v) { return max(max(v[0],v[1]),max(v[2],v[3])); } +__forceinline int ssei_min(int a, int b) { return (a < b)? a: b; } +__forceinline int ssei_max(int a, int b) { return (a > b)? a: b; } +__forceinline int reduce_min(const ssei& v) { return ssei_min(ssei_min(v[0],v[1]),ssei_min(v[2],v[3])); } +__forceinline int reduce_max(const ssei& v) { return ssei_max(ssei_max(v[0],v[1]),ssei_max(v[2],v[3])); } __forceinline int reduce_add(const ssei& v) { return v[0]+v[1]+v[2]+v[3]; } #endif diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index a5d1d7152d5..d9642df8005 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -21,72 +21,17 @@ # include #endif -/* Bitness */ +/* Standard Integer Types */ -#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) -# define __KERNEL_64_BIT__ +#if !defined(__KERNEL_GPU__) && !defined(_WIN32) +# include #endif -/* Qualifiers for kernel code shared by CPU and GPU */ - -#ifndef __KERNEL_GPU__ -# define ccl_device static inline -# define ccl_device_noinline static -# define ccl_global -# define ccl_constant -# define ccl_local -# define ccl_local_param -# define ccl_private -# define ccl_restrict __restrict -# define __KERNEL_WITH_SSE_ALIGN__ - -# if defined(_WIN32) && !defined(FREE_WINDOWS) -# define ccl_device_inline static __forceinline -# define ccl_device_forceinline static __forceinline -# define ccl_align(...) __declspec(align(__VA_ARGS__)) -# ifdef __KERNEL_64_BIT__ -# define ccl_try_align(...) __declspec(align(__VA_ARGS__)) -# else /* __KERNEL_64_BIT__ */ -# undef __KERNEL_WITH_SSE_ALIGN__ -/* No support for function arguments (error C2719). */ -# define ccl_try_align(...) -# endif /* __KERNEL_64_BIT__ */ -# define ccl_may_alias -# define ccl_always_inline __forceinline -# define ccl_never_inline __declspec(noinline) -# define ccl_maybe_unused -# else /* _WIN32 && !FREE_WINDOWS */ -# define ccl_device_inline static inline __attribute__((always_inline)) -# define ccl_device_forceinline static inline __attribute__((always_inline)) -# define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) -# ifndef FREE_WINDOWS64 -# define __forceinline inline __attribute__((always_inline)) -# endif -# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) -# define ccl_may_alias __attribute__((__may_alias__)) -# define ccl_always_inline __attribute__((always_inline)) -# define ccl_never_inline __attribute__((noinline)) -# define ccl_maybe_unused __attribute__((used)) -# endif /* _WIN32 && !FREE_WINDOWS */ - -/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */ -# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */ -# define ATTR_FALLTHROUGH __attribute__((fallthrough)) -# else -# define ATTR_FALLTHROUGH ((void)0) -# endif -#endif /* __KERNEL_GPU__ */ - -/* Standard Integer Types */ +#include "util/util_defines.h" #ifndef __KERNEL_GPU__ -/* int8_t, uint16_t, and friends */ -# ifndef _WIN32 -# include -# endif -/* SIMD Types */ -# include "util/util_optimization.h" -#endif /* __KERNEL_GPU__ */ +# include "util/util_simd.h" +#endif CCL_NAMESPACE_BEGIN @@ -201,65 +146,8 @@ enum ExtensionType { EXTENSION_NUM_TYPES, }; -/* macros */ - -/* hints for branch prediction, only use in code that runs a _lot_ */ -#if defined(__GNUC__) && defined(__KERNEL_CPU__) -# define LIKELY(x) __builtin_expect(!!(x), 1) -# define UNLIKELY(x) __builtin_expect(!!(x), 0) -#else -# define LIKELY(x) (x) -# define UNLIKELY(x) (x) -#endif - -#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800)) -# define HAS_CPP11_FEATURES -#endif - -#if defined(__GNUC__) || defined(__clang__) -# if defined(HAS_CPP11_FEATURES) -/* Some magic to be sure we don't have reference in the type. */ -template static inline T decltype_helper(T x) { return x; } -# define TYPEOF(x) decltype(decltype_helper(x)) -# else -# define TYPEOF(x) typeof(x) -# endif -#endif - -/* Causes warning: - * incompatible types when assigning to type 'Foo' from type 'Bar' - * ... the compiler optimizes away the temp var */ -#ifdef __GNUC__ -#define CHECK_TYPE(var, type) { \ - TYPEOF(var) *__tmp; \ - __tmp = (type *)NULL; \ - (void)__tmp; \ -} (void)0 - -#define CHECK_TYPE_PAIR(var_a, var_b) { \ - TYPEOF(var_a) *__tmp; \ - __tmp = (typeof(var_b) *)NULL; \ - (void)__tmp; \ -} (void)0 -#else -# define CHECK_TYPE(var, type) -# define CHECK_TYPE_PAIR(var_a, var_b) -#endif - -/* can be used in simple macros */ -#define CHECK_TYPE_INLINE(val, type) \ - ((void)(((type)0) != (val))) - - CCL_NAMESPACE_END -#ifndef __KERNEL_GPU__ -# include -# define util_assert(statement) assert(statement) -#else -# define util_assert(statement) -#endif - /* Vectorized types declaration. */ #include "util/util_types_uchar2.h" #include "util/util_types_uchar3.h" @@ -298,5 +186,13 @@ CCL_NAMESPACE_END #include "util/util_types_vector3_impl.h" +/* SSE types. */ +#ifndef __KERNEL_GPU__ +# include "util/util_sseb.h" +# include "util/util_ssei.h" +# include "util/util_ssef.h" +# include "util/util_avxf.h" +#endif + #endif /* __UTIL_TYPES_H__ */ -- cgit v1.2.3