Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2017-08-02 03:09:08 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2017-08-07 15:01:24 +0300
commita8cc0d707e82ac781f44bf6cd7ed1e8974d8ed39 (patch)
treedbf2e42988f8ee8fcdf95571f7983dae7dbe03c7 /intern/cycles/util
parent5e4bad2c00e466bfc013f8ad9edac7ad66938001 (diff)
Code refactor: split defines into separate header, changes to SSE type headers.
I need to use some macros defined in util_simd.h for float3/float4, to emulate SSE4 instructions on SSE2. But due to issues with order of header includes this was not possible, this does some refactoring to make it work. Differential Revision: https://developer.blender.org/D2764
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/CMakeLists.txt1
-rw-r--r--intern/cycles/util/util_defines.h134
-rw-r--r--intern/cycles/util/util_optimization.h52
-rw-r--r--intern/cycles/util/util_simd.h45
-rw-r--r--intern/cycles/util/util_sseb.h3
-rw-r--r--intern/cycles/util/util_ssef.h3
-rw-r--r--intern/cycles/util/util_ssei.h9
-rw-r--r--intern/cycles/util/util_types.h132
8 files changed, 197 insertions, 182 deletions
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 43f9a57d099..7f3747a0f58 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -38,6 +38,7 @@ set(SRC_HEADERS
util_atomic.h
util_boundbox.h
util_debug.h
+ util_defines.h
util_guarded_allocator.cpp
util_foreach.h
util_function.h
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
new file mode 100644
index 00000000000..d0d87e74332
--- /dev/null
+++ b/intern/cycles/util/util_defines.h
@@ -0,0 +1,134 @@
+
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_DEFINES_H__
+#define __UTIL_DEFINES_H__
+
+/* Bitness */
+
+#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
+# define __KERNEL_64_BIT__
+#endif
+
+/* Qualifiers for kernel code shared by CPU and GPU */
+
+#ifndef __KERNEL_GPU__
+# define ccl_device static inline
+# define ccl_device_noinline static
+# define ccl_global
+# define ccl_constant
+# define ccl_local
+# define ccl_local_param
+# define ccl_private
+# define ccl_restrict __restrict
+# define __KERNEL_WITH_SSE_ALIGN__
+
+# if defined(_WIN32) && !defined(FREE_WINDOWS)
+# define ccl_device_inline static __forceinline
+# define ccl_device_forceinline static __forceinline
+# define ccl_align(...) __declspec(align(__VA_ARGS__))
+# ifdef __KERNEL_64_BIT__
+# define ccl_try_align(...) __declspec(align(__VA_ARGS__))
+# else /* __KERNEL_64_BIT__ */
+# undef __KERNEL_WITH_SSE_ALIGN__
+/* No support for function arguments (error C2719). */
+# define ccl_try_align(...)
+# endif /* __KERNEL_64_BIT__ */
+# define ccl_may_alias
+# define ccl_always_inline __forceinline
+# define ccl_never_inline __declspec(noinline)
+# define ccl_maybe_unused
+# else /* _WIN32 && !FREE_WINDOWS */
+# define ccl_device_inline static inline __attribute__((always_inline))
+# define ccl_device_forceinline static inline __attribute__((always_inline))
+# define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
+# ifndef FREE_WINDOWS64
+# define __forceinline inline __attribute__((always_inline))
+# endif
+# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
+# define ccl_may_alias __attribute__((__may_alias__))
+# define ccl_always_inline __attribute__((always_inline))
+# define ccl_never_inline __attribute__((noinline))
+# define ccl_maybe_unused __attribute__((used))
+# endif /* _WIN32 && !FREE_WINDOWS */
+
+/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
+# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
+# define ATTR_FALLTHROUGH __attribute__((fallthrough))
+# else
+# define ATTR_FALLTHROUGH ((void)0)
+# endif
+#endif /* __KERNEL_GPU__ */
+
+/* macros */
+
+/* hints for branch prediction, only use in code that runs a _lot_ */
+#if defined(__GNUC__) && defined(__KERNEL_CPU__)
+# define LIKELY(x) __builtin_expect(!!(x), 1)
+# define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+# define LIKELY(x) (x)
+# define UNLIKELY(x) (x)
+#endif
+
+#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
+# define HAS_CPP11_FEATURES
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+# if defined(HAS_CPP11_FEATURES)
+/* Some magic to be sure we don't have reference in the type. */
+template<typename T> static inline T decltype_helper(T x) { return x; }
+# define TYPEOF(x) decltype(decltype_helper(x))
+# else
+# define TYPEOF(x) typeof(x)
+# endif
+#endif
+
+/* Causes warning:
+ * incompatible types when assigning to type 'Foo' from type 'Bar'
+ * ... the compiler optimizes away the temp var */
+#ifdef __GNUC__
+#define CHECK_TYPE(var, type) { \
+ TYPEOF(var) *__tmp; \
+ __tmp = (type *)NULL; \
+ (void)__tmp; \
+} (void)0
+
+#define CHECK_TYPE_PAIR(var_a, var_b) { \
+ TYPEOF(var_a) *__tmp; \
+ __tmp = (typeof(var_b) *)NULL; \
+ (void)__tmp; \
+} (void)0
+#else
+# define CHECK_TYPE(var, type)
+# define CHECK_TYPE_PAIR(var_a, var_b)
+#endif
+
+/* can be used in simple macros */
+#define CHECK_TYPE_INLINE(val, type) \
+ ((void)(((type)0) != (val)))
+
+#ifndef __KERNEL_GPU__
+# include <cassert>
+# define util_assert(statement) assert(statement)
+#else
+# define util_assert(statement)
+#endif
+
+#endif /* __UTIL_DEFINES_H__ */
+
diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h
index 6f70a474fe7..0382c0811dd 100644
--- a/intern/cycles/util/util_optimization.h
+++ b/intern/cycles/util/util_optimization.h
@@ -19,16 +19,6 @@
#ifndef __KERNEL_GPU__
-/* quiet unused define warnings */
-#if defined(__KERNEL_SSE2__) || \
- defined(__KERNEL_SSE3__) || \
- defined(__KERNEL_SSSE3__) || \
- defined(__KERNEL_SSE41__) || \
- defined(__KERNEL_AVX__) || \
- defined(__KERNEL_AVX2__)
- /* do nothing */
-#endif
-
/* x86
*
* Compile a regular, SSE2 and SSE3 kernel. */
@@ -73,48 +63,6 @@
#endif /* defined(__x86_64__) || defined(_M_X64) */
-/* SSE Experiment
- *
- * This is disabled code for an experiment to use SSE types globally for types
- * such as float3 and float4. Currently this gives an overall slowdown. */
-
-#if 0
-# define __KERNEL_SSE__
-# ifndef __KERNEL_SSE2__
-# define __KERNEL_SSE2__
-# endif
-# ifndef __KERNEL_SSE3__
-# define __KERNEL_SSE3__
-# endif
-# ifndef __KERNEL_SSSE3__
-# define __KERNEL_SSSE3__
-# endif
-# ifndef __KERNEL_SSE4__
-# define __KERNEL_SSE4__
-# endif
-#endif
-
-/* SSE Intrinsics includes
- *
- * We assume __KERNEL_SSEX__ flags to have been defined at this point */
-
-/* SSE intrinsics headers */
-#ifndef FREE_WINDOWS64
-
-#ifdef _MSC_VER
-# include <intrin.h>
-#elif (defined(__x86_64__) || defined(__i386__))
-# include <x86intrin.h>
-#endif
-
-#else
-
-/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
- * Since we can't avoid including <windows.h>, better only include that */
-#include "util/util_windows.h"
-
-#endif
-
#endif
#endif /* __UTIL_OPTIMIZATION_H__ */
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 587febe3e52..7d938a0fbca 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -18,19 +18,38 @@
#ifndef __UTIL_SIMD_TYPES_H__
#define __UTIL_SIMD_TYPES_H__
+#ifndef __KERNEL_GPU__
+
#include <limits>
#include "util/util_debug.h"
-#include "util/util_types.h"
+#include "util/util_defines.h"
+
+/* SSE Intrinsics includes
+ *
+ * We assume __KERNEL_SSEX__ flags to have been defined at this point */
+
+/* SSE intrinsics headers */
+#ifndef FREE_WINDOWS64
+
+#ifdef _MSC_VER
+# include <intrin.h>
+#elif (defined(__x86_64__) || defined(__i386__))
+# include <x86intrin.h>
+#endif
+
+#else
+
+/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
+ * Since we can't avoid including <windows.h>, better only include that */
+#include "util/util_windows.h"
+
+#endif
CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
-struct sseb;
-struct ssei;
-struct ssef;
-
extern const __m128 _mm_lookupmask_ps[16];
/* Special Types */
@@ -496,13 +515,19 @@ ccl_device_inline int bitscan(int value)
#endif /* __KERNEL_SSE2__ */
+/* quiet unused define warnings */
+#if defined(__KERNEL_SSE2__) || \
+ defined(__KERNEL_SSE3__) || \
+ defined(__KERNEL_SSSE3__) || \
+ defined(__KERNEL_SSE41__) || \
+ defined(__KERNEL_AVX__) || \
+ defined(__KERNEL_AVX2__)
+ /* do nothing */
+#endif
+
CCL_NAMESPACE_END
-#include "util/util_math.h"
-#include "util/util_sseb.h"
-#include "util/util_ssei.h"
-#include "util/util_ssef.h"
-#include "util/util_avxf.h"
+#endif /* __KERNEL_GPU__ */
#endif /* __UTIL_SIMD_TYPES_H__ */
diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h
index 6e669701f3b..93c22aafdcd 100644
--- a/intern/cycles/util/util_sseb.h
+++ b/intern/cycles/util/util_sseb.h
@@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
+struct ssei;
+struct ssef;
+
/*! 4-wide SSE bool type. */
struct sseb
{
diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h
index cf99a08efae..bb007ff84a9 100644
--- a/intern/cycles/util/util_ssef.h
+++ b/intern/cycles/util/util_ssef.h
@@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
+struct sseb;
+struct ssef;
+
/*! 4-wide SSE float type. */
struct ssef
{
diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h
index 5f62569268c..ef2a9e68b7d 100644
--- a/intern/cycles/util/util_ssei.h
+++ b/intern/cycles/util/util_ssei.h
@@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
+struct sseb;
+struct ssef;
+
/*! 4-wide SSE integer type. */
struct ssei
{
@@ -234,8 +237,10 @@ __forceinline size_t select_max(const sseb& valid, const ssei& v) { const ssei a
#else
-__forceinline int reduce_min(const ssei& v) { return min(min(v[0],v[1]),min(v[2],v[3])); }
-__forceinline int reduce_max(const ssei& v) { return max(max(v[0],v[1]),max(v[2],v[3])); }
+__forceinline int ssei_min(int a, int b) { return (a < b)? a: b; }
+__forceinline int ssei_max(int a, int b) { return (a > b)? a: b; }
+__forceinline int reduce_min(const ssei& v) { return ssei_min(ssei_min(v[0],v[1]),ssei_min(v[2],v[3])); }
+__forceinline int reduce_max(const ssei& v) { return ssei_max(ssei_max(v[0],v[1]),ssei_max(v[2],v[3])); }
__forceinline int reduce_add(const ssei& v) { return v[0]+v[1]+v[2]+v[3]; }
#endif
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index a5d1d7152d5..d9642df8005 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -21,72 +21,17 @@
# include <stdlib.h>
#endif
-/* Bitness */
+/* Standard Integer Types */
-#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
-# define __KERNEL_64_BIT__
+#if !defined(__KERNEL_GPU__) && !defined(_WIN32)
+# include <stdint.h>
#endif
-/* Qualifiers for kernel code shared by CPU and GPU */
-
-#ifndef __KERNEL_GPU__
-# define ccl_device static inline
-# define ccl_device_noinline static
-# define ccl_global
-# define ccl_constant
-# define ccl_local
-# define ccl_local_param
-# define ccl_private
-# define ccl_restrict __restrict
-# define __KERNEL_WITH_SSE_ALIGN__
-
-# if defined(_WIN32) && !defined(FREE_WINDOWS)
-# define ccl_device_inline static __forceinline
-# define ccl_device_forceinline static __forceinline
-# define ccl_align(...) __declspec(align(__VA_ARGS__))
-# ifdef __KERNEL_64_BIT__
-# define ccl_try_align(...) __declspec(align(__VA_ARGS__))
-# else /* __KERNEL_64_BIT__ */
-# undef __KERNEL_WITH_SSE_ALIGN__
-/* No support for function arguments (error C2719). */
-# define ccl_try_align(...)
-# endif /* __KERNEL_64_BIT__ */
-# define ccl_may_alias
-# define ccl_always_inline __forceinline
-# define ccl_never_inline __declspec(noinline)
-# define ccl_maybe_unused
-# else /* _WIN32 && !FREE_WINDOWS */
-# define ccl_device_inline static inline __attribute__((always_inline))
-# define ccl_device_forceinline static inline __attribute__((always_inline))
-# define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
-# ifndef FREE_WINDOWS64
-# define __forceinline inline __attribute__((always_inline))
-# endif
-# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
-# define ccl_may_alias __attribute__((__may_alias__))
-# define ccl_always_inline __attribute__((always_inline))
-# define ccl_never_inline __attribute__((noinline))
-# define ccl_maybe_unused __attribute__((used))
-# endif /* _WIN32 && !FREE_WINDOWS */
-
-/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
-# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
-# define ATTR_FALLTHROUGH __attribute__((fallthrough))
-# else
-# define ATTR_FALLTHROUGH ((void)0)
-# endif
-#endif /* __KERNEL_GPU__ */
-
-/* Standard Integer Types */
+#include "util/util_defines.h"
#ifndef __KERNEL_GPU__
-/* int8_t, uint16_t, and friends */
-# ifndef _WIN32
-# include <stdint.h>
-# endif
-/* SIMD Types */
-# include "util/util_optimization.h"
-#endif /* __KERNEL_GPU__ */
+# include "util/util_simd.h"
+#endif
CCL_NAMESPACE_BEGIN
@@ -201,65 +146,8 @@ enum ExtensionType {
EXTENSION_NUM_TYPES,
};
-/* macros */
-
-/* hints for branch prediction, only use in code that runs a _lot_ */
-#if defined(__GNUC__) && defined(__KERNEL_CPU__)
-# define LIKELY(x) __builtin_expect(!!(x), 1)
-# define UNLIKELY(x) __builtin_expect(!!(x), 0)
-#else
-# define LIKELY(x) (x)
-# define UNLIKELY(x) (x)
-#endif
-
-#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
-# define HAS_CPP11_FEATURES
-#endif
-
-#if defined(__GNUC__) || defined(__clang__)
-# if defined(HAS_CPP11_FEATURES)
-/* Some magic to be sure we don't have reference in the type. */
-template<typename T> static inline T decltype_helper(T x) { return x; }
-# define TYPEOF(x) decltype(decltype_helper(x))
-# else
-# define TYPEOF(x) typeof(x)
-# endif
-#endif
-
-/* Causes warning:
- * incompatible types when assigning to type 'Foo' from type 'Bar'
- * ... the compiler optimizes away the temp var */
-#ifdef __GNUC__
-#define CHECK_TYPE(var, type) { \
- TYPEOF(var) *__tmp; \
- __tmp = (type *)NULL; \
- (void)__tmp; \
-} (void)0
-
-#define CHECK_TYPE_PAIR(var_a, var_b) { \
- TYPEOF(var_a) *__tmp; \
- __tmp = (typeof(var_b) *)NULL; \
- (void)__tmp; \
-} (void)0
-#else
-# define CHECK_TYPE(var, type)
-# define CHECK_TYPE_PAIR(var_a, var_b)
-#endif
-
-/* can be used in simple macros */
-#define CHECK_TYPE_INLINE(val, type) \
- ((void)(((type)0) != (val)))
-
-
CCL_NAMESPACE_END
-#ifndef __KERNEL_GPU__
-# include <cassert>
-# define util_assert(statement) assert(statement)
-#else
-# define util_assert(statement)
-#endif
-
/* Vectorized types declaration. */
#include "util/util_types_uchar2.h"
#include "util/util_types_uchar3.h"
@@ -298,5 +186,13 @@ CCL_NAMESPACE_END
#include "util/util_types_vector3_impl.h"
+/* SSE types. */
+#ifndef __KERNEL_GPU__
+# include "util/util_sseb.h"
+# include "util/util_ssei.h"
+# include "util/util_ssef.h"
+# include "util/util_avxf.h"
+#endif
+
#endif /* __UTIL_TYPES_H__ */