diff options
Diffstat (limited to 'intern/cycles/util/util_types.h')
-rw-r--r-- | intern/cycles/util/util_types.h | 526 |
1 files changed, 60 insertions, 466 deletions
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index a000fae4bd6..84206a7ba5a 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -18,72 +18,20 @@ #define __UTIL_TYPES_H__ #ifndef __KERNEL_OPENCL__ - -#include <stdlib.h> - -#endif - -/* Bitness */ - -#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) -#define __KERNEL_64_BIT__ -#endif - -/* Qualifiers for kernel code shared by CPU and GPU */ - -#ifndef __KERNEL_GPU__ - -#define ccl_device static inline -#define ccl_device_noinline static -#define ccl_global -#define ccl_constant -#define ccl_restrict __restrict -#define __KERNEL_WITH_SSE_ALIGN__ - -#if defined(_WIN32) && !defined(FREE_WINDOWS) -#define ccl_device_inline static __forceinline -#define ccl_device_forceinline static __forceinline -#define ccl_align(...) __declspec(align(__VA_ARGS__)) -#ifdef __KERNEL_64_BIT__ -#define ccl_try_align(...) __declspec(align(__VA_ARGS__)) -#else -#undef __KERNEL_WITH_SSE_ALIGN__ -#define ccl_try_align(...) /* not support for function arguments (error C2719) */ -#endif -#define ccl_may_alias -#define ccl_always_inline __forceinline -#define ccl_maybe_unused - -#else - -#define ccl_device_inline static inline __attribute__((always_inline)) -#define ccl_device_forceinline static inline __attribute__((always_inline)) -#define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) -#ifndef FREE_WINDOWS64 -#define __forceinline inline __attribute__((always_inline)) -#endif -#define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) -#define ccl_may_alias __attribute__((__may_alias__)) -#define ccl_always_inline __attribute__((always_inline)) -#define ccl_maybe_unused __attribute__((used)) - -#endif - +# include <stdlib.h> #endif /* Standard Integer Types */ -#ifndef __KERNEL_GPU__ - -/* int8_t, uint16_t, and friends */ -#ifndef _WIN32 -#include <stdint.h> +#if !defined(__KERNEL_GPU__) && !defined(_WIN32) +# include <stdint.h> #endif -/* SIMD Types */ - -#include "util_optimization.h" +#include "util/util_defines.h" +#ifndef __KERNEL_GPU__ +# include "util/util_optimization.h" +# include "util/util_simd.h" #endif CCL_NAMESPACE_BEGIN @@ -97,18 +45,19 @@ CCL_NAMESPACE_BEGIN /* Shorter Unsigned Names */ #ifndef __KERNEL_OPENCL__ - typedef unsigned char uchar; typedef unsigned int uint; - +typedef unsigned short ushort; #endif -#ifndef __KERNEL_GPU__ - /* Fixed Bits Types */ -#ifdef _WIN32 +#ifdef __KERNEL_OPENCL__ +typedef ulong uint64_t; +#endif +#ifndef __KERNEL_GPU__ +# ifdef _WIN32 typedef signed char int8_t; typedef unsigned char uint8_t; @@ -120,440 +69,85 @@ typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; - -#ifdef __KERNEL_64_BIT__ +# ifdef __KERNEL_64_BIT__ typedef int64_t ssize_t; -#else +# else typedef int32_t ssize_t; -#endif - -#endif +# endif +# endif /* _WIN32 */ /* Generic Memory Pointer */ typedef uint64_t device_ptr; +#endif /* __KERNEL_GPU__ */ -/* Vector Types */ - -struct uchar2 { - uchar x, y; - - __forceinline uchar operator[](int i) const { return *(&x + i); } - __forceinline uchar& operator[](int i) { return *(&x + i); } -}; - -struct uchar3 { - uchar x, y, z; - - __forceinline uchar operator[](int i) const { return *(&x + i); } - __forceinline uchar& operator[](int i) { return *(&x + i); } -}; - -struct uchar4 { - uchar x, y, z, w; - - __forceinline uchar operator[](int i) const { return *(&x + i); } - __forceinline uchar& operator[](int i) { return *(&x + i); } -}; - -struct int2 { - int x, y; - - __forceinline int operator[](int i) const { return *(&x + i); } - __forceinline int& operator[](int i) { return *(&x + i); } -}; - -struct ccl_try_align(16) int3 { -#ifdef __KERNEL_SSE__ - union { - __m128i m128; - struct { int x, y, z, w; }; - }; - - __forceinline int3() {} - __forceinline int3(const __m128i a) : m128(a) {} - __forceinline operator const __m128i&(void) const { return m128; } - __forceinline operator __m128i&(void) { return m128; } - - int3(const int3& a) { m128 = a.m128; } - int3& operator =(const int3& a) { m128 = a.m128; return *this; } -#else - int x, y, z, w; -#endif - - __forceinline int operator[](int i) const { return *(&x + i); } - __forceinline int& operator[](int i) { return *(&x + i); } -}; - -struct ccl_try_align(16) int4 { -#ifdef __KERNEL_SSE__ - union { - __m128i m128; - struct { int x, y, z, w; }; - }; - - __forceinline int4() {} - __forceinline int4(const __m128i a) : m128(a) {} - __forceinline operator const __m128i&(void) const { return m128; } - __forceinline operator __m128i&(void) { return m128; } - - int4(const int4& a) : m128(a.m128) {} - int4& operator=(const int4& a) { m128 = a.m128; return *this; } -#else - int x, y, z, w; -#endif - - __forceinline int operator[](int i) const { return *(&x + i); } - __forceinline int& operator[](int i) { return *(&x + i); } -}; - -struct uint2 { - uint x, y; - - __forceinline uint operator[](uint i) const { return *(&x + i); } - __forceinline uint& operator[](uint i) { return *(&x + i); } -}; - -struct uint3 { - uint x, y, z; - - __forceinline uint operator[](uint i) const { return *(&x + i); } - __forceinline uint& operator[](uint i) { return *(&x + i); } -}; - -struct uint4 { - uint x, y, z, w; - - __forceinline uint operator[](uint i) const { return *(&x + i); } - __forceinline uint& operator[](uint i) { return *(&x + i); } -}; - -struct float2 { - float x, y; - - __forceinline float operator[](int i) const { return *(&x + i); } - __forceinline float& operator[](int i) { return *(&x + i); } -}; - -struct ccl_try_align(16) float3 { -#ifdef __KERNEL_SSE__ - union { - __m128 m128; - struct { float x, y, z, w; }; - }; - - __forceinline float3() {} - __forceinline float3(const __m128& a) : m128(a) {} - __forceinline operator const __m128&(void) const { return m128; } - __forceinline operator __m128&(void) { return m128; } - - __forceinline float3(const float3& a) : m128(a.m128) {} - __forceinline float3& operator =(const float3& a) { m128 = a.m128; return *this; } -#else - float x, y, z, w; -#endif - - __forceinline float operator[](int i) const { return *(&x + i); } - __forceinline float& operator[](int i) { return *(&x + i); } -}; - -struct ccl_try_align(16) float4 { -#ifdef __KERNEL_SSE__ - union { - __m128 m128; - struct { float x, y, z, w; }; - }; - - __forceinline float4() {} - __forceinline float4(const __m128 a) : m128(a) {} - __forceinline operator const __m128&(void) const { return m128; } - __forceinline operator __m128&(void) { return m128; } - - __forceinline float4(const float4& a) : m128(a.m128) {} - __forceinline float4& operator =(const float4& a) { m128 = a.m128; return *this; } - -#else - float x, y, z, w; -#endif - - __forceinline float operator[](int i) const { return *(&x + i); } - __forceinline float& operator[](int i) { return *(&x + i); } -}; - -template<typename T> -class vector3 -{ -public: - T x, y, z; - - ccl_always_inline vector3() {} - ccl_always_inline vector3(const T& a) - : x(a), y(a), z(a) {} - ccl_always_inline vector3(const T& x, const T& y, const T& z) - : x(x), y(y), z(z) {} -}; - -#endif - -#ifndef __KERNEL_GPU__ - -/* Vector Type Constructors - * - * OpenCL does not support C++ class, so we use these instead. */ - -ccl_device_inline uchar2 make_uchar2(uchar x, uchar y) -{ - uchar2 a = {x, y}; - return a; -} - -ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z) -{ - uchar3 a = {x, y, z}; - return a; -} - -ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) -{ - uchar4 a = {x, y, z, w}; - return a; -} - -ccl_device_inline int2 make_int2(int x, int y) -{ - int2 a = {x, y}; - return a; -} - -ccl_device_inline int3 make_int3(int x, int y, int z) -{ -#ifdef __KERNEL_SSE__ - int3 a; - a.m128 = _mm_set_epi32(0, z, y, x); -#else - int3 a = {x, y, z, 0}; -#endif - - return a; -} - -ccl_device_inline int4 make_int4(int x, int y, int z, int w) -{ -#ifdef __KERNEL_SSE__ - int4 a; - a.m128 = _mm_set_epi32(w, z, y, x); -#else - int4 a = {x, y, z, w}; -#endif - - return a; -} - -ccl_device_inline uint2 make_uint2(uint x, uint y) -{ - uint2 a = {x, y}; - return a; -} - -ccl_device_inline uint3 make_uint3(uint x, uint y, uint z) -{ - uint3 a = {x, y, z}; - return a; -} - -ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) -{ - uint4 a = {x, y, z, w}; - return a; -} - -ccl_device_inline float2 make_float2(float x, float y) -{ - float2 a = {x, y}; - return a; -} - -ccl_device_inline float3 make_float3(float x, float y, float z) -{ -#ifdef __KERNEL_SSE__ - float3 a; - a.m128 = _mm_set_ps(0.0f, z, y, x); -#else - float3 a = {x, y, z, 0.0f}; -#endif - - return a; -} - -ccl_device_inline float4 make_float4(float x, float y, float z, float w) -{ -#ifdef __KERNEL_SSE__ - float4 a; - a.m128 = _mm_set_ps(w, z, y, x); -#else - float4 a = {x, y, z, w}; -#endif - - return a; -} - -ccl_device_inline int align_up(int offset, int alignment) +ccl_device_inline size_t align_up(size_t offset, size_t alignment) { return (offset + alignment - 1) & ~(alignment - 1); } -ccl_device_inline int3 make_int3(int i) +ccl_device_inline size_t divide_up(size_t x, size_t y) { -#ifdef __KERNEL_SSE__ - int3 a; - a.m128 = _mm_set1_epi32(i); -#else - int3 a = {i, i, i, i}; -#endif - - return a; + return (x + y - 1) / y; } -ccl_device_inline int4 make_int4(int i) +ccl_device_inline size_t round_up(size_t x, size_t multiple) { -#ifdef __KERNEL_SSE__ - int4 a; - a.m128 = _mm_set1_epi32(i); -#else - int4 a = {i, i, i, i}; -#endif - - return a; + return ((x + multiple - 1) / multiple) * multiple; } -ccl_device_inline float3 make_float3(float f) +ccl_device_inline size_t round_down(size_t x, size_t multiple) { -#ifdef __KERNEL_SSE__ - float3 a; - a.m128 = _mm_set1_ps(f); -#else - float3 a = {f, f, f, f}; -#endif - - return a; + return (x / multiple) * multiple; } -ccl_device_inline float4 make_float4(float f) -{ -#ifdef __KERNEL_SSE__ - float4 a; - a.m128 = _mm_set1_ps(f); -#else - float4 a = {f, f, f, f}; -#endif - - return a; -} +CCL_NAMESPACE_END -ccl_device_inline float4 make_float4(const int4& i) -{ -#ifdef __KERNEL_SSE__ - float4 a; - a.m128 = _mm_cvtepi32_ps(i.m128); -#else - float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; -#endif +/* Vectorized types declaration. */ +#include "util/util_types_uchar2.h" +#include "util/util_types_uchar3.h" +#include "util/util_types_uchar4.h" - return a; -} +#include "util/util_types_int2.h" +#include "util/util_types_int3.h" +#include "util/util_types_int4.h" -ccl_device_inline int4 make_int4(const float3& f) -{ -#ifdef __KERNEL_SSE__ - int4 a; - a.m128 = _mm_cvtps_epi32(f.m128); -#else - int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; -#endif +#include "util/util_types_uint2.h" +#include "util/util_types_uint3.h" +#include "util/util_types_uint4.h" - return a; -} +#include "util/util_types_float2.h" +#include "util/util_types_float3.h" +#include "util/util_types_float4.h" -#endif +#include "util/util_types_vector3.h" -/* Interpolation types for textures - * cuda also use texture space to store other objects */ -enum InterpolationType { - INTERPOLATION_NONE = -1, - INTERPOLATION_LINEAR = 0, - INTERPOLATION_CLOSEST = 1, - INTERPOLATION_CUBIC = 2, - INTERPOLATION_SMART = 3, +/* Vectorized types implementation. */ +#include "util/util_types_uchar2_impl.h" +#include "util/util_types_uchar3_impl.h" +#include "util/util_types_uchar4_impl.h" - INTERPOLATION_NUM_TYPES, -}; +#include "util/util_types_int2_impl.h" +#include "util/util_types_int3_impl.h" +#include "util/util_types_int4_impl.h" -/* Extension types for textures. - * - * Defines how the image is extrapolated past its original bounds. - */ -enum ExtensionType { - /* Cause the image to repeat horizontally and vertically. */ - EXTENSION_REPEAT = 0, - /* Extend by repeating edge pixels of the image. */ - EXTENSION_EXTEND = 1, - /* Clip to image size and set exterior pixels as transparent. */ - EXTENSION_CLIP = 2, - - EXTENSION_NUM_TYPES, -}; - -/* macros */ - -/* hints for branch prediction, only use in code that runs a _lot_ */ -#if defined(__GNUC__) && defined(__KERNEL_CPU__) -# define LIKELY(x) __builtin_expect(!!(x), 1) -# define UNLIKELY(x) __builtin_expect(!!(x), 0) -#else -# define LIKELY(x) (x) -# define UNLIKELY(x) (x) -#endif +#include "util/util_types_uint2_impl.h" +#include "util/util_types_uint3_impl.h" +#include "util/util_types_uint4_impl.h" -#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800)) -# define HAS_CPP11_FEATURES -#endif +#include "util/util_types_float2_impl.h" +#include "util/util_types_float3_impl.h" +#include "util/util_types_float4_impl.h" -#if defined(__GNUC__) || defined(__clang__) -# if defined(HAS_CPP11_FEATURES) -/* Some magic to be sure we don't have reference in the type. */ -template<typename T> static inline T decltype_helper(T x) { return x; } -# define TYPEOF(x) decltype(decltype_helper(x)) -# else -# define TYPEOF(x) typeof(x) -# endif -#endif +#include "util/util_types_vector3_impl.h" -/* Causes warning: - * incompatible types when assigning to type 'Foo' from type 'Bar' - * ... the compiler optimizes away the temp var */ -#ifdef __GNUC__ -#define CHECK_TYPE(var, type) { \ - TYPEOF(var) *__tmp; \ - __tmp = (type *)NULL; \ - (void)__tmp; \ -} (void)0 - -#define CHECK_TYPE_PAIR(var_a, var_b) { \ - TYPEOF(var_a) *__tmp; \ - __tmp = (typeof(var_b) *)NULL; \ - (void)__tmp; \ -} (void)0 -#else -# define CHECK_TYPE(var, type) -# define CHECK_TYPE_PAIR(var_a, var_b) +/* SSE types. */ +#ifndef __KERNEL_GPU__ +# include "util/util_sseb.h" +# include "util/util_ssei.h" +# include "util/util_ssef.h" +# include "util/util_avxf.h" #endif -/* can be used in simple macros */ -#define CHECK_TYPE_INLINE(val, type) \ - ((void)(((type)0) != (val))) - - -CCL_NAMESPACE_END - #endif /* __UTIL_TYPES_H__ */ |