diff options
Diffstat (limited to 'intern/cycles/util/util_types.h')
-rw-r--r-- | intern/cycles/util/util_types.h | 268 |
1 files changed, 214 insertions, 54 deletions
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index efdda98571a..cf167707e47 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -36,23 +36,37 @@ #define __shared #define __constant -#ifdef __GNUC__ -#define __device_inline static inline __attribute__((always_inline)) -#else +#ifdef _WIN32 #define __device_inline static __forceinline +#define __align(...) __declspec(align(__VA_ARGS__)) +#else +#define __device_inline static inline __attribute__((always_inline)) +#define __forceinline inline __attribute__((always_inline)) +#define __align(...) __attribute__((aligned(__VA_ARGS__))) #endif #endif +/* Bitness */ + +#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) +#define __KERNEL_64_BIT__ +#endif + /* SIMD Types */ -/* not needed yet, will be for qbvh -#ifndef __KERNEL_GPU__ +/* not enabled, globally applying it just gives slowdown, + * but useful for testing. */ +//#define __KERNEL_SSE__ +#ifdef __KERNEL_SSE__ -#include <emmintrin.h> -#include <xmmintrin.h> +#include <xmmintrin.h> /* SSE 1 */ +#include <emmintrin.h> /* SSE 2 */ +#include <pmmintrin.h> /* SSE 3 */ +#include <tmmintrin.h> /* SSE 3 */ +#include <smmintrin.h> /* SSE 4 */ -#endif*/ +#endif #ifndef _WIN32 #ifndef __KERNEL_GPU__ @@ -97,6 +111,12 @@ typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; +#ifdef __KERNEL_64_BIT__ +typedef int64_t ssize_t; +#else +typedef int32_t ssize_t; +#endif + #endif /* Generic Memory Pointer */ @@ -108,89 +128,137 @@ typedef uint64_t device_ptr; struct uchar2 { uchar x, y; - uchar operator[](int i) const { return *(&x + i); } - uchar& operator[](int i) { return *(&x + i); } + __forceinline uchar operator[](int i) const { return *(&x + i); } + __forceinline uchar& operator[](int i) { return *(&x + i); } }; struct uchar3 { uchar x, y, z; - uchar operator[](int i) const { return *(&x + i); } - uchar& operator[](int i) { return *(&x + i); } + __forceinline uchar operator[](int i) const { return *(&x + i); } + __forceinline uchar& operator[](int i) { return *(&x + i); } }; struct uchar4 { uchar x, y, z, w; - uchar operator[](int i) const { return *(&x + i); } - uchar& operator[](int i) { return *(&x + i); } + __forceinline uchar operator[](int i) const { return *(&x + i); } + __forceinline uchar& operator[](int i) { return *(&x + i); } }; struct int2 { int x, y; - int operator[](int i) const { return *(&x + i); } - int& operator[](int i) { return *(&x + i); } + __forceinline int operator[](int i) const { return *(&x + i); } + __forceinline int& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) int3 { + union { + __m128i m128; + struct { int x, y, z, w; }; + }; + + __forceinline int3() {} + __forceinline int3(const __m128i a) : m128(a) {} + __forceinline operator const __m128i&(void) const { return m128; } + __forceinline operator __m128i&(void) { return m128; } +#else struct int3 { - int x, y, z; + int x, y, z, w; +#endif - int operator[](int i) const { return *(&x + i); } - int& operator[](int i) { return *(&x + i); } + __forceinline int operator[](int i) const { return *(&x + i); } + __forceinline int& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) int4 { + union { + __m128i m128; + struct { int x, y, z, w; }; + }; + + __forceinline int4() {} + __forceinline int4(const __m128i a) : m128(a) {} + __forceinline operator const __m128i&(void) const { return m128; } + __forceinline operator __m128i&(void) { return m128; } +#else struct int4 { int x, y, z, w; +#endif - int operator[](int i) const { return *(&x + i); } - int& operator[](int i) { return *(&x + i); } + __forceinline int operator[](int i) const { return *(&x + i); } + __forceinline int& operator[](int i) { return *(&x + i); } }; struct uint2 { uint x, y; - uint operator[](int i) const { return *(&x + i); } - uint& operator[](int i) { return *(&x + i); } + __forceinline uint operator[](uint i) const { return *(&x + i); } + __forceinline uint& operator[](uint i) { return *(&x + i); } }; struct uint3 { uint x, y, z; - uint operator[](int i) const { return *(&x + i); } - uint& operator[](int i) { return *(&x + i); } + __forceinline uint operator[](uint i) const { return *(&x + i); } + __forceinline uint& operator[](uint i) { return *(&x + i); } }; struct uint4 { uint x, y, z, w; - uint operator[](int i) const { return *(&x + i); } - uint& operator[](int i) { return *(&x + i); } + __forceinline uint operator[](uint i) const { return *(&x + i); } + __forceinline uint& operator[](uint i) { return *(&x + i); } }; struct float2 { float x, y; - float operator[](int i) const { return *(&x + i); } - float& operator[](int i) { return *(&x + i); } + __forceinline float operator[](int i) const { return *(&x + i); } + __forceinline float& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) float3 { + union { + __m128 m128; + struct { float x, y, z, w; }; + }; + + __forceinline float3() {} + __forceinline float3(const __m128 a) : m128(a) {} + __forceinline operator const __m128&(void) const { return m128; } + __forceinline operator __m128&(void) { return m128; } +#else struct float3 { - float x, y, z; - -#ifdef WITH_OPENCL - float w; + float x, y, z, w; #endif - float operator[](int i) const { return *(&x + i); } - float& operator[](int i) { return *(&x + i); } + __forceinline float operator[](int i) const { return *(&x + i); } + __forceinline float& operator[](int i) { return *(&x + i); } }; +#ifdef __KERNEL_SSE__ +struct __align(16) float4 { + union { + __m128 m128; + struct { float x, y, z, w; }; + }; + + __forceinline float4() {} + __forceinline float4(const __m128 a) : m128(a) {} + __forceinline operator const __m128&(void) const { return m128; } + __forceinline operator __m128&(void) { return m128; } +#else struct float4 { float x, y, z, w; +#endif - float operator[](int i) const { return *(&x + i); } - float& operator[](int i) { return *(&x + i); } + __forceinline float operator[](int i) const { return *(&x + i); } + __forceinline float& operator[](int i) { return *(&x + i); } }; #endif @@ -201,87 +269,179 @@ struct float4 { * * OpenCL does not support C++ class, so we use these instead. */ -__device uchar2 make_uchar2(uchar x, uchar y) +__device_inline uchar2 make_uchar2(uchar x, uchar y) { uchar2 a = {x, y}; return a; } -__device uchar3 make_uchar3(uchar x, uchar y, uchar z) +__device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z) { uchar3 a = {x, y, z}; return a; } -__device uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) +__device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) { uchar4 a = {x, y, z, w}; return a; } -__device int2 make_int2(int x, int y) +__device_inline int2 make_int2(int x, int y) { int2 a = {x, y}; return a; } -__device int3 make_int3(int x, int y, int z) +__device_inline int3 make_int3(int x, int y, int z) { - int3 a = {x, y, z}; +#ifdef __KERNEL_SSE__ + int3 a; + a.m128 = _mm_set_epi32(0, z, y, x); +#else + int3 a = {x, y, z, 0}; +#endif + return a; } -__device int4 make_int4(int x, int y, int z, int w) +__device_inline int4 make_int4(int x, int y, int z, int w) { +#ifdef __KERNEL_SSE__ + int4 a; + a.m128 = _mm_set_epi32(w, z, y, x); +#else int4 a = {x, y, z, w}; +#endif + return a; } -__device uint2 make_uint2(uint x, uint y) +__device_inline uint2 make_uint2(uint x, uint y) { uint2 a = {x, y}; return a; } -__device uint3 make_uint3(uint x, uint y, uint z) +__device_inline uint3 make_uint3(uint x, uint y, uint z) { uint3 a = {x, y, z}; return a; } -__device uint4 make_uint4(uint x, uint y, uint z, uint w) +__device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { uint4 a = {x, y, z, w}; return a; } -__device float2 make_float2(float x, float y) +__device_inline float2 make_float2(float x, float y) { float2 a = {x, y}; return a; } -__device float3 make_float3(float x, float y, float z) +__device_inline float3 make_float3(float x, float y, float z) { -#ifdef WITH_OPENCL - float3 a = {x, y, z, 0.0f}; +#ifdef __KERNEL_SSE__ + float3 a; + a.m128 = _mm_set_ps(0.0f, z, y, x); #else - float3 a = {x, y, z}; + float3 a = {x, y, z, 0.0f}; #endif + return a; } -__device float4 make_float4(float x, float y, float z, float w) +__device_inline float4 make_float4(float x, float y, float z, float w) { +#ifdef __KERNEL_SSE__ + float4 a; + a.m128 = _mm_set_ps(w, z, y, x); +#else float4 a = {x, y, z, w}; +#endif + return a; } -__device int align_up(int offset, int alignment) +__device_inline int align_up(int offset, int alignment) { return (offset + alignment - 1) & ~(alignment - 1); } +__device_inline int3 make_int3(int i) +{ +#ifdef __KERNEL_SSE__ + int3 a; + a.m128 = _mm_set1_epi32(i); +#else + int3 a = {i, i, i, i}; +#endif + + return a; +} + +__device_inline int4 make_int4(int i) +{ +#ifdef __KERNEL_SSE__ + int4 a; + a.m128 = _mm_set1_epi32(i); +#else + int4 a = {i, i, i, i}; +#endif + + return a; +} + +__device_inline float3 make_float3(float f) +{ +#ifdef __KERNEL_SSE__ + float3 a; + a.m128 = _mm_set1_ps(f); +#else + float3 a = {f, f, f, f}; +#endif + + return a; +} + +__device_inline float4 make_float4(float f) +{ +#ifdef __KERNEL_SSE__ + float4 a; + a.m128 = _mm_set1_ps(f); +#else + float4 a = {f, f, f, f}; +#endif + + return a; +} + +__device_inline float4 make_float4(const int4& i) +{ +#ifdef __KERNEL_SSE__ + float4 a; + a.m128 = _mm_cvtepi32_ps(i.m128); +#else + float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; +#endif + + return a; +} + +__device_inline int4 make_int4(const float3& f) +{ +#ifdef __KERNEL_SSE__ + int4 a; + a.m128 = _mm_cvtps_epi32(f.m128); +#else + int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; +#endif + + return a; +} + #endif CCL_NAMESPACE_END |