Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util/util_types.h')
-rw-r--r--intern/cycles/util/util_types.h268
1 files changed, 214 insertions, 54 deletions
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index efdda98571a..cf167707e47 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -36,23 +36,37 @@
#define __shared
#define __constant
-#ifdef __GNUC__
-#define __device_inline static inline __attribute__((always_inline))
-#else
+#ifdef _WIN32
#define __device_inline static __forceinline
+#define __align(...) __declspec(align(__VA_ARGS__))
+#else
+#define __device_inline static inline __attribute__((always_inline))
+#define __forceinline inline __attribute__((always_inline))
+#define __align(...) __attribute__((aligned(__VA_ARGS__)))
#endif
#endif
+/* Bitness */
+
+#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
+#define __KERNEL_64_BIT__
+#endif
+
/* SIMD Types */
-/* not needed yet, will be for qbvh
-#ifndef __KERNEL_GPU__
+/* not enabled, globally applying it just gives slowdown,
+ * but useful for testing. */
+//#define __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
-#include <emmintrin.h>
-#include <xmmintrin.h>
+#include <xmmintrin.h> /* SSE 1 */
+#include <emmintrin.h> /* SSE 2 */
+#include <pmmintrin.h> /* SSE 3 */
+#include <tmmintrin.h> /* SSE 3 */
+#include <smmintrin.h> /* SSE 4 */
-#endif*/
+#endif
#ifndef _WIN32
#ifndef __KERNEL_GPU__
@@ -97,6 +111,12 @@ typedef unsigned int uint32_t;
typedef long long int64_t;
typedef unsigned long long uint64_t;
+#ifdef __KERNEL_64_BIT__
+typedef int64_t ssize_t;
+#else
+typedef int32_t ssize_t;
+#endif
+
#endif
/* Generic Memory Pointer */
@@ -108,89 +128,137 @@ typedef uint64_t device_ptr;
struct uchar2 {
uchar x, y;
- uchar operator[](int i) const { return *(&x + i); }
- uchar& operator[](int i) { return *(&x + i); }
+ __forceinline uchar operator[](int i) const { return *(&x + i); }
+ __forceinline uchar& operator[](int i) { return *(&x + i); }
};
struct uchar3 {
uchar x, y, z;
- uchar operator[](int i) const { return *(&x + i); }
- uchar& operator[](int i) { return *(&x + i); }
+ __forceinline uchar operator[](int i) const { return *(&x + i); }
+ __forceinline uchar& operator[](int i) { return *(&x + i); }
};
struct uchar4 {
uchar x, y, z, w;
- uchar operator[](int i) const { return *(&x + i); }
- uchar& operator[](int i) { return *(&x + i); }
+ __forceinline uchar operator[](int i) const { return *(&x + i); }
+ __forceinline uchar& operator[](int i) { return *(&x + i); }
};
struct int2 {
int x, y;
- int operator[](int i) const { return *(&x + i); }
- int& operator[](int i) { return *(&x + i); }
+ __forceinline int operator[](int i) const { return *(&x + i); }
+ __forceinline int& operator[](int i) { return *(&x + i); }
};
+#ifdef __KERNEL_SSE__
+struct __align(16) int3 {
+ union {
+ __m128i m128;
+ struct { int x, y, z, w; };
+ };
+
+ __forceinline int3() {}
+ __forceinline int3(const __m128i a) : m128(a) {}
+ __forceinline operator const __m128i&(void) const { return m128; }
+ __forceinline operator __m128i&(void) { return m128; }
+#else
struct int3 {
- int x, y, z;
+ int x, y, z, w;
+#endif
- int operator[](int i) const { return *(&x + i); }
- int& operator[](int i) { return *(&x + i); }
+ __forceinline int operator[](int i) const { return *(&x + i); }
+ __forceinline int& operator[](int i) { return *(&x + i); }
};
+#ifdef __KERNEL_SSE__
+struct __align(16) int4 {
+ union {
+ __m128i m128;
+ struct { int x, y, z, w; };
+ };
+
+ __forceinline int4() {}
+ __forceinline int4(const __m128i a) : m128(a) {}
+ __forceinline operator const __m128i&(void) const { return m128; }
+ __forceinline operator __m128i&(void) { return m128; }
+#else
struct int4 {
int x, y, z, w;
+#endif
- int operator[](int i) const { return *(&x + i); }
- int& operator[](int i) { return *(&x + i); }
+ __forceinline int operator[](int i) const { return *(&x + i); }
+ __forceinline int& operator[](int i) { return *(&x + i); }
};
struct uint2 {
uint x, y;
- uint operator[](int i) const { return *(&x + i); }
- uint& operator[](int i) { return *(&x + i); }
+ __forceinline uint operator[](uint i) const { return *(&x + i); }
+ __forceinline uint& operator[](uint i) { return *(&x + i); }
};
struct uint3 {
uint x, y, z;
- uint operator[](int i) const { return *(&x + i); }
- uint& operator[](int i) { return *(&x + i); }
+ __forceinline uint operator[](uint i) const { return *(&x + i); }
+ __forceinline uint& operator[](uint i) { return *(&x + i); }
};
struct uint4 {
uint x, y, z, w;
- uint operator[](int i) const { return *(&x + i); }
- uint& operator[](int i) { return *(&x + i); }
+ __forceinline uint operator[](uint i) const { return *(&x + i); }
+ __forceinline uint& operator[](uint i) { return *(&x + i); }
};
struct float2 {
float x, y;
- float operator[](int i) const { return *(&x + i); }
- float& operator[](int i) { return *(&x + i); }
+ __forceinline float operator[](int i) const { return *(&x + i); }
+ __forceinline float& operator[](int i) { return *(&x + i); }
};
+#ifdef __KERNEL_SSE__
+struct __align(16) float3 {
+ union {
+ __m128 m128;
+ struct { float x, y, z, w; };
+ };
+
+ __forceinline float3() {}
+ __forceinline float3(const __m128 a) : m128(a) {}
+ __forceinline operator const __m128&(void) const { return m128; }
+ __forceinline operator __m128&(void) { return m128; }
+#else
struct float3 {
- float x, y, z;
-
-#ifdef WITH_OPENCL
- float w;
+ float x, y, z, w;
#endif
- float operator[](int i) const { return *(&x + i); }
- float& operator[](int i) { return *(&x + i); }
+ __forceinline float operator[](int i) const { return *(&x + i); }
+ __forceinline float& operator[](int i) { return *(&x + i); }
};
+#ifdef __KERNEL_SSE__
+struct __align(16) float4 {
+ union {
+ __m128 m128;
+ struct { float x, y, z, w; };
+ };
+
+ __forceinline float4() {}
+ __forceinline float4(const __m128 a) : m128(a) {}
+ __forceinline operator const __m128&(void) const { return m128; }
+ __forceinline operator __m128&(void) { return m128; }
+#else
struct float4 {
float x, y, z, w;
+#endif
- float operator[](int i) const { return *(&x + i); }
- float& operator[](int i) { return *(&x + i); }
+ __forceinline float operator[](int i) const { return *(&x + i); }
+ __forceinline float& operator[](int i) { return *(&x + i); }
};
#endif
@@ -201,87 +269,179 @@ struct float4 {
*
* OpenCL does not support C++ class, so we use these instead. */
-__device uchar2 make_uchar2(uchar x, uchar y)
+__device_inline uchar2 make_uchar2(uchar x, uchar y)
{
uchar2 a = {x, y};
return a;
}
-__device uchar3 make_uchar3(uchar x, uchar y, uchar z)
+__device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z)
{
uchar3 a = {x, y, z};
return a;
}
-__device uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w)
+__device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w)
{
uchar4 a = {x, y, z, w};
return a;
}
-__device int2 make_int2(int x, int y)
+__device_inline int2 make_int2(int x, int y)
{
int2 a = {x, y};
return a;
}
-__device int3 make_int3(int x, int y, int z)
+__device_inline int3 make_int3(int x, int y, int z)
{
- int3 a = {x, y, z};
+#ifdef __KERNEL_SSE__
+ int3 a;
+ a.m128 = _mm_set_epi32(0, z, y, x);
+#else
+ int3 a = {x, y, z, 0};
+#endif
+
return a;
}
-__device int4 make_int4(int x, int y, int z, int w)
+__device_inline int4 make_int4(int x, int y, int z, int w)
{
+#ifdef __KERNEL_SSE__
+ int4 a;
+ a.m128 = _mm_set_epi32(w, z, y, x);
+#else
int4 a = {x, y, z, w};
+#endif
+
return a;
}
-__device uint2 make_uint2(uint x, uint y)
+__device_inline uint2 make_uint2(uint x, uint y)
{
uint2 a = {x, y};
return a;
}
-__device uint3 make_uint3(uint x, uint y, uint z)
+__device_inline uint3 make_uint3(uint x, uint y, uint z)
{
uint3 a = {x, y, z};
return a;
}
-__device uint4 make_uint4(uint x, uint y, uint z, uint w)
+__device_inline uint4 make_uint4(uint x, uint y, uint z, uint w)
{
uint4 a = {x, y, z, w};
return a;
}
-__device float2 make_float2(float x, float y)
+__device_inline float2 make_float2(float x, float y)
{
float2 a = {x, y};
return a;
}
-__device float3 make_float3(float x, float y, float z)
+__device_inline float3 make_float3(float x, float y, float z)
{
-#ifdef WITH_OPENCL
- float3 a = {x, y, z, 0.0f};
+#ifdef __KERNEL_SSE__
+ float3 a;
+ a.m128 = _mm_set_ps(0.0f, z, y, x);
#else
- float3 a = {x, y, z};
+ float3 a = {x, y, z, 0.0f};
#endif
+
return a;
}
-__device float4 make_float4(float x, float y, float z, float w)
+__device_inline float4 make_float4(float x, float y, float z, float w)
{
+#ifdef __KERNEL_SSE__
+ float4 a;
+ a.m128 = _mm_set_ps(w, z, y, x);
+#else
float4 a = {x, y, z, w};
+#endif
+
return a;
}
-__device int align_up(int offset, int alignment)
+__device_inline int align_up(int offset, int alignment)
{
return (offset + alignment - 1) & ~(alignment - 1);
}
+__device_inline int3 make_int3(int i)
+{
+#ifdef __KERNEL_SSE__
+ int3 a;
+ a.m128 = _mm_set1_epi32(i);
+#else
+ int3 a = {i, i, i, i};
+#endif
+
+ return a;
+}
+
+__device_inline int4 make_int4(int i)
+{
+#ifdef __KERNEL_SSE__
+ int4 a;
+ a.m128 = _mm_set1_epi32(i);
+#else
+ int4 a = {i, i, i, i};
+#endif
+
+ return a;
+}
+
+__device_inline float3 make_float3(float f)
+{
+#ifdef __KERNEL_SSE__
+ float3 a;
+ a.m128 = _mm_set1_ps(f);
+#else
+ float3 a = {f, f, f, f};
+#endif
+
+ return a;
+}
+
+__device_inline float4 make_float4(float f)
+{
+#ifdef __KERNEL_SSE__
+ float4 a;
+ a.m128 = _mm_set1_ps(f);
+#else
+ float4 a = {f, f, f, f};
+#endif
+
+ return a;
+}
+
+__device_inline float4 make_float4(const int4& i)
+{
+#ifdef __KERNEL_SSE__
+ float4 a;
+ a.m128 = _mm_cvtepi32_ps(i.m128);
+#else
+ float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
+#endif
+
+ return a;
+}
+
+__device_inline int4 make_int4(const float3& f)
+{
+#ifdef __KERNEL_SSE__
+ int4 a;
+ a.m128 = _mm_cvtps_epi32(f.m128);
+#else
+ int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
+#endif
+
+ return a;
+}
+
#endif
CCL_NAMESPACE_END