1 files changed, 60 insertions, 466 deletions
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index a000fae4bd6..84206a7ba5a 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -18,72 +18,20 @@
 #define __UTIL_TYPES_H__
 
 #ifndef __KERNEL_OPENCL__
-
-#include <stdlib.h>
-
-#endif
-
-/* Bitness */
-
-#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
-#define __KERNEL_64_BIT__
-#endif
-
-/* Qualifiers for kernel code shared by CPU and GPU */
-
-#ifndef __KERNEL_GPU__
-
-#define ccl_device static inline
-#define ccl_device_noinline static
-#define ccl_global
-#define ccl_constant
-#define ccl_restrict __restrict
-#define __KERNEL_WITH_SSE_ALIGN__
-
-#if defined(_WIN32) && !defined(FREE_WINDOWS)
-#define ccl_device_inline static __forceinline
-#define ccl_device_forceinline static __forceinline
-#define ccl_align(...) __declspec(align(__VA_ARGS__))
-#ifdef __KERNEL_64_BIT__
-#define ccl_try_align(...) __declspec(align(__VA_ARGS__))
-#else
-#undef __KERNEL_WITH_SSE_ALIGN__
-#define ccl_try_align(...) /* not support for function arguments (error C2719) */
-#endif
-#define ccl_may_alias
-#define ccl_always_inline __forceinline
-#define ccl_maybe_unused
-
-#else
-
-#define ccl_device_inline static inline __attribute__((always_inline))
-#define ccl_device_forceinline static inline __attribute__((always_inline))
-#define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
-#ifndef FREE_WINDOWS64
-#define __forceinline inline __attribute__((always_inline))
-#endif
-#define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
-#define ccl_may_alias __attribute__((__may_alias__))
-#define ccl_always_inline __attribute__((always_inline))
-#define ccl_maybe_unused __attribute__((used))
-
-#endif
-
+#  include <stdlib.h>
 #endif
 
 /* Standard Integer Types */
 
-#ifndef __KERNEL_GPU__
-
-/* int8_t, uint16_t, and friends */
-#ifndef _WIN32
-#include <stdint.h>
+#if !defined(__KERNEL_GPU__) && !defined(_WIN32)
+#  include <stdint.h>
 #endif
 
-/* SIMD Types */
-
-#include "util_optimization.h"
+#include "util/util_defines.h"
 
+#ifndef __KERNEL_GPU__
+#  include "util/util_optimization.h"
+#  include "util/util_simd.h"
 #endif
 
 CCL_NAMESPACE_BEGIN
@@ -97,18 +45,19 @@ CCL_NAMESPACE_BEGIN
 /* Shorter Unsigned Names */
 
 #ifndef __KERNEL_OPENCL__
-
 typedef unsigned char uchar;
 typedef unsigned int uint;
-
+typedef unsigned short ushort;
 #endif
 
-#ifndef __KERNEL_GPU__
-
 /* Fixed Bits Types */
 
-#ifdef _WIN32
+#ifdef __KERNEL_OPENCL__
+typedef ulong uint64_t;
+#endif
 
+#ifndef __KERNEL_GPU__
+#  ifdef _WIN32
 typedef signed char int8_t;
 typedef unsigned char uint8_t;
 
@@ -120,440 +69,85 @@ typedef unsigned int uint32_t;
 
 typedef long long int64_t;
 typedef unsigned long long uint64_t;
-
-#ifdef __KERNEL_64_BIT__
+#    ifdef __KERNEL_64_BIT__
 typedef int64_t ssize_t;
-#else
+#    else
 typedef int32_t ssize_t;
-#endif
-
-#endif
+#    endif
+#  endif  /* _WIN32 */
 
 /* Generic Memory Pointer */
 
 typedef uint64_t device_ptr;
+#endif  /* __KERNEL_GPU__ */
 
-/* Vector Types */
-
-struct uchar2 {
-	uchar x, y;
-
-	__forceinline uchar operator[](int i) const { return *(&x + i); }
-	__forceinline uchar& operator[](int i) { return *(&x + i); }
-};
-
-struct uchar3 {
-	uchar x, y, z;
-
-	__forceinline uchar operator[](int i) const { return *(&x + i); }
-	__forceinline uchar& operator[](int i) { return *(&x + i); }
-};
-
-struct uchar4 {
-	uchar x, y, z, w;
-
-	__forceinline uchar operator[](int i) const { return *(&x + i); }
-	__forceinline uchar& operator[](int i) { return *(&x + i); }
-};
-
-struct int2 {
-	int x, y;
-
-	__forceinline int operator[](int i) const { return *(&x + i); }
-	__forceinline int& operator[](int i) { return *(&x + i); }
-};
-
-struct ccl_try_align(16) int3 {
-#ifdef __KERNEL_SSE__
-	union {
-		__m128i m128;
-		struct { int x, y, z, w; };
-	};
-
-	__forceinline int3() {}
-	__forceinline int3(const __m128i a) : m128(a) {}
-	__forceinline operator const __m128i&(void) const { return m128; }
-	__forceinline operator __m128i&(void) { return m128; }
-
-	int3(const int3& a) { m128 = a.m128; }
-	int3& operator =(const int3& a) { m128 = a.m128; return *this; }
-#else
-	int x, y, z, w;
-#endif
-
-	__forceinline int operator[](int i) const { return *(&x + i); }
-	__forceinline int& operator[](int i) { return *(&x + i); }
-};
-
-struct ccl_try_align(16) int4 {
-#ifdef __KERNEL_SSE__
-	union {
-		__m128i m128;
-		struct { int x, y, z, w; };
-	};
-
-	__forceinline int4() {}
-	__forceinline int4(const __m128i a) : m128(a) {}
-	__forceinline operator const __m128i&(void) const { return m128; }
-	__forceinline operator __m128i&(void) { return m128; }
-
-	int4(const int4& a) : m128(a.m128) {}
-	int4& operator=(const int4& a) { m128 = a.m128; return *this; }
-#else
-	int x, y, z, w;
-#endif
-
-	__forceinline int operator[](int i) const { return *(&x + i); }
-	__forceinline int& operator[](int i) { return *(&x + i); }
-};
-
-struct uint2 {
-	uint x, y;
-
-	__forceinline uint operator[](uint i) const { return *(&x + i); }
-	__forceinline uint& operator[](uint i) { return *(&x + i); }
-};
-
-struct uint3 {
-	uint x, y, z;
-
-	__forceinline uint operator[](uint i) const { return *(&x + i); }
-	__forceinline uint& operator[](uint i) { return *(&x + i); }
-};
-
-struct uint4 {
-	uint x, y, z, w;
-
-	__forceinline uint operator[](uint i) const { return *(&x + i); }
-	__forceinline uint& operator[](uint i) { return *(&x + i); }
-};
-
-struct float2 {
-	float x, y;
-
-	__forceinline float operator[](int i) const { return *(&x + i); }
-	__forceinline float& operator[](int i) { return *(&x + i); }
-};
-
-struct ccl_try_align(16) float3 {
-#ifdef __KERNEL_SSE__
-	union {
-		__m128 m128;
-		struct { float x, y, z, w; };
-	};
-
-	__forceinline float3() {}
-	__forceinline float3(const __m128& a) : m128(a) {}
-	__forceinline operator const __m128&(void) const { return m128; }
-	__forceinline operator __m128&(void) { return m128; }
-
-	__forceinline float3(const float3& a) : m128(a.m128) {}
-	__forceinline float3& operator =(const float3& a) { m128 = a.m128; return *this; }
-#else
-	float x, y, z, w;
-#endif
-
-	__forceinline float operator[](int i) const { return *(&x + i); }
-	__forceinline float& operator[](int i) { return *(&x + i); }
-};
-
-struct ccl_try_align(16) float4 {
-#ifdef __KERNEL_SSE__
-	union {
-		__m128 m128;
-		struct { float x, y, z, w; };
-	};
-
-	__forceinline float4() {}
-	__forceinline float4(const __m128 a) : m128(a) {}
-	__forceinline operator const __m128&(void) const { return m128; }
-	__forceinline operator __m128&(void) { return m128; }
-
-	__forceinline float4(const float4& a) : m128(a.m128) {}
-	__forceinline float4& operator =(const float4& a) { m128 = a.m128; return *this; }
-
-#else
-	float x, y, z, w;
-#endif
-
-	__forceinline float operator[](int i) const { return *(&x + i); }
-	__forceinline float& operator[](int i) { return *(&x + i); }
-};
-
-template<typename T>
-class vector3
-{
-public:
-	T x, y, z;
-
-	ccl_always_inline vector3() {}
-	ccl_always_inline vector3(const T& a)
-	  : x(a), y(a), z(a) {}
-	ccl_always_inline vector3(const T& x, const T& y, const T& z)
-	  : x(x), y(y), z(z) {}
-};
-
-#endif
-
-#ifndef __KERNEL_GPU__
-
-/* Vector Type Constructors
- * 
- * OpenCL does not support C++ class, so we use these instead. */
-
-ccl_device_inline uchar2 make_uchar2(uchar x, uchar y)
-{
-	uchar2 a = {x, y};
-	return a;
-}
-
-ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z)
-{
-	uchar3 a = {x, y, z};
-	return a;
-}
-
-ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w)
-{
-	uchar4 a = {x, y, z, w};
-	return a;
-}
-
-ccl_device_inline int2 make_int2(int x, int y)
-{
-	int2 a = {x, y};
-	return a;
-}
-
-ccl_device_inline int3 make_int3(int x, int y, int z)
-{
-#ifdef __KERNEL_SSE__
-	int3 a;
-	a.m128 = _mm_set_epi32(0, z, y, x);
-#else
-	int3 a = {x, y, z, 0};
-#endif
-
-	return a;
-}
-
-ccl_device_inline int4 make_int4(int x, int y, int z, int w)
-{
-#ifdef __KERNEL_SSE__
-	int4 a;
-	a.m128 = _mm_set_epi32(w, z, y, x);
-#else
-	int4 a = {x, y, z, w};
-#endif
-
-	return a;
-}
-
-ccl_device_inline uint2 make_uint2(uint x, uint y)
-{
-	uint2 a = {x, y};
-	return a;
-}
-
-ccl_device_inline uint3 make_uint3(uint x, uint y, uint z)
-{
-	uint3 a = {x, y, z};
-	return a;
-}
-
-ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w)
-{
-	uint4 a = {x, y, z, w};
-	return a;
-}
-
-ccl_device_inline float2 make_float2(float x, float y)
-{
-	float2 a = {x, y};
-	return a;
-}
-
-ccl_device_inline float3 make_float3(float x, float y, float z)
-{
-#ifdef __KERNEL_SSE__
-	float3 a;
-	a.m128 = _mm_set_ps(0.0f, z, y, x);
-#else
-	float3 a = {x, y, z, 0.0f};
-#endif
-
-	return a;
-}
-
-ccl_device_inline float4 make_float4(float x, float y, float z, float w)
-{
-#ifdef __KERNEL_SSE__
-	float4 a;
-	a.m128 = _mm_set_ps(w, z, y, x);
-#else
-	float4 a = {x, y, z, w};
-#endif
-
-	return a;
-}
-
-ccl_device_inline int align_up(int offset, int alignment)
+ccl_device_inline size_t align_up(size_t offset, size_t alignment)
 {
 	return (offset + alignment - 1) & ~(alignment - 1);
 }
 
-ccl_device_inline int3 make_int3(int i)
+ccl_device_inline size_t divide_up(size_t x, size_t y)
 {
-#ifdef __KERNEL_SSE__
-	int3 a;
-	a.m128 = _mm_set1_epi32(i);
-#else
-	int3 a = {i, i, i, i};
-#endif
-
-	return a;
+	return (x + y - 1) / y;
 }
 
-ccl_device_inline int4 make_int4(int i)
+ccl_device_inline size_t round_up(size_t x, size_t multiple)
 {
-#ifdef __KERNEL_SSE__
-	int4 a;
-	a.m128 = _mm_set1_epi32(i);
-#else
-	int4 a = {i, i, i, i};
-#endif
-
-	return a;
+	return ((x + multiple - 1) / multiple) * multiple;
 }
 
-ccl_device_inline float3 make_float3(float f)
+ccl_device_inline size_t round_down(size_t x, size_t multiple)
 {
-#ifdef __KERNEL_SSE__
-	float3 a;
-	a.m128 = _mm_set1_ps(f);
-#else
-	float3 a = {f, f, f, f};
-#endif
-
-	return a;
+	return (x / multiple) * multiple;
 }
 
-ccl_device_inline float4 make_float4(float f)
-{
-#ifdef __KERNEL_SSE__
-	float4 a;
-	a.m128 = _mm_set1_ps(f);
-#else
-	float4 a = {f, f, f, f};
-#endif
-
-	return a;
-}
+CCL_NAMESPACE_END
 
-ccl_device_inline float4 make_float4(const int4& i)
-{
-#ifdef __KERNEL_SSE__
-	float4 a;
-	a.m128 = _mm_cvtepi32_ps(i.m128);
-#else
-	float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
-#endif
+/* Vectorized types declaration. */
+#include "util/util_types_uchar2.h"
+#include "util/util_types_uchar3.h"
+#include "util/util_types_uchar4.h"
 
-	return a;
-}
+#include "util/util_types_int2.h"
+#include "util/util_types_int3.h"
+#include "util/util_types_int4.h"
 
-ccl_device_inline int4 make_int4(const float3& f)
-{
-#ifdef __KERNEL_SSE__
-	int4 a;
-	a.m128 = _mm_cvtps_epi32(f.m128);
-#else
-	int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
-#endif
+#include "util/util_types_uint2.h"
+#include "util/util_types_uint3.h"
+#include "util/util_types_uint4.h"
 
-	return a;
-}
+#include "util/util_types_float2.h"
+#include "util/util_types_float3.h"
+#include "util/util_types_float4.h"
 
-#endif
+#include "util/util_types_vector3.h"
 
-/* Interpolation types for textures
- * cuda also use texture space to store other objects */
-enum InterpolationType {
-	INTERPOLATION_NONE = -1,
-	INTERPOLATION_LINEAR = 0,
-	INTERPOLATION_CLOSEST = 1,
-	INTERPOLATION_CUBIC = 2,
-	INTERPOLATION_SMART = 3,
+/* Vectorized types implementation. */
+#include "util/util_types_uchar2_impl.h"
+#include "util/util_types_uchar3_impl.h"
+#include "util/util_types_uchar4_impl.h"
 
-	INTERPOLATION_NUM_TYPES,
-};
+#include "util/util_types_int2_impl.h"
+#include "util/util_types_int3_impl.h"
+#include "util/util_types_int4_impl.h"
 
-/* Extension types for textures.
- *
- * Defines how the image is extrapolated past its original bounds.
- */
-enum ExtensionType {
-	/* Cause the image to repeat horizontally and vertically. */
-	EXTENSION_REPEAT = 0,
-	/* Extend by repeating edge pixels of the image. */
-	EXTENSION_EXTEND = 1,
-	/* Clip to image size and set exterior pixels as transparent. */
-	EXTENSION_CLIP = 2,
-
-	EXTENSION_NUM_TYPES,
-};
-
-/* macros */
-
-/* hints for branch prediction, only use in code that runs a _lot_ */
-#if defined(__GNUC__) && defined(__KERNEL_CPU__)
-#  define LIKELY(x)       __builtin_expect(!!(x), 1)
-#  define UNLIKELY(x)     __builtin_expect(!!(x), 0)
-#else
-#  define LIKELY(x)       (x)
-#  define UNLIKELY(x)     (x)
-#endif
+#include "util/util_types_uint2_impl.h"
+#include "util/util_types_uint3_impl.h"
+#include "util/util_types_uint4_impl.h"
 
-#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
-#  define HAS_CPP11_FEATURES
-#endif
+#include "util/util_types_float2_impl.h"
+#include "util/util_types_float3_impl.h"
+#include "util/util_types_float4_impl.h"
 
-#if defined(__GNUC__) || defined(__clang__)
-#  if defined(HAS_CPP11_FEATURES)
-/* Some magic to be sure we don't have reference in the type. */
-template<typename T> static inline T decltype_helper(T x) { return x; }
-#    define TYPEOF(x) decltype(decltype_helper(x))
-#  else
-#    define TYPEOF(x) typeof(x)
-#  endif
-#endif
+#include "util/util_types_vector3_impl.h"
 
-/* Causes warning:
- * incompatible types when assigning to type 'Foo' from type 'Bar'
- * ... the compiler optimizes away the temp var */
-#ifdef __GNUC__
-#define CHECK_TYPE(var, type)  {  \
-	TYPEOF(var) *__tmp;         \
-	__tmp = (type *)NULL;         \
-	(void)__tmp;                  \
-} (void)0
-
-#define CHECK_TYPE_PAIR(var_a, var_b)  {  \
-	TYPEOF(var_a) *__tmp;                 \
-	__tmp = (typeof(var_b) *)NULL;        \
-	(void)__tmp;                          \
-} (void)0
-#else
-#  define CHECK_TYPE(var, type)
-#  define CHECK_TYPE_PAIR(var_a, var_b)
+/* SSE types. */
+#ifndef __KERNEL_GPU__
+#  include "util/util_sseb.h"
+#  include "util/util_ssei.h"
+#  include "util/util_ssef.h"
+#  include "util/util_avxf.h"
 #endif
 
-/* can be used in simple macros */
-#define CHECK_TYPE_INLINE(val, type) \
-	((void)(((type)0) != (val)))
-
-
-CCL_NAMESPACE_END
-
 #endif /* __UTIL_TYPES_H__ */