diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2014-12-16 18:27:44 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2014-12-25 00:50:49 +0300 |
commit | ab8d9c4b8853755faa62307750d961dc2ec43708 (patch) | |
tree | 707d52530016e210d46375e2c1c0db432ae230b1 /intern | |
parent | f770bc4757a2b471d5aaee048359096c1c79a6b2 (diff) |
Cycles: Add some utility functions and structures
Most of them are not currently used but are essential for the further work.
- CPU kernels with SSE2 support will now have sse3b, sse3f and sse3i
- Added templatedversions of min4, max4 which are handy to use with register
variables.
- Added util_swap function which gets arguments by pointers.
So hopefully it'll be a portable version of std::swap.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/geom/geom_triangle_intersect.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cpu.h | 6 | ||||
-rw-r--r-- | intern/cycles/util/util_math.h | 37 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 13 |
4 files changed, 57 insertions, 3 deletions
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index 4bb60ca78e0..8ed0e15e6ac 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -61,9 +61,7 @@ ccl_device_inline void triangle_intersect_precalc(float3 dir, /* Swap kx and ky dimensions to preserve winding direction of triangles. */ if(IDX(dir, kz) < 0.0f) { - int tmp = kx; - kx = ky; - ky = tmp; + util_swap(&kx, &ky); } /* Calculate the shear constants. */ diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 08c8bdd369d..2f0b78ea073 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -344,6 +344,12 @@ typedef texture_image<uchar4> texture_image_uchar4; #define kernel_data (kg->__data) +#ifdef __KERNEL_SSE2__ +typedef vector3<sseb> sse3b; +typedef vector3<ssef> sse3f; +typedef vector3<ssei> sse3i; +#endif + CCL_NAMESPACE_END #endif /* __KERNEL_COMPAT_CPU_H__ */ diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 6898dc974c6..3d605e0ecfd 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -124,6 +124,24 @@ ccl_device_inline double min(double a, double b) return (a < b)? a: b; } +/* These 2 guys are templated for usage with registers data. + * + * NOTE: Since this is CPU-only functions it is ok to use references here. + * But for other devices we'll need to be careful about this. + */ + +template<typename T> +ccl_device_inline T min4(const T& a, const T& b, const T& c, const T& d) +{ + return min(min(a,b),min(c,d)); +} + +template<typename T> +ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d) +{ + return max(max(a,b),max(c,d)); +} + #endif ccl_device_inline float min4(float a, float b, float c, float d) @@ -1468,6 +1486,25 @@ ccl_device_inline int util_max_axis(float3 vec) } } +/* NOTE: We don't use std::swap here because of number of reasons: + * + * - We don't want current context to be polluted with all the templated + * functions from stl which might cause some interference about which + * function is used. + * + * - Different devices in theory might want to use intrinsics to optimize + * this function for specific type. + * + * - We don't want ot use references because of OpenCL state at this moment. + */ +template <typename T> +ccl_device_inline void util_swap(T *__restrict a, T *__restrict b) +{ + T c = *a; + *a = *b; + *b = c; +} + CCL_NAMESPACE_END #endif /* __UTIL_MATH_H__ */ diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index ce84200d0b6..8c0f6d180b0 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -264,6 +264,19 @@ struct ccl_try_align(16) float4 { __forceinline float& operator[](int i) { return *(&x + i); } }; +template<typename T> +class vector3 +{ +public: + T x, y, z; + + ccl_always_inline vector3() {} + ccl_always_inline vector3(const T& a) + : x(a), y(a), z(a) {} + ccl_always_inline vector3(const T& x, const T& y, const T& z) + : x(x), y(y), z(z) {} +}; + #endif #ifndef __KERNEL_GPU__ |