Cycles: Add some utility functions and structures

Most of them are not currently used but are essential for the further work. - CPU kernels with SSE2 support will now have sse3b, sse3f and sse3i - Added templatedversions of min4, max4 which are handy to use with register variables. - Added util_swap function which gets arguments by pointers. So hopefully it'll be a portable version of std::swap.
author: Sergey Sharybin <sergey.vfx@gmail.com> 2014-12-16 18:27:44 +0300
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2014-12-25 00:50:49 +0300
commit: ab8d9c4b8853755faa62307750d961dc2ec43708 (patch)
tree: 707d52530016e210d46375e2c1c0db432ae230b1 /intern
parent: f770bc4757a2b471d5aaee048359096c1c79a6b2 (diff)
4 files changed, 57 insertions, 3 deletions
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 4bb60ca78e0..8ed0e15e6ac 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -61,9 +61,7 @@ ccl_device_inline void triangle_intersect_precalc(float3 dir,
 
 	/* Swap kx and ky dimensions to preserve winding direction of triangles. */
 	if(IDX(dir, kz) < 0.0f) {
-		int tmp = kx;
-		kx = ky;
-		ky = tmp;
+		util_swap(&kx, &ky);
 	}
 
 	/* Calculate the shear constants. */
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 08c8bdd369d..2f0b78ea073 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -344,6 +344,12 @@ typedef texture_image<uchar4> texture_image_uchar4;
 
 #define kernel_data (kg->__data)
 
+#ifdef __KERNEL_SSE2__
+typedef vector3<sseb> sse3b;
+typedef vector3<ssef> sse3f;
+typedef vector3<ssei> sse3i;
+#endif
+
 CCL_NAMESPACE_END
 
 #endif /* __KERNEL_COMPAT_CPU_H__ */
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 6898dc974c6..3d605e0ecfd 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -124,6 +124,24 @@ ccl_device_inline double min(double a, double b)
 	return (a < b)? a: b;
 }
 
+/* These 2 guys are templated for usage with registers data.
+ *
+ * NOTE: Since this is CPU-only functions it is ok to use references here.
+ * But for other devices we'll need to be careful about this.
+ */
+
+template<typename T>
+ccl_device_inline T min4(const T& a, const T& b, const T& c, const T& d)
+{
+	return min(min(a,b),min(c,d));
+}
+
+template<typename T>
+ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d)
+{
+	return max(max(a,b),max(c,d));
+}
+
 #endif
 
 ccl_device_inline float min4(float a, float b, float c, float d)
@@ -1468,6 +1486,25 @@ ccl_device_inline int util_max_axis(float3 vec)
 	}
 }
 
+/* NOTE: We don't use std::swap here because of number of reasons:
+ *
+ * - We don't want current context to be polluted with all the templated
+ *   functions from stl which might cause some interference about which
+ *   function is used.
+ *
+ * - Different devices in theory might want to use intrinsics to optimize
+ *   this function for specific type.
+ *
+ * - We don't want ot use references because of OpenCL state at this moment.
+ */
+template <typename T>
+ccl_device_inline void util_swap(T *__restrict a, T *__restrict b)
+{
+	T c = *a;
+	*a = *b;
+	*b = c;
+}
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index ce84200d0b6..8c0f6d180b0 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -264,6 +264,19 @@ struct ccl_try_align(16) float4 {
 	__forceinline float& operator[](int i) { return *(&x + i); }
 };
 
+template<typename T>
+class vector3
+{
+public:
+	T x, y, z;
+
+	ccl_always_inline vector3() {}
+	ccl_always_inline vector3(const T& a)
+	  : x(a), y(a), z(a) {}
+	ccl_always_inline vector3(const T& x, const T& y, const T& z)
+	  : x(x), y(y), z(z) {}
+};
+
 #endif
 
 #ifndef __KERNEL_GPU__
author	Sergey Sharybin <sergey.vfx@gmail.com>	2014-12-16 18:27:44 +0300
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2014-12-25 00:50:49 +0300
commit	ab8d9c4b8853755faa62307750d961dc2ec43708 (patch)
tree	707d52530016e210d46375e2c1c0db432ae230b1 /intern
parent	f770bc4757a2b471d5aaee048359096c1c79a6b2 (diff)