Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCampbell Barton <ideasman42@gmail.com>2014-05-04 21:49:22 +0400
committerCampbell Barton <ideasman42@gmail.com>2014-05-04 21:49:22 +0400
commitd828d44d7a934e6e9c8fb492c8f788e91fda16ff (patch)
treed9ae1cf32ab6d26d2d663ca1611ac7036485bb98
parentdc13969e484ed9fa6834b1aba85ba00f65bf8a11 (diff)
Cycles: use LIKELY/UNLIKELY macros
Gives overall ~3% speedup in own tests for BMW scene.
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp6
-rw-r--r--intern/cycles/util/util_color.h30
-rw-r--r--intern/cycles/util/util_math.h10
-rw-r--r--intern/cycles/util/util_transform.cpp9
-rw-r--r--intern/cycles/util/util_types.h37
5 files changed, 67 insertions, 25 deletions
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 2984cc97437..f33d032b9d3 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -50,7 +50,11 @@ CCL_NAMESPACE_BEGIN
/* RenderServices implementation */
-#define COPY_MATRIX44(m1, m2) memcpy(m1, m2, sizeof(*m2))
+#define COPY_MATRIX44(m1, m2) { \
+ CHECK_TYPE(m1, OSL::Matrix44*); \
+ CHECK_TYPE(m2, Transform*); \
+ memcpy(m1, m2, sizeof(*m2)); \
+} (void)0
/* static ustrings */
ustring OSLRenderServices::u_distance("distance");
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index 50d9f59ea27..b72cc6bc873 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -61,22 +61,22 @@ ccl_device float3 rgb_to_hsv(float3 rgb)
h = 0.0f;
}
- if(s == 0.0f) {
- h = 0.0f;
- }
- else {
+ if(s != 0.0f) {
float3 cmax3 = make_float3(cmax, cmax, cmax);
c = (cmax3 - rgb)/cdelta;
- if(rgb.x == cmax) h = c.z - c.y;
- else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
- else h = 4.0f + c.y - c.x;
+ if (rgb.x == cmax) h = c.z - c.y;
+ else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
+ else h = 4.0f + c.y - c.x;
h /= 6.0f;
if(h < 0.0f)
h += 1.0f;
}
+ else {
+ h = 0.0f;
+ }
return make_float3(h, s, v);
}
@@ -90,13 +90,10 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
s = hsv.y;
v = hsv.z;
- if(s == 0.0f) {
- rgb = make_float3(v, v, v);
- }
- else {
+ if(s != 0.0f) {
if(h == 1.0f)
h = 0.0f;
-
+
h *= 6.0f;
i = floorf(h);
f = h - i;
@@ -104,13 +101,16 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
p = v*(1.0f-s);
q = v*(1.0f-(s*f));
t = v*(1.0f-(s*(1.0f-f)));
-
- if(i == 0.0f) rgb = make_float3(v, t, p);
+
+ if (i == 0.0f) rgb = make_float3(v, t, p);
else if(i == 1.0f) rgb = make_float3(q, v, p);
else if(i == 2.0f) rgb = make_float3(p, v, t);
else if(i == 3.0f) rgb = make_float3(p, q, v);
else if(i == 4.0f) rgb = make_float3(t, p, v);
- else rgb = make_float3(v, p, q);
+ else rgb = make_float3(v, p, q);
+ }
+ else {
+ rgb = make_float3(v, v, v);
}
return rgb;
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 53ed6817258..ded75762cd2 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -1237,7 +1237,7 @@ ccl_device float compatible_powf(float x, float y)
ccl_device float safe_powf(float a, float b)
{
- if(a < 0.0f && b != float_to_int(b))
+ if(UNLIKELY(a < 0.0f && b != float_to_int(b)))
return 0.0f;
return compatible_powf(a, b);
@@ -1245,7 +1245,7 @@ ccl_device float safe_powf(float a, float b)
ccl_device float safe_logf(float a, float b)
{
- if(a < 0.0f || b < 0.0f)
+ if(UNLIKELY(a < 0.0f || b < 0.0f))
return 0.0f;
return logf(a)/logf(b);
@@ -1305,7 +1305,7 @@ ccl_device bool ray_aligned_disk_intersect(
float3 disk_N = normalize_len(ray_P - disk_P, &disk_t);
float div = dot(ray_D, disk_N);
- if(div == 0.0f)
+ if(UNLIKELY(div == 0.0f))
return false;
/* compute t to intersection point */
@@ -1335,7 +1335,7 @@ ccl_device bool ray_triangle_intersect(
float3 s1 = cross(ray_D, e2);
const float divisor = dot(s1, e1);
- if(divisor == 0.0f)
+ if(UNLIKELY(divisor == 0.0f))
return false;
const float invdivisor = 1.0f/divisor;
@@ -1378,7 +1378,7 @@ ccl_device bool ray_triangle_intersect_uv(
float3 s1 = cross(ray_D, e2);
const float divisor = dot(s1, e1);
- if(divisor == 0.0f)
+ if(UNLIKELY(divisor == 0.0f))
return false;
const float invdivisor = 1.0f/divisor;
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
index 12c2270a8d4..14613558501 100644
--- a/intern/cycles/util/util_transform.cpp
+++ b/intern/cycles/util/util_transform.cpp
@@ -75,7 +75,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
}
}
- if(pivotsize == 0)
+ if(UNLIKELY(pivotsize == 0.0f))
return false;
if(pivot != i) {
@@ -106,7 +106,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
for(int i = 3; i >= 0; --i) {
float f;
- if((f = M[i][i]) == 0)
+ if(UNLIKELY((f = M[i][i]) == 0.0f))
return false;
for(int j = 0; j < 4; j++) {
@@ -135,15 +135,16 @@ Transform transform_inverse(const Transform& tfm)
memcpy(R, &tfmR, sizeof(R));
memcpy(M, &tfm, sizeof(M));
- if(!transform_matrix4_gj_inverse(R, M)) {
+ if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
/* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
* never be in this situation, but try to invert it anyway with tweak */
M[0][0] += 1e-8f;
M[1][1] += 1e-8f;
M[2][2] += 1e-8f;
- if(!transform_matrix4_gj_inverse(R, M))
+ if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
return transform_identity();
+ }
}
memcpy(&tfmR, R, sizeof(R));
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 8ac0b11678e..eeb472ef175 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -456,6 +456,43 @@ enum InterpolationType {
INTERPOLATION_SMART = 3,
};
+
+/* macros */
+
+/* hints for branch pradiction, only use in code that runs a _lot_ */
+#ifdef __GNUC__
+# define LIKELY(x) __builtin_expect(!!(x), 1)
+# define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+# define LIKELY(x) (x)
+# define UNLIKELY(x) (x)
+#endif
+
+/* Causes warning:
+ * incompatible types when assigning to type 'Foo' from type 'Bar'
+ * ... the compiler optimizes away the temp var */
+#ifdef __GNUC__
+#define CHECK_TYPE(var, type) { \
+ __typeof(var) *__tmp; \
+ __tmp = (type *)NULL; \
+ (void)__tmp; \
+} (void)0
+
+#define CHECK_TYPE_PAIR(var_a, var_b) { \
+ __typeof(var_a) *__tmp; \
+ __tmp = (__typeof(var_b) *)NULL; \
+ (void)__tmp; \
+} (void)0
+#else
+# define CHECK_TYPE(var, type)
+# define CHECK_TYPE_PAIR(var_a, var_b)
+#endif
+
+/* can be used in simple macros */
+#define CHECK_TYPE_INLINE(val, type) \
+ ((void)(((type)0) != (val)))
+
+
CCL_NAMESPACE_END
#endif /* __UTIL_TYPES_H__ */