diff options
author | Campbell Barton <ideasman42@gmail.com> | 2014-05-04 21:49:22 +0400 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2014-05-04 21:49:22 +0400 |
commit | d828d44d7a934e6e9c8fb492c8f788e91fda16ff (patch) | |
tree | d9ae1cf32ab6d26d2d663ca1611ac7036485bb98 | |
parent | dc13969e484ed9fa6834b1aba85ba00f65bf8a11 (diff) |
Cycles: use LIKELY/UNLIKELY macros
Gives overall ~3% speedup in own tests for BMW scene.
-rw-r--r-- | intern/cycles/kernel/osl/osl_services.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/util/util_color.h | 30 | ||||
-rw-r--r-- | intern/cycles/util/util_math.h | 10 | ||||
-rw-r--r-- | intern/cycles/util/util_transform.cpp | 9 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 37 |
5 files changed, 67 insertions, 25 deletions
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 2984cc97437..f33d032b9d3 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -50,7 +50,11 @@ CCL_NAMESPACE_BEGIN /* RenderServices implementation */ -#define COPY_MATRIX44(m1, m2) memcpy(m1, m2, sizeof(*m2)) +#define COPY_MATRIX44(m1, m2) { \ + CHECK_TYPE(m1, OSL::Matrix44*); \ + CHECK_TYPE(m2, Transform*); \ + memcpy(m1, m2, sizeof(*m2)); \ +} (void)0 /* static ustrings */ ustring OSLRenderServices::u_distance("distance"); diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h index 50d9f59ea27..b72cc6bc873 100644 --- a/intern/cycles/util/util_color.h +++ b/intern/cycles/util/util_color.h @@ -61,22 +61,22 @@ ccl_device float3 rgb_to_hsv(float3 rgb) h = 0.0f; } - if(s == 0.0f) { - h = 0.0f; - } - else { + if(s != 0.0f) { float3 cmax3 = make_float3(cmax, cmax, cmax); c = (cmax3 - rgb)/cdelta; - if(rgb.x == cmax) h = c.z - c.y; - else if(rgb.y == cmax) h = 2.0f + c.x - c.z; - else h = 4.0f + c.y - c.x; + if (rgb.x == cmax) h = c.z - c.y; + else if(rgb.y == cmax) h = 2.0f + c.x - c.z; + else h = 4.0f + c.y - c.x; h /= 6.0f; if(h < 0.0f) h += 1.0f; } + else { + h = 0.0f; + } return make_float3(h, s, v); } @@ -90,13 +90,10 @@ ccl_device float3 hsv_to_rgb(float3 hsv) s = hsv.y; v = hsv.z; - if(s == 0.0f) { - rgb = make_float3(v, v, v); - } - else { + if(s != 0.0f) { if(h == 1.0f) h = 0.0f; - + h *= 6.0f; i = floorf(h); f = h - i; @@ -104,13 +101,16 @@ ccl_device float3 hsv_to_rgb(float3 hsv) p = v*(1.0f-s); q = v*(1.0f-(s*f)); t = v*(1.0f-(s*(1.0f-f))); - - if(i == 0.0f) rgb = make_float3(v, t, p); + + if (i == 0.0f) rgb = make_float3(v, t, p); else if(i == 1.0f) rgb = make_float3(q, v, p); else if(i == 2.0f) rgb = make_float3(p, v, t); else if(i == 3.0f) rgb = make_float3(p, q, v); else if(i == 4.0f) rgb = make_float3(t, p, v); - else rgb = make_float3(v, p, q); + else rgb = make_float3(v, p, q); + } + else { + rgb = make_float3(v, v, v); } return rgb; diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 53ed6817258..ded75762cd2 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -1237,7 +1237,7 @@ ccl_device float compatible_powf(float x, float y) ccl_device float safe_powf(float a, float b) { - if(a < 0.0f && b != float_to_int(b)) + if(UNLIKELY(a < 0.0f && b != float_to_int(b))) return 0.0f; return compatible_powf(a, b); @@ -1245,7 +1245,7 @@ ccl_device float safe_powf(float a, float b) ccl_device float safe_logf(float a, float b) { - if(a < 0.0f || b < 0.0f) + if(UNLIKELY(a < 0.0f || b < 0.0f)) return 0.0f; return logf(a)/logf(b); @@ -1305,7 +1305,7 @@ ccl_device bool ray_aligned_disk_intersect( float3 disk_N = normalize_len(ray_P - disk_P, &disk_t); float div = dot(ray_D, disk_N); - if(div == 0.0f) + if(UNLIKELY(div == 0.0f)) return false; /* compute t to intersection point */ @@ -1335,7 +1335,7 @@ ccl_device bool ray_triangle_intersect( float3 s1 = cross(ray_D, e2); const float divisor = dot(s1, e1); - if(divisor == 0.0f) + if(UNLIKELY(divisor == 0.0f)) return false; const float invdivisor = 1.0f/divisor; @@ -1378,7 +1378,7 @@ ccl_device bool ray_triangle_intersect_uv( float3 s1 = cross(ray_D, e2); const float divisor = dot(s1, e1); - if(divisor == 0.0f) + if(UNLIKELY(divisor == 0.0f)) return false; const float invdivisor = 1.0f/divisor; diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp index 12c2270a8d4..14613558501 100644 --- a/intern/cycles/util/util_transform.cpp +++ b/intern/cycles/util/util_transform.cpp @@ -75,7 +75,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4]) } } - if(pivotsize == 0) + if(UNLIKELY(pivotsize == 0.0f)) return false; if(pivot != i) { @@ -106,7 +106,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4]) for(int i = 3; i >= 0; --i) { float f; - if((f = M[i][i]) == 0) + if(UNLIKELY((f = M[i][i]) == 0.0f)) return false; for(int j = 0; j < 4; j++) { @@ -135,15 +135,16 @@ Transform transform_inverse(const Transform& tfm) memcpy(R, &tfmR, sizeof(R)); memcpy(M, &tfm, sizeof(M)); - if(!transform_matrix4_gj_inverse(R, M)) { + if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should * never be in this situation, but try to invert it anyway with tweak */ M[0][0] += 1e-8f; M[1][1] += 1e-8f; M[2][2] += 1e-8f; - if(!transform_matrix4_gj_inverse(R, M)) + if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { return transform_identity(); + } } memcpy(&tfmR, R, sizeof(R)); diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 8ac0b11678e..eeb472ef175 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -456,6 +456,43 @@ enum InterpolationType { INTERPOLATION_SMART = 3, }; + +/* macros */ + +/* hints for branch pradiction, only use in code that runs a _lot_ */ +#ifdef __GNUC__ +# define LIKELY(x) __builtin_expect(!!(x), 1) +# define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +# define LIKELY(x) (x) +# define UNLIKELY(x) (x) +#endif + +/* Causes warning: + * incompatible types when assigning to type 'Foo' from type 'Bar' + * ... the compiler optimizes away the temp var */ +#ifdef __GNUC__ +#define CHECK_TYPE(var, type) { \ + __typeof(var) *__tmp; \ + __tmp = (type *)NULL; \ + (void)__tmp; \ +} (void)0 + +#define CHECK_TYPE_PAIR(var_a, var_b) { \ + __typeof(var_a) *__tmp; \ + __tmp = (__typeof(var_b) *)NULL; \ + (void)__tmp; \ +} (void)0 +#else +# define CHECK_TYPE(var, type) +# define CHECK_TYPE_PAIR(var_a, var_b) +#endif + +/* can be used in simple macros */ +#define CHECK_TYPE_INLINE(val, type) \ + ((void)(((type)0) != (val))) + + CCL_NAMESPACE_END #endif /* __UTIL_TYPES_H__ */ |