Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernel_compat_cpu.h')
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h105
1 files changed, 101 insertions, 4 deletions
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 403fd0a67f7..08c8bdd369d 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -25,10 +25,12 @@
#include "util_half.h"
#include "util_types.h"
-/* On 64bit linux single precision exponent is really slow comparing to the
- * double precision version, even with float<->double conversion involved.
+/* On x86_64, versions of glibc < 2.16 have an issue where expf is
+ * much slower than the double version. This was fixed in glibc 2.16.
*/
-#if !defined(__KERNEL_GPU__) && defined(__linux__) && defined(__x86_64__)
+#if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \
+ defined(__GNU_LIBRARY__) && defined(__GLIBC__ ) && defined(__GLIBC_MINOR__) && \
+ (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16)
# define expf(x) ((float)exp((double)(x)))
#endif
@@ -151,6 +153,13 @@ template<typename T> struct texture_image {
ccl_always_inline float4 interp_3d(float x, float y, float z, bool periodic = false)
{
+ return interp_3d_ex(x, y, z, interpolation, periodic);
+ }
+
+ ccl_always_inline float4 interp_3d_ex(float x, float y, float z,
+ int interpolation = INTERPOLATION_LINEAR,
+ bool periodic = false)
+ {
if(UNLIKELY(!data))
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
@@ -174,7 +183,7 @@ template<typename T> struct texture_image {
return read(data[ix + iy*width + iz*width*height]);
}
- else {
+ else if(interpolation == INTERPOLATION_LINEAR) {
float tx = frac(x*(float)width - 0.5f, &ix);
float ty = frac(y*(float)height - 0.5f, &iy);
float tz = frac(z*(float)depth - 0.5f, &iz);
@@ -212,6 +221,93 @@ template<typename T> struct texture_image {
return r;
}
+ else {
+ /* Tricubic b-spline interpolation. */
+ const float tx = frac(x*(float)width - 0.5f, &ix);
+ const float ty = frac(y*(float)height - 0.5f, &iy);
+ const float tz = frac(z*(float)depth - 0.5f, &iz);
+ int pix, piy, piz, nnix, nniy, nniz;
+
+ if(periodic) {
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ iz = wrap_periodic(iz, depth);
+
+ pix = wrap_periodic(ix-1, width);
+ piy = wrap_periodic(iy-1, height);
+ piz = wrap_periodic(iz-1, depth);
+
+ nix = wrap_periodic(ix+1, width);
+ niy = wrap_periodic(iy+1, height);
+ niz = wrap_periodic(iz+1, depth);
+
+ nnix = wrap_periodic(ix+2, width);
+ nniy = wrap_periodic(iy+2, height);
+ nniz = wrap_periodic(iz+2, depth);
+ }
+ else {
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ iz = wrap_clamp(iz, depth);
+
+ pix = wrap_clamp(ix-1, width);
+ piy = wrap_clamp(iy-1, height);
+ piz = wrap_clamp(iz-1, depth);
+
+ nix = wrap_clamp(ix+1, width);
+ niy = wrap_clamp(iy+1, height);
+ niz = wrap_clamp(iz+1, depth);
+
+ nnix = wrap_clamp(ix+2, width);
+ nniy = wrap_clamp(iy+2, height);
+ nniz = wrap_clamp(iz+2, depth);
+ }
+
+ const int xc[4] = {pix, ix, nix, nnix};
+ const int yc[4] = {width * piy,
+ width * iy,
+ width * niy,
+ width * nniy};
+ const int zc[4] = {width * height * piz,
+ width * height * iz,
+ width * height * niz,
+ width * height * nniz};
+ float u[4], v[4], w[4];
+
+ /* Some helper macro to keep code reasonable size,
+ * let compiler to inline all the matrix multiplications.
+ */
+#define SET_SPLINE_WEIGHTS(u, t) \
+ { \
+ u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
+ u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
+ u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
+ u[3] = (1.0f / 6.0f) * t * t * t; \
+ } (void)0
+#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
+#define COL_TERM(col, row) \
+ (v[col] * (u[0] * DATA(0, col, row) + \
+ u[1] * DATA(1, col, row) + \
+ u[2] * DATA(2, col, row) + \
+ u[3] * DATA(3, col, row)))
+#define ROW_TERM(row) \
+ (w[row] * (COL_TERM(0, row) + \
+ COL_TERM(1, row) + \
+ COL_TERM(2, row) + \
+ COL_TERM(3, row)))
+
+ SET_SPLINE_WEIGHTS(u, tx);
+ SET_SPLINE_WEIGHTS(v, ty);
+ SET_SPLINE_WEIGHTS(w, tz);
+
+ /* Actual interpolation. */
+ return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
+
+#undef COL_TERM
+#undef ROW_TERM
+#undef DATA
+#undef SET_SPLINE_WEIGHTS
+ }
}
ccl_always_inline void dimensions_set(int width_, int height_, int depth_)
@@ -244,6 +340,7 @@ typedef texture_image<uchar4> texture_image_uchar4;
#define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
#define kernel_tex_image_interp(tex, x, y) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp(x, y) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp(x, y))
#define kernel_tex_image_interp_3d(tex, x, y, z) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp_3d(x, y, z) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp_3d(x, y, z))
+#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp_3d_ex(x, y, z, interpolation) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp_3d_ex(x, y, z, interpolation))
#define kernel_data (kg->__data)