Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/kernel/device/cpu/image.h378
1 files changed, 264 insertions, 114 deletions
diff --git a/intern/cycles/kernel/device/cpu/image.h b/intern/cycles/kernel/device/cpu/image.h
index b6bcd19b9ea..c0b6846e5b0 100644
--- a/intern/cycles/kernel/device/cpu/image.h
+++ b/intern/cycles/kernel/device/cpu/image.h
@@ -80,19 +80,115 @@ template<typename T> struct TextureInterpolator {
return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
}
+ /* Read 2D Texture Data
+ * Does not check if data request is in bounds. */
static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height)
{
- if (x < 0 || y < 0 || x >= width || y >= height) {
+ return read(data[y * width + x]);
+ }
+
+ /* Read 2D Texture Data Clip
+ * Returns transparent black if data request is out of bounds. */
+ static ccl_always_inline float4 read_clip(const T *data, int x, int y, int width, int height)
+ {
+ if (x < 0 || x >= width || y < 0 || y >= height) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
return read(data[y * width + x]);
}
+ /* Read 3D Texture Data
+ * Does not check if data request is in bounds. */
+ static ccl_always_inline float4
+ read(const T *data, int x, int y, int z, int width, int height, int depth)
+ {
+ return read(data[x + y * width + z * width * height]);
+ }
+
+ /* Read 3D Texture Data Clip
+ * Returns transparent black if data request is out of bounds. */
+ static ccl_always_inline float4
+ read_clip(const T *data, int x, int y, int z, int width, int height, int depth)
+ {
+ if (x < 0 || x >= width || y < 0 || y >= height || z < 0 || z >= depth) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ return read(data[x + y * width + z * width * height]);
+ }
+
+ /* Trilinear Interpolation */
+ static ccl_always_inline float4
+ trilinear_lookup(const T *data,
+ float tx,
+ float ty,
+ float tz,
+ int ix,
+ int iy,
+ int iz,
+ int nix,
+ int niy,
+ int niz,
+ int width,
+ int height,
+ int depth,
+ float4 read(const T *, int, int, int, int, int, int))
+ {
+ float4 r;
+ r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, iz, width, height, depth);
+ r += (1.0f - tz) * (1.0f - ty) * tx * read(data, nix, iy, iz, width, height, depth);
+ r += (1.0f - tz) * ty * (1.0f - tx) * read(data, ix, niy, iz, width, height, depth);
+ r += (1.0f - tz) * ty * tx * read(data, nix, niy, iz, width, height, depth);
+
+ r += tz * (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, niz, width, height, depth);
+ r += tz * (1.0f - ty) * tx * read(data, nix, iy, niz, width, height, depth);
+ r += tz * ty * (1.0f - tx) * read(data, ix, niy, niz, width, height, depth);
+ r += tz * ty * tx * read(data, nix, niy, niz, width, height, depth);
+ return r;
+ }
+
+ /** Tricubic Interpolation */
+ static ccl_always_inline float4
+ tricubic_lookup(const T *data,
+ float tx,
+ float ty,
+ float tz,
+ const int xc[4],
+ const int yc[4],
+ const int zc[4],
+ int width,
+ int height,
+ int depth,
+ float4 read(const T *, int, int, int, int, int, int))
+ {
+ float u[4], v[4], w[4];
+
+ /* Some helper macros to keep code size reasonable.
+ * Lets the compiler inline all the matrix multiplications.
+ */
+#define DATA(x, y, z) (read(data, xc[x], yc[y], zc[z], width, height, depth))
+#define COL_TERM(col, row) \
+ (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
+ u[3] * DATA(3, col, row)))
+#define ROW_TERM(row) \
+ (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
+
+ SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+ SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+ SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+ /* Actual interpolation. */
+ return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
+
+#undef COL_TERM
+#undef ROW_TERM
+#undef DATA
+ }
+
static ccl_always_inline int wrap_periodic(int x, int width)
{
x %= width;
- if (x < 0)
+ if (x < 0) {
x += width;
+ }
return x;
}
@@ -105,7 +201,6 @@ template<typename T> struct TextureInterpolator {
static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y)
{
- const T *data = (const T *)info.data;
const int width = info.width;
const int height = info.height;
int ix, iy;
@@ -117,10 +212,11 @@ template<typename T> struct TextureInterpolator {
iy = wrap_periodic(iy, height);
break;
case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+ /* No samples are inside the clip region. */
+ if (ix < 0 || ix >= width || iy < 0 || iy >= height) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
- ATTR_FALLTHROUGH;
+ break;
case EXTENSION_EXTEND:
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
@@ -129,93 +225,121 @@ template<typename T> struct TextureInterpolator {
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
- return read(data[ix + iy * width]);
+
+ const T *data = (const T *)info.data;
+ return read((const T *)data, ix, iy, width, height);
}
static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y)
{
- const T *data = (const T *)info.data;
const int width = info.width;
const int height = info.height;
- int ix, iy, nix, niy;
+
+ /* A -0.5 offset is used to center the linear samples around the sample point. */
+ int ix, iy;
+ int nix, niy;
const float tx = frac(x * (float)width - 0.5f, &ix);
const float ty = frac(y * (float)height - 0.5f, &iy);
+
switch (info.extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
nix = wrap_periodic(ix + 1, width);
+
+ iy = wrap_periodic(iy, height);
niy = wrap_periodic(iy + 1, height);
break;
case EXTENSION_CLIP:
+ /* No linear samples are inside the clip region. */
+ if (ix < -1 || ix >= width || iy < -1 || iy >= height) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
nix = ix + 1;
niy = iy + 1;
break;
case EXTENSION_EXTEND:
nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
ix = wrap_clamp(ix, width);
+ niy = wrap_clamp(iy + 1, height);
iy = wrap_clamp(iy, height);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
- return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) +
- (1.0f - ty) * tx * read(data, nix, iy, width, height) +
- ty * (1.0f - tx) * read(data, ix, niy, width, height) +
- ty * tx * read(data, nix, niy, width, height);
+
+ const T *data = (const T *)info.data;
+ return (1.0f - ty) * (1.0f - tx) * read_clip(data, ix, iy, width, height) +
+ (1.0f - ty) * tx * read_clip(data, nix, iy, width, height) +
+ ty * (1.0f - tx) * read_clip(data, ix, niy, width, height) +
+ ty * tx * read_clip(data, nix, niy, width, height);
}
static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y)
{
- const T *data = (const T *)info.data;
const int width = info.width;
const int height = info.height;
- int ix, iy, nix, niy;
+
+ /* A -0.5 offset is used to center the cubic samples around the sample point. */
+ int ix, iy;
const float tx = frac(x * (float)width - 0.5f, &ix);
const float ty = frac(y * (float)height - 0.5f, &iy);
- int pix, piy, nnix, nniy;
+
+ int pix, piy;
+ int nix, niy;
+ int nnix, nniy;
+
switch (info.extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
pix = wrap_periodic(ix - 1, width);
- piy = wrap_periodic(iy - 1, height);
nix = wrap_periodic(ix + 1, width);
- niy = wrap_periodic(iy + 1, height);
nnix = wrap_periodic(ix + 2, width);
+
+ iy = wrap_periodic(iy, height);
+ piy = wrap_periodic(iy - 1, height);
+ niy = wrap_periodic(iy + 1, height);
nniy = wrap_periodic(iy + 2, height);
break;
case EXTENSION_CLIP:
+ /* No cubic samples are inside the clip region. */
+ if (ix < -2 || ix > width || iy < -2 || iy > height) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+
pix = ix - 1;
- piy = iy - 1;
nix = ix + 1;
- niy = iy + 1;
nnix = ix + 2;
+
+ piy = iy - 1;
+ niy = iy + 1;
nniy = iy + 2;
break;
case EXTENSION_EXTEND:
pix = wrap_clamp(ix - 1, width);
- piy = wrap_clamp(iy - 1, height);
nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
nnix = wrap_clamp(ix + 2, width);
- nniy = wrap_clamp(iy + 2, height);
ix = wrap_clamp(ix, width);
+
+ piy = wrap_clamp(iy - 1, height);
+ niy = wrap_clamp(iy + 1, height);
+ nniy = wrap_clamp(iy + 2, height);
iy = wrap_clamp(iy, height);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
+
+ const T *data = (const T *)info.data;
const int xc[4] = {pix, ix, nix, nnix};
const int yc[4] = {piy, iy, niy, nniy};
float u[4], v[4];
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
+
+ /* Some helper macros to keep code size reasonable.
+ * Lets the compiler inline all the matrix multiplications.
*/
-#define DATA(x, y) (read(data, xc[x], yc[y], width, height))
+#define DATA(x, y) (read_clip(data, xc[x], yc[y], width, height))
#define TERM(col) \
(v[col] * \
(u[0] * DATA(0, col) + u[1] * DATA(1, col) + u[2] * DATA(2, col) + u[3] * DATA(3, col)))
@@ -251,9 +375,9 @@ template<typename T> struct TextureInterpolator {
float y,
float z)
{
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
+ const int width = info.width;
+ const int height = info.height;
+ const int depth = info.depth;
int ix, iy, iz;
frac(x * (float)width, &ix);
@@ -267,10 +391,11 @@ template<typename T> struct TextureInterpolator {
iz = wrap_periodic(iz, depth);
break;
case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ /* No samples are inside the clip region. */
+ if (ix < 0 || ix >= width || iy < 0 || iy >= height || iz < 0 || iz >= depth) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
- ATTR_FALLTHROUGH;
+ break;
case EXTENSION_EXTEND:
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
@@ -282,7 +407,7 @@ template<typename T> struct TextureInterpolator {
}
const T *data = (const T *)info.data;
- return read(data[ix + iy * width + iz * width * height]);
+ return read(data, ix, iy, iz, width, height, depth);
}
static ccl_always_inline float4 interp_3d_linear(const TextureInfo &info,
@@ -290,12 +415,13 @@ template<typename T> struct TextureInterpolator {
float y,
float z)
{
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
+ const int width = info.width;
+ const int height = info.height;
+ const int depth = info.depth;
int ix, iy, iz;
int nix, niy, niz;
+ /* A -0.5 offset is used to center the linear samples around the sample point. */
float tx = frac(x * (float)width - 0.5f, &ix);
float ty = frac(y * (float)height - 0.5f, &iy);
float tz = frac(z * (float)depth - 0.5f, &iz);
@@ -303,25 +429,53 @@ template<typename T> struct TextureInterpolator {
switch (info.extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
-
nix = wrap_periodic(ix + 1, width);
+
+ iy = wrap_periodic(iy, height);
niy = wrap_periodic(iy + 1, height);
+
+ iz = wrap_periodic(iz, depth);
niz = wrap_periodic(iz + 1, depth);
break;
case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ /* No linear samples are inside the clip region. */
+ if (ix < -1 || ix >= width || iy < -1 || iy >= height || iz < -1 || iz >= depth) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
- ATTR_FALLTHROUGH;
+
+ nix = ix + 1;
+ niy = iy + 1;
+ niz = iz + 1;
+
+ /* All linear samples are inside the clip region. */
+ if (ix >= 0 && nix < width && iy >= 0 && niy < height && iz >= 0 && niz < depth) {
+ break;
+ }
+
+ /* The linear samples span the clip border.
+ * #read_clip is used to ensure proper interpolation across the clip border. */
+ return trilinear_lookup((const T *)info.data,
+ tx,
+ ty,
+ tz,
+ ix,
+ iy,
+ iz,
+ nix,
+ niy,
+ niz,
+ width,
+ height,
+ depth,
+ read_clip);
case EXTENSION_EXTEND:
nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
- niz = wrap_clamp(iz + 1, depth);
-
ix = wrap_clamp(ix, width);
+
+ niy = wrap_clamp(iy + 1, height);
iy = wrap_clamp(iy, height);
+
+ niz = wrap_clamp(iz + 1, depth);
iz = wrap_clamp(iz, depth);
break;
default:
@@ -329,24 +483,13 @@ template<typename T> struct TextureInterpolator {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
- const T *data = (const T *)info.data;
- float4 r;
-
- r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
- read(data[ix + iy * width + iz * width * height]);
- r += (1.0f - tz) * (1.0f - ty) * tx * read(data[nix + iy * width + iz * width * height]);
- r += (1.0f - tz) * ty * (1.0f - tx) * read(data[ix + niy * width + iz * width * height]);
- r += (1.0f - tz) * ty * tx * read(data[nix + niy * width + iz * width * height]);
-
- r += tz * (1.0f - ty) * (1.0f - tx) * read(data[ix + iy * width + niz * width * height]);
- r += tz * (1.0f - ty) * tx * read(data[nix + iy * width + niz * width * height]);
- r += tz * ty * (1.0f - tx) * read(data[ix + niy * width + niz * width * height]);
- r += tz * ty * tx * read(data[nix + niy * width + niz * width * height]);
-
- return r;
+ return trilinear_lookup(
+ (const T *)info.data, tx, ty, tz, ix, iy, iz, nix, niy, niz, width, height, depth, read);
}
- /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
+ /* Tricubic b-spline interpolation.
+ *
+ * TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
* causing stack overflow issue in this function unless it is inlined.
*
* Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
@@ -364,85 +507,89 @@ template<typename T> struct TextureInterpolator {
int height = info.height;
int depth = info.depth;
int ix, iy, iz;
- int nix, niy, niz;
- /* Tricubic b-spline interpolation. */
+
+ /* A -0.5 offset is used to center the cubic samples around the sample point. */
const float tx = frac(x * (float)width - 0.5f, &ix);
const float ty = frac(y * (float)height - 0.5f, &iy);
const float tz = frac(z * (float)depth - 0.5f, &iz);
- int pix, piy, piz, nnix, nniy, nniz;
+
+ int pix, piy, piz;
+ int nix, niy, niz;
+ int nnix, nniy, nniz;
switch (info.extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
-
pix = wrap_periodic(ix - 1, width);
- piy = wrap_periodic(iy - 1, height);
- piz = wrap_periodic(iz - 1, depth);
-
nix = wrap_periodic(ix + 1, width);
- niy = wrap_periodic(iy + 1, height);
- niz = wrap_periodic(iz + 1, depth);
-
nnix = wrap_periodic(ix + 2, width);
+
+ iy = wrap_periodic(iy, height);
+ niy = wrap_periodic(iy + 1, height);
+ piy = wrap_periodic(iy - 1, height);
nniy = wrap_periodic(iy + 2, height);
+
+ iz = wrap_periodic(iz, depth);
+ piz = wrap_periodic(iz - 1, depth);
+ niz = wrap_periodic(iz + 1, depth);
nniz = wrap_periodic(iz + 2, depth);
break;
- case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ case EXTENSION_CLIP: {
+ /* No cubic samples are inside the clip region. */
+ if (ix < -2 || ix > width || iy < -2 || iy > height || iz < -2 || iz > depth) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
- ATTR_FALLTHROUGH;
+
+ pix = ix - 1;
+ nnix = ix + 2;
+ nix = ix + 1;
+
+ piy = iy - 1;
+ niy = iy + 1;
+ nniy = iy + 2;
+
+ piz = iz - 1;
+ niz = iz + 1;
+ nniz = iz + 2;
+
+ /* All cubic samples are inside the clip region. */
+ if (pix >= 0 && nnix < width && piy >= 0 && nniy < height && piz >= 0 && nniz < depth) {
+ break;
+ }
+
+ /* The Cubic samples span the clip border.
+ * read_clip is used to ensure proper interpolation across the clip border. */
+ const int xc[4] = {pix, ix, nix, nnix};
+ const int yc[4] = {piy, iy, niy, nniy};
+ const int zc[4] = {piz, iz, niz, nniz};
+ return tricubic_lookup(
+ (const T *)info.data, tx, ty, tz, xc, yc, zc, width, height, depth, read_clip);
+ }
case EXTENSION_EXTEND:
pix = wrap_clamp(ix - 1, width);
- piy = wrap_clamp(iy - 1, height);
- piz = wrap_clamp(iz - 1, depth);
-
nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
- niz = wrap_clamp(iz + 1, depth);
-
nnix = wrap_clamp(ix + 2, width);
- nniy = wrap_clamp(iy + 2, height);
- nniz = wrap_clamp(iz + 2, depth);
-
ix = wrap_clamp(ix, width);
+
+ piy = wrap_clamp(iy - 1, height);
+ niy = wrap_clamp(iy + 1, height);
+ nniy = wrap_clamp(iy + 2, height);
iy = wrap_clamp(iy, height);
+
+ piz = wrap_clamp(iz - 1, depth);
+ niz = wrap_clamp(iz + 1, depth);
+ nniz = wrap_clamp(iz + 2, depth);
iz = wrap_clamp(iz, depth);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
-
const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {width * piy, width * iy, width * niy, width * nniy};
- const int zc[4] = {
- width * height * piz, width * height * iz, width * height * niz, width * height * nniz};
- float u[4], v[4], w[4];
-
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
-#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
-#define COL_TERM(col, row) \
- (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
- u[3] * DATA(3, col, row)))
-#define ROW_TERM(row) \
- (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
-
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
- /* Actual interpolation. */
+ const int yc[4] = {piy, iy, niy, nniy};
+ const int zc[4] = {piz, iz, niz, nniz};
const T *data = (const T *)info.data;
- return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
-
-#undef COL_TERM
-#undef ROW_TERM
-#undef DATA
+ return tricubic_lookup(data, tx, ty, tz, xc, yc, zc, width, height, depth, read);
}
static ccl_always_inline float4
@@ -495,6 +642,7 @@ template<typename T> struct NanoVDBInterpolator {
return read(nanovdb::SampleFromVoxels<AccessorType, 1, false>(acc)(xyz));
}
+ /* Tricubic b-spline interpolation. */
# if defined(__GNUC__) || defined(__clang__)
static ccl_always_inline
# else
@@ -507,10 +655,12 @@ template<typename T> struct NanoVDBInterpolator {
int nix, niy, niz;
int pix, piy, piz;
int nnix, nniy, nniz;
- /* Tricubic b-spline interpolation. */
+
+ /* A -0.5 offset is used to center the cubic samples around the sample point. */
const float tx = frac(x - 0.5f, &ix);
const float ty = frac(y - 0.5f, &iy);
const float tz = frac(z - 0.5f, &iz);
+
pix = ix - 1;
piy = iy - 1;
piz = iz - 1;
@@ -526,8 +676,8 @@ template<typename T> struct NanoVDBInterpolator {
const int zc[4] = {piz, iz, niz, nniz};
float u[4], v[4], w[4];
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
+ /* Some helper macros to keep code size reasonable.
+ * Lets the compiler inline all the matrix multiplications.
*/
# define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
# define COL_TERM(col, row) \