diff options
author | Campbell Barton <ideasman42@gmail.com> | 2017-04-09 09:09:12 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2017-04-09 09:09:12 +0300 |
commit | c800ee6bfe602d1cdfafc042228add3692bdb12e (patch) | |
tree | a02dad89cb052cdd388791509696e6bbf94cf2ae /intern | |
parent | 79e862ad6b416968f3e0afa7a18f98ab11a4db03 (diff) | |
parent | 162e184ffd0f2e5f1584b95339df6cb3c429f017 (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cpu.h | 329 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cuda.h | 1 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 2 | ||||
-rw-r--r-- | intern/libmv/libmv/multiview/homography.cc | 2 |
6 files changed, 181 insertions, 157 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 606494f08ed..4c1a49878f5 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1613,7 +1613,7 @@ int2 CUDASplitKernel::split_kernel_local_size() return make_int2(32, 1); } -int2 CUDASplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask */*task*/) +int2 CUDASplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask * /*task*/) { /* TODO(mai): implement something here to detect ideal work size */ return make_int2(256, 256); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 579dbc84f53..b8df57ec7b9 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -340,7 +340,7 @@ public: return make_int2(64, 1); } - virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/) + virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask * /*task*/) { cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice); /* Use small global size on CPU devices as it seems to be much faster. */ diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index ae7c9b836c4..cad5f4d2959 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -316,184 +316,203 @@ template<typename T> struct texture_image { return interp_3d_ex(x, y, z, interpolation); } - ccl_always_inline float4 interp_3d_ex(float x, float y, float z, - int interpolation = INTERPOLATION_LINEAR) + ccl_always_inline float4 interp_3d_ex_closest(float x, float y, float z) { - if(UNLIKELY(!data)) - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - int ix, iy, iz, nix, niy, niz; - - if(interpolation == INTERPOLATION_CLOSEST) { - frac(x*(float)width, &ix); - frac(y*(float)height, &iy); - frac(z*(float)depth, &iz); - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - /* Fall through. */ - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); + int ix, iy, iz; + frac(x*(float)width, &ix); + frac(y*(float)height, &iy); + frac(z*(float)depth, &iz); + + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - return read(data[ix + iy*width + iz*width*height]); + } + /* Fall through. */ + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } - else if(interpolation == INTERPOLATION_LINEAR) { - float tx = frac(x*(float)width - 0.5f, &ix); - float ty = frac(y*(float)height - 0.5f, &iy); - float tz = frac(z*(float)depth - 0.5f, &iz); - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - /* Fall through. */ - case EXTENSION_EXTEND: - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); + return read(data[ix + iy*width + iz*width*height]); + } - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); + ccl_always_inline float4 interp_3d_ex_linear(float x, float y, float z) + { + int ix, iy, iz; + int nix, niy, niz; + + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + float tz = frac(z*(float)depth - 0.5f, &iz); + + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - float4 r; - - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); - r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); - r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); - r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); - - r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); - r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); - r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); - r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); - - return r; + } + /* Fall through. */ + case EXTENSION_EXTEND: + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } - else { - /* Tricubic b-spline interpolation. */ - const float tx = frac(x*(float)width - 0.5f, &ix); - const float ty = frac(y*(float)height - 0.5f, &iy); - const float tz = frac(z*(float)depth - 0.5f, &iz); - int pix, piy, piz, nnix, nniy, nniz; - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - pix = wrap_periodic(ix-1, width); - piy = wrap_periodic(iy-1, height); - piz = wrap_periodic(iz-1, depth); + float4 r; - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - - nnix = wrap_periodic(ix+2, width); - nniy = wrap_periodic(iy+2, height); - nniz = wrap_periodic(iz+2, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - /* Fall through. */ - case EXTENSION_EXTEND: - pix = wrap_clamp(ix-1, width); - piy = wrap_clamp(iy-1, height); - piz = wrap_clamp(iz-1, depth); + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); + r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); + r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); + r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); + r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); + r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); + r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); + r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); - nnix = wrap_clamp(ix+2, width); - nniy = wrap_clamp(iy+2, height); - nniz = wrap_clamp(iz+2, depth); + return r; + } - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); + ccl_never_inline float4 interp_3d_ex_tricubic(float x, float y, float z) + { + int ix, iy, iz; + int nix, niy, niz; + /* Tricubic b-spline interpolation. */ + const float tx = frac(x*(float)width - 0.5f, &ix); + const float ty = frac(y*(float)height - 0.5f, &iy); + const float tz = frac(z*(float)depth - 0.5f, &iz); + int pix, piy, piz, nnix, nniy, nniz; + + switch(extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + pix = wrap_periodic(ix-1, width); + piy = wrap_periodic(iy-1, height); + piz = wrap_periodic(iz-1, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + + nnix = wrap_periodic(ix+2, width); + nniy = wrap_periodic(iy+2, height); + nniz = wrap_periodic(iz+2, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {width * piy, - width * iy, - width * niy, - width * nniy}; - const int zc[4] = {width * height * piz, - width * height * iz, - width * height * niz, - width * height * nniz}; - float u[4], v[4], w[4]; + } + /* Fall through. */ + case EXTENSION_EXTEND: + pix = wrap_clamp(ix-1, width); + piy = wrap_clamp(iy-1, height); + piz = wrap_clamp(iz-1, depth); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + nnix = wrap_clamp(ix+2, width); + nniy = wrap_clamp(iy+2, height); + nniz = wrap_clamp(iz+2, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + const int zc[4] = {width * height * piz, + width * height * iz, + width * height * niz, + width * height * nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ #define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) #define COL_TERM(col, row) \ - (v[col] * (u[0] * DATA(0, col, row) + \ - u[1] * DATA(1, col, row) + \ - u[2] * DATA(2, col, row) + \ - u[3] * DATA(3, col, row))) + (v[col] * (u[0] * DATA(0, col, row) + \ + u[1] * DATA(1, col, row) + \ + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) #define ROW_TERM(row) \ - (w[row] * (COL_TERM(0, row) + \ - COL_TERM(1, row) + \ - COL_TERM(2, row) + \ - COL_TERM(3, row))) + (w[row] * (COL_TERM(0, row) + \ + COL_TERM(1, row) + \ + COL_TERM(2, row) + \ + COL_TERM(3, row))) - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); - /* Actual interpolation. */ - return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + /* Actual interpolation. */ + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); #undef COL_TERM #undef ROW_TERM #undef DATA + } + + ccl_always_inline float4 interp_3d_ex(float x, float y, float z, + int interpolation = INTERPOLATION_LINEAR) + { + + if(UNLIKELY(!data)) + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + switch(interpolation) { + case INTERPOLATION_CLOSEST: + return interp_3d_ex_closest(x, y, z); + case INTERPOLATION_LINEAR: + return interp_3d_ex_linear(x, y, z); + default: + return interp_3d_ex_tricubic(x, y, z); } } diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 39e98c7dda6..c375d17a95f 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -33,6 +33,7 @@ #include <cuda.h> #include <cuda_fp16.h> #include <float.h> +#include <stdint.h> /* Qualifier wrappers for different names on different devices */ diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index bf4a134b998..aa22f6a2c57 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -55,6 +55,7 @@ #endif #define ccl_may_alias #define ccl_always_inline __forceinline +#define ccl_never_inline __declspec(noinline) #define ccl_maybe_unused #else @@ -68,6 +69,7 @@ #define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) #define ccl_may_alias __attribute__((__may_alias__)) #define ccl_always_inline __attribute__((always_inline)) +#define ccl_never_inline __attribute__((noinline)) #define ccl_maybe_unused __attribute__((used)) #endif diff --git a/intern/libmv/libmv/multiview/homography.cc b/intern/libmv/libmv/multiview/homography.cc index ce533a3ead2..69177743f94 100644 --- a/intern/libmv/libmv/multiview/homography.cc +++ b/intern/libmv/libmv/multiview/homography.cc @@ -209,6 +209,8 @@ class HomographySymmetricGeometricCostFunctor { return true; } + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + const Vec2 x_; const Vec2 y_; }; |