From 2d92988f6bc4ec475d685c5cdfb84ba23c1a95ba Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sat, 7 Oct 2017 02:15:12 +0200 Subject: Cycles: CUDA bicubic and tricubic texture interpolation support. While cubic interpolation is quite expensive on the CPU compared to linear interpolation, the difference on the GPU is quite small. --- intern/cycles/blender/addon/ui.py | 4 + intern/cycles/kernel/geom/geom_volume.h | 22 +-- .../cycles/kernel/kernels/cpu/kernel_cpu_image.h | 28 +--- .../cycles/kernel/kernels/cuda/kernel_cuda_image.h | 151 +++++++++++++++++++-- .../kernel/kernels/opencl/kernel_opencl_image.h | 4 +- intern/cycles/kernel/osl/osl_services.cpp | 2 +- intern/cycles/kernel/svm/svm_voxel.h | 2 +- 7 files changed, 158 insertions(+), 55 deletions(-) diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 7ab47455c49..baf1b9c31ee 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -1208,6 +1208,8 @@ class CYCLES_WORLD_PT_settings(CyclesButtonsPanel, Panel): sub = col.column() sub.active = use_cpu(context) sub.prop(cworld, "volume_sampling", text="") + sub = col.column() + sub.active = not use_opencl(context) sub.prop(cworld, "volume_interpolation", text="") col.prop(cworld, "homogeneous_volume", text="Homogeneous") @@ -1307,6 +1309,8 @@ class CYCLES_MATERIAL_PT_settings(CyclesButtonsPanel, Panel): sub = col.column() sub.active = use_cpu(context) sub.prop(cmat, "volume_sampling", text="") + sub = col.column() + sub.active = not use_opencl(context) sub.prop(cmat, "volume_interpolation", text="") col.prop(cmat, "homogeneous_volume", text="Homogeneous") diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index b19c488ef8a..6be448c4fa4 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -50,15 +50,8 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { float3 P = volume_normalized_position(kg, sd, sd->P); -#ifdef __KERNEL_GPU__ - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z); -#else - float4 r; - if(sd->flag & SD_VOLUME_CUBIC) - r = kernel_tex_image_interp_3d_ex(kg, desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC); - else - r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z); -#endif + InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE; + float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); if(dx) *dx = 0.0f; if(dy) *dy = 0.0f; @@ -69,15 +62,8 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) { float3 P = volume_normalized_position(kg, sd, sd->P); -#ifdef __KERNEL_GPU__ - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z); -#else - float4 r; - if(sd->flag & SD_VOLUME_CUBIC) - r = kernel_tex_image_interp_3d_ex(kg, desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC); - else - r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z); -#endif + InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE; + float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index b2ad60f08c1..37ba0f692be 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -430,12 +430,12 @@ template struct TextureInterpolator { static ccl_always_inline float4 interp_3d(const TextureInfo& info, float x, float y, float z, - int interpolation = INTERPOLATION_LINEAR) + InterpolationType interp) { if(UNLIKELY(!info.data)) return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - switch(interpolation) { + switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) { case INTERPOLATION_CLOSEST: return interp_3d_closest(info, x, y, z); case INTERPOLATION_LINEAR: @@ -468,29 +468,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl } } -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z) -{ - const TextureInfo& info = kernel_tex_fetch(__texture_info, id); - InterpolationType interp = (InterpolationType)info.interpolation; - - switch(kernel_tex_type(id)) { - case IMAGE_DATA_TYPE_HALF: - return TextureInterpolator::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_BYTE: - return TextureInterpolator::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_FLOAT: - return TextureInterpolator::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_HALF4: - return TextureInterpolator::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_BYTE4: - return TextureInterpolator::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_FLOAT4: - default: - return TextureInterpolator::interp_3d(info, x, y, z, interp); - } -} - -ccl_device float4 kernel_tex_image_interp_3d_ex(KernelGlobals *kg, int id, float x, float y, float z, int interp) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) { const TextureInfo& info = kernel_tex_fetch(__texture_info, id); diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index 00f6954003d..269e74f6164 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -18,7 +18,115 @@ /* Kepler */ -ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y) +/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ +ccl_device float cubic_w0(float a) +{ + return (1.0f/6.0f)*(a*(a*(-a + 3.0f) - 3.0f) + 1.0f); +} + +ccl_device float cubic_w1(float a) +{ + return (1.0f/6.0f)*(a*a*(3.0f*a - 6.0f) + 4.0f); +} + +ccl_device float cubic_w2(float a) +{ + return (1.0f/6.0f)*(a*(a*(-3.0f*a + 3.0f) + 3.0f) + 1.0f); +} + +ccl_device float cubic_w3(float a) +{ + return (1.0f/6.0f)*(a*a*a); +} + +/* g0 and g1 are the two amplitude functions. */ +ccl_device float cubic_g0(float a) +{ + return cubic_w0(a) + cubic_w1(a); +} + +ccl_device float cubic_g1(float a) +{ + return cubic_w2(a) + cubic_w3(a); +} + +/* h0 and h1 are the two offset functions */ +ccl_device float cubic_h0(float a) +{ + /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ + return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f; +} + +ccl_device float cubic_h1(float a) +{ + return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f; +} + +/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ +template +ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y) +{ + x = (x * info.width) - 0.5f; + y = (y * info.height) - 0.5f; + + float px = floor(x); + float py = floor(y); + float fx = x - px; + float fy = y - py; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float x0 = (px + cubic_h0(fx)) / info.width; + float x1 = (px + cubic_h1(fx)) / info.width; + float y0 = (py + cubic_h0(fy)) / info.height; + float y1 = (py + cubic_h1(fy)) / info.height; + + return cubic_g0(fy) * (g0x * tex2D(tex, x0, y0) + + g1x * tex2D(tex, x1, y0)) + + cubic_g1(fy) * (g0x * tex2D(tex, x0, y1) + + g1x * tex2D(tex, x1, y1)); +} + +/* Fast tricubic texture lookup using 8 bilinear lookups. */ +template +ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo& info, CUtexObject tex, float x, float y, float z) +{ + x = (x * info.width) - 0.5f; + y = (y * info.height) - 0.5f; + z = (z * info.depth) - 0.5f; + + float px = floor(x); + float py = floor(y); + float pz = floor(z); + float fx = x - px; + float fy = y - py; + float fz = z - pz; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float g0y = cubic_g0(fy); + float g1y = cubic_g1(fy); + float g0z = cubic_g0(fz); + float g1z = cubic_g1(fz); + + float x0 = (px + cubic_h0(fx)) / info.width; + float x1 = (px + cubic_h1(fx)) / info.width; + float y0 = (py + cubic_h0(fy)) / info.height; + float y1 = (py + cubic_h1(fy)) / info.height; + float z0 = (pz + cubic_h0(fz)) / info.depth; + float z1 = (pz + cubic_h1(fz)) / info.depth; + + return g0z * (g0y * (g0x * tex3D(tex, x0, y0, z0) + + g1x * tex3D(tex, x1, y0, z0)) + + g1y * (g0x * tex3D(tex, x0, y1, z0) + + g1x * tex3D(tex, x1, y1, z0))) + + g1z * (g0y * (g0x * tex3D(tex, x0, y0, z1) + + g1x * tex3D(tex, x1, y0, z1)) + + g1y * (g0x * tex3D(tex, x0, y1, z1) + + g1x * tex3D(tex, x1, y1, z1))); +} + +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { const TextureInfo& info = kernel_tex_fetch(__texture_info, id); CUtexObject tex = (CUtexObject)info.data; @@ -29,29 +137,56 @@ ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y) texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4) { - return tex2D(tex, x, y); + if(info.interpolation == INTERPOLATION_CUBIC) { + return kernel_tex_image_interp_bicubic(info, tex, x, y); + } + else { + return tex2D(tex, x, y); + } } /* float, byte and half */ else { - float f = tex2D(tex, x, y); + float f; + + if(info.interpolation == INTERPOLATION_CUBIC) { + f = kernel_tex_image_interp_bicubic(info, tex, x, y); + } + else { + f = tex2D(tex, x, y); + } + return make_float4(f, f, f, 1.0f); } } -ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, float z) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) { const TextureInfo& info = kernel_tex_fetch(__texture_info, id); CUtexObject tex = (CUtexObject)info.data; + uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp; const int texture_type = kernel_tex_type(id); if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4) { - return tex3D(tex, x, y, z); + if(interpolation == INTERPOLATION_CUBIC) { + return kernel_tex_image_interp_bicubic_3d(info, tex, x, y, z); + } + else { + return tex3D(tex, x, y, z); + } } else { - float f = tex3D(tex, x, y, z); + float f; + + if(interpolation == INTERPOLATION_CUBIC) { + f = kernel_tex_image_interp_bicubic_3d(info, tex, x, y, z); + } + else { + f = tex3D(tex, x, y, z); + } + return make_float4(f, f, f, 1.0f); } } @@ -60,7 +195,7 @@ ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, /* Fermi */ -ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y) +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { float4 r; switch(id) { @@ -158,7 +293,7 @@ ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y) return r; } -ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, float z) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z) { float4 r; switch(id) { diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h index 514980e731e..20ec36aa9eb 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -142,7 +142,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl } -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp) { const ccl_global TextureInfo *info = kernel_tex_info(kg, id); @@ -150,7 +150,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, uint height = info->height; uint offset = 0; uint depth = info->depth; - uint interpolation = info->interpolation; + uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp; uint extension = info->extension; /* Actual sampling. */ diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 5b991bf065c..8ae004031e1 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -1043,7 +1043,7 @@ bool OSLRenderServices::texture3d(ustring filename, bool status; if(filename.length() && filename[0] == '@') { int slot = atoi(filename.c_str() + 1); - float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z); + float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE); result[0] = rgba[0]; if(nchannels > 1) diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h index 466480d21b6..d967516a5c9 100644 --- a/intern/cycles/kernel/svm/svm_voxel.h +++ b/intern/cycles/kernel/svm/svm_voxel.h @@ -43,7 +43,7 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg, co = transform_point(&tfm, co); } - float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z); + float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE); #else float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); #endif -- cgit v1.2.3