From 4e94a6f576de76d670b5ed3dd69500fd8c539e67 Mon Sep 17 00:00:00 2001 From: Geraldine Chua Date: Mon, 11 Jun 2018 22:57:59 +0800 Subject: Minor sparse tile fixes. Fixed some really obvious errors with CUDA sampling, and moved the coordinates to index calculation back into kernel, saving a small perecentage of time. --- .../cycles/kernel/kernels/cpu/kernel_cpu_image.h | 27 +++++++++++++--------- .../cycles/kernel/kernels/cuda/kernel_cuda_image.h | 9 ++++---- .../kernel/kernels/opencl/kernel_opencl_image.h | 1 + 3 files changed, 22 insertions(+), 15 deletions(-) (limited to 'intern/cycles/kernel') diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index c43b94db7e0..a0d6b54245b 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -30,6 +30,11 @@ template struct TextureInterpolator { u[3] = (1.0f / 6.0f) * t * t * t; \ } (void)0 + static ccl_always_inline int flatten(int x, int y, int z, int width, int height) + { + return x + width * (y + z * height); + } + static ccl_always_inline float4 read(float4 r) { return r; @@ -82,7 +87,7 @@ template struct TextureInterpolator { int tix = x / TILE_SIZE, itix = x % TILE_SIZE, tiy = y / TILE_SIZE, itiy = y % TILE_SIZE, tiz = z / TILE_SIZE, itiz = z % TILE_SIZE; - int dense_index = compute_index_fast(tix, tiy, tiz, tiw, tih) * 2; + int dense_index = flatten(tix, tiy, tiz, tiw, tih) * 2; int sparse_index = grid_info[dense_index]; int dims = grid_info[dense_index + 1]; if(sparse_index < 0) { @@ -90,7 +95,7 @@ template struct TextureInterpolator { } int itiw = dims & (1 << ST_SHIFT_TRUNCATE_WIDTH) ? ltw : TILE_SIZE; int itih = dims & (1 << ST_SHIFT_TRUNCATE_HEIGHT) ? lth : TILE_SIZE; - int in_tile_index = compute_index_fast(itix, itiy, itiz, itiw, itih); + int in_tile_index = flatten(itix, itiy, itiz, itiw, itih); return read(data[sparse_index + in_tile_index]); } @@ -318,7 +323,7 @@ template struct TextureInterpolator { info.tiled_width, info.tiled_height, info.last_tile_width, info.last_tile_height); } - return read(data[compute_index_fast(ix, iy, iz, width, height)]); + return read(data[flatten(ix, iy, iz, width, height)]); } static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info, @@ -384,14 +389,14 @@ template struct TextureInterpolator { r += tz*ty*tx * read(data, gi, nix, niy, niz, tiw, tih, ltw, lth); } else { - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data[compute_index_fast(ix, iy, iz, width, height)]); - r += (1.0f - tz)*(1.0f - ty)*tx * read(data[compute_index_fast(nix, iy, iz, width, height)]); - r += (1.0f - tz)*ty*(1.0f - tx) * read(data[compute_index_fast(ix, niy, iz, width, height)]); - r += (1.0f - tz)*ty*tx * read(data[compute_index_fast(nix, niy, iz, width, height)]); - r += tz*(1.0f - ty)*(1.0f - tx) * read(data[compute_index_fast(ix, iy, niz, width, height)]); - r += tz*(1.0f - ty)*tx * read(data[compute_index_fast(nix, iy, niz, width, height)]); - r += tz*ty*(1.0f - tx) * read(data[compute_index_fast(ix, niy, niz, width, height)]); - r += tz*ty*tx * read(data[compute_index_fast(nix, niy, niz, width, height)]); + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data[flatten(ix, iy, iz, width, height)]); + r += (1.0f - tz)*(1.0f - ty)*tx * read(data[flatten(nix, iy, iz, width, height)]); + r += (1.0f - tz)*ty*(1.0f - tx) * read(data[flatten(ix, niy, iz, width, height)]); + r += (1.0f - tz)*ty*tx * read(data[flatten(nix, niy, iz, width, height)]); + r += tz*(1.0f - ty)*(1.0f - tx) * read(data[flatten(ix, iy, niz, width, height)]); + r += tz*(1.0f - ty)*tx * read(data[flatten(nix, iy, niz, width, height)]); + r += tz*ty*(1.0f - tx) * read(data[flatten(ix, niy, niz, width, height)]); + r += tz*ty*tx * read(data[flatten(nix, niy, niz, width, height)]); } return r; diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index dcd1b33722a..f73f5cdbb3a 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -58,7 +58,8 @@ ccl_device float cubic_h1(float a) return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f; } -ccl_device bool compute_sparse_coordinates(const TextureInfo *info, float fx, float fy, float fz) +/* Converts coordinates from normal volume textures dense to sparse ones. */ +ccl_device bool compute_sparse_coordinates(const TextureInfo *info, float &fx, float &fy, float &fz) { float *ix, *iy, *iz; modff(fx, *ix); @@ -76,9 +77,9 @@ ccl_device bool compute_sparse_coordinates(const TextureInfo *info, float fx, fl int tile_y = info->grid_info[dense_index + 1]; int tile_z = info->grid_info[dense_index + 2]; int dims = info->grid_info[dense_index + 3]; - fx += tile_x + itix + (dims & (1 << ST_SHIFT_X_LHS_PAD)); - fy += tile_y + itiy + (dims & (1 << ST_SHIFT_Y_LHS_PAD)); - fz += tile_z + itiz + (dims & (1 << ST_SHIFT_Z_LHS_PAD)); + fx += tile_x + itix + (dims & (1 << ST_SHIFT_X_PAD)); + fy += tile_y + itiy + (dims & (1 << ST_SHIFT_Y_PAD)); + fz += tile_z + itiz + (dims & (1 << ST_SHIFT_Z_PAD)); return true; } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h index 2e0db6609f7..be1996af3a0 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -67,6 +67,7 @@ ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, const ccl_glo } } +/* Calculates the index for sparse volume textures. */ ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, const ccl_global TextureInfo *info, int id, int x, int y, int z) -- cgit v1.2.3