From e0ea53ae77c193ef08bb0b9215c5b3ffd84e7c11 Mon Sep 17 00:00:00 2001 From: Geraldine Chua Date: Sun, 10 Jun 2018 23:15:29 +0800 Subject: Updates to volume kernel tiling function. 1. OpenCL and CUDA support (mostly untested). 2. Change name of offsets to grid_info since it needs to keep track of other info as well. 3. Several speed and memory optimizations. --- .../cycles/kernel/kernels/cpu/kernel_cpu_image.h | 108 +++++++++++---------- .../cycles/kernel/kernels/cuda/kernel_cuda_image.h | 30 ++++++ .../kernel/kernels/opencl/kernel_opencl_image.h | 22 +++++ 3 files changed, 111 insertions(+), 49 deletions(-) (limited to 'intern/cycles/kernel') diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index 7513efc6b15..c43b94db7e0 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -75,26 +75,33 @@ template struct TextureInterpolator { return read(data[y * width + x]); } - static ccl_always_inline float4 read(const T *data, const int *offsets, + static ccl_always_inline float4 read(const T *data, const int *grid_info, int x, int y, int z, - int width, int height, int depth, - int tiw, int tih, int tid) + int tiw, int tih, int ltw, int lth) { - int index = compute_index(offsets, x, y, z, - width, height, depth, tiw, tih, tid); - return index < 0 ? make_float4(0.0f) : read(data[index]); + int tix = x / TILE_SIZE, itix = x % TILE_SIZE, + tiy = y / TILE_SIZE, itiy = y % TILE_SIZE, + tiz = z / TILE_SIZE, itiz = z % TILE_SIZE; + int dense_index = compute_index_fast(tix, tiy, tiz, tiw, tih) * 2; + int sparse_index = grid_info[dense_index]; + int dims = grid_info[dense_index + 1]; + if(sparse_index < 0) { + return make_float4(0.0f); + } + int itiw = dims & (1 << ST_SHIFT_TRUNCATE_WIDTH) ? ltw : TILE_SIZE; + int itih = dims & (1 << ST_SHIFT_TRUNCATE_HEIGHT) ? lth : TILE_SIZE; + int in_tile_index = compute_index_fast(itix, itiy, itiz, itiw, itih); + return read(data[sparse_index + in_tile_index]); } - static ccl_always_inline float4 read(const T *data, const int *offsets, - int idx, int width, int height, int depth) + static ccl_always_inline float4 read(const T *data, const int *grid_info, + int index, int width, int height, int /*depth*/, + int tiw, int tih, int ltw, int lth) { - int3 c = compute_coordinates(idx, width, height, depth); - int index = compute_index(offsets, c.x, c.y, c.z, - width, height, depth, - get_tile_res(width), - get_tile_res(height), - get_tile_res(depth)); - return index < 0 ? make_float4(0.0f) : read(data[index]); + int x = index % width; + int y = (index / width) % height; + int z = index / (width * height); + return read(data, grid_info, x, y, z, tiw, tih, ltw, lth); } static ccl_always_inline int wrap_periodic(int x, int width) @@ -304,13 +311,14 @@ template struct TextureInterpolator { } const T *data = (const T*)info.data; - const int *ofs = (const int*)info.offsets; + const int *grid_info = (const int*)info.grid_info; - if(ofs) { - return read(data, ofs, ix, iy, iz, width, height, depth, - get_tile_res(width), get_tile_res(height), get_tile_res(depth)); + if(grid_info) { + return read(data, grid_info, ix, iy, iz, + info.tiled_width, info.tiled_height, + info.last_tile_width, info.last_tile_height); } - return read(data[compute_index(ix, iy, iz, width, height, depth)]); + return read(data[compute_index_fast(ix, iy, iz, width, height)]); } static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info, @@ -359,33 +367,31 @@ template struct TextureInterpolator { float4 r; const T *data = (const T*)info.data; - const int *ofs = (const int*)info.offsets; - - if(ofs) { - int tiw = get_tile_res(width), tih = get_tile_res(height), tid = get_tile_res(depth); - /* Initial check if either voxel is in an active tile. */ - if(!tile_is_active(ofs, ix, iy, iz, tiw, tih, tid) && - !tile_is_active(ofs, nix, niy, niz, tiw, tih, tid)) { - return make_float4(0.0f); - } - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data, ofs, ix, iy, iz, width, height, depth, tiw, tih, tid); - r += (1.0f - tz)*(1.0f - ty)*tx * read(data, ofs, nix, iy, iz, width, height, depth, tiw, tih, tid); - r += (1.0f - tz)*ty*(1.0f - tx) * read(data, ofs, ix, niy, iz, width, height, depth, tiw, tih, tid); - r += (1.0f - tz)*ty*tx * read(data, ofs, nix, niy, iz, width, height, depth, tiw, tih, tid); - r += tz*(1.0f - ty)*(1.0f - tx) * read(data, ofs, ix, iy, niz, width, height, depth, tiw, tih, tid); - r += tz*(1.0f - ty)*tx * read(data, ofs, nix, iy, niz, width, height, depth, tiw, tih, tid); - r += tz*ty*(1.0f - tx) * read(data, ofs, ix, niy, niz, width, height, depth, tiw, tih, tid); - r += tz*ty*tx * read(data, ofs, nix, niy, niz, width, height, depth, tiw, tih, tid); + const int *gi = (const int*)info.grid_info; + + if(gi) { + int tiw = info.tiled_width; + int tih = info.tiled_height; + int ltw = info.last_tile_width; + int lth = info.last_tile_height; + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data, gi, ix, iy, iz, tiw, tih, ltw, lth); + r += (1.0f - tz)*(1.0f - ty)*tx * read(data, gi, nix, iy, iz, tiw, tih, ltw, lth); + r += (1.0f - tz)*ty*(1.0f - tx) * read(data, gi, ix, niy, iz, tiw, tih, ltw, lth); + r += (1.0f - tz)*ty*tx * read(data, gi, nix, niy, iz, tiw, tih, ltw, lth); + r += tz*(1.0f - ty)*(1.0f - tx) * read(data, gi, ix, iy, niz, tiw, tih, ltw, lth); + r += tz*(1.0f - ty)*tx * read(data, gi, nix, iy, niz, tiw, tih, ltw, lth); + r += tz*ty*(1.0f - tx) * read(data, gi, ix, niy, niz, tiw, tih, ltw, lth); + r += tz*ty*tx * read(data, gi, nix, niy, niz, tiw, tih, ltw, lth); } else { - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data[compute_index(ix, iy, iz, width, height, depth)]); - r += (1.0f - tz)*(1.0f - ty)*tx * read(data[compute_index(nix, iy, iz, width, height, depth)]); - r += (1.0f - tz)*ty*(1.0f - tx) * read(data[compute_index(ix, niy, iz, width, height, depth)]); - r += (1.0f - tz)*ty*tx * read(data[compute_index(nix, niy, iz, width, height, depth)]); - r += tz*(1.0f - ty)*(1.0f - tx) * read(data[compute_index(ix, iy, niz, width, height, depth)]); - r += tz*(1.0f - ty)*tx * read(data[compute_index(nix, iy, niz, width, height, depth)]); - r += tz*ty*(1.0f - tx) * read(data[compute_index(ix, niy, niz, width, height, depth)]); - r += tz*ty*tx * read(data[compute_index(nix, niy, niz, width, height, depth)]); + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data[compute_index_fast(ix, iy, iz, width, height)]); + r += (1.0f - tz)*(1.0f - ty)*tx * read(data[compute_index_fast(nix, iy, iz, width, height)]); + r += (1.0f - tz)*ty*(1.0f - tx) * read(data[compute_index_fast(ix, niy, iz, width, height)]); + r += (1.0f - tz)*ty*tx * read(data[compute_index_fast(nix, niy, iz, width, height)]); + r += tz*(1.0f - ty)*(1.0f - tx) * read(data[compute_index_fast(ix, iy, niz, width, height)]); + r += tz*(1.0f - ty)*tx * read(data[compute_index_fast(nix, iy, niz, width, height)]); + r += tz*ty*(1.0f - tx) * read(data[compute_index_fast(ix, niy, niz, width, height)]); + r += tz*ty*tx * read(data[compute_index_fast(nix, niy, niz, width, height)]); } return r; @@ -407,6 +413,10 @@ template struct TextureInterpolator { int width = info.width; int height = info.height; int depth = info.depth; + int tiw = info.tiled_width; + int tih = info.tiled_height; + int ltw = info.last_tile_width; + int lth = info.last_tile_height; int ix, iy, iz; int nix, niy, niz; /* Tricubic b-spline interpolation. */ @@ -476,9 +486,9 @@ template struct TextureInterpolator { /* Some helper macro to keep code reasonable size, * let compiler to inline all the matrix multiplications. */ -#define DATA(x, y, z) (ofs ? \ - read(data, ofs, xc[x] + yc[y] + zc[z], width, height, depth) : \ - read(data[xc[x] + yc[y] + zc[z]])) +#define DATA(x, y, z) (gi ? \ + read(data, gi, xc[x] + yc[y] + zc[z], width, height, depth, tiw, tih, ltw, lth) : \ + read(data[xc[x] + yc[y] + zc[z]])) #define COL_TERM(col, row) \ (v[col] * (u[0] * DATA(0, col, row) + \ u[1] * DATA(1, col, row) + \ @@ -496,7 +506,7 @@ template struct TextureInterpolator { /* Actual interpolation. */ const T *data = (const T*)info.data; - const int *ofs = (const int*)info.offsets; + const int *gi = (const int*)info.grid_info; return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); #undef COL_TERM diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index 91ad289a858..dcd1b33722a 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -58,6 +58,30 @@ ccl_device float cubic_h1(float a) return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f; } +ccl_device bool compute_sparse_coordinates(const TextureInfo *info, float fx, float fy, float fz) +{ + float *ix, *iy, *iz; + modff(fx, *ix); + modff(fy, *iy); + modff(fz, *iz); + int x = *ix, y = *iy, z = *iz; + int tix = x / TILE_SIZE, itix = x % TILE_SIZE, + tiy = y / TILE_SIZE, itiy = y % TILE_SIZE, + tiz = z / TILE_SIZE, itiz = z % TILE_SIZE; + int dense_index = (tix + info->tiled_width * (tiy + tiz * info->tiled_height)) * 4; + int tile_x = info->grid_info[dense_index]; + if(tile_x < 0) { + return false; + } + int tile_y = info->grid_info[dense_index + 1]; + int tile_z = info->grid_info[dense_index + 2]; + int dims = info->grid_info[dense_index + 3]; + fx += tile_x + itix + (dims & (1 << ST_SHIFT_X_LHS_PAD)); + fy += tile_y + itiy + (dims & (1 << ST_SHIFT_Y_LHS_PAD)); + fz += tile_z + itiz + (dims & (1 << ST_SHIFT_Z_LHS_PAD)); + return true; +} + /* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ template ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y) @@ -161,6 +185,12 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, CUtexObject tex = (CUtexObject)info.data; uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp; + if(info.grid_info) { + if(!compute_sparse_coordinates(&info, x, y, z)) { + return make_float4(0.0f); + } + } + const int texture_type = kernel_tex_type(id); if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h index faa9dd66d0e..2e0db6609f7 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -67,6 +67,25 @@ ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, const ccl_glo } } +ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, + const ccl_global TextureInfo *info, + int id, int x, int y, int z) +{ + int tix = x / TILE_SIZE, itix = x % TILE_SIZE, + tiy = y / TILE_SIZE, itiy = y % TILE_SIZE, + tiz = z / TILE_SIZE, itiz = z % TILE_SIZE; + int dense_index = (tix + info->tiled_width * (tiy + tiz * info->tiled_height)) * 2; + int sparse_index = info->grid_info[dense_index]; + int dims = info->grid_info[dense_index + 1]; + if(sparse_index < 0) { + return make_float4(0.0f); + } + int itiw = dims & (1 << ST_SHIFT_TRUNCATE_WIDTH) ? info->last_tile_width : TILE_SIZE; + int itih = dims & (1 << ST_SHIFT_TRUNCATE_HEIGHT) ? info->last_tile_height : TILE_SIZE; + int in_tile_index = itix + itiw * (itiy + itih * itiz); + return svm_image_texture_read(kg, info, id, sparse_index + in_tile_index); +} + ccl_device_inline float4 svm_image_texture_read_2d(KernelGlobals *kg, int id, int x, int y) { const ccl_global TextureInfo *info = kernel_tex_info(kg, id); @@ -101,6 +120,9 @@ ccl_device_inline float4 svm_image_texture_read_3d(KernelGlobals *kg, int id, in z = svm_image_texture_wrap_clamp(z, info->depth); } + if(info->grid_info) { + return svm_image_texture_read(kg, info, id, x, y, z); + } int offset = x + info->width * y + info->width * info->height * z; return svm_image_texture_read(kg, info, id, offset); } -- cgit v1.2.3