diff options
author | Thomas Dinges <blender@dingto.org> | 2016-05-19 13:47:41 +0300 |
---|---|---|
committer | Thomas Dinges <blender@dingto.org> | 2016-05-19 14:14:37 +0300 |
commit | c9f1ed1e4c22728b1f711656ab4de56c11c65e35 (patch) | |
tree | d64b736497f386827ef327d8b35fd5d9ce91588a /intern/cycles/kernel/svm/svm_image.h | |
parent | 03f846ea12ba38d4686edfeef01a571329bd9385 (diff) |
Cycles: Add support for bindless textures.
This adds support for CUDA Texture objects (also known as Bindless textures) for Kepler GPUs (Geforce 6xx and above).
This is used for all 2D/3D textures, data still uses arrays as before.
User benefits:
* No more limits of image textures on Kepler.
We had 5 float4 and 145 byte4 slots there before, now we have 1024 float4 and 1024 byte4.
This can be extended further if we need to (just change the define).
* Single channel textures slots (byte and float) are now supported on Kepler as well (1024 slots for each type).
ToDo / Issues:
* 3D textures don't work yet, at least don't show up during render. I have no idea whats wrong yet.
* Dynamically allocate bindless_mapping array?
I hope Fermi still works fine, but that should be tested on a Fermi card before pushing to master.
Part of my GSoC 2016.
Reviewers: sergey, #cycles, brecht
Subscribers: swerner, jtheninja, brecht, sergey
Differential Revision: https://developer.blender.org/D1999
Diffstat (limited to 'intern/cycles/kernel/svm/svm_image.h')
-rw-r--r-- | intern/cycles/kernel/svm/svm_image.h | 82 |
1 files changed, 17 insertions, 65 deletions
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index faff4ce3e6d..92d2b36bbb1 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -18,11 +18,15 @@ CCL_NAMESPACE_BEGIN /* Float4 textures on various devices. */ #if defined(__KERNEL_CPU__) - #define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_CPU +# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_CPU #elif defined(__KERNEL_CUDA__) - #define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_CUDA +# if __CUDA_ARCH__ < 300 +# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_CUDA +# else +# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_CUDA_KEPLER +# endif #else - #define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_OPENCL +# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_OPENCL #endif #ifdef __KERNEL_OPENCL__ @@ -151,6 +155,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, #else float4 r; +# if __CUDA_ARCH__ < 300 /* not particularly proud of this massive switch, what are the * alternatives? * - use a single big 1D texture, and do our own lookup/filtering @@ -254,72 +259,19 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, case 90: r = kernel_tex_image_interp(__tex_image_byte4_090, x, y); break; case 91: r = kernel_tex_image_interp(__tex_image_byte4_091, x, y); break; case 92: r = kernel_tex_image_interp(__tex_image_byte4_092, x, y); break; - -# if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300) - case 93: r = kernel_tex_image_interp(__tex_image_byte4_093, x, y); break; - case 94: r = kernel_tex_image_interp(__tex_image_byte4_094, x, y); break; - case 95: r = kernel_tex_image_interp(__tex_image_byte4_095, x, y); break; - case 96: r = kernel_tex_image_interp(__tex_image_byte4_096, x, y); break; - case 97: r = kernel_tex_image_interp(__tex_image_byte4_097, x, y); break; - case 98: r = kernel_tex_image_interp(__tex_image_byte4_098, x, y); break; - case 99: r = kernel_tex_image_interp(__tex_image_byte4_099, x, y); break; - case 100: r = kernel_tex_image_interp(__tex_image_byte4_100, x, y); break; - case 101: r = kernel_tex_image_interp(__tex_image_byte4_101, x, y); break; - case 102: r = kernel_tex_image_interp(__tex_image_byte4_102, x, y); break; - case 103: r = kernel_tex_image_interp(__tex_image_byte4_103, x, y); break; - case 104: r = kernel_tex_image_interp(__tex_image_byte4_104, x, y); break; - case 105: r = kernel_tex_image_interp(__tex_image_byte4_105, x, y); break; - case 106: r = kernel_tex_image_interp(__tex_image_byte4_106, x, y); break; - case 107: r = kernel_tex_image_interp(__tex_image_byte4_107, x, y); break; - case 108: r = kernel_tex_image_interp(__tex_image_byte4_108, x, y); break; - case 109: r = kernel_tex_image_interp(__tex_image_byte4_109, x, y); break; - case 110: r = kernel_tex_image_interp(__tex_image_byte4_110, x, y); break; - case 111: r = kernel_tex_image_interp(__tex_image_byte4_111, x, y); break; - case 112: r = kernel_tex_image_interp(__tex_image_byte4_112, x, y); break; - case 113: r = kernel_tex_image_interp(__tex_image_byte4_113, x, y); break; - case 114: r = kernel_tex_image_interp(__tex_image_byte4_114, x, y); break; - case 115: r = kernel_tex_image_interp(__tex_image_byte4_115, x, y); break; - case 116: r = kernel_tex_image_interp(__tex_image_byte4_116, x, y); break; - case 117: r = kernel_tex_image_interp(__tex_image_byte4_117, x, y); break; - case 118: r = kernel_tex_image_interp(__tex_image_byte4_118, x, y); break; - case 119: r = kernel_tex_image_interp(__tex_image_byte4_119, x, y); break; - case 120: r = kernel_tex_image_interp(__tex_image_byte4_120, x, y); break; - case 121: r = kernel_tex_image_interp(__tex_image_byte4_121, x, y); break; - case 122: r = kernel_tex_image_interp(__tex_image_byte4_122, x, y); break; - case 123: r = kernel_tex_image_interp(__tex_image_byte4_123, x, y); break; - case 124: r = kernel_tex_image_interp(__tex_image_byte4_124, x, y); break; - case 125: r = kernel_tex_image_interp(__tex_image_byte4_125, x, y); break; - case 126: r = kernel_tex_image_interp(__tex_image_byte4_126, x, y); break; - case 127: r = kernel_tex_image_interp(__tex_image_byte4_127, x, y); break; - case 128: r = kernel_tex_image_interp(__tex_image_byte4_128, x, y); break; - case 129: r = kernel_tex_image_interp(__tex_image_byte4_129, x, y); break; - case 130: r = kernel_tex_image_interp(__tex_image_byte4_130, x, y); break; - case 131: r = kernel_tex_image_interp(__tex_image_byte4_131, x, y); break; - case 132: r = kernel_tex_image_interp(__tex_image_byte4_132, x, y); break; - case 133: r = kernel_tex_image_interp(__tex_image_byte4_133, x, y); break; - case 134: r = kernel_tex_image_interp(__tex_image_byte4_134, x, y); break; - case 135: r = kernel_tex_image_interp(__tex_image_byte4_135, x, y); break; - case 136: r = kernel_tex_image_interp(__tex_image_byte4_136, x, y); break; - case 137: r = kernel_tex_image_interp(__tex_image_byte4_137, x, y); break; - case 138: r = kernel_tex_image_interp(__tex_image_byte4_138, x, y); break; - case 139: r = kernel_tex_image_interp(__tex_image_byte4_139, x, y); break; - case 140: r = kernel_tex_image_interp(__tex_image_byte4_140, x, y); break; - case 141: r = kernel_tex_image_interp(__tex_image_byte4_141, x, y); break; - case 142: r = kernel_tex_image_interp(__tex_image_byte4_142, x, y); break; - case 143: r = kernel_tex_image_interp(__tex_image_byte4_143, x, y); break; - case 144: r = kernel_tex_image_interp(__tex_image_byte4_144, x, y); break; - case 145: r = kernel_tex_image_interp(__tex_image_byte4_145, x, y); break; - case 146: r = kernel_tex_image_interp(__tex_image_byte4_146, x, y); break; - case 147: r = kernel_tex_image_interp(__tex_image_byte4_147, x, y); break; - case 148: r = kernel_tex_image_interp(__tex_image_byte4_148, x, y); break; - case 149: r = kernel_tex_image_interp(__tex_image_byte4_149, x, y); break; - case 150: r = kernel_tex_image_interp(__tex_image_byte4_150, x, y); break; -# endif - default: kernel_assert(0); return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } +# else + CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); + if(id < 2048) /* TODO(dingto): Make this a variable */ + r = kernel_tex_image_interp_float4(tex, x, y); + else { + float f = kernel_tex_image_interp_float(tex, x, y); + r = make_float4(f, f, f, 1.0); + } +# endif #endif #ifdef __KERNEL_SSE2__ |