From c08c931fb6f57bdca7865d48ac09a0775590f3ce Mon Sep 17 00:00:00 2001 From: Thomas Dinges Date: Sun, 11 May 2014 03:38:39 +0200 Subject: Cycles / CUDA: Increase maximum image textures on GPU. Instead of 95, we can use 145 images now. This only affects Kepler and above (sm30, sm_35 and sm_50). This can be increased further if needed, but let's first test if this does not come with a performance impact. Originally developed during my GSoC 2013. --- intern/cycles/kernel/svm/svm_image.h | 61 ++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 3 deletions(-) (limited to 'intern/cycles/kernel/svm') diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 6627786725f..6c658afb9df 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -149,8 +149,8 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, * - group by size and use a 3d texture, performance impact * - group into larger texture with some padding for correct lerp * - * also note that cuda has 128 textures limit, we use 100 now, since - * we still need some for other storage */ + * also note that cuda has a textures limit (128 for Fermi, 256 for Kepler), + * and we cannot use all since we still need some for other storage */ switch(id) { case 0: r = kernel_tex_image_interp(__tex_image_float_000, x, y); break; @@ -253,7 +253,62 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break; case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break; case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break; - default: + +#if __CUDA_ARCH__ >= 300 + case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break; + case 101: r = kernel_tex_image_interp(__tex_image_101, x, y); break; + case 102: r = kernel_tex_image_interp(__tex_image_102, x, y); break; + case 103: r = kernel_tex_image_interp(__tex_image_103, x, y); break; + case 104: r = kernel_tex_image_interp(__tex_image_104, x, y); break; + case 105: r = kernel_tex_image_interp(__tex_image_105, x, y); break; + case 106: r = kernel_tex_image_interp(__tex_image_106, x, y); break; + case 107: r = kernel_tex_image_interp(__tex_image_107, x, y); break; + case 108: r = kernel_tex_image_interp(__tex_image_108, x, y); break; + case 109: r = kernel_tex_image_interp(__tex_image_109, x, y); break; + case 110: r = kernel_tex_image_interp(__tex_image_110, x, y); break; + case 111: r = kernel_tex_image_interp(__tex_image_111, x, y); break; + case 112: r = kernel_tex_image_interp(__tex_image_112, x, y); break; + case 113: r = kernel_tex_image_interp(__tex_image_113, x, y); break; + case 114: r = kernel_tex_image_interp(__tex_image_114, x, y); break; + case 115: r = kernel_tex_image_interp(__tex_image_115, x, y); break; + case 116: r = kernel_tex_image_interp(__tex_image_116, x, y); break; + case 117: r = kernel_tex_image_interp(__tex_image_117, x, y); break; + case 118: r = kernel_tex_image_interp(__tex_image_118, x, y); break; + case 119: r = kernel_tex_image_interp(__tex_image_119, x, y); break; + case 120: r = kernel_tex_image_interp(__tex_image_120, x, y); break; + case 121: r = kernel_tex_image_interp(__tex_image_121, x, y); break; + case 122: r = kernel_tex_image_interp(__tex_image_122, x, y); break; + case 123: r = kernel_tex_image_interp(__tex_image_123, x, y); break; + case 124: r = kernel_tex_image_interp(__tex_image_124, x, y); break; + case 125: r = kernel_tex_image_interp(__tex_image_125, x, y); break; + case 126: r = kernel_tex_image_interp(__tex_image_126, x, y); break; + case 127: r = kernel_tex_image_interp(__tex_image_127, x, y); break; + case 128: r = kernel_tex_image_interp(__tex_image_128, x, y); break; + case 129: r = kernel_tex_image_interp(__tex_image_129, x, y); break; + case 130: r = kernel_tex_image_interp(__tex_image_130, x, y); break; + case 131: r = kernel_tex_image_interp(__tex_image_131, x, y); break; + case 132: r = kernel_tex_image_interp(__tex_image_132, x, y); break; + case 133: r = kernel_tex_image_interp(__tex_image_133, x, y); break; + case 134: r = kernel_tex_image_interp(__tex_image_134, x, y); break; + case 135: r = kernel_tex_image_interp(__tex_image_135, x, y); break; + case 136: r = kernel_tex_image_interp(__tex_image_136, x, y); break; + case 137: r = kernel_tex_image_interp(__tex_image_137, x, y); break; + case 138: r = kernel_tex_image_interp(__tex_image_138, x, y); break; + case 139: r = kernel_tex_image_interp(__tex_image_139, x, y); break; + case 140: r = kernel_tex_image_interp(__tex_image_140, x, y); break; + case 141: r = kernel_tex_image_interp(__tex_image_141, x, y); break; + case 142: r = kernel_tex_image_interp(__tex_image_142, x, y); break; + case 143: r = kernel_tex_image_interp(__tex_image_143, x, y); break; + case 144: r = kernel_tex_image_interp(__tex_image_144, x, y); break; + case 145: r = kernel_tex_image_interp(__tex_image_145, x, y); break; + case 146: r = kernel_tex_image_interp(__tex_image_146, x, y); break; + case 147: r = kernel_tex_image_interp(__tex_image_147, x, y); break; + case 148: r = kernel_tex_image_interp(__tex_image_148, x, y); break; + case 149: r = kernel_tex_image_interp(__tex_image_149, x, y); break; + case 150: r = kernel_tex_image_interp(__tex_image_150, x, y); break; +#endif + + default: kernel_assert(0); return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } -- cgit v1.2.3