Code refactor: make texture code more consistent between devices.

* Use common TextureInfo struct for all devices, except CUDA fermi. * Move image sampling code to kernels/*/kernel_*_image.h files. * Use arrays for data textures on Fermi too, so device_vector<Struct> works.
author: Brecht Van Lommel <brechtvanlommel@gmail.com> 2017-10-06 22:47:41 +0300
committer: Brecht Van Lommel <brechtvanlommel@gmail.com> 2017-10-07 15:53:14 +0300
commit: 23098cda9936d785988b689ee69e58e900f17cb2 (patch)
tree: ed49843e81afbe9c38707324f37bf7e14b234a9b /intern/cycles/kernel
parent: d013b56dde47580d1907e3a994bc49cfaaa9f90c (diff)
17 files changed, 724 insertions, 922 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index b10dd05cb9b..1b7a657214a 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -83,7 +83,6 @@ set(SRC_HEADERS
 	kernel_emission.h
 	kernel_film.h
 	kernel_globals.h
-	kernel_image_opencl.h
 	kernel_jitter.h
 	kernel_light.h
 	kernel_math.h
@@ -119,10 +118,12 @@ set(SRC_KERNELS_CPU_HEADERS
 
 set(SRC_KERNELS_CUDA_HEADERS
 	kernels/cuda/kernel_config.h
+	kernels/cuda/kernel_cuda_image.h
 )
 
 set(SRC_KERNELS_OPENCL_HEADERS
 	kernels/opencl/kernel_split_function.h
+	kernels/opencl/kernel_opencl_image.h
 )
 
 set(SRC_CLOSURE_HEADERS
@@ -507,6 +508,7 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteratio
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split_function.h" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_opencl_image.h" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/filter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 698cd6b03fd..b19c488ef8a 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -29,21 +29,6 @@ CCL_NAMESPACE_BEGIN
 
 /* Return position normalized to 0..1 in mesh bounds */
 
-#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300
-ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z)
-{
-	float4 r;
-	switch(id) {
-		case 0: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_000, x, y, z); break;
-		case 8: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_008, x, y, z); break;
-		case 16: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_016, x, y, z); break;
-		case 24: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_024, x, y, z); break;
-		case 32: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_032, x, y, z); break;
-	}
-	return r;
-}
-#endif  /* __KERNEL_CUDA__ */
-
 ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
                                                     const ShaderData *sd,
                                                     float3 P)
@@ -65,22 +50,14 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
 ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
 {
 	float3 P = volume_normalized_position(kg, sd, sd->P);
-#ifdef __KERNEL_CUDA__
-#  if __CUDA_ARCH__ >= 300
-	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
-	float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z);
-	float4 r = make_float4(f, f, f, 1.0f);
-#  else
-	float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
-#  endif
-#elif defined(__KERNEL_OPENCL__)
+#ifdef __KERNEL_GPU__
 	float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
 #else
 	float4 r;
 	if(sd->flag & SD_VOLUME_CUBIC)
-		r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
+		r = kernel_tex_image_interp_3d_ex(kg, desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
 	else
-		r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z);
+		r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
 #endif
 
 	if(dx) *dx = 0.0f;
@@ -92,21 +69,14 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
 ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
 {
 	float3 P = volume_normalized_position(kg, sd, sd->P);
-#ifdef __KERNEL_CUDA__
-#  if __CUDA_ARCH__ >= 300
-	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
-	float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z);
-#  else
-	float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
-#  endif
-#elif defined(__KERNEL_OPENCL__)
+#ifdef __KERNEL_GPU__
 	float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
 #else
 	float4 r;
 	if(sd->flag & SD_VOLUME_CUBIC)
-		r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
+		r = kernel_tex_image_interp_3d_ex(kg, desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
 	else
-		r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z);
+		r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
 #endif
 
 	if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 93934ee6b38..6f63c8f77a2 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -74,7 +74,7 @@ CCL_NAMESPACE_BEGIN
  * pointer lookup. */
 
 template<typename T> struct texture  {
-	ccl_always_inline T fetch(int index)
+	ccl_always_inline const T& fetch(int index)
 	{
 		kernel_assert(index >= 0 && index < width);
 		return data[index];
@@ -112,449 +112,6 @@ template<typename T> struct texture  {
 	int width;
 };
 
-template<typename T> struct texture_image  {
-#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
-	{ \
-		u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
-		u[1] =  ((      0.5f * t - 1.0f) * t       ) * t + (2.0f/3.0f); \
-		u[2] =  ((     -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
-		u[3] = (1.0f / 6.0f) * t * t * t; \
-	} (void)0
-
-	ccl_always_inline float4 read(float4 r)
-	{
-		return r;
-	}
-
-	ccl_always_inline float4 read(uchar4 r)
-	{
-		float f = 1.0f/255.0f;
-		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
-	}
-
-	ccl_always_inline float4 read(uchar r)
-	{
-		float f = r*(1.0f/255.0f);
-		return make_float4(f, f, f, 1.0f);
-	}
-
-	ccl_always_inline float4 read(float r)
-	{
-		/* TODO(dingto): Optimize this, so interpolation
-		 * happens on float instead of float4 */
-		return make_float4(r, r, r, 1.0f);
-	}
-
-	ccl_always_inline float4 read(half4 r)
-	{
-		return half4_to_float4(r);
-	}
-
-	ccl_always_inline float4 read(half r)
-	{
-		float f = half_to_float(r);
-		return make_float4(f, f, f, 1.0f);
-	}
-
-	ccl_always_inline int wrap_periodic(int x, int width)
-	{
-		x %= width;
-		if(x < 0)
-			x += width;
-		return x;
-	}
-
-	ccl_always_inline int wrap_clamp(int x, int width)
-	{
-		return clamp(x, 0, width-1);
-	}
-
-	ccl_always_inline float frac(float x, int *ix)
-	{
-		int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
-		*ix = i;
-		return x - (float)i;
-	}
-
-	ccl_always_inline float4 interp(float x, float y)
-	{
-		if(UNLIKELY(!data))
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-		int ix, iy, nix, niy;
-
-		if(interpolation == INTERPOLATION_CLOSEST) {
-			frac(x*(float)width, &ix);
-			frac(y*(float)height, &iy);
-			switch(extension) {
-				case EXTENSION_REPEAT:
-					ix = wrap_periodic(ix, width);
-					iy = wrap_periodic(iy, height);
-					break;
-				case EXTENSION_CLIP:
-					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
-						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-					}
-					ATTR_FALLTHROUGH;
-				case EXTENSION_EXTEND:
-					ix = wrap_clamp(ix, width);
-					iy = wrap_clamp(iy, height);
-					break;
-				default:
-					kernel_assert(0);
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-			}
-			return read(data[ix + iy*width]);
-		}
-		else if(interpolation == INTERPOLATION_LINEAR) {
-			float tx = frac(x*(float)width - 0.5f, &ix);
-			float ty = frac(y*(float)height - 0.5f, &iy);
-
-			switch(extension) {
-				case EXTENSION_REPEAT:
-					ix = wrap_periodic(ix, width);
-					iy = wrap_periodic(iy, height);
-
-					nix = wrap_periodic(ix+1, width);
-					niy = wrap_periodic(iy+1, height);
-					break;
-				case EXTENSION_CLIP:
-					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
-						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-					}
-					ATTR_FALLTHROUGH;
-				case EXTENSION_EXTEND:
-					nix = wrap_clamp(ix+1, width);
-					niy = wrap_clamp(iy+1, height);
-
-					ix = wrap_clamp(ix, width);
-					iy = wrap_clamp(iy, height);
-					break;
-				default:
-					kernel_assert(0);
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-			}
-
-			float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
-			r += (1.0f - ty)*tx*read(data[nix + iy*width]);
-			r += ty*(1.0f - tx)*read(data[ix + niy*width]);
-			r += ty*tx*read(data[nix + niy*width]);
-
-			return r;
-		}
-		else {
-			/* Bicubic b-spline interpolation. */
-			float tx = frac(x*(float)width - 0.5f, &ix);
-			float ty = frac(y*(float)height - 0.5f, &iy);
-			int pix, piy, nnix, nniy;
-			switch(extension) {
-				case EXTENSION_REPEAT:
-					ix = wrap_periodic(ix, width);
-					iy = wrap_periodic(iy, height);
-
-					pix = wrap_periodic(ix-1, width);
-					piy = wrap_periodic(iy-1, height);
-
-					nix = wrap_periodic(ix+1, width);
-					niy = wrap_periodic(iy+1, height);
-
-					nnix = wrap_periodic(ix+2, width);
-					nniy = wrap_periodic(iy+2, height);
-					break;
-				case EXTENSION_CLIP:
-					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
-						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-					}
-					ATTR_FALLTHROUGH;
-				case EXTENSION_EXTEND:
-					pix = wrap_clamp(ix-1, width);
-					piy = wrap_clamp(iy-1, height);
-
-					nix = wrap_clamp(ix+1, width);
-					niy = wrap_clamp(iy+1, height);
-
-					nnix = wrap_clamp(ix+2, width);
-					nniy = wrap_clamp(iy+2, height);
-
-					ix = wrap_clamp(ix, width);
-					iy = wrap_clamp(iy, height);
-					break;
-				default:
-					kernel_assert(0);
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-			}
-
-			const int xc[4] = {pix, ix, nix, nnix};
-			const int yc[4] = {width * piy,
-			                   width * iy,
-			                   width * niy,
-			                   width * nniy};
-			float u[4], v[4];
-			/* Some helper macro to keep code reasonable size,
-			 * let compiler to inline all the matrix multiplications.
-			 */
-#define DATA(x, y) (read(data[xc[x] + yc[y]]))
-#define TERM(col) \
-			(v[col] * (u[0] * DATA(0, col) + \
-			           u[1] * DATA(1, col) + \
-			           u[2] * DATA(2, col) + \
-			           u[3] * DATA(3, col)))
-
-			SET_CUBIC_SPLINE_WEIGHTS(u, tx);
-			SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-
-			/* Actual interpolation. */
-			return TERM(0) + TERM(1) + TERM(2) + TERM(3);
-
-#undef TERM
-#undef DATA
-		}
-	}
-
-	ccl_always_inline float4 interp_3d(float x, float y, float z)
-	{
-		return interp_3d_ex(x, y, z, interpolation);
-	}
-
-	ccl_always_inline float4 interp_3d_ex_closest(float x, float y, float z)
-	{
-		int ix, iy, iz;
-		frac(x*(float)width, &ix);
-		frac(y*(float)height, &iy);
-		frac(z*(float)depth, &iz);
-
-		switch(extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				iz = wrap_periodic(iz, depth);
-				break;
-			case EXTENSION_CLIP:
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-				ATTR_FALLTHROUGH;
-			case EXTENSION_EXTEND:
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				iz = wrap_clamp(iz, depth);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-
-		return read(data[ix + iy*width + iz*width*height]);
-	}
-
-	ccl_always_inline float4 interp_3d_ex_linear(float x, float y, float z)
-	{
-		int ix, iy, iz;
-		int nix, niy, niz;
-
-		float tx = frac(x*(float)width - 0.5f, &ix);
-		float ty = frac(y*(float)height - 0.5f, &iy);
-		float tz = frac(z*(float)depth - 0.5f, &iz);
-
-		switch(extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				iz = wrap_periodic(iz, depth);
-
-				nix = wrap_periodic(ix+1, width);
-				niy = wrap_periodic(iy+1, height);
-				niz = wrap_periodic(iz+1, depth);
-				break;
-			case EXTENSION_CLIP:
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-				ATTR_FALLTHROUGH;
-			case EXTENSION_EXTEND:
-				nix = wrap_clamp(ix+1, width);
-				niy = wrap_clamp(iy+1, height);
-				niz = wrap_clamp(iz+1, depth);
-
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				iz = wrap_clamp(iz, depth);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-
-		float4 r;
-
-		r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
-		r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
-		r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
-		r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
-
-		r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
-		r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
-		r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
-		r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
-
-		return r;
-	}
-
-	/* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
-	 * causing stack overflow issue in this function unless it is inlined.
-	 *
-	 * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
-	 * enabled.
-	 */
-#ifdef __GNUC__
-	ccl_always_inline
-#else
-	ccl_never_inline
-#endif
-	float4 interp_3d_ex_tricubic(float x, float y, float z)
-	{
-		int ix, iy, iz;
-		int nix, niy, niz;
-		/* Tricubic b-spline interpolation. */
-		const float tx = frac(x*(float)width - 0.5f, &ix);
-		const float ty = frac(y*(float)height - 0.5f, &iy);
-		const float tz = frac(z*(float)depth - 0.5f, &iz);
-		int pix, piy, piz, nnix, nniy, nniz;
-
-		switch(extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				iz = wrap_periodic(iz, depth);
-
-				pix = wrap_periodic(ix-1, width);
-				piy = wrap_periodic(iy-1, height);
-				piz = wrap_periodic(iz-1, depth);
-
-				nix = wrap_periodic(ix+1, width);
-				niy = wrap_periodic(iy+1, height);
-				niz = wrap_periodic(iz+1, depth);
-
-				nnix = wrap_periodic(ix+2, width);
-				nniy = wrap_periodic(iy+2, height);
-				nniz = wrap_periodic(iz+2, depth);
-				break;
-			case EXTENSION_CLIP:
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-				ATTR_FALLTHROUGH;
-			case EXTENSION_EXTEND:
-				pix = wrap_clamp(ix-1, width);
-				piy = wrap_clamp(iy-1, height);
-				piz = wrap_clamp(iz-1, depth);
-
-				nix = wrap_clamp(ix+1, width);
-				niy = wrap_clamp(iy+1, height);
-				niz = wrap_clamp(iz+1, depth);
-
-				nnix = wrap_clamp(ix+2, width);
-				nniy = wrap_clamp(iy+2, height);
-				nniz = wrap_clamp(iz+2, depth);
-
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				iz = wrap_clamp(iz, depth);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-
-		const int xc[4] = {pix, ix, nix, nnix};
-		const int yc[4] = {width * piy,
-		                   width * iy,
-		                   width * niy,
-		                   width * nniy};
-		const int zc[4] = {width * height * piz,
-		                   width * height * iz,
-		                   width * height * niz,
-		                   width * height * nniz};
-		float u[4], v[4], w[4];
-
-		/* Some helper macro to keep code reasonable size,
-		 * let compiler to inline all the matrix multiplications.
-		 */
-#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
-#define COL_TERM(col, row) \
-		(v[col] * (u[0] * DATA(0, col, row) + \
-		           u[1] * DATA(1, col, row) + \
-		           u[2] * DATA(2, col, row) + \
-		           u[3] * DATA(3, col, row)))
-#define ROW_TERM(row) \
-		(w[row] * (COL_TERM(0, row) + \
-		           COL_TERM(1, row) + \
-		           COL_TERM(2, row) + \
-		           COL_TERM(3, row)))
-
-		SET_CUBIC_SPLINE_WEIGHTS(u, tx);
-		SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-		SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
-		/* Actual interpolation. */
-		return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
-
-#undef COL_TERM
-#undef ROW_TERM
-#undef DATA
-	}
-
-	ccl_always_inline float4 interp_3d_ex(float x, float y, float z,
-	                                      int interpolation = INTERPOLATION_LINEAR)
-	{
-		if(UNLIKELY(!data))
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-		switch(interpolation) {
-			case INTERPOLATION_CLOSEST:
-				return interp_3d_ex_closest(x, y, z);
-			case INTERPOLATION_LINEAR:
-				return interp_3d_ex_linear(x, y, z);
-			default:
-				return interp_3d_ex_tricubic(x, y, z);
-		}
-	}
-
-	ccl_always_inline void dimensions_set(int width_, int height_, int depth_)
-	{
-		width = width_;
-		height = height_;
-		depth = depth_;
-	}
-
-	T *data;
-	int interpolation;
-	ExtensionType extension;
-	int width, height, depth;
-#undef SET_CUBIC_SPLINE_WEIGHTS
-};
-
-typedef texture<float4> texture_float4;
-typedef texture<float2> texture_float2;
-typedef texture<float> texture_float;
-typedef texture<uint> texture_uint;
-typedef texture<int> texture_int;
-typedef texture<uint4> texture_uint4;
-typedef texture<uchar4> texture_uchar4;
-typedef texture<uchar> texture_uchar;
-typedef texture_image<float> texture_image_float;
-typedef texture_image<uchar> texture_image_uchar;
-typedef texture_image<half> texture_image_half;
-typedef texture_image<float4> texture_image_float4;
-typedef texture_image<uchar4> texture_image_uchar4;
-typedef texture_image<half4> texture_image_half4;
-
 /* Macros to handle different memory storage on different devices */
 
 #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
@@ -563,10 +120,6 @@ typedef texture_image<half4> texture_image_half4;
 #define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
 #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
 
-#define kernel_tex_image_interp(tex,x,y) kernel_tex_image_interp_impl(kg,tex,x,y)
-#define kernel_tex_image_interp_3d(tex, x, y, z) kernel_tex_image_interp_3d_impl(kg,tex,x,y,z)
-#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) kernel_tex_image_interp_3d_ex_impl(kg,tex, x, y, z, interpolation)
-
 #define kernel_data (kg->__data)
 
 #ifdef __KERNEL_SSE2__
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 2e8ca48c413..fa512f80e41 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -126,42 +126,16 @@ ccl_device_inline uint ccl_num_groups(uint d)
 
 /* Textures */
 
-typedef texture<float4, 1> texture_float4;
-typedef texture<float2, 1> texture_float2;
-typedef texture<float, 1> texture_float;
-typedef texture<uint, 1> texture_uint;
-typedef texture<int, 1> texture_int;
-typedef texture<uint4, 1> texture_uint4;
-typedef texture<uchar, 1> texture_uchar;
-typedef texture<uchar4, 1> texture_uchar4;
+/* Use arrays for regular data. This is a little slower than textures on Fermi,
+ * but allows for cleaner code and we will stop supporting Fermi soon. */
+#define kernel_tex_fetch(t, index) t[(index)]
+
+/* On Kepler (6xx) and above, we use Bindless Textures for images.
+ * On Fermi cards (4xx and 5xx), we have to use regular textures. */
+#if __CUDA_ARCH__ < 300
 typedef texture<float4, 2> texture_image_float4;
 typedef texture<float4, 3> texture_image3d_float4;
 typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
-
-/* Macros to handle different memory storage on different devices */
-
-/* On Fermi cards (4xx and 5xx), we use regular textures for both data and images.
- * On Kepler (6xx) and above, we use Bindless Textures for images and arrays for data.
- *
- * Arrays are necessary in order to use the full VRAM on newer cards, and it's slightly faster.
- * Using Arrays on Fermi turned out to be slower.*/
-
-/* Fermi */
-#if __CUDA_ARCH__ < 300
-#  define __KERNEL_CUDA_TEX_STORAGE__
-#  define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
-
-#  define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
-#  define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)
-
-/* Kepler */
-#else
-#  define kernel_tex_fetch(t, index) t[(index)]
-
-#  define kernel_tex_image_interp_float4(t, x, y) tex2D<float4>(t, x, y)
-#  define kernel_tex_image_interp_float(t, x, y) tex2D<float>(t, x, y)
-#  define kernel_tex_image_interp_3d_float4(t, x, y, z) tex3D<float4>(t, x, y, z)
-#  define kernel_tex_image_interp_3d_float(t, x, y, z) tex3D<float>(t, x, y, z)
 #endif
 
 #define kernel_data __data
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 7f81523791b..b02e3bc576d 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -144,7 +144,7 @@
 
 /* data lookup defines */
 #define kernel_data (*kg->data)
-#define kernel_tex_fetch(tex, index) ((ccl_global tex##_t*)(kg->buffers[kg->tex.buffer] + kg->tex.offset))[(index)]
+#define kernel_tex_fetch(tex, index) ((const ccl_global tex##_t*)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data))[(index)]
 
 /* define NULL */
 #define NULL 0
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 9d55183d94b..97d4726407b 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -46,14 +46,7 @@ struct Intersection;
 struct VolumeStep;
 
 typedef struct KernelGlobals {
-	vector<texture_image_float4> texture_float4_images;
-	vector<texture_image_uchar4> texture_byte4_images;
-	vector<texture_image_half4> texture_half4_images;
-	vector<texture_image_float> texture_float_images;
-	vector<texture_image_uchar> texture_byte_images;
-	vector<texture_image_half> texture_half_images;
-
-#  define KERNEL_TEX(type, ttype, name) ttype name;
+#  define KERNEL_TEX(type, name) texture<type> name;
 #  define KERNEL_IMAGE_TEX(type, ttype, name)
 #  include "kernel/kernel_textures.h"
 
@@ -99,11 +92,7 @@ typedef struct KernelGlobals {
 	Intersection hits_stack[64];
 } KernelGlobals;
 
-#  ifdef __KERNEL_CUDA_TEX_STORAGE__
-#    define KERNEL_TEX(type, ttype, name) ttype name;
-#  else
-#    define KERNEL_TEX(type, ttype, name) const __constant__ __device__ type *name;
-#  endif
+#  define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
 #  define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
 #  include "kernel/kernel_textures.h"
 
@@ -113,22 +102,16 @@ typedef struct KernelGlobals {
 
 #ifdef __KERNEL_OPENCL__
 
-#  define KERNEL_TEX(type, ttype, name) \
+#  define KERNEL_TEX(type, name) \
 typedef type name##_t;
 #  include "kernel/kernel_textures.h"
 
-typedef struct tex_info_t {
-	uint buffer, padding;
-	uint64_t offset;
-	uint width, height, depth, options;
-} tex_info_t;
-
 typedef ccl_addr_space struct KernelGlobals {
 	ccl_constant KernelData *data;
 	ccl_global char *buffers[8];
 
-#  define KERNEL_TEX(type, ttype, name) \
-	tex_info_t name;
+#  define KERNEL_TEX(type, name) \
+	TextureInfo name;
 #  include "kernel/kernel_textures.h"
 
 #  ifdef __SPLIT_KERNEL__
@@ -176,9 +159,9 @@ ccl_device_inline void kernel_set_buffer_info(KernelGlobals *kg)
 	if(ccl_local_id(0) + ccl_local_id(1) == 0)
 #  endif
 	{
-		ccl_global tex_info_t *info = (ccl_global tex_info_t*)kg->buffers[0];
+		ccl_global TextureInfo *info = (ccl_global TextureInfo*)kg->buffers[0];
 
-#  define KERNEL_TEX(type, ttype, name) \
+#  define KERNEL_TEX(type, name) \
 		kg->name = *(info++);
 #  include "kernel/kernel_textures.h"
 	}
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index 5eab28a2953..344b2223573 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -15,7 +15,7 @@
  */
 
 #ifndef KERNEL_TEX
-#  define KERNEL_TEX(type, ttype, name)
+#  define KERNEL_TEX(type, name)
 #endif
 
 #ifndef KERNEL_IMAGE_TEX
@@ -23,63 +23,65 @@
 #endif
 
 /* bvh */
-KERNEL_TEX(float4, texture_float4, __bvh_nodes)
-KERNEL_TEX(float4, texture_float4, __bvh_leaf_nodes)
-KERNEL_TEX(float4, texture_float4, __prim_tri_verts)
-KERNEL_TEX(uint, texture_uint, __prim_tri_index)
-KERNEL_TEX(uint, texture_uint, __prim_type)
-KERNEL_TEX(uint, texture_uint, __prim_visibility)
-KERNEL_TEX(uint, texture_uint, __prim_index)
-KERNEL_TEX(uint, texture_uint, __prim_object)
-KERNEL_TEX(uint, texture_uint, __object_node)
-KERNEL_TEX(float2, texture_float2, __prim_time)
+KERNEL_TEX(float4, __bvh_nodes)
+KERNEL_TEX(float4, __bvh_leaf_nodes)
+KERNEL_TEX(float4, __prim_tri_verts)
+KERNEL_TEX(uint, __prim_tri_index)
+KERNEL_TEX(uint, __prim_type)
+KERNEL_TEX(uint, __prim_visibility)
+KERNEL_TEX(uint, __prim_index)
+KERNEL_TEX(uint, __prim_object)
+KERNEL_TEX(uint, __object_node)
+KERNEL_TEX(float2, __prim_time)
 
 /* objects */
-KERNEL_TEX(float4, texture_float4, __objects)
-KERNEL_TEX(float4, texture_float4, __objects_vector)
+KERNEL_TEX(float4, __objects)
+KERNEL_TEX(float4, __objects_vector)
 
 /* triangles */
-KERNEL_TEX(uint, texture_uint, __tri_shader)
-KERNEL_TEX(float4, texture_float4, __tri_vnormal)
-KERNEL_TEX(uint4, texture_uint4, __tri_vindex)
-KERNEL_TEX(uint, texture_uint, __tri_patch)
-KERNEL_TEX(float2, texture_float2, __tri_patch_uv)
+KERNEL_TEX(uint, __tri_shader)
+KERNEL_TEX(float4, __tri_vnormal)
+KERNEL_TEX(uint4, __tri_vindex)
+KERNEL_TEX(uint, __tri_patch)
+KERNEL_TEX(float2, __tri_patch_uv)
 
 /* curves */
-KERNEL_TEX(float4, texture_float4, __curves)
-KERNEL_TEX(float4, texture_float4, __curve_keys)
+KERNEL_TEX(float4, __curves)
+KERNEL_TEX(float4, __curve_keys)
 
 /* patches */
-KERNEL_TEX(uint, texture_uint, __patches)
+KERNEL_TEX(uint, __patches)
 
 /* attributes */
-KERNEL_TEX(uint4, texture_uint4, __attributes_map)
-KERNEL_TEX(float, texture_float, __attributes_float)
-KERNEL_TEX(float4, texture_float4, __attributes_float3)
-KERNEL_TEX(uchar4, texture_uchar4, __attributes_uchar4)
+KERNEL_TEX(uint4, __attributes_map)
+KERNEL_TEX(float, __attributes_float)
+KERNEL_TEX(float4, __attributes_float3)
+KERNEL_TEX(uchar4, __attributes_uchar4)
 
 /* lights */
-KERNEL_TEX(float4, texture_float4, __light_distribution)
-KERNEL_TEX(float4, texture_float4, __light_data)
-KERNEL_TEX(float2, texture_float2, __light_background_marginal_cdf)
-KERNEL_TEX(float2, texture_float2, __light_background_conditional_cdf)
+KERNEL_TEX(float4, __light_distribution)
+KERNEL_TEX(float4, __light_data)
+KERNEL_TEX(float2, __light_background_marginal_cdf)
+KERNEL_TEX(float2, __light_background_conditional_cdf)
 
 /* particles */
-KERNEL_TEX(float4, texture_float4, __particles)
+KERNEL_TEX(float4, __particles)
 
 /* shaders */
-KERNEL_TEX(uint4, texture_uint4, __svm_nodes)
-KERNEL_TEX(uint, texture_uint, __shader_flag)
-KERNEL_TEX(uint, texture_uint, __object_flag)
+KERNEL_TEX(uint4, __svm_nodes)
+KERNEL_TEX(uint, __shader_flag)
+KERNEL_TEX(uint, __object_flag)
 
 /* lookup tables */
-KERNEL_TEX(float, texture_float, __lookup_table)
+KERNEL_TEX(float, __lookup_table)
 
 /* sobol */
-KERNEL_TEX(uint, texture_uint, __sobol_directions)
+KERNEL_TEX(uint, __sobol_directions)
 
-#ifdef __KERNEL_CUDA__
-#  if __CUDA_ARCH__ < 300
+#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ >= 300
+/* image textures */
+KERNEL_TEX(TextureInfo, __texture_info)
+#else
 /* full-float image */
 KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_000)
 KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_008)
@@ -180,12 +182,7 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_641)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_649)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_657)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_665)
-
-#  else
-/* bindless textures */
-KERNEL_TEX(uint, texture_uint, __bindless_mapping)
-#  endif  /* __CUDA_ARCH__ */
-#endif  /* __KERNEL_CUDA__ */
+#endif  /* defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300 */
 
 #undef KERNEL_TEX
 #undef KERNEL_IMAGE_TEX
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index 998619ac897..7679ab4f111 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -84,130 +84,16 @@ void kernel_tex_copy(KernelGlobals *kg,
 	if(0) {
 	}
 
-#define KERNEL_TEX(type, ttype, tname) \
+#define KERNEL_TEX(type, tname) \
 	else if(strcmp(name, #tname) == 0) { \
 		kg->tname.data = (type*)mem; \
 		kg->tname.width = width; \
 	}
-#define KERNEL_IMAGE_TEX(type, ttype, tname)
+#define KERNEL_IMAGE_TEX(type, tname)
 #include "kernel/kernel_textures.h"
-
-	else if(strstr(name, "__tex_image_float4")) {
-		texture_image_float4 *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_float4_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_float4_images.size()) {
-				kg->texture_float4_images.resize(array_index+1);
-			}
-			tex = &kg->texture_float4_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (float4*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else if(strstr(name, "__tex_image_float")) {
-		texture_image_float *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_float_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_float_images.size()) {
-				kg->texture_float_images.resize(array_index+1);
-			}
-			tex = &kg->texture_float_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (float*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else if(strstr(name, "__tex_image_byte4")) {
-		texture_image_uchar4 *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_byte4_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_byte4_images.size()) {
-				kg->texture_byte4_images.resize(array_index+1);
-			}
-			tex = &kg->texture_byte4_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (uchar4*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else if(strstr(name, "__tex_image_byte")) {
-		texture_image_uchar *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_byte_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_byte_images.size()) {
-				kg->texture_byte_images.resize(array_index+1);
-			}
-			tex = &kg->texture_byte_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (uchar*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else if(strstr(name, "__tex_image_half4")) {
-		texture_image_half4 *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_half4_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_half4_images.size()) {
-				kg->texture_half4_images.resize(array_index+1);
-			}
-			tex = &kg->texture_half4_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (half4*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else if(strstr(name, "__tex_image_half")) {
-		texture_image_half *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_half_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_half_images.size()) {
-				kg->texture_half_images.resize(array_index+1);
-			}
-			tex = &kg->texture_half_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (half*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else
+	else {
 		assert(0);
+	}
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index f6bb4c25012..b2ad60f08c1 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -17,70 +17,500 @@
 #ifndef __KERNEL_CPU_IMAGE_H__
 #define __KERNEL_CPU_IMAGE_H__
 
-#ifdef __KERNEL_CPU__
-
 CCL_NAMESPACE_BEGIN
 
-ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float x, float y)
+template<typename T> struct TextureInterpolator  {
+#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
+	{ \
+		u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
+		u[1] =  ((      0.5f * t - 1.0f) * t       ) * t + (2.0f/3.0f); \
+		u[2] =  ((     -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
+		u[3] = (1.0f / 6.0f) * t * t * t; \
+	} (void)0
+
+	static ccl_always_inline float4 read(float4 r)
+	{
+		return r;
+	}
+
+	static ccl_always_inline float4 read(uchar4 r)
+	{
+		float f = 1.0f/255.0f;
+		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
+	}
+
+	static ccl_always_inline float4 read(uchar r)
+	{
+		float f = r*(1.0f/255.0f);
+		return make_float4(f, f, f, 1.0f);
+	}
+
+	static ccl_always_inline float4 read(float r)
+	{
+		/* TODO(dingto): Optimize this, so interpolation
+		 * happens on float instead of float4 */
+		return make_float4(r, r, r, 1.0f);
+	}
+
+	static ccl_always_inline float4 read(half4 r)
+	{
+		return half4_to_float4(r);
+	}
+
+	static ccl_always_inline float4 read(half r)
+	{
+		float f = half_to_float(r);
+		return make_float4(f, f, f, 1.0f);
+	}
+
+	static ccl_always_inline int wrap_periodic(int x, int width)
+	{
+		x %= width;
+		if(x < 0)
+			x += width;
+		return x;
+	}
+
+	static ccl_always_inline int wrap_clamp(int x, int width)
+	{
+		return clamp(x, 0, width-1);
+	}
+
+	static ccl_always_inline float frac(float x, int *ix)
+	{
+		int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
+		*ix = i;
+		return x - (float)i;
+	}
+
+	static ccl_always_inline float4 interp(const TextureInfo& info, float x, float y)
+	{
+		if(UNLIKELY(!info.data))
+			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+		const T *data = (const T*)info.data;
+		int width = info.width;
+		int height = info.height;
+		int ix, iy, nix, niy;
+
+		if(info.interpolation == INTERPOLATION_CLOSEST) {
+			frac(x*(float)width, &ix);
+			frac(y*(float)height, &iy);
+			switch(info.extension) {
+				case EXTENSION_REPEAT:
+					ix = wrap_periodic(ix, width);
+					iy = wrap_periodic(iy, height);
+					break;
+				case EXTENSION_CLIP:
+					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+					}
+					ATTR_FALLTHROUGH;
+				case EXTENSION_EXTEND:
+					ix = wrap_clamp(ix, width);
+					iy = wrap_clamp(iy, height);
+					break;
+				default:
+					kernel_assert(0);
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+			}
+			return read(data[ix + iy*width]);
+		}
+		else if(info.interpolation == INTERPOLATION_LINEAR) {
+			float tx = frac(x*(float)width - 0.5f, &ix);
+			float ty = frac(y*(float)height - 0.5f, &iy);
+
+			switch(info.extension) {
+				case EXTENSION_REPEAT:
+					ix = wrap_periodic(ix, width);
+					iy = wrap_periodic(iy, height);
+
+					nix = wrap_periodic(ix+1, width);
+					niy = wrap_periodic(iy+1, height);
+					break;
+				case EXTENSION_CLIP:
+					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+					}
+					ATTR_FALLTHROUGH;
+				case EXTENSION_EXTEND:
+					nix = wrap_clamp(ix+1, width);
+					niy = wrap_clamp(iy+1, height);
+
+					ix = wrap_clamp(ix, width);
+					iy = wrap_clamp(iy, height);
+					break;
+				default:
+					kernel_assert(0);
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+			}
+
+			float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
+			r += (1.0f - ty)*tx*read(data[nix + iy*width]);
+			r += ty*(1.0f - tx)*read(data[ix + niy*width]);
+			r += ty*tx*read(data[nix + niy*width]);
+
+			return r;
+		}
+		else {
+			/* Bicubic b-spline interpolation. */
+			float tx = frac(x*(float)width - 0.5f, &ix);
+			float ty = frac(y*(float)height - 0.5f, &iy);
+			int pix, piy, nnix, nniy;
+			switch(info.extension) {
+				case EXTENSION_REPEAT:
+					ix = wrap_periodic(ix, width);
+					iy = wrap_periodic(iy, height);
+
+					pix = wrap_periodic(ix-1, width);
+					piy = wrap_periodic(iy-1, height);
+
+					nix = wrap_periodic(ix+1, width);
+					niy = wrap_periodic(iy+1, height);
+
+					nnix = wrap_periodic(ix+2, width);
+					nniy = wrap_periodic(iy+2, height);
+					break;
+				case EXTENSION_CLIP:
+					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+					}
+					ATTR_FALLTHROUGH;
+				case EXTENSION_EXTEND:
+					pix = wrap_clamp(ix-1, width);
+					piy = wrap_clamp(iy-1, height);
+
+					nix = wrap_clamp(ix+1, width);
+					niy = wrap_clamp(iy+1, height);
+
+					nnix = wrap_clamp(ix+2, width);
+					nniy = wrap_clamp(iy+2, height);
+
+					ix = wrap_clamp(ix, width);
+					iy = wrap_clamp(iy, height);
+					break;
+				default:
+					kernel_assert(0);
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+			}
+
+			const int xc[4] = {pix, ix, nix, nnix};
+			const int yc[4] = {width * piy,
+			                   width * iy,
+			                   width * niy,
+			                   width * nniy};
+			float u[4], v[4];
+			/* Some helper macro to keep code reasonable size,
+			 * let compiler to inline all the matrix multiplications.
+			 */
+#define DATA(x, y) (read(data[xc[x] + yc[y]]))
+#define TERM(col) \
+			(v[col] * (u[0] * DATA(0, col) + \
+			           u[1] * DATA(1, col) + \
+			           u[2] * DATA(2, col) + \
+			           u[3] * DATA(3, col)))
+
+			SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+			SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+
+			/* Actual interpolation. */
+			return TERM(0) + TERM(1) + TERM(2) + TERM(3);
+
+#undef TERM
+#undef DATA
+		}
+	}
+
+	static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info, float x, float y, float z)
+	{
+		int width = info.width;
+		int height = info.height;
+		int depth = info.depth;
+		int ix, iy, iz;
+
+		frac(x*(float)width, &ix);
+		frac(y*(float)height, &iy);
+		frac(z*(float)depth, &iz);
+
+		switch(info.extension) {
+			case EXTENSION_REPEAT:
+				ix = wrap_periodic(ix, width);
+				iy = wrap_periodic(iy, height);
+				iz = wrap_periodic(iz, depth);
+				break;
+			case EXTENSION_CLIP:
+				if(x < 0.0f || y < 0.0f || z < 0.0f ||
+				   x > 1.0f || y > 1.0f || z > 1.0f)
+				{
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+				}
+				ATTR_FALLTHROUGH;
+			case EXTENSION_EXTEND:
+				ix = wrap_clamp(ix, width);
+				iy = wrap_clamp(iy, height);
+				iz = wrap_clamp(iz, depth);
+				break;
+			default:
+				kernel_assert(0);
+				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+		}
+
+		const T *data = (const T*)info.data;
+		return read(data[ix + iy*width + iz*width*height]);
+	}
+
+	static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info, float x, float y, float z)
+	{
+		int width = info.width;
+		int height = info.height;
+		int depth = info.depth;
+		int ix, iy, iz;
+		int nix, niy, niz;
+
+		float tx = frac(x*(float)width - 0.5f, &ix);
+		float ty = frac(y*(float)height - 0.5f, &iy);
+		float tz = frac(z*(float)depth - 0.5f, &iz);
+
+		switch(info.extension) {
+			case EXTENSION_REPEAT:
+				ix = wrap_periodic(ix, width);
+				iy = wrap_periodic(iy, height);
+				iz = wrap_periodic(iz, depth);
+
+				nix = wrap_periodic(ix+1, width);
+				niy = wrap_periodic(iy+1, height);
+				niz = wrap_periodic(iz+1, depth);
+				break;
+			case EXTENSION_CLIP:
+				if(x < 0.0f || y < 0.0f || z < 0.0f ||
+				   x > 1.0f || y > 1.0f || z > 1.0f)
+				{
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+				}
+				ATTR_FALLTHROUGH;
+			case EXTENSION_EXTEND:
+				nix = wrap_clamp(ix+1, width);
+				niy = wrap_clamp(iy+1, height);
+				niz = wrap_clamp(iz+1, depth);
+
+				ix = wrap_clamp(ix, width);
+				iy = wrap_clamp(iy, height);
+				iz = wrap_clamp(iz, depth);
+				break;
+			default:
+				kernel_assert(0);
+				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+		}
+
+		const T *data = (const T*)info.data;
+		float4 r;
+
+		r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
+		r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
+		r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
+		r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
+
+		r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
+		r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
+		r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
+		r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
+
+		return r;
+	}
+
+	/* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
+	 * causing stack overflow issue in this function unless it is inlined.
+	 *
+	 * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
+	 * enabled.
+	 */
+#ifdef __GNUC__
+	static ccl_always_inline
+#else
+	static ccl_never_inline
+#endif
+	float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z)
+	{
+		int width = info.width;
+		int height = info.height;
+		int depth = info.depth;
+		int ix, iy, iz;
+		int nix, niy, niz;
+		/* Tricubic b-spline interpolation. */
+		const float tx = frac(x*(float)width - 0.5f, &ix);
+		const float ty = frac(y*(float)height - 0.5f, &iy);
+		const float tz = frac(z*(float)depth - 0.5f, &iz);
+		int pix, piy, piz, nnix, nniy, nniz;
+
+		switch(info.extension) {
+			case EXTENSION_REPEAT:
+				ix = wrap_periodic(ix, width);
+				iy = wrap_periodic(iy, height);
+				iz = wrap_periodic(iz, depth);
+
+				pix = wrap_periodic(ix-1, width);
+				piy = wrap_periodic(iy-1, height);
+				piz = wrap_periodic(iz-1, depth);
+
+				nix = wrap_periodic(ix+1, width);
+				niy = wrap_periodic(iy+1, height);
+				niz = wrap_periodic(iz+1, depth);
+
+				nnix = wrap_periodic(ix+2, width);
+				nniy = wrap_periodic(iy+2, height);
+				nniz = wrap_periodic(iz+2, depth);
+				break;
+			case EXTENSION_CLIP:
+				if(x < 0.0f || y < 0.0f || z < 0.0f ||
+				   x > 1.0f || y > 1.0f || z > 1.0f)
+				{
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+				}
+				ATTR_FALLTHROUGH;
+			case EXTENSION_EXTEND:
+				pix = wrap_clamp(ix-1, width);
+				piy = wrap_clamp(iy-1, height);
+				piz = wrap_clamp(iz-1, depth);
+
+				nix = wrap_clamp(ix+1, width);
+				niy = wrap_clamp(iy+1, height);
+				niz = wrap_clamp(iz+1, depth);
+
+				nnix = wrap_clamp(ix+2, width);
+				nniy = wrap_clamp(iy+2, height);
+				nniz = wrap_clamp(iz+2, depth);
+
+				ix = wrap_clamp(ix, width);
+				iy = wrap_clamp(iy, height);
+				iz = wrap_clamp(iz, depth);
+				break;
+			default:
+				kernel_assert(0);
+				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+		}
+
+		const int xc[4] = {pix, ix, nix, nnix};
+		const int yc[4] = {width * piy,
+		                   width * iy,
+		                   width * niy,
+		                   width * nniy};
+		const int zc[4] = {width * height * piz,
+		                   width * height * iz,
+		                   width * height * niz,
+		                   width * height * nniz};
+		float u[4], v[4], w[4];
+
+		/* Some helper macro to keep code reasonable size,
+		 * let compiler to inline all the matrix multiplications.
+		 */
+#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
+#define COL_TERM(col, row) \
+		(v[col] * (u[0] * DATA(0, col, row) + \
+		           u[1] * DATA(1, col, row) + \
+		           u[2] * DATA(2, col, row) + \
+		           u[3] * DATA(3, col, row)))
+#define ROW_TERM(row) \
+		(w[row] * (COL_TERM(0, row) + \
+		           COL_TERM(1, row) + \
+		           COL_TERM(2, row) + \
+		           COL_TERM(3, row)))
+
+		SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+		SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+		SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+
+		/* Actual interpolation. */
+		const T *data = (const T*)info.data;
+		return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
+
+#undef COL_TERM
+#undef ROW_TERM
+#undef DATA
+	}
+
+	static ccl_always_inline float4 interp_3d(const TextureInfo& info,
+	                                          float x, float y, float z,
+	                                          int interpolation = INTERPOLATION_LINEAR)
+	{
+		if(UNLIKELY(!info.data))
+			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+		switch(interpolation) {
+			case INTERPOLATION_CLOSEST:
+				return interp_3d_closest(info, x, y, z);
+			case INTERPOLATION_LINEAR:
+				return interp_3d_linear(info, x, y, z);
+			default:
+				return interp_3d_tricubic(info, x, y, z);
+		}
+	}
+#undef SET_CUBIC_SPLINE_WEIGHTS
+};
+
+ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
 {
-	switch(kernel_tex_type(tex)) {
+	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
+
+	switch(kernel_tex_type(id)) {
 		case IMAGE_DATA_TYPE_HALF:
-			return kg->texture_half_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<half>::interp(info, x, y);
 		case IMAGE_DATA_TYPE_BYTE:
-			return kg->texture_byte_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<uchar>::interp(info, x, y);
 		case IMAGE_DATA_TYPE_FLOAT:
-			return kg->texture_float_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<float>::interp(info, x, y);
 		case IMAGE_DATA_TYPE_HALF4:
-			return kg->texture_half4_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<half4>::interp(info, x, y);
 		case IMAGE_DATA_TYPE_BYTE4:
-			return kg->texture_byte4_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<uchar4>::interp(info, x, y);
 		case IMAGE_DATA_TYPE_FLOAT4:
 		default:
-			return kg->texture_float4_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<float4>::interp(info, x, y);
 	}
 }
 
-ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, float x, float y, float z)
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
 {
-	switch(kernel_tex_type(tex)) {
+	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
+	InterpolationType interp = (InterpolationType)info.interpolation;
+
+	switch(kernel_tex_type(id)) {
 		case IMAGE_DATA_TYPE_HALF:
-			return kg->texture_half_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+			return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_BYTE:
-			return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+			return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_FLOAT:
-			return kg->texture_float_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+			return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_HALF4:
-			return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+			return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_BYTE4:
-			return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+			return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_FLOAT4:
 		default:
-			return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+			return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
 	}
 }
 
-ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation)
+ccl_device float4 kernel_tex_image_interp_3d_ex(KernelGlobals *kg, int id, float x, float y, float z, int interp)
 {
-	switch(kernel_tex_type(tex)) {
+	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
+
+	switch(kernel_tex_type(id)) {
 		case IMAGE_DATA_TYPE_HALF:
-			return kg->texture_half_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_BYTE:
-			return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_FLOAT:
-			return kg->texture_float_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_HALF4:
-			return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_BYTE4:
-			return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_FLOAT4:
 		default:
-			return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
 	}
 }
 
 CCL_NAMESPACE_END
 
-#endif  // __KERNEL_CPU__
-
-
 #endif // __KERNEL_CPU_IMAGE_H__
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index 1ac6afd167a..3c93e00ccf1 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -26,6 +26,7 @@
 #include "kernel/kernel_math.h"
 #include "kernel/kernel_types.h"
 #include "kernel/kernel_globals.h"
+#include "kernel/kernels/cuda/kernel_cuda_image.h"
 #include "kernel/kernel_film.h"
 #include "kernel/kernel_path.h"
 #include "kernel/kernel_path_branched.h"
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
new file mode 100644
index 00000000000..00f6954003d
--- /dev/null
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright 2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if __CUDA_ARCH__ >= 300
+
+/* Kepler */
+
+ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y)
+{
+	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
+	CUtexObject tex = (CUtexObject)info.data;
+
+	/* float4, byte4 and half4 */
+	const int texture_type = kernel_tex_type(id);
+	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
+	   texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+	   texture_type == IMAGE_DATA_TYPE_HALF4)
+	{
+		return tex2D<float4>(tex, x, y);
+	}
+	/* float, byte and half */
+	else {
+		float f = tex2D<float>(tex, x, y);
+		return make_float4(f, f, f, 1.0f);
+	}
+}
+
+ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, float z)
+{
+	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
+	CUtexObject tex = (CUtexObject)info.data;
+
+	const int texture_type = kernel_tex_type(id);
+	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
+	   texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+	   texture_type == IMAGE_DATA_TYPE_HALF4)
+	{
+		return tex3D<float4>(tex, x, y, z);
+	}
+	else {
+		float f = tex3D<float>(tex, x, y, z);
+		return make_float4(f, f, f, 1.0f);
+	}
+}
+
+#else
+
+/* Fermi */
+
+ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y)
+{
+	float4 r;
+	switch(id) {
+		case 0: r = tex2D(__tex_image_float4_000, x, y); break;
+		case 8: r = tex2D(__tex_image_float4_008, x, y); break;
+		case 16: r = tex2D(__tex_image_float4_016, x, y); break;
+		case 24: r = tex2D(__tex_image_float4_024, x, y); break;
+		case 32: r = tex2D(__tex_image_float4_032, x, y); break;
+		case 1: r = tex2D(__tex_image_byte4_001, x, y); break;
+		case 9: r = tex2D(__tex_image_byte4_009, x, y); break;
+		case 17: r = tex2D(__tex_image_byte4_017, x, y); break;
+		case 25: r = tex2D(__tex_image_byte4_025, x, y); break;
+		case 33: r = tex2D(__tex_image_byte4_033, x, y); break;
+		case 41: r = tex2D(__tex_image_byte4_041, x, y); break;
+		case 49: r = tex2D(__tex_image_byte4_049, x, y); break;
+		case 57: r = tex2D(__tex_image_byte4_057, x, y); break;
+		case 65: r = tex2D(__tex_image_byte4_065, x, y); break;
+		case 73: r = tex2D(__tex_image_byte4_073, x, y); break;
+		case 81: r = tex2D(__tex_image_byte4_081, x, y); break;
+		case 89: r = tex2D(__tex_image_byte4_089, x, y); break;
+		case 97: r = tex2D(__tex_image_byte4_097, x, y); break;
+		case 105: r = tex2D(__tex_image_byte4_105, x, y); break;
+		case 113: r = tex2D(__tex_image_byte4_113, x, y); break;
+		case 121: r = tex2D(__tex_image_byte4_121, x, y); break;
+		case 129: r = tex2D(__tex_image_byte4_129, x, y); break;
+		case 137: r = tex2D(__tex_image_byte4_137, x, y); break;
+		case 145: r = tex2D(__tex_image_byte4_145, x, y); break;
+		case 153: r = tex2D(__tex_image_byte4_153, x, y); break;
+		case 161: r = tex2D(__tex_image_byte4_161, x, y); break;
+		case 169: r = tex2D(__tex_image_byte4_169, x, y); break;
+		case 177: r = tex2D(__tex_image_byte4_177, x, y); break;
+		case 185: r = tex2D(__tex_image_byte4_185, x, y); break;
+		case 193: r = tex2D(__tex_image_byte4_193, x, y); break;
+		case 201: r = tex2D(__tex_image_byte4_201, x, y); break;
+		case 209: r = tex2D(__tex_image_byte4_209, x, y); break;
+		case 217: r = tex2D(__tex_image_byte4_217, x, y); break;
+		case 225: r = tex2D(__tex_image_byte4_225, x, y); break;
+		case 233: r = tex2D(__tex_image_byte4_233, x, y); break;
+		case 241: r = tex2D(__tex_image_byte4_241, x, y); break;
+		case 249: r = tex2D(__tex_image_byte4_249, x, y); break;
+		case 257: r = tex2D(__tex_image_byte4_257, x, y); break;
+		case 265: r = tex2D(__tex_image_byte4_265, x, y); break;
+		case 273: r = tex2D(__tex_image_byte4_273, x, y); break;
+		case 281: r = tex2D(__tex_image_byte4_281, x, y); break;
+		case 289: r = tex2D(__tex_image_byte4_289, x, y); break;
+		case 297: r = tex2D(__tex_image_byte4_297, x, y); break;
+		case 305: r = tex2D(__tex_image_byte4_305, x, y); break;
+		case 313: r = tex2D(__tex_image_byte4_313, x, y); break;
+		case 321: r = tex2D(__tex_image_byte4_321, x, y); break;
+		case 329: r = tex2D(__tex_image_byte4_329, x, y); break;
+		case 337: r = tex2D(__tex_image_byte4_337, x, y); break;
+		case 345: r = tex2D(__tex_image_byte4_345, x, y); break;
+		case 353: r = tex2D(__tex_image_byte4_353, x, y); break;
+		case 361: r = tex2D(__tex_image_byte4_361, x, y); break;
+		case 369: r = tex2D(__tex_image_byte4_369, x, y); break;
+		case 377: r = tex2D(__tex_image_byte4_377, x, y); break;
+		case 385: r = tex2D(__tex_image_byte4_385, x, y); break;
+		case 393: r = tex2D(__tex_image_byte4_393, x, y); break;
+		case 401: r = tex2D(__tex_image_byte4_401, x, y); break;
+		case 409: r = tex2D(__tex_image_byte4_409, x, y); break;
+		case 417: r = tex2D(__tex_image_byte4_417, x, y); break;
+		case 425: r = tex2D(__tex_image_byte4_425, x, y); break;
+		case 433: r = tex2D(__tex_image_byte4_433, x, y); break;
+		case 441: r = tex2D(__tex_image_byte4_441, x, y); break;
+		case 449: r = tex2D(__tex_image_byte4_449, x, y); break;
+		case 457: r = tex2D(__tex_image_byte4_457, x, y); break;
+		case 465: r = tex2D(__tex_image_byte4_465, x, y); break;
+		case 473: r = tex2D(__tex_image_byte4_473, x, y); break;
+		case 481: r = tex2D(__tex_image_byte4_481, x, y); break;
+		case 489: r = tex2D(__tex_image_byte4_489, x, y); break;
+		case 497: r = tex2D(__tex_image_byte4_497, x, y); break;
+		case 505: r = tex2D(__tex_image_byte4_505, x, y); break;
+		case 513: r = tex2D(__tex_image_byte4_513, x, y); break;
+		case 521: r = tex2D(__tex_image_byte4_521, x, y); break;
+		case 529: r = tex2D(__tex_image_byte4_529, x, y); break;
+		case 537: r = tex2D(__tex_image_byte4_537, x, y); break;
+		case 545: r = tex2D(__tex_image_byte4_545, x, y); break;
+		case 553: r = tex2D(__tex_image_byte4_553, x, y); break;
+		case 561: r = tex2D(__tex_image_byte4_561, x, y); break;
+		case 569: r = tex2D(__tex_image_byte4_569, x, y); break;
+		case 577: r = tex2D(__tex_image_byte4_577, x, y); break;
+		case 585: r = tex2D(__tex_image_byte4_585, x, y); break;
+		case 593: r = tex2D(__tex_image_byte4_593, x, y); break;
+		case 601: r = tex2D(__tex_image_byte4_601, x, y); break;
+		case 609: r = tex2D(__tex_image_byte4_609, x, y); break;
+		case 617: r = tex2D(__tex_image_byte4_617, x, y); break;
+		case 625: r = tex2D(__tex_image_byte4_625, x, y); break;
+		case 633: r = tex2D(__tex_image_byte4_633, x, y); break;
+		case 641: r = tex2D(__tex_image_byte4_641, x, y); break;
+		case 649: r = tex2D(__tex_image_byte4_649, x, y); break;
+		case 657: r = tex2D(__tex_image_byte4_657, x, y); break;
+		case 665: r = tex2D(__tex_image_byte4_665, x, y); break;
+		default: r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+	}
+	return r;
+}
+
+ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, float z)
+{
+	float4 r;
+	switch(id) {
+		case 0: r = tex3D(__tex_image_float4_3d_000, x, y, z); break;
+		case 8: r = tex3D(__tex_image_float4_3d_008, x, y, z); break;
+		case 16: r = tex3D(__tex_image_float4_3d_016, x, y, z); break;
+		case 24: r = tex3D(__tex_image_float4_3d_024, x, y, z); break;
+		case 32: r = tex3D(__tex_image_float4_3d_032, x, y, z); break;
+	}
+	return r;
+}
+
+#endif
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index 66b6e19de84..9d5d784e140 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -20,7 +20,7 @@
 #include "kernel/kernel_math.h"
 #include "kernel/kernel_types.h"
 #include "kernel/kernel_globals.h"
-#include "kernel/kernel_image_opencl.h"
+#include "kernel/kernels/opencl/kernel_opencl_image.h"
 
 #include "kernel/kernel_film.h"
 
diff --git a/intern/cycles/kernel/kernel_image_opencl.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index 9e3373432ec..514980e731e 100644
--- a/intern/cycles/kernel/kernel_image_opencl.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -14,23 +14,22 @@
  * limitations under the License.
  */
 
-
 /* For OpenCL we do manual lookup and interpolation. */
 
-ccl_device_inline ccl_global tex_info_t* kernel_tex_info(KernelGlobals *kg, uint id) {
+ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) {
 	const uint tex_offset = id
-#define KERNEL_TEX(type, ttype, name) + 1
+#define KERNEL_TEX(type, name) + 1
 #include "kernel/kernel_textures.h"
 	;
 
-	return &((ccl_global tex_info_t*)kg->buffers[0])[tex_offset];
+	return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset];
 }
 
-#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->buffer] + info->offset))[(index)]
+#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)]
 
 ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
 {
-	const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
+	const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
 	const int texture_type = kernel_tex_type(id);
 
 	/* Float4 */
@@ -76,35 +75,15 @@ ccl_device_inline float svm_image_texture_frac(float x, int *ix)
 	return x - (float)i;
 }
 
-ccl_device_inline uint kernel_decode_image_interpolation(uint info)
-{
-	return (info & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
-}
-
-ccl_device_inline uint kernel_decode_image_extension(uint info)
-{
-	if(info & (1 << 1)) {
-		return EXTENSION_REPEAT;
-	}
-	else if(info & (1 << 2)) {
-		return EXTENSION_EXTEND;
-	}
-	else {
-		return EXTENSION_CLIP;
-	}
-}
-
 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
 {
-	const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
+	const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
 
 	uint width = info->width;
 	uint height = info->height;
 	uint offset = 0;
-
-	/* Decode image options. */
-	uint interpolation = kernel_decode_image_interpolation(info->options);
-	uint extension = kernel_decode_image_extension(info->options);
+	uint interpolation = info->interpolation;
+	uint extension = info->extension;
 
 	/* Actual sampling. */
 	float4 r;
@@ -165,16 +144,14 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
 
 ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
 {
-	const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
+	const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
 
 	uint width = info->width;
 	uint height = info->height;
 	uint offset = 0;
 	uint depth = info->depth;
-
-	/* Decode image options. */
-	uint interpolation = kernel_decode_image_interpolation(info->options);
-	uint extension = kernel_decode_image_extension(info->options);
+	uint interpolation = info->interpolation;
+	uint extension = info->extension;
 
 	/* Actual sampling. */
 	float4 r;
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 8ad2e12b067..5b991bf065c 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -962,7 +962,7 @@ bool OSLRenderServices::texture(ustring filename,
 
 	if(filename.length() && filename[0] == '@') {
 		int slot = atoi(filename.c_str() + 1);
-		float4 rgba = kernel_tex_image_interp(slot, s, 1.0f - t);
+		float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t);
 
 		result[0] = rgba[0];
 		if(nchannels > 1)
@@ -1043,7 +1043,7 @@ bool OSLRenderServices::texture3d(ustring filename,
 	bool status;
 	if(filename.length() && filename[0] == '@') {
 		int slot = atoi(filename.c_str() + 1);
-		float4 rgba = kernel_tex_image_interp_3d(slot, P.x, P.y, P.z);
+		float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z);
 
 		result[0] = rgba[0];
 		if(nchannels > 1)
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 558d327bc76..21886ee62ee 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -29,7 +29,10 @@
 #endif
 
 #ifdef __KERNEL_OPENCL__
-#  include "kernel/kernel_image_opencl.h"
+#  include "kernel/kernels/opencl/kernel_opencl_image.h"
+#endif
+#ifdef __KERNEL_CUDA__
+#  include "kernel/kernels/cuda/kernel_cuda_image.h"
 #endif
 #ifdef __KERNEL_CPU__
 #  include "kernel/kernels/cpu/kernel_cpu_image.h"
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 6d6e92e73f6..4226e7adfe0 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -18,135 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
 {
-#ifdef __KERNEL_CPU__
-	float4 r = kernel_tex_image_interp(id, x, y);
-#elif defined(__KERNEL_OPENCL__)
 	float4 r = kernel_tex_image_interp(kg, id, x, y);
-#else
-	float4 r;
-
-#  if __CUDA_ARCH__ < 300
-	/* not particularly proud of this massive switch, what are the
-	 * alternatives?
-	 * - use a single big 1D texture, and do our own lookup/filtering
-	 * - group by size and use a 3d texture, performance impact
-	 * - group into larger texture with some padding for correct lerp
-	 *
-	 * also note that cuda has a textures limit (128 for Fermi, 256 for Kepler),
-	 * and we cannot use all since we still need some for other storage */
-
-	switch(id) {
-		case 0: r = kernel_tex_image_interp(__tex_image_float4_000, x, y); break;
-		case 8: r = kernel_tex_image_interp(__tex_image_float4_008, x, y); break;
-		case 16: r = kernel_tex_image_interp(__tex_image_float4_016, x, y); break;
-		case 24: r = kernel_tex_image_interp(__tex_image_float4_024, x, y); break;
-		case 32: r = kernel_tex_image_interp(__tex_image_float4_032, x, y); break;
-		case 1: r = kernel_tex_image_interp(__tex_image_byte4_001, x, y); break;
-		case 9: r = kernel_tex_image_interp(__tex_image_byte4_009, x, y); break;
-		case 17: r = kernel_tex_image_interp(__tex_image_byte4_017, x, y); break;
-		case 25: r = kernel_tex_image_interp(__tex_image_byte4_025, x, y); break;
-		case 33: r = kernel_tex_image_interp(__tex_image_byte4_033, x, y); break;
-		case 41: r = kernel_tex_image_interp(__tex_image_byte4_041, x, y); break;
-		case 49: r = kernel_tex_image_interp(__tex_image_byte4_049, x, y); break;
-		case 57: r = kernel_tex_image_interp(__tex_image_byte4_057, x, y); break;
-		case 65: r = kernel_tex_image_interp(__tex_image_byte4_065, x, y); break;
-		case 73: r = kernel_tex_image_interp(__tex_image_byte4_073, x, y); break;
-		case 81: r = kernel_tex_image_interp(__tex_image_byte4_081, x, y); break;
-		case 89: r = kernel_tex_image_interp(__tex_image_byte4_089, x, y); break;
-		case 97: r = kernel_tex_image_interp(__tex_image_byte4_097, x, y); break;
-		case 105: r = kernel_tex_image_interp(__tex_image_byte4_105, x, y); break;
-		case 113: r = kernel_tex_image_interp(__tex_image_byte4_113, x, y); break;
-		case 121: r = kernel_tex_image_interp(__tex_image_byte4_121, x, y); break;
-		case 129: r = kernel_tex_image_interp(__tex_image_byte4_129, x, y); break;
-		case 137: r = kernel_tex_image_interp(__tex_image_byte4_137, x, y); break;
-		case 145: r = kernel_tex_image_interp(__tex_image_byte4_145, x, y); break;
-		case 153: r = kernel_tex_image_interp(__tex_image_byte4_153, x, y); break;
-		case 161: r = kernel_tex_image_interp(__tex_image_byte4_161, x, y); break;
-		case 169: r = kernel_tex_image_interp(__tex_image_byte4_169, x, y); break;
-		case 177: r = kernel_tex_image_interp(__tex_image_byte4_177, x, y); break;
-		case 185: r = kernel_tex_image_interp(__tex_image_byte4_185, x, y); break;
-		case 193: r = kernel_tex_image_interp(__tex_image_byte4_193, x, y); break;
-		case 201: r = kernel_tex_image_interp(__tex_image_byte4_201, x, y); break;
-		case 209: r = kernel_tex_image_interp(__tex_image_byte4_209, x, y); break;
-		case 217: r = kernel_tex_image_interp(__tex_image_byte4_217, x, y); break;
-		case 225: r = kernel_tex_image_interp(__tex_image_byte4_225, x, y); break;
-		case 233: r = kernel_tex_image_interp(__tex_image_byte4_233, x, y); break;
-		case 241: r = kernel_tex_image_interp(__tex_image_byte4_241, x, y); break;
-		case 249: r = kernel_tex_image_interp(__tex_image_byte4_249, x, y); break;
-		case 257: r = kernel_tex_image_interp(__tex_image_byte4_257, x, y); break;
-		case 265: r = kernel_tex_image_interp(__tex_image_byte4_265, x, y); break;
-		case 273: r = kernel_tex_image_interp(__tex_image_byte4_273, x, y); break;
-		case 281: r = kernel_tex_image_interp(__tex_image_byte4_281, x, y); break;
-		case 289: r = kernel_tex_image_interp(__tex_image_byte4_289, x, y); break;
-		case 297: r = kernel_tex_image_interp(__tex_image_byte4_297, x, y); break;
-		case 305: r = kernel_tex_image_interp(__tex_image_byte4_305, x, y); break;
-		case 313: r = kernel_tex_image_interp(__tex_image_byte4_313, x, y); break;
-		case 321: r = kernel_tex_image_interp(__tex_image_byte4_321, x, y); break;
-		case 329: r = kernel_tex_image_interp(__tex_image_byte4_329, x, y); break;
-		case 337: r = kernel_tex_image_interp(__tex_image_byte4_337, x, y); break;
-		case 345: r = kernel_tex_image_interp(__tex_image_byte4_345, x, y); break;
-		case 353: r = kernel_tex_image_interp(__tex_image_byte4_353, x, y); break;
-		case 361: r = kernel_tex_image_interp(__tex_image_byte4_361, x, y); break;
-		case 369: r = kernel_tex_image_interp(__tex_image_byte4_369, x, y); break;
-		case 377: r = kernel_tex_image_interp(__tex_image_byte4_377, x, y); break;
-		case 385: r = kernel_tex_image_interp(__tex_image_byte4_385, x, y); break;
-		case 393: r = kernel_tex_image_interp(__tex_image_byte4_393, x, y); break;
-		case 401: r = kernel_tex_image_interp(__tex_image_byte4_401, x, y); break;
-		case 409: r = kernel_tex_image_interp(__tex_image_byte4_409, x, y); break;
-		case 417: r = kernel_tex_image_interp(__tex_image_byte4_417, x, y); break;
-		case 425: r = kernel_tex_image_interp(__tex_image_byte4_425, x, y); break;
-		case 433: r = kernel_tex_image_interp(__tex_image_byte4_433, x, y); break;
-		case 441: r = kernel_tex_image_interp(__tex_image_byte4_441, x, y); break;
-		case 449: r = kernel_tex_image_interp(__tex_image_byte4_449, x, y); break;
-		case 457: r = kernel_tex_image_interp(__tex_image_byte4_457, x, y); break;
-		case 465: r = kernel_tex_image_interp(__tex_image_byte4_465, x, y); break;
-		case 473: r = kernel_tex_image_interp(__tex_image_byte4_473, x, y); break;
-		case 481: r = kernel_tex_image_interp(__tex_image_byte4_481, x, y); break;
-		case 489: r = kernel_tex_image_interp(__tex_image_byte4_489, x, y); break;
-		case 497: r = kernel_tex_image_interp(__tex_image_byte4_497, x, y); break;
-		case 505: r = kernel_tex_image_interp(__tex_image_byte4_505, x, y); break;
-		case 513: r = kernel_tex_image_interp(__tex_image_byte4_513, x, y); break;
-		case 521: r = kernel_tex_image_interp(__tex_image_byte4_521, x, y); break;
-		case 529: r = kernel_tex_image_interp(__tex_image_byte4_529, x, y); break;
-		case 537: r = kernel_tex_image_interp(__tex_image_byte4_537, x, y); break;
-		case 545: r = kernel_tex_image_interp(__tex_image_byte4_545, x, y); break;
-		case 553: r = kernel_tex_image_interp(__tex_image_byte4_553, x, y); break;
-		case 561: r = kernel_tex_image_interp(__tex_image_byte4_561, x, y); break;
-		case 569: r = kernel_tex_image_interp(__tex_image_byte4_569, x, y); break;
-		case 577: r = kernel_tex_image_interp(__tex_image_byte4_577, x, y); break;
-		case 585: r = kernel_tex_image_interp(__tex_image_byte4_585, x, y); break;
-		case 593: r = kernel_tex_image_interp(__tex_image_byte4_593, x, y); break;
-		case 601: r = kernel_tex_image_interp(__tex_image_byte4_601, x, y); break;
-		case 609: r = kernel_tex_image_interp(__tex_image_byte4_609, x, y); break;
-		case 617: r = kernel_tex_image_interp(__tex_image_byte4_617, x, y); break;
-		case 625: r = kernel_tex_image_interp(__tex_image_byte4_625, x, y); break;
-		case 633: r = kernel_tex_image_interp(__tex_image_byte4_633, x, y); break;
-		case 641: r = kernel_tex_image_interp(__tex_image_byte4_641, x, y); break;
-		case 649: r = kernel_tex_image_interp(__tex_image_byte4_649, x, y); break;
-		case 657: r = kernel_tex_image_interp(__tex_image_byte4_657, x, y); break;
-		case 665: r = kernel_tex_image_interp(__tex_image_byte4_665, x, y); break;
-		default:
-			kernel_assert(0);
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-	}
-#  else
-	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
-	/* float4, byte4 and half4 */
-	const int texture_type = kernel_tex_type(id);
-	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
-	   texture_type == IMAGE_DATA_TYPE_BYTE4 ||
-	   texture_type == IMAGE_DATA_TYPE_HALF4)
-	{
-		r = kernel_tex_image_interp_float4(tex, x, y);
-	}
-	/* float, byte and half */
-	else {
-		float f = kernel_tex_image_interp_float(tex, x, y);
-		r = make_float4(f, f, f, 1.0f);
-	}
-#  endif
-#endif
-
 	const float alpha = r.w;
 
 	if(use_alpha && alpha != 1.0f && alpha != 0.0f) {
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index f4a5b2b2994..466480d21b6 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -42,29 +42,8 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
 		tfm.w = read_node_float(kg, offset);
 		co = transform_point(&tfm, co);
 	}
-	float4 r;
-#  if defined(__KERNEL_CUDA__)
-#    if __CUDA_ARCH__ >= 300
-	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
-	const int texture_type = kernel_tex_type(id);
-	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
-	   texture_type == IMAGE_DATA_TYPE_BYTE4 ||
-	   texture_type == IMAGE_DATA_TYPE_HALF4)
-	{
-		r = kernel_tex_image_interp_3d_float4(tex, co.x, co.y, co.z);
-	}
-	else {
-		float f = kernel_tex_image_interp_3d_float(tex, co.x, co.y, co.z);
-		r = make_float4(f, f, f, 1.0f);
-	}
-#    else /* __CUDA_ARCH__ >= 300 */
-	r = volume_image_texture_3d(id, co.x, co.y, co.z);
-#    endif
-#  elif defined(__KERNEL_OPENCL__)
-	r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z);
-#  else
-	r = kernel_tex_image_interp_3d(id, co.x, co.y, co.z);
-#  endif /* __KERNEL_CUDA__ */
+
+	float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z);
 #else
 	float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 #endif
author	Brecht Van Lommel <brechtvanlommel@gmail.com>	2017-10-06 22:47:41 +0300
committer	Brecht Van Lommel <brechtvanlommel@gmail.com>	2017-10-07 15:53:14 +0300
commit	23098cda9936d785988b689ee69e58e900f17cb2 (patch)
tree	ed49843e81afbe9c38707324f37bf7e14b234a9b /intern/cycles/kernel
parent	d013b56dde47580d1907e3a994bc49cfaaa9f90c (diff)