Merge branch 'master' into blender2.8

author: Campbell Barton <ideasman42@gmail.com> 2018-02-18 14:33:05 +0300
committer: Campbell Barton <ideasman42@gmail.com> 2018-02-18 14:33:05 +0300
commit: 2bc952fdb6e1474e9e568224a37bcf5cff874aaf (patch)
tree: 8d7c3a99896415392035ba350a606d350b6d5e37 /intern/cycles
parent: 5d3f679013bbbb9f0c7aae47b5653c54266cf7ca (diff)
parent: 7ff3cd26932cbc93068eea4dc7438442216e4ee1 (diff)
17 files changed, 108 insertions, 482 deletions
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt
index a8e0297aeaa..d1f86a5fe7d 100644
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -126,7 +126,8 @@ if(WITH_CYCLES_CUBIN_COMPILER)
 	# though we are building 32 bit blender a 64 bit cubin_cc will have
 	# to be build to compile the cubins.
 	if(MSVC AND NOT CMAKE_CL_64)
-		Message("cycles_cubin_cc not supported on x86")
+		message("Building with CUDA not supported on 32 bit, skipped")
+		set(WITH_CYCLES_CUDA_BINARIES OFF)
 	else()
 		set(SRC
 			cycles_cubin_cc.cpp
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 56649a7e363..aa2a10a3dd9 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -502,7 +502,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
 	info.description = "Multi Device";
 	info.num = 0;
 
-	info.has_fermi_limits = false;
 	info.has_half_images = true;
 	info.has_volume_decoupled = true;
 	info.bvh_layout_mask = BVH_LAYOUT_ALL;
@@ -538,8 +537,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
 		}
 
 		/* Accumulate device info. */
-		info.has_fermi_limits = info.has_fermi_limits ||
-		                        device.has_fermi_limits;
 		info.has_half_images &= device.has_half_images;
 		info.has_volume_decoupled &= device.has_volume_decoupled;
 		info.bvh_layout_mask = device.bvh_layout_mask & info.bvh_layout_mask;
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index be4b9a7e972..31deba2d796 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -56,7 +56,6 @@ public:
 	int num;
 	bool display_device;            /* GPU is used as a display device. */
 	bool advanced_shading;          /* Supports full shading system. */
-	bool has_fermi_limits;          /* Fixed number of textures limit. */
 	bool has_half_images;           /* Support half-float textures. */
 	bool has_volume_decoupled;      /* Decoupled volume shading. */
 	BVHLayoutMask bvh_layout_mask;  /* Bitmask of supported BVH layouts. */
@@ -73,7 +72,6 @@ public:
 		cpu_threads = 0;
 		display_device = false;
 		advanced_shading = true;
-		has_fermi_limits = false;
 		has_half_images = false;
 		has_volume_decoupled = false;
 		bvh_layout_mask = BVH_LAYOUT_NONE;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 64295838f0c..54d4c731ed4 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -309,9 +309,7 @@ public:
 
 		delete split_kernel;
 
-		if(!info.has_fermi_limits) {
-			texture_info.free();
-		}
+		texture_info.free();
 
 		cuda_assert(cuCtxDestroy(cuContext));
 	}
@@ -322,9 +320,9 @@ public:
 		cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
 		cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
 
-		/* We only support sm_20 and above */
-		if(major < 2) {
-			cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
+		/* We only support sm_30 and above */
+		if(major < 3) {
+			cuda_error_message(string_printf("CUDA device supported only with compute capability 3.0 or up, found %d.%d.", major, minor));
 			return false;
 		}
 
@@ -462,9 +460,9 @@ public:
 
 #ifdef _WIN32
 		if(have_precompiled_kernels()) {
-			if(major < 2) {
+			if(major < 3) {
 				cuda_error_message(string_printf(
-				        "CUDA device requires compute capability 2.0 or up, "
+				        "CUDA device requires compute capability 3.0 or up, "
 				        "found %d.%d. Your GPU is not supported.",
 				        major, minor));
 			}
@@ -680,7 +678,7 @@ public:
 
 	void load_texture_info()
 	{
-		if(!info.has_fermi_limits && need_texture_info) {
+		if(need_texture_info) {
 			texture_info.copy_to_device();
 			need_texture_info = false;
 		}
@@ -1018,9 +1016,6 @@ public:
 	{
 		CUDAContextScope scope(this);
 
-		/* Check if we are on sm_30 or above, for bindless textures. */
-		bool has_fermi_limits = info.has_fermi_limits;
-
 		/* General variables for both architectures */
 		string bind_name = mem.name;
 		size_t dsize = datatype_size(mem.data_type);
@@ -1074,27 +1069,6 @@ public:
 		}
 
 		/* Image Texture Storage */
-		CUtexref texref = NULL;
-
-		if(has_fermi_limits) {
-			if(mem.data_depth > 1) {
-				/* Kernel uses different bind names for 2d and 3d float textures,
-				 * so we have to adjust couple of things here.
-				 */
-				vector<string> tokens;
-				string_split(tokens, mem.name, "_");
-				bind_name = string_printf("__tex_image_%s_3d_%s",
-				                          tokens[2].c_str(),
-				                          tokens[3].c_str());
-			}
-
-			cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
-
-			if(!texref) {
-				return;
-			}
-		}
-
 		CUarray_format_enum format;
 		switch(mem.data_type) {
 			case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
@@ -1187,97 +1161,68 @@ public:
 			cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
 		}
 
-		if(!has_fermi_limits) {
-			/* Kepler+, bindless textures. */
-			int flat_slot = 0;
-			if(string_startswith(mem.name, "__tex_image")) {
-				int pos =  string(mem.name).rfind("_");
-				flat_slot = atoi(mem.name + pos + 1);
-			}
-			else {
-				assert(0);
-			}
-
-			CUDA_RESOURCE_DESC resDesc;
-			memset(&resDesc, 0, sizeof(resDesc));
-
-			if(array_3d) {
-				resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
-				resDesc.res.array.hArray = array_3d;
-				resDesc.flags = 0;
-			}
-			else if(mem.data_height > 0) {
-				resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
-				resDesc.res.pitch2D.devPtr = mem.device_pointer;
-				resDesc.res.pitch2D.format = format;
-				resDesc.res.pitch2D.numChannels = mem.data_elements;
-				resDesc.res.pitch2D.height = mem.data_height;
-				resDesc.res.pitch2D.width = mem.data_width;
-				resDesc.res.pitch2D.pitchInBytes = dst_pitch;
-			}
-			else {
-				resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
-				resDesc.res.linear.devPtr = mem.device_pointer;
-				resDesc.res.linear.format = format;
-				resDesc.res.linear.numChannels = mem.data_elements;
-				resDesc.res.linear.sizeInBytes = mem.device_size;
-			}
-
-			CUDA_TEXTURE_DESC texDesc;
-			memset(&texDesc, 0, sizeof(texDesc));
-			texDesc.addressMode[0] = address_mode;
-			texDesc.addressMode[1] = address_mode;
-			texDesc.addressMode[2] = address_mode;
-			texDesc.filterMode = filter_mode;
-			texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
-
-			cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
-
-			/* Resize once */
-			if(flat_slot >= texture_info.size()) {
-				/* Allocate some slots in advance, to reduce amount
-				 * of re-allocations. */
-				texture_info.resize(flat_slot + 128);
-			}
-
-			/* Set Mapping and tag that we need to (re-)upload to device */
-			TextureInfo& info = texture_info[flat_slot];
-			info.data = (uint64_t)cmem->texobject;
-			info.cl_buffer = 0;
-			info.interpolation = mem.interpolation;
-			info.extension = mem.extension;
-			info.width = mem.data_width;
-			info.height = mem.data_height;
-			info.depth = mem.data_depth;
-			need_texture_info = true;
+		/* Kepler+, bindless textures. */
+		int flat_slot = 0;
+		if(string_startswith(mem.name, "__tex_image")) {
+			int pos =  string(mem.name).rfind("_");
+			flat_slot = atoi(mem.name + pos + 1);
 		}
 		else {
-			/* Fermi, fixed texture slots. */
-			if(array_3d) {
-				cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT));
-			}
-			else if(mem.data_height > 0) {
-				CUDA_ARRAY_DESCRIPTOR array_desc;
-				array_desc.Format = format;
-				array_desc.Height = mem.data_height;
-				array_desc.Width = mem.data_width;
-				array_desc.NumChannels = mem.data_elements;
-				cuda_assert(cuTexRefSetAddress2D_v3(texref, &array_desc, mem.device_pointer, dst_pitch));
-			}
-			else {
-				cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
-			}
+			assert(0);
+		}
 
-			/* Attach to texture reference. */
-			cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
-			cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
-			cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
-			cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
-			cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
-			if(mem.data_depth > 1) {
-				cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
-			}
+		CUDA_RESOURCE_DESC resDesc;
+		memset(&resDesc, 0, sizeof(resDesc));
+
+		if(array_3d) {
+			resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+			resDesc.res.array.hArray = array_3d;
+			resDesc.flags = 0;
+		}
+		else if(mem.data_height > 0) {
+			resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
+			resDesc.res.pitch2D.devPtr = mem.device_pointer;
+			resDesc.res.pitch2D.format = format;
+			resDesc.res.pitch2D.numChannels = mem.data_elements;
+			resDesc.res.pitch2D.height = mem.data_height;
+			resDesc.res.pitch2D.width = mem.data_width;
+			resDesc.res.pitch2D.pitchInBytes = dst_pitch;
 		}
+		else {
+			resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
+			resDesc.res.linear.devPtr = mem.device_pointer;
+			resDesc.res.linear.format = format;
+			resDesc.res.linear.numChannels = mem.data_elements;
+			resDesc.res.linear.sizeInBytes = mem.device_size;
+		}
+
+		CUDA_TEXTURE_DESC texDesc;
+		memset(&texDesc, 0, sizeof(texDesc));
+		texDesc.addressMode[0] = address_mode;
+		texDesc.addressMode[1] = address_mode;
+		texDesc.addressMode[2] = address_mode;
+		texDesc.filterMode = filter_mode;
+		texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+
+		cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
+
+		/* Resize once */
+		if(flat_slot >= texture_info.size()) {
+			/* Allocate some slots in advance, to reduce amount
+			 * of re-allocations. */
+			texture_info.resize(flat_slot + 128);
+		}
+
+		/* Set Mapping and tag that we need to (re-)upload to device */
+		TextureInfo& info = texture_info[flat_slot];
+		info.data = (uint64_t)cmem->texobject;
+		info.cl_buffer = 0;
+		info.interpolation = mem.interpolation;
+		info.extension = mem.extension;
+		info.width = mem.data_width;
+		info.height = mem.data_height;
+		info.depth = mem.data_depth;
+		need_texture_info = true;
 	}
 
 	void tex_free(device_memory& mem)
@@ -2550,9 +2495,9 @@ void device_cuda_info(vector<DeviceInfo>& devices)
 
 		int major;
 		cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
-		if(major < 2) {
+		if(major < 3) {
 			VLOG(1) << "Ignoring device \"" << name
-			        << "\", compute capability is too low.";
+			        << "\", this graphics card is no longer supported.";
 			continue;
 		}
 
@@ -2562,8 +2507,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
 		info.description = string(name);
 		info.num = num;
 
-		info.advanced_shading = (major >= 2);
-		info.has_fermi_limits = !(major >= 3);
+		info.advanced_shading = (major >= 3);
 		info.has_half_images = (major >= 3);
 		info.has_volume_decoupled = false;
 		info.bvh_layout_mask = BVH_LAYOUT_BVH2;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index d981b67559e..50ea03a1f8f 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -316,12 +316,8 @@ set(SRC_SPLIT_HEADERS
 # CUDA module
 
 if(WITH_CYCLES_CUDA_BINARIES)
-	# 32 bit or 64 bit
-	if(CUDA_64_BIT_DEVICE_CODE)
-		set(CUDA_BITS 64)
-	else()
-		set(CUDA_BITS 32)
-	endif()
+	# 64 bit only
+	set(CUDA_BITS 64)
 
 	# CUDA version
 	execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
@@ -420,17 +416,17 @@ if(WITH_CYCLES_CUDA_BINARIES)
 	endmacro()
 
 	foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
-		if(CUDA_VERSION GREATER "89" AND ${arch} MATCHES "sm_2.")
-			message(STATUS "CUDA binaries for ${arch} disabled, not supported by CUDA 9.")
+		if(${arch} MATCHES "sm_2.")
+			message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
 		else()
 			# Compile regular kernel
 			CYCLES_CUDA_KERNEL_ADD(${arch} filter "" "${cuda_filter_sources}" FALSE)
 			CYCLES_CUDA_KERNEL_ADD(${arch} kernel "" "${cuda_sources}" FALSE)
+		endif()
 
-			if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
-				# Compile split kernel
-				CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D __SPLIT__" ${cuda_sources} FALSE)
-			endif()
+		if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
+			# Compile split kernel
+			CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D __SPLIT__" ${cuda_sources} FALSE)
 		endif()
 	endforeach()
 
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index e9a149ea1ab..faf3e3cdf2b 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -18,12 +18,6 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __HAIR__
 
-#if defined(__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300)
-#  define ccl_device_curveintersect ccl_device
-#else
-#  define ccl_device_curveintersect ccl_device_forceinline
-#endif
-
 #ifdef __KERNEL_SSE2__
 ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a)
 {
@@ -32,7 +26,7 @@ ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a)
 #endif
 
 /* On CPU pass P and dir by reference to aligned vector. */
-ccl_device_curveintersect bool cardinal_curve_intersect(
+ccl_device_forceinline bool cardinal_curve_intersect(
         KernelGlobals *kg,
         Intersection *isect,
         const float3 ccl_ref P,
@@ -505,18 +499,18 @@ ccl_device_curveintersect bool cardinal_curve_intersect(
 	return hit;
 }
 
-ccl_device_curveintersect bool curve_intersect(KernelGlobals *kg,
-                                               Intersection *isect,
-                                               float3 P,
-                                               float3 direction,
-                                               uint visibility,
-                                               int object,
-                                               int curveAddr,
-                                               float time,
-                                               int type,
-                                               uint *lcg_state,
-                                               float difl,
-                                               float extmax)
+ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
+                                            Intersection *isect,
+                                            float3 P,
+                                            float3 direction,
+                                            uint visibility,
+                                            int object,
+                                            int curveAddr,
+                                            float time,
+                                            int type,
+                                            uint *lcg_state,
+                                            float difl,
+                                            float extmax)
 {
 	/* define few macros to minimize code duplication for SSE */
 #ifndef __KERNEL_SSE2__
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 900f7fe6a2c..9bd7a572f5f 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -50,10 +50,7 @@ __device__ half __float2half(const float f)
 /* Qualifier wrappers for different names on different devices */
 
 #define ccl_device  __device__ __inline__
-#if __CUDA_ARCH__ < 300
-#  define ccl_device_inline  __device__ __inline__
-#  define ccl_device_forceinline  __device__ __forceinline__
-#elif __CUDA_ARCH__ < 500
+#if __CUDA_ARCH__ < 500
 #  define ccl_device_inline  __device__ __forceinline__
 #  define ccl_device_forceinline  __device__ __forceinline__
 #else
@@ -138,18 +135,9 @@ ccl_device_inline uint ccl_num_groups(uint d)
 
 /* Textures */
 
-/* Use arrays for regular data. This is a little slower than textures on Fermi,
- * but allows for cleaner code and we will stop supporting Fermi soon. */
+/* Use arrays for regular data. */
 #define kernel_tex_fetch(t, index) t[(index)]
 
-/* On Kepler (6xx) and above, we use Bindless Textures for images.
- * On Fermi cards (4xx and 5xx), we have to use regular textures. */
-#if __CUDA_ARCH__ < 300
-typedef texture<float4, 2> texture_image_float4;
-typedef texture<float4, 3> texture_image3d_float4;
-typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
-#endif
-
 #define kernel_data __data
 
 /* Use fast math functions */
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 97d4726407b..74cfacb5bc1 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -47,7 +47,6 @@ struct VolumeStep;
 
 typedef struct KernelGlobals {
 #  define KERNEL_TEX(type, name) texture<type> name;
-#  define KERNEL_IMAGE_TEX(type, ttype, name)
 #  include "kernel/kernel_textures.h"
 
 	KernelData __data;
@@ -93,7 +92,6 @@ typedef struct KernelGlobals {
 } KernelGlobals;
 
 #  define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
-#  define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
 #  include "kernel/kernel_textures.h"
 
 #endif  /* __KERNEL_CUDA__ */
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index 344b2223573..74b659557e5 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -18,10 +18,6 @@
 #  define KERNEL_TEX(type, name)
 #endif
 
-#ifndef KERNEL_IMAGE_TEX
-#  define KERNEL_IMAGE_TEX(type, ttype, name)
-#endif
-
 /* bvh */
 KERNEL_TEX(float4, __bvh_nodes)
 KERNEL_TEX(float4, __bvh_leaf_nodes)
@@ -78,113 +74,8 @@ KERNEL_TEX(float, __lookup_table)
 /* sobol */
 KERNEL_TEX(uint, __sobol_directions)
 
-#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ >= 300
 /* image textures */
 KERNEL_TEX(TextureInfo, __texture_info)
-#else
-/* full-float image */
-KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_000)
-KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_008)
-KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_016)
-KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_024)
-KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_032)
-
-KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_000)
-KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_008)
-KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_016)
-KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_024)
-KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_032)
-
-/* image
- * These texture names are encoded to their flattened slots as
- * ImageManager::type_index_to_flattened_slot() returns them. */
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_001)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_009)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_017)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_025)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_033)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_041)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_049)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_057)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_065)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_073)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_081)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_089)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_097)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_105)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_113)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_121)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_129)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_137)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_145)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_153)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_161)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_169)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_177)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_185)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_193)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_201)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_209)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_217)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_225)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_233)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_241)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_249)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_257)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_265)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_273)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_281)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_289)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_297)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_305)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_313)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_321)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_329)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_337)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_345)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_353)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_361)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_369)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_377)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_385)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_393)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_401)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_409)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_417)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_425)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_433)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_441)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_449)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_457)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_465)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_473)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_481)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_489)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_497)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_505)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_513)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_521)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_529)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_537)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_545)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_553)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_561)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_569)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_577)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_585)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_593)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_601)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_609)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_617)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_625)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_633)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_641)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_649)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_657)
-KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_665)
-#endif  /* defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300 */
 
 #undef KERNEL_TEX
-#undef KERNEL_IMAGE_TEX
-
 
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index aa67262f36b..de487f6123f 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -85,7 +85,6 @@ void kernel_tex_copy(KernelGlobals *kg,
 		kg->tname.data = (type*)mem; \
 		kg->tname.width = size; \
 	}
-#define KERNEL_IMAGE_TEX(type, tname)
 #include "kernel/kernel_textures.h"
 	else {
 		assert(0);
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h
index 94f59ff38d9..f3d0d721c5c 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_config.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h
@@ -16,20 +16,8 @@
 
 /* device data taken from CUDA occupancy calculator */
 
-/* 2.0 and 2.1 */
-#if __CUDA_ARCH__ == 200 || __CUDA_ARCH__ == 210
-#  define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768
-#  define CUDA_MULTIPROCESSOR_MAX_BLOCKS 8
-#  define CUDA_BLOCK_MAX_THREADS 1024
-#  define CUDA_THREAD_MAX_REGISTERS 63
-
-/* tunable parameters */
-#  define CUDA_THREADS_BLOCK_WIDTH 16
-#  define CUDA_KERNEL_MAX_REGISTERS 32
-#  define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40
-
 /* 3.0 and 3.5 */
-#elif __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350
+#if __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350
 #  define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
 #  define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
 #  define CUDA_BLOCK_MAX_THREADS 1024
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index 5ca07eaeb05..91ad289a858 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -14,10 +14,6 @@
  * limitations under the License.
  */
 
-#if __CUDA_ARCH__ >= 300
-
-/* Kepler */
-
 /* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
 ccl_device float cubic_w0(float a)
 {
@@ -191,120 +187,3 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x,
 	}
 }
 
-#else
-
-/* Fermi */
-
-ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
-{
-	float4 r;
-	switch(id) {
-		case 0: r = tex2D(__tex_image_float4_000, x, y); break;
-		case 8: r = tex2D(__tex_image_float4_008, x, y); break;
-		case 16: r = tex2D(__tex_image_float4_016, x, y); break;
-		case 24: r = tex2D(__tex_image_float4_024, x, y); break;
-		case 32: r = tex2D(__tex_image_float4_032, x, y); break;
-		case 1: r = tex2D(__tex_image_byte4_001, x, y); break;
-		case 9: r = tex2D(__tex_image_byte4_009, x, y); break;
-		case 17: r = tex2D(__tex_image_byte4_017, x, y); break;
-		case 25: r = tex2D(__tex_image_byte4_025, x, y); break;
-		case 33: r = tex2D(__tex_image_byte4_033, x, y); break;
-		case 41: r = tex2D(__tex_image_byte4_041, x, y); break;
-		case 49: r = tex2D(__tex_image_byte4_049, x, y); break;
-		case 57: r = tex2D(__tex_image_byte4_057, x, y); break;
-		case 65: r = tex2D(__tex_image_byte4_065, x, y); break;
-		case 73: r = tex2D(__tex_image_byte4_073, x, y); break;
-		case 81: r = tex2D(__tex_image_byte4_081, x, y); break;
-		case 89: r = tex2D(__tex_image_byte4_089, x, y); break;
-		case 97: r = tex2D(__tex_image_byte4_097, x, y); break;
-		case 105: r = tex2D(__tex_image_byte4_105, x, y); break;
-		case 113: r = tex2D(__tex_image_byte4_113, x, y); break;
-		case 121: r = tex2D(__tex_image_byte4_121, x, y); break;
-		case 129: r = tex2D(__tex_image_byte4_129, x, y); break;
-		case 137: r = tex2D(__tex_image_byte4_137, x, y); break;
-		case 145: r = tex2D(__tex_image_byte4_145, x, y); break;
-		case 153: r = tex2D(__tex_image_byte4_153, x, y); break;
-		case 161: r = tex2D(__tex_image_byte4_161, x, y); break;
-		case 169: r = tex2D(__tex_image_byte4_169, x, y); break;
-		case 177: r = tex2D(__tex_image_byte4_177, x, y); break;
-		case 185: r = tex2D(__tex_image_byte4_185, x, y); break;
-		case 193: r = tex2D(__tex_image_byte4_193, x, y); break;
-		case 201: r = tex2D(__tex_image_byte4_201, x, y); break;
-		case 209: r = tex2D(__tex_image_byte4_209, x, y); break;
-		case 217: r = tex2D(__tex_image_byte4_217, x, y); break;
-		case 225: r = tex2D(__tex_image_byte4_225, x, y); break;
-		case 233: r = tex2D(__tex_image_byte4_233, x, y); break;
-		case 241: r = tex2D(__tex_image_byte4_241, x, y); break;
-		case 249: r = tex2D(__tex_image_byte4_249, x, y); break;
-		case 257: r = tex2D(__tex_image_byte4_257, x, y); break;
-		case 265: r = tex2D(__tex_image_byte4_265, x, y); break;
-		case 273: r = tex2D(__tex_image_byte4_273, x, y); break;
-		case 281: r = tex2D(__tex_image_byte4_281, x, y); break;
-		case 289: r = tex2D(__tex_image_byte4_289, x, y); break;
-		case 297: r = tex2D(__tex_image_byte4_297, x, y); break;
-		case 305: r = tex2D(__tex_image_byte4_305, x, y); break;
-		case 313: r = tex2D(__tex_image_byte4_313, x, y); break;
-		case 321: r = tex2D(__tex_image_byte4_321, x, y); break;
-		case 329: r = tex2D(__tex_image_byte4_329, x, y); break;
-		case 337: r = tex2D(__tex_image_byte4_337, x, y); break;
-		case 345: r = tex2D(__tex_image_byte4_345, x, y); break;
-		case 353: r = tex2D(__tex_image_byte4_353, x, y); break;
-		case 361: r = tex2D(__tex_image_byte4_361, x, y); break;
-		case 369: r = tex2D(__tex_image_byte4_369, x, y); break;
-		case 377: r = tex2D(__tex_image_byte4_377, x, y); break;
-		case 385: r = tex2D(__tex_image_byte4_385, x, y); break;
-		case 393: r = tex2D(__tex_image_byte4_393, x, y); break;
-		case 401: r = tex2D(__tex_image_byte4_401, x, y); break;
-		case 409: r = tex2D(__tex_image_byte4_409, x, y); break;
-		case 417: r = tex2D(__tex_image_byte4_417, x, y); break;
-		case 425: r = tex2D(__tex_image_byte4_425, x, y); break;
-		case 433: r = tex2D(__tex_image_byte4_433, x, y); break;
-		case 441: r = tex2D(__tex_image_byte4_441, x, y); break;
-		case 449: r = tex2D(__tex_image_byte4_449, x, y); break;
-		case 457: r = tex2D(__tex_image_byte4_457, x, y); break;
-		case 465: r = tex2D(__tex_image_byte4_465, x, y); break;
-		case 473: r = tex2D(__tex_image_byte4_473, x, y); break;
-		case 481: r = tex2D(__tex_image_byte4_481, x, y); break;
-		case 489: r = tex2D(__tex_image_byte4_489, x, y); break;
-		case 497: r = tex2D(__tex_image_byte4_497, x, y); break;
-		case 505: r = tex2D(__tex_image_byte4_505, x, y); break;
-		case 513: r = tex2D(__tex_image_byte4_513, x, y); break;
-		case 521: r = tex2D(__tex_image_byte4_521, x, y); break;
-		case 529: r = tex2D(__tex_image_byte4_529, x, y); break;
-		case 537: r = tex2D(__tex_image_byte4_537, x, y); break;
-		case 545: r = tex2D(__tex_image_byte4_545, x, y); break;
-		case 553: r = tex2D(__tex_image_byte4_553, x, y); break;
-		case 561: r = tex2D(__tex_image_byte4_561, x, y); break;
-		case 569: r = tex2D(__tex_image_byte4_569, x, y); break;
-		case 577: r = tex2D(__tex_image_byte4_577, x, y); break;
-		case 585: r = tex2D(__tex_image_byte4_585, x, y); break;
-		case 593: r = tex2D(__tex_image_byte4_593, x, y); break;
-		case 601: r = tex2D(__tex_image_byte4_601, x, y); break;
-		case 609: r = tex2D(__tex_image_byte4_609, x, y); break;
-		case 617: r = tex2D(__tex_image_byte4_617, x, y); break;
-		case 625: r = tex2D(__tex_image_byte4_625, x, y); break;
-		case 633: r = tex2D(__tex_image_byte4_633, x, y); break;
-		case 641: r = tex2D(__tex_image_byte4_641, x, y); break;
-		case 649: r = tex2D(__tex_image_byte4_649, x, y); break;
-		case 657: r = tex2D(__tex_image_byte4_657, x, y); break;
-		case 665: r = tex2D(__tex_image_byte4_665, x, y); break;
-		default: r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-	}
-	return r;
-}
-
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
-{
-	float4 r;
-	switch(id) {
-		case 0: r = tex3D(__tex_image_float4_3d_000, x, y, z); break;
-		case 8: r = tex3D(__tex_image_float4_3d_008, x, y, z); break;
-		case 16: r = tex3D(__tex_image_float4_3d_016, x, y, z); break;
-		case 24: r = tex3D(__tex_image_float4_3d_024, x, y, z); break;
-		case 32: r = tex3D(__tex_image_float4_3d_032, x, y, z); break;
-	}
-	return r;
-}
-
-#endif
-
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index a8f99d23b7d..fae9f783483 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -30,8 +30,7 @@
  * in local memory on the GPU, as it would take too many register and indexes in
  * ways not known at compile time. This seems the only solution even though it
  * may be slow, with two positive factors. If the same shader is being executed,
- * memory access will be coalesced, and on fermi cards, memory will actually be
- * cached.
+ * memory access will be coalesced and cached.
  *
  * The result of shader execution will be a single closure. This means the
  * closure type, associated label, data and weight. Sampling from multiple
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index feaa17148ee..dbe15a67b9e 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -49,7 +49,6 @@ ImageManager::ImageManager(const DeviceInfo& info)
 	/* Set image limits */
 	max_num_images = TEX_NUM_MAX;
 	has_half_images = info.has_half_images;
-	cuda_fermi_limits = info.has_fermi_limits;
 
 	for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
 		tex_num_images[type] = 0;
@@ -255,7 +254,7 @@ int ImageManager::add_image(const string& filename,
 	/* Check whether it's a float texture. */
 	is_float = (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4);
 
-	/* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */
+	/* No half textures on OpenCL, use full float instead. */
 	if(!has_half_images) {
 		if(type == IMAGE_DATA_TYPE_HALF4) {
 			type = IMAGE_DATA_TYPE_FLOAT4;
@@ -265,15 +264,6 @@ int ImageManager::add_image(const string& filename,
 		}
 	}
 
-	if(cuda_fermi_limits) {
-		if(type == IMAGE_DATA_TYPE_FLOAT) {
-			type = IMAGE_DATA_TYPE_FLOAT4;
-		}
-		else if(type == IMAGE_DATA_TYPE_BYTE) {
-			type = IMAGE_DATA_TYPE_BYTE4;
-		}
-	}
-
 	/* Fnd existing image. */
 	for(slot = 0; slot < images[type].size(); slot++) {
 		img = images[type][slot];
@@ -303,27 +293,16 @@ int ImageManager::add_image(const string& filename,
 			break;
 	}
 
-	/* Count if we're over the limit */
-	if(cuda_fermi_limits) {
-		if(tex_num_images[IMAGE_DATA_TYPE_BYTE4] == TEX_NUM_BYTE4_CUDA
-			|| tex_num_images[IMAGE_DATA_TYPE_FLOAT4] == TEX_NUM_FLOAT4_CUDA)
-		{
-			printf("ImageManager::add_image: Reached %s image limit (%d), skipping '%s'\n",
-				name_from_type(type).c_str(), tex_num_images[type], filename.c_str());
-			return -1;
-		}
+	/* Count if we're over the limit.
+	 * Very unlikely, since max_num_images is insanely big. But better safe than sorry. */
+	int tex_count = 0;
+	for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
+		tex_count += tex_num_images[type];
 	}
-	else {
-		/* Very unlikely, since max_num_images is insanely big. But better safe than sorry. */
-		int tex_count = 0;
-		for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
-			tex_count += tex_num_images[type];
-		}
-		if(tex_count > max_num_images) {
-			printf("ImageManager::add_image: Reached image limit (%d), skipping '%s'\n",
-				max_num_images, filename.c_str());
-			return -1;
-		}
+	if(tex_count > max_num_images) {
+		printf("ImageManager::add_image: Reached image limit (%d), skipping '%s'\n",
+			max_num_images, filename.c_str());
+		return -1;
 	}
 
 	if(slot == images[type].size()) {
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index 3519a67bc05..6fca3ca20d3 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -121,7 +121,6 @@ private:
 	int tex_num_images[IMAGE_DATA_NUM_TYPES];
 	int max_num_images;
 	bool has_half_images;
-	bool cuda_fermi_limits;
 
 	thread_mutex device_mutex;
 	int animation_frame;
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
index 498c21b9706..61ddcc38f50 100644
--- a/intern/cycles/util/util_math_intersect.h
+++ b/intern/cycles/util/util_math_intersect.h
@@ -79,12 +79,7 @@ ccl_device bool ray_aligned_disk_intersect(
 	return true;
 }
 
-#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300
-ccl_device_inline
-#else
-ccl_device_forceinline
-#endif
-bool ray_triangle_intersect(
+ccl_device_forceinline bool ray_triangle_intersect(
         float3 ray_P, float3 ray_dir, float ray_t,
 #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
         const ssef *ssef_verts,
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index cec03dc5e6e..4b5f630427d 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -20,22 +20,6 @@
 CCL_NAMESPACE_BEGIN
 
 /* Texture limits on devices. */
-
-/* CUDA (Geforce 4xx and 5xx) */
-#define TEX_NUM_FLOAT4_CUDA      5
-#define TEX_NUM_BYTE4_CUDA       84
-#define TEX_NUM_HALF4_CUDA       0
-#define TEX_NUM_FLOAT_CUDA       0
-#define TEX_NUM_BYTE_CUDA        0
-#define TEX_NUM_HALF_CUDA        0
-#define TEX_START_FLOAT4_CUDA    0
-#define TEX_START_BYTE4_CUDA     TEX_NUM_FLOAT4_CUDA
-#define TEX_START_HALF4_CUDA     (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
-#define TEX_START_FLOAT_CUDA     (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA)
-#define TEX_START_BYTE_CUDA      (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA)
-#define TEX_START_HALF_CUDA      (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
-
-/* Any architecture other than old CUDA cards */
 #define TEX_NUM_MAX (INT_MAX >> 4)
 
 /* Color to use when textures are not found. */
@@ -44,11 +28,8 @@ CCL_NAMESPACE_BEGIN
 #define TEX_IMAGE_MISSING_B 1
 #define TEX_IMAGE_MISSING_A 1
 
-#if defined (__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300)
-#  define kernel_tex_type(tex) (tex < TEX_START_BYTE4_CUDA ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4)
-#else
-#  define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK)
-#endif
+/* Texture type. */
+#define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK)
 
 /* Interpolation types for textures
  * cuda also use texture space to store other objects */
author	Campbell Barton <ideasman42@gmail.com>	2018-02-18 14:33:05 +0300
committer	Campbell Barton <ideasman42@gmail.com>	2018-02-18 14:33:05 +0300
commit	2bc952fdb6e1474e9e568224a37bcf5cff874aaf (patch)
tree	8d7c3a99896415392035ba350a606d350b6d5e37 /intern/cycles
parent	5d3f679013bbbb9f0c7aae47b5653c54266cf7ca (diff)
parent	7ff3cd26932cbc93068eea4dc7438442216e4ee1 (diff)