From 2eaf90b305fc6fb37f775f288c3d5cd34cc66c81 Mon Sep 17 00:00:00 2001
From: Thomas Dinges <blender@dingto.org>
Date: Sat, 17 Feb 2018 16:15:07 +0100
Subject: Cycles: Remove Fermi support from CMake and update runtime checks in
 device_cuda.cpp.

Fermi code in Cycles kernel and texture system are coming next.
---
 intern/cycles/device/device_cuda.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'intern/cycles/device')
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index dfedf922ca9..9644937d906 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -322,9 +322,9 @@ public:
 		cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
 		cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
 
-		/* We only support sm_20 and above */
-		if(major < 2) {
-			cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
+		/* We only support sm_30 and above */
+		if(major < 3) {
+			cuda_error_message(string_printf("CUDA device supported only with compute capability 3.0 or up, found %d.%d.", major, minor));
 			return false;
 		}
 
@@ -462,9 +462,9 @@ public:
 
 #ifdef _WIN32
 		if(have_precompiled_kernels()) {
-			if(major < 2) {
+			if(major < 3) {
 				cuda_error_message(string_printf(
-				        "CUDA device requires compute capability 2.0 or up, "
+				        "CUDA device requires compute capability 3.0 or up, "
 				        "found %d.%d. Your GPU is not supported.",
 				        major, minor));
 			}
@@ -2532,7 +2532,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
 
 		int major;
 		cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
-		if(major < 2) {
+		if(major < 3) {
 			VLOG(1) << "Ignoring device \"" << name
 			        << "\", compute capability is too low.";
 			continue;
@@ -2544,7 +2544,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
 		info.description = string(name);
 		info.num = num;
 
-		info.advanced_shading = (major >= 2);
+		info.advanced_shading = (major >= 3);
 		info.has_fermi_limits = !(major >= 3);
 		info.has_half_images = (major >= 3);
 		info.has_volume_decoupled = false;
-- 
cgit v1.2.3


From 9e717c0495a3f9b71d3895d35df1e15518b6ca2c Mon Sep 17 00:00:00 2001
From: Thomas Dinges <blender@dingto.org>
Date: Sat, 17 Feb 2018 22:56:58 +0100
Subject: Cycles: Remove Fermi texture code.

This should be the last Fermi removal commit, unless I missed something.
It's been a pleasure Fermi!
---
 intern/cycles/device/device.cpp      |   3 -
 intern/cycles/device/device.h        |   2 -
 intern/cycles/device/device_cuda.cpp | 174 ++++++++++++-----------------------
 3 files changed, 60 insertions(+), 119 deletions(-)

(limited to 'intern/cycles/device')

diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 1ec0bc3e1c6..6959dd73c32 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -359,7 +359,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
 	info.description = "Multi Device";
 	info.num = 0;
 
-	info.has_fermi_limits = false;
 	info.has_half_images = true;
 	info.has_volume_decoupled = true;
 	info.bvh_layout_mask = BVH_LAYOUT_ALL;
@@ -395,8 +394,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
 		}
 
 		/* Accumulate device info. */
-		info.has_fermi_limits = info.has_fermi_limits ||
-		                        device.has_fermi_limits;
 		info.has_half_images &= device.has_half_images;
 		info.has_volume_decoupled &= device.has_volume_decoupled;
 		info.bvh_layout_mask = device.bvh_layout_mask & info.bvh_layout_mask;
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 555fd5ec2d2..b856bdd9d01 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -56,7 +56,6 @@ public:
 	int num;
 	bool display_device;            /* GPU is used as a display device. */
 	bool advanced_shading;          /* Supports full shading system. */
-	bool has_fermi_limits;          /* Fixed number of textures limit. */
 	bool has_half_images;           /* Support half-float textures. */
 	bool has_volume_decoupled;      /* Decoupled volume shading. */
 	BVHLayoutMask bvh_layout_mask;  /* Bitmask of supported BVH layouts. */
@@ -73,7 +72,6 @@ public:
 		cpu_threads = 0;
 		display_device = false;
 		advanced_shading = true;
-		has_fermi_limits = false;
 		has_half_images = false;
 		has_volume_decoupled = false;
 		bvh_layout_mask = BVH_LAYOUT_NONE;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 9644937d906..42e78e50540 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -309,9 +309,7 @@ public:
 
 		delete split_kernel;
 
-		if(!info.has_fermi_limits) {
-			texture_info.free();
-		}
+		texture_info.free();
 
 		cuda_assert(cuCtxDestroy(cuContext));
 	}
@@ -680,7 +678,7 @@ public:
 
 	void load_texture_info()
 	{
-		if(!info.has_fermi_limits && need_texture_info) {
+		if(need_texture_info) {
 			texture_info.copy_to_device();
 			need_texture_info = false;
 		}
@@ -1018,9 +1016,6 @@ public:
 	{
 		CUDAContextScope scope(this);
 
-		/* Check if we are on sm_30 or above, for bindless textures. */
-		bool has_fermi_limits = info.has_fermi_limits;
-
 		/* General variables for both architectures */
 		string bind_name = mem.name;
 		size_t dsize = datatype_size(mem.data_type);
@@ -1076,25 +1071,6 @@ public:
 		/* Image Texture Storage */
 		CUtexref texref = NULL;
 
-		if(has_fermi_limits) {
-			if(mem.data_depth > 1) {
-				/* Kernel uses different bind names for 2d and 3d float textures,
-				 * so we have to adjust couple of things here.
-				 */
-				vector<string> tokens;
-				string_split(tokens, mem.name, "_");
-				bind_name = string_printf("__tex_image_%s_3d_%s",
-				                          tokens[2].c_str(),
-				                          tokens[3].c_str());
-			}
-
-			cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
-
-			if(!texref) {
-				return;
-			}
-		}
-
 		CUarray_format_enum format;
 		switch(mem.data_type) {
 			case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
@@ -1187,97 +1163,68 @@ public:
 			cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
 		}
 
-		if(!has_fermi_limits) {
-			/* Kepler+, bindless textures. */
-			int flat_slot = 0;
-			if(string_startswith(mem.name, "__tex_image")) {
-				int pos =  string(mem.name).rfind("_");
-				flat_slot = atoi(mem.name + pos + 1);
-			}
-			else {
-				assert(0);
-			}
-
-			CUDA_RESOURCE_DESC resDesc;
-			memset(&resDesc, 0, sizeof(resDesc));
-
-			if(array_3d) {
-				resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
-				resDesc.res.array.hArray = array_3d;
-				resDesc.flags = 0;
-			}
-			else if(mem.data_height > 0) {
-				resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
-				resDesc.res.pitch2D.devPtr = mem.device_pointer;
-				resDesc.res.pitch2D.format = format;
-				resDesc.res.pitch2D.numChannels = mem.data_elements;
-				resDesc.res.pitch2D.height = mem.data_height;
-				resDesc.res.pitch2D.width = mem.data_width;
-				resDesc.res.pitch2D.pitchInBytes = dst_pitch;
-			}
-			else {
-				resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
-				resDesc.res.linear.devPtr = mem.device_pointer;
-				resDesc.res.linear.format = format;
-				resDesc.res.linear.numChannels = mem.data_elements;
-				resDesc.res.linear.sizeInBytes = mem.device_size;
-			}
-
-			CUDA_TEXTURE_DESC texDesc;
-			memset(&texDesc, 0, sizeof(texDesc));
-			texDesc.addressMode[0] = address_mode;
-			texDesc.addressMode[1] = address_mode;
-			texDesc.addressMode[2] = address_mode;
-			texDesc.filterMode = filter_mode;
-			texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
-
-			cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
-
-			/* Resize once */
-			if(flat_slot >= texture_info.size()) {
-				/* Allocate some slots in advance, to reduce amount
-				 * of re-allocations. */
-				texture_info.resize(flat_slot + 128);
-			}
-
-			/* Set Mapping and tag that we need to (re-)upload to device */
-			TextureInfo& info = texture_info[flat_slot];
-			info.data = (uint64_t)cmem->texobject;
-			info.cl_buffer = 0;
-			info.interpolation = mem.interpolation;
-			info.extension = mem.extension;
-			info.width = mem.data_width;
-			info.height = mem.data_height;
-			info.depth = mem.data_depth;
-			need_texture_info = true;
+		/* Kepler+, bindless textures. */
+		int flat_slot = 0;
+		if(string_startswith(mem.name, "__tex_image")) {
+			int pos =  string(mem.name).rfind("_");
+			flat_slot = atoi(mem.name + pos + 1);
 		}
 		else {
-			/* Fermi, fixed texture slots. */
-			if(array_3d) {
-				cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT));
-			}
-			else if(mem.data_height > 0) {
-				CUDA_ARRAY_DESCRIPTOR array_desc;
-				array_desc.Format = format;
-				array_desc.Height = mem.data_height;
-				array_desc.Width = mem.data_width;
-				array_desc.NumChannels = mem.data_elements;
-				cuda_assert(cuTexRefSetAddress2D_v3(texref, &array_desc, mem.device_pointer, dst_pitch));
-			}
-			else {
-				cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
-			}
+			assert(0);
+		}
 
-			/* Attach to texture reference. */
-			cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
-			cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
-			cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
-			cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
-			cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
-			if(mem.data_depth > 1) {
-				cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
-			}
+		CUDA_RESOURCE_DESC resDesc;
+		memset(&resDesc, 0, sizeof(resDesc));
+
+		if(array_3d) {
+			resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+			resDesc.res.array.hArray = array_3d;
+			resDesc.flags = 0;
+		}
+		else if(mem.data_height > 0) {
+			resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
+			resDesc.res.pitch2D.devPtr = mem.device_pointer;
+			resDesc.res.pitch2D.format = format;
+			resDesc.res.pitch2D.numChannels = mem.data_elements;
+			resDesc.res.pitch2D.height = mem.data_height;
+			resDesc.res.pitch2D.width = mem.data_width;
+			resDesc.res.pitch2D.pitchInBytes = dst_pitch;
 		}
+		else {
+			resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
+			resDesc.res.linear.devPtr = mem.device_pointer;
+			resDesc.res.linear.format = format;
+			resDesc.res.linear.numChannels = mem.data_elements;
+			resDesc.res.linear.sizeInBytes = mem.device_size;
+		}
+
+		CUDA_TEXTURE_DESC texDesc;
+		memset(&texDesc, 0, sizeof(texDesc));
+		texDesc.addressMode[0] = address_mode;
+		texDesc.addressMode[1] = address_mode;
+		texDesc.addressMode[2] = address_mode;
+		texDesc.filterMode = filter_mode;
+		texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+
+		cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
+
+		/* Resize once */
+		if(flat_slot >= texture_info.size()) {
+			/* Allocate some slots in advance, to reduce amount
+			 * of re-allocations. */
+			texture_info.resize(flat_slot + 128);
+		}
+
+		/* Set Mapping and tag that we need to (re-)upload to device */
+		TextureInfo& info = texture_info[flat_slot];
+		info.data = (uint64_t)cmem->texobject;
+		info.cl_buffer = 0;
+		info.interpolation = mem.interpolation;
+		info.extension = mem.extension;
+		info.width = mem.data_width;
+		info.height = mem.data_height;
+		info.depth = mem.data_depth;
+		need_texture_info = true;
 	}
 
 	void tex_free(device_memory& mem)
@@ -2545,7 +2492,6 @@ void device_cuda_info(vector<DeviceInfo>& devices)
 		info.num = num;
 
 		info.advanced_shading = (major >= 3);
-		info.has_fermi_limits = !(major >= 3);
 		info.has_half_images = (major >= 3);
 		info.has_volume_decoupled = false;
 		info.bvh_layout_mask = BVH_LAYOUT_BVH2;
-- 
cgit v1.2.3


From 1dcd7db73d13443c59dd824abd9cacbf6bc88997 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brechtvanlommel@gmail.com>
Date: Sun, 18 Feb 2018 00:51:46 +0100
Subject: Code cleanup: remove some more unused code after recent CUDA changes.

---
 intern/cycles/device/device_cuda.cpp | 2 --
 1 file changed, 2 deletions(-)

(limited to 'intern/cycles/device')

diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 42e78e50540..d28080c667a 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1069,8 +1069,6 @@ public:
 		}
 
 		/* Image Texture Storage */
-		CUtexref texref = NULL;
-
 		CUarray_format_enum format;
 		switch(mem.data_type) {
 			case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
-- 
cgit v1.2.3


From fee4b646c451303a78baef3cbf031e9e0f771373 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brechtvanlommel@gmail.com>
Date: Tue, 6 Feb 2018 15:19:25 +0100
Subject: Cycles: tweak CUDA messages and avoid build errors with existing
 sm_2x configs.

---
 intern/cycles/device/device_cuda.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'intern/cycles/device')

diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index d28080c667a..5703aa7144e 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -2479,7 +2479,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
 		cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
 		if(major < 3) {
 			VLOG(1) << "Ignoring device \"" << name
-			        << "\", compute capability is too low.";
+			        << "\", this graphics card is no longer supported.";
 			continue;
 		}
 
-- 
cgit v1.2.3