From 6ec599c68214413475cbea403ef869ed7c8113f9 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brechtvanlommel@gmail.com>
Date: Fri, 3 Nov 2017 20:21:19 +0100
Subject: Fix T53247: mixed CPU + GPU render wrong texture limits.

---
 intern/cycles/device/device.cpp      | 29 +++++++++++++++++++----------
 intern/cycles/device/device.h        | 18 ++++++++++--------
 intern/cycles/device/device_cpu.cpp  |  1 +
 intern/cycles/device/device_cuda.cpp | 15 ++++++++-------
 intern/cycles/render/image.cpp       | 26 ++------------------------
 5 files changed, 40 insertions(+), 49 deletions(-)

(limited to 'intern')
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 1a3a3846c25..b2f20bab58b 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -361,39 +361,48 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
 	info.description = "Multi Device";
 	info.num = 0;
 
-	info.has_bindless_textures = true;
+	info.has_fermi_limits = false;
+	info.has_half_images = true;
 	info.has_volume_decoupled = true;
 	info.has_qbvh = true;
 	info.has_osl = true;
 
 	foreach(const DeviceInfo &device, subdevices) {
-		info.has_bindless_textures &= device.has_bindless_textures;
-		info.has_volume_decoupled &= device.has_volume_decoupled;
-		info.has_qbvh &= device.has_qbvh;
-		info.has_osl &= device.has_osl;
-
+		/* Ensure CPU device does not slow down GPU. */
 		if(device.type == DEVICE_CPU && subdevices.size() > 1) {
 			if(background) {
 				int orig_cpu_threads = (threads)? threads: system_cpu_thread_count();
 				int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
 
+				VLOG(1) << "CPU render threads reduced from "
+						<< orig_cpu_threads << " to " << cpu_threads
+						<< ", to dedicate to GPU.";
+
 				if(cpu_threads >= 1) {
 					DeviceInfo cpu_device = device;
 					cpu_device.cpu_threads = cpu_threads;
 					info.multi_devices.push_back(cpu_device);
 				}
-
-				VLOG(1) << "CPU render threads reduced from "
-						<< orig_cpu_threads << " to " << cpu_threads
-						<< ", to dedicate to GPU.";
+				else {
+					continue;
+				}
 			}
 			else {
 				VLOG(1) << "CPU render threads disabled for interactive render.";
+				continue;
 			}
 		}
 		else {
 			info.multi_devices.push_back(device);
 		}
+
+		/* Accumulate device info. */
+		info.has_fermi_limits = info.has_fermi_limits ||
+		                        device.has_fermi_limits;
+		info.has_half_images &= device.has_half_images;
+		info.has_volume_decoupled &= device.has_volume_decoupled;
+		info.has_qbvh &= device.has_qbvh;
+		info.has_osl &= device.has_osl;
 	}
 
 	return info;
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 316bf70a5c3..4f78b9e82a4 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -52,13 +52,14 @@ public:
 	string description;
 	string id; /* used for user preferences, should stay fixed with changing hardware config */
 	int num;
-	bool display_device;
-	bool advanced_shading;
-	bool has_bindless_textures; /* flag for GPU and Multi device */
-	bool has_volume_decoupled;
-	bool has_qbvh;
-	bool has_osl;
-	bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */
+	bool display_device;         /* GPU is used as a display device. */
+	bool advanced_shading;       /* Supports full shading system. */
+	bool has_fermi_limits;       /* Fixed number of textures limit. */
+	bool has_half_images;        /* Support half-float textures. */
+	bool has_volume_decoupled;   /* Decoupled volume shading. */
+	bool has_qbvh;               /* Supports both BVH2 and BVH4 raytracing. */
+	bool has_osl;                /* Support Open Shading Language. */
+	bool use_split_kernel;       /* Use split or mega kernel. */
 	int cpu_threads;
 	vector<DeviceInfo> multi_devices;
 
@@ -70,7 +71,8 @@ public:
 		cpu_threads = 0;
 		display_device = false;
 		advanced_shading = true;
-		has_bindless_textures = false;
+		has_fermi_limits = false;
+		has_half_images = false;
 		has_volume_decoupled = false;
 		has_qbvh = false;
 		has_osl = false;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 32ab18fe164..0c0e6af7eb4 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -1047,6 +1047,7 @@ void device_cpu_info(vector<DeviceInfo>& devices)
 	info.has_qbvh = system_cpu_support_sse2();
 	info.has_volume_decoupled = true;
 	info.has_osl = true;
+	info.has_half_images = true;
 
 	devices.insert(devices.begin(), info);
 }
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index aa6386e455b..c951364b53a 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -274,7 +274,7 @@ public:
 
 		delete split_kernel;
 
-		if(info.has_bindless_textures) {
+		if(!info.has_fermi_limits) {
 			texture_info.free();
 		}
 
@@ -547,7 +547,7 @@ public:
 
 	void load_texture_info()
 	{
-		if(info.has_bindless_textures && need_texture_info) {
+		if(!info.has_fermi_limits && need_texture_info) {
 			texture_info.copy_to_device();
 			need_texture_info = false;
 		}
@@ -701,7 +701,7 @@ public:
 		        << string_human_readable_size(mem.memory_size()) << ")";
 
 		/* Check if we are on sm_30 or above, for bindless textures. */
-		bool has_bindless_textures = info.has_bindless_textures;
+		bool has_fermi_limits = info.has_fermi_limits;
 
 		/* General variables for both architectures */
 		string bind_name = mem.name;
@@ -735,7 +735,7 @@ public:
 		/* General variables for Fermi */
 		CUtexref texref = NULL;
 
-		if(!has_bindless_textures && mem.interpolation != INTERPOLATION_NONE) {
+		if(has_fermi_limits && mem.interpolation != INTERPOLATION_NONE) {
 			if(mem.data_depth > 1) {
 				/* Kernel uses different bind names for 2d and 3d float textures,
 				 * so we have to adjust couple of things here.
@@ -853,7 +853,7 @@ public:
 
 			stats.mem_alloc(size);
 
-			if(has_bindless_textures) {
+			if(!has_fermi_limits) {
 				/* Bindless Textures - Kepler */
 				int flat_slot = 0;
 				if(string_startswith(mem.name, "__tex_image")) {
@@ -934,7 +934,7 @@ public:
 				cuArrayDestroy((CUarray)mem.device_pointer);
 
 				/* Free CUtexObject (Bindless Textures) */
-				if(info.has_bindless_textures && tex_bindless_map[mem.device_pointer]) {
+				if(!info.has_fermi_limits && tex_bindless_map[mem.device_pointer]) {
 					CUtexObject tex = tex_bindless_map[mem.device_pointer];
 					cuTexObjectDestroy(tex);
 				}
@@ -2174,7 +2174,8 @@ void device_cuda_info(vector<DeviceInfo>& devices)
 		info.num = num;
 
 		info.advanced_shading = (major >= 2);
-		info.has_bindless_textures = (major >= 3);
+		info.has_fermi_limits = (major < 3);
+		info.has_half_images = true;
 		info.has_volume_decoupled = false;
 		info.has_qbvh = false;
 
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 625901ff258..9358b40a689 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -46,32 +46,10 @@ ImageManager::ImageManager(const DeviceInfo& info)
 	osl_texture_system = NULL;
 	animation_frame = 0;
 
-	/* In case of multiple devices used we need to know type of an actual
-	 * compute device.
-	 *
-	 * NOTE: We assume that all the devices are same type, otherwise we'll
-	 * be screwed on so many levels..
-	 */
-	DeviceType device_type = info.type;
-	if(device_type == DEVICE_MULTI) {
-		device_type = info.multi_devices[0].type;
-	}
-
 	/* Set image limits */
 	max_num_images = TEX_NUM_MAX;
-	has_half_images = true;
-	cuda_fermi_limits = false;
-
-	if(device_type == DEVICE_CUDA) {
-		if(!info.has_bindless_textures) {
-			/* CUDA Fermi hardware (SM 2.x) has a hard limit on the number of textures */
-			cuda_fermi_limits = true;
-			has_half_images = false;
-		}
-	}
-	else if(device_type == DEVICE_OPENCL) {
-		has_half_images = false;
-	}
+	has_half_images = info.has_half_images;
+	cuda_fermi_limits = info.has_fermi_limits;
 
 	for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
 		tex_num_images[type] = 0;
-- 
cgit v1.2.3