From 6ec599c68214413475cbea403ef869ed7c8113f9 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Fri, 3 Nov 2017 20:21:19 +0100 Subject: Fix T53247: mixed CPU + GPU render wrong texture limits. --- intern/cycles/device/device.cpp | 29 +++++++++++++++++++---------- intern/cycles/device/device.h | 18 ++++++++++-------- intern/cycles/device/device_cpu.cpp | 1 + intern/cycles/device/device_cuda.cpp | 15 ++++++++------- intern/cycles/render/image.cpp | 26 ++------------------------ 5 files changed, 40 insertions(+), 49 deletions(-) (limited to 'intern') diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 1a3a3846c25..b2f20bab58b 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -361,39 +361,48 @@ DeviceInfo Device::get_multi_device(const vector& subdevices, int th info.description = "Multi Device"; info.num = 0; - info.has_bindless_textures = true; + info.has_fermi_limits = false; + info.has_half_images = true; info.has_volume_decoupled = true; info.has_qbvh = true; info.has_osl = true; foreach(const DeviceInfo &device, subdevices) { - info.has_bindless_textures &= device.has_bindless_textures; - info.has_volume_decoupled &= device.has_volume_decoupled; - info.has_qbvh &= device.has_qbvh; - info.has_osl &= device.has_osl; - + /* Ensure CPU device does not slow down GPU. */ if(device.type == DEVICE_CPU && subdevices.size() > 1) { if(background) { int orig_cpu_threads = (threads)? threads: system_cpu_thread_count(); int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0); + VLOG(1) << "CPU render threads reduced from " + << orig_cpu_threads << " to " << cpu_threads + << ", to dedicate to GPU."; + if(cpu_threads >= 1) { DeviceInfo cpu_device = device; cpu_device.cpu_threads = cpu_threads; info.multi_devices.push_back(cpu_device); } - - VLOG(1) << "CPU render threads reduced from " - << orig_cpu_threads << " to " << cpu_threads - << ", to dedicate to GPU."; + else { + continue; + } } else { VLOG(1) << "CPU render threads disabled for interactive render."; + continue; } } else { info.multi_devices.push_back(device); } + + /* Accumulate device info. */ + info.has_fermi_limits = info.has_fermi_limits || + device.has_fermi_limits; + info.has_half_images &= device.has_half_images; + info.has_volume_decoupled &= device.has_volume_decoupled; + info.has_qbvh &= device.has_qbvh; + info.has_osl &= device.has_osl; } return info; diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 316bf70a5c3..4f78b9e82a4 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -52,13 +52,14 @@ public: string description; string id; /* used for user preferences, should stay fixed with changing hardware config */ int num; - bool display_device; - bool advanced_shading; - bool has_bindless_textures; /* flag for GPU and Multi device */ - bool has_volume_decoupled; - bool has_qbvh; - bool has_osl; - bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */ + bool display_device; /* GPU is used as a display device. */ + bool advanced_shading; /* Supports full shading system. */ + bool has_fermi_limits; /* Fixed number of textures limit. */ + bool has_half_images; /* Support half-float textures. */ + bool has_volume_decoupled; /* Decoupled volume shading. */ + bool has_qbvh; /* Supports both BVH2 and BVH4 raytracing. */ + bool has_osl; /* Support Open Shading Language. */ + bool use_split_kernel; /* Use split or mega kernel. */ int cpu_threads; vector multi_devices; @@ -70,7 +71,8 @@ public: cpu_threads = 0; display_device = false; advanced_shading = true; - has_bindless_textures = false; + has_fermi_limits = false; + has_half_images = false; has_volume_decoupled = false; has_qbvh = false; has_osl = false; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 32ab18fe164..0c0e6af7eb4 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -1047,6 +1047,7 @@ void device_cpu_info(vector& devices) info.has_qbvh = system_cpu_support_sse2(); info.has_volume_decoupled = true; info.has_osl = true; + info.has_half_images = true; devices.insert(devices.begin(), info); } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index aa6386e455b..c951364b53a 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -274,7 +274,7 @@ public: delete split_kernel; - if(info.has_bindless_textures) { + if(!info.has_fermi_limits) { texture_info.free(); } @@ -547,7 +547,7 @@ public: void load_texture_info() { - if(info.has_bindless_textures && need_texture_info) { + if(!info.has_fermi_limits && need_texture_info) { texture_info.copy_to_device(); need_texture_info = false; } @@ -701,7 +701,7 @@ public: << string_human_readable_size(mem.memory_size()) << ")"; /* Check if we are on sm_30 or above, for bindless textures. */ - bool has_bindless_textures = info.has_bindless_textures; + bool has_fermi_limits = info.has_fermi_limits; /* General variables for both architectures */ string bind_name = mem.name; @@ -735,7 +735,7 @@ public: /* General variables for Fermi */ CUtexref texref = NULL; - if(!has_bindless_textures && mem.interpolation != INTERPOLATION_NONE) { + if(has_fermi_limits && mem.interpolation != INTERPOLATION_NONE) { if(mem.data_depth > 1) { /* Kernel uses different bind names for 2d and 3d float textures, * so we have to adjust couple of things here. @@ -853,7 +853,7 @@ public: stats.mem_alloc(size); - if(has_bindless_textures) { + if(!has_fermi_limits) { /* Bindless Textures - Kepler */ int flat_slot = 0; if(string_startswith(mem.name, "__tex_image")) { @@ -934,7 +934,7 @@ public: cuArrayDestroy((CUarray)mem.device_pointer); /* Free CUtexObject (Bindless Textures) */ - if(info.has_bindless_textures && tex_bindless_map[mem.device_pointer]) { + if(!info.has_fermi_limits && tex_bindless_map[mem.device_pointer]) { CUtexObject tex = tex_bindless_map[mem.device_pointer]; cuTexObjectDestroy(tex); } @@ -2174,7 +2174,8 @@ void device_cuda_info(vector& devices) info.num = num; info.advanced_shading = (major >= 2); - info.has_bindless_textures = (major >= 3); + info.has_fermi_limits = (major < 3); + info.has_half_images = true; info.has_volume_decoupled = false; info.has_qbvh = false; diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 625901ff258..9358b40a689 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -46,32 +46,10 @@ ImageManager::ImageManager(const DeviceInfo& info) osl_texture_system = NULL; animation_frame = 0; - /* In case of multiple devices used we need to know type of an actual - * compute device. - * - * NOTE: We assume that all the devices are same type, otherwise we'll - * be screwed on so many levels.. - */ - DeviceType device_type = info.type; - if(device_type == DEVICE_MULTI) { - device_type = info.multi_devices[0].type; - } - /* Set image limits */ max_num_images = TEX_NUM_MAX; - has_half_images = true; - cuda_fermi_limits = false; - - if(device_type == DEVICE_CUDA) { - if(!info.has_bindless_textures) { - /* CUDA Fermi hardware (SM 2.x) has a hard limit on the number of textures */ - cuda_fermi_limits = true; - has_half_images = false; - } - } - else if(device_type == DEVICE_OPENCL) { - has_half_images = false; - } + has_half_images = info.has_half_images; + cuda_fermi_limits = info.has_fermi_limits; for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { tex_num_images[type] = 0; -- cgit v1.2.3