From 2eaf90b305fc6fb37f775f288c3d5cd34cc66c81 Mon Sep 17 00:00:00 2001 From: Thomas Dinges Date: Sat, 17 Feb 2018 16:15:07 +0100 Subject: Cycles: Remove Fermi support from CMake and update runtime checks in device_cuda.cpp. Fermi code in Cycles kernel and texture system are coming next. --- intern/cycles/device/device_cuda.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'intern/cycles/device') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index dfedf922ca9..9644937d906 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -322,9 +322,9 @@ public: cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); - /* We only support sm_20 and above */ - if(major < 2) { - cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor)); + /* We only support sm_30 and above */ + if(major < 3) { + cuda_error_message(string_printf("CUDA device supported only with compute capability 3.0 or up, found %d.%d.", major, minor)); return false; } @@ -462,9 +462,9 @@ public: #ifdef _WIN32 if(have_precompiled_kernels()) { - if(major < 2) { + if(major < 3) { cuda_error_message(string_printf( - "CUDA device requires compute capability 2.0 or up, " + "CUDA device requires compute capability 3.0 or up, " "found %d.%d. Your GPU is not supported.", major, minor)); } @@ -2532,7 +2532,7 @@ void device_cuda_info(vector& devices) int major; cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num); - if(major < 2) { + if(major < 3) { VLOG(1) << "Ignoring device \"" << name << "\", compute capability is too low."; continue; @@ -2544,7 +2544,7 @@ void device_cuda_info(vector& devices) info.description = string(name); info.num = num; - info.advanced_shading = (major >= 2); + info.advanced_shading = (major >= 3); info.has_fermi_limits = !(major >= 3); info.has_half_images = (major >= 3); info.has_volume_decoupled = false; -- cgit v1.2.3 From 9e717c0495a3f9b71d3895d35df1e15518b6ca2c Mon Sep 17 00:00:00 2001 From: Thomas Dinges Date: Sat, 17 Feb 2018 22:56:58 +0100 Subject: Cycles: Remove Fermi texture code. This should be the last Fermi removal commit, unless I missed something. It's been a pleasure Fermi! --- intern/cycles/device/device.cpp | 3 - intern/cycles/device/device.h | 2 - intern/cycles/device/device_cuda.cpp | 174 ++++++++++++----------------------- 3 files changed, 60 insertions(+), 119 deletions(-) (limited to 'intern/cycles/device') diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 1ec0bc3e1c6..6959dd73c32 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -359,7 +359,6 @@ DeviceInfo Device::get_multi_device(const vector& subdevices, int th info.description = "Multi Device"; info.num = 0; - info.has_fermi_limits = false; info.has_half_images = true; info.has_volume_decoupled = true; info.bvh_layout_mask = BVH_LAYOUT_ALL; @@ -395,8 +394,6 @@ DeviceInfo Device::get_multi_device(const vector& subdevices, int th } /* Accumulate device info. */ - info.has_fermi_limits = info.has_fermi_limits || - device.has_fermi_limits; info.has_half_images &= device.has_half_images; info.has_volume_decoupled &= device.has_volume_decoupled; info.bvh_layout_mask = device.bvh_layout_mask & info.bvh_layout_mask; diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 555fd5ec2d2..b856bdd9d01 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -56,7 +56,6 @@ public: int num; bool display_device; /* GPU is used as a display device. */ bool advanced_shading; /* Supports full shading system. */ - bool has_fermi_limits; /* Fixed number of textures limit. */ bool has_half_images; /* Support half-float textures. */ bool has_volume_decoupled; /* Decoupled volume shading. */ BVHLayoutMask bvh_layout_mask; /* Bitmask of supported BVH layouts. */ @@ -73,7 +72,6 @@ public: cpu_threads = 0; display_device = false; advanced_shading = true; - has_fermi_limits = false; has_half_images = false; has_volume_decoupled = false; bvh_layout_mask = BVH_LAYOUT_NONE; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 9644937d906..42e78e50540 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -309,9 +309,7 @@ public: delete split_kernel; - if(!info.has_fermi_limits) { - texture_info.free(); - } + texture_info.free(); cuda_assert(cuCtxDestroy(cuContext)); } @@ -680,7 +678,7 @@ public: void load_texture_info() { - if(!info.has_fermi_limits && need_texture_info) { + if(need_texture_info) { texture_info.copy_to_device(); need_texture_info = false; } @@ -1018,9 +1016,6 @@ public: { CUDAContextScope scope(this); - /* Check if we are on sm_30 or above, for bindless textures. */ - bool has_fermi_limits = info.has_fermi_limits; - /* General variables for both architectures */ string bind_name = mem.name; size_t dsize = datatype_size(mem.data_type); @@ -1076,25 +1071,6 @@ public: /* Image Texture Storage */ CUtexref texref = NULL; - if(has_fermi_limits) { - if(mem.data_depth > 1) { - /* Kernel uses different bind names for 2d and 3d float textures, - * so we have to adjust couple of things here. - */ - vector tokens; - string_split(tokens, mem.name, "_"); - bind_name = string_printf("__tex_image_%s_3d_%s", - tokens[2].c_str(), - tokens[3].c_str()); - } - - cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str())); - - if(!texref) { - return; - } - } - CUarray_format_enum format; switch(mem.data_type) { case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break; @@ -1187,97 +1163,68 @@ public: cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size)); } - if(!has_fermi_limits) { - /* Kepler+, bindless textures. */ - int flat_slot = 0; - if(string_startswith(mem.name, "__tex_image")) { - int pos = string(mem.name).rfind("_"); - flat_slot = atoi(mem.name + pos + 1); - } - else { - assert(0); - } - - CUDA_RESOURCE_DESC resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - - if(array_3d) { - resDesc.resType = CU_RESOURCE_TYPE_ARRAY; - resDesc.res.array.hArray = array_3d; - resDesc.flags = 0; - } - else if(mem.data_height > 0) { - resDesc.resType = CU_RESOURCE_TYPE_PITCH2D; - resDesc.res.pitch2D.devPtr = mem.device_pointer; - resDesc.res.pitch2D.format = format; - resDesc.res.pitch2D.numChannels = mem.data_elements; - resDesc.res.pitch2D.height = mem.data_height; - resDesc.res.pitch2D.width = mem.data_width; - resDesc.res.pitch2D.pitchInBytes = dst_pitch; - } - else { - resDesc.resType = CU_RESOURCE_TYPE_LINEAR; - resDesc.res.linear.devPtr = mem.device_pointer; - resDesc.res.linear.format = format; - resDesc.res.linear.numChannels = mem.data_elements; - resDesc.res.linear.sizeInBytes = mem.device_size; - } - - CUDA_TEXTURE_DESC texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = address_mode; - texDesc.addressMode[1] = address_mode; - texDesc.addressMode[2] = address_mode; - texDesc.filterMode = filter_mode; - texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; - - cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); - - /* Resize once */ - if(flat_slot >= texture_info.size()) { - /* Allocate some slots in advance, to reduce amount - * of re-allocations. */ - texture_info.resize(flat_slot + 128); - } - - /* Set Mapping and tag that we need to (re-)upload to device */ - TextureInfo& info = texture_info[flat_slot]; - info.data = (uint64_t)cmem->texobject; - info.cl_buffer = 0; - info.interpolation = mem.interpolation; - info.extension = mem.extension; - info.width = mem.data_width; - info.height = mem.data_height; - info.depth = mem.data_depth; - need_texture_info = true; + /* Kepler+, bindless textures. */ + int flat_slot = 0; + if(string_startswith(mem.name, "__tex_image")) { + int pos = string(mem.name).rfind("_"); + flat_slot = atoi(mem.name + pos + 1); } else { - /* Fermi, fixed texture slots. */ - if(array_3d) { - cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT)); - } - else if(mem.data_height > 0) { - CUDA_ARRAY_DESCRIPTOR array_desc; - array_desc.Format = format; - array_desc.Height = mem.data_height; - array_desc.Width = mem.data_width; - array_desc.NumChannels = mem.data_elements; - cuda_assert(cuTexRefSetAddress2D_v3(texref, &array_desc, mem.device_pointer, dst_pitch)); - } - else { - cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size)); - } + assert(0); + } - /* Attach to texture reference. */ - cuda_assert(cuTexRefSetFilterMode(texref, filter_mode)); - cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES)); - cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)); - cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode)); - cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode)); - if(mem.data_depth > 1) { - cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode)); - } + CUDA_RESOURCE_DESC resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + + if(array_3d) { + resDesc.resType = CU_RESOURCE_TYPE_ARRAY; + resDesc.res.array.hArray = array_3d; + resDesc.flags = 0; + } + else if(mem.data_height > 0) { + resDesc.resType = CU_RESOURCE_TYPE_PITCH2D; + resDesc.res.pitch2D.devPtr = mem.device_pointer; + resDesc.res.pitch2D.format = format; + resDesc.res.pitch2D.numChannels = mem.data_elements; + resDesc.res.pitch2D.height = mem.data_height; + resDesc.res.pitch2D.width = mem.data_width; + resDesc.res.pitch2D.pitchInBytes = dst_pitch; } + else { + resDesc.resType = CU_RESOURCE_TYPE_LINEAR; + resDesc.res.linear.devPtr = mem.device_pointer; + resDesc.res.linear.format = format; + resDesc.res.linear.numChannels = mem.data_elements; + resDesc.res.linear.sizeInBytes = mem.device_size; + } + + CUDA_TEXTURE_DESC texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.addressMode[0] = address_mode; + texDesc.addressMode[1] = address_mode; + texDesc.addressMode[2] = address_mode; + texDesc.filterMode = filter_mode; + texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + + cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); + + /* Resize once */ + if(flat_slot >= texture_info.size()) { + /* Allocate some slots in advance, to reduce amount + * of re-allocations. */ + texture_info.resize(flat_slot + 128); + } + + /* Set Mapping and tag that we need to (re-)upload to device */ + TextureInfo& info = texture_info[flat_slot]; + info.data = (uint64_t)cmem->texobject; + info.cl_buffer = 0; + info.interpolation = mem.interpolation; + info.extension = mem.extension; + info.width = mem.data_width; + info.height = mem.data_height; + info.depth = mem.data_depth; + need_texture_info = true; } void tex_free(device_memory& mem) @@ -2545,7 +2492,6 @@ void device_cuda_info(vector& devices) info.num = num; info.advanced_shading = (major >= 3); - info.has_fermi_limits = !(major >= 3); info.has_half_images = (major >= 3); info.has_volume_decoupled = false; info.bvh_layout_mask = BVH_LAYOUT_BVH2; -- cgit v1.2.3 From 1dcd7db73d13443c59dd824abd9cacbf6bc88997 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 18 Feb 2018 00:51:46 +0100 Subject: Code cleanup: remove some more unused code after recent CUDA changes. --- intern/cycles/device/device_cuda.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'intern/cycles/device') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 42e78e50540..d28080c667a 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1069,8 +1069,6 @@ public: } /* Image Texture Storage */ - CUtexref texref = NULL; - CUarray_format_enum format; switch(mem.data_type) { case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break; -- cgit v1.2.3 From fee4b646c451303a78baef3cbf031e9e0f771373 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 6 Feb 2018 15:19:25 +0100 Subject: Cycles: tweak CUDA messages and avoid build errors with existing sm_2x configs. --- intern/cycles/device/device_cuda.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'intern/cycles/device') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index d28080c667a..5703aa7144e 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -2479,7 +2479,7 @@ void device_cuda_info(vector& devices) cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num); if(major < 3) { VLOG(1) << "Ignoring device \"" << name - << "\", compute capability is too low."; + << "\", this graphics card is no longer supported."; continue; } -- cgit v1.2.3