Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCampbell Barton <ideasman42@gmail.com>2018-02-18 14:33:05 +0300
committerCampbell Barton <ideasman42@gmail.com>2018-02-18 14:33:05 +0300
commit2bc952fdb6e1474e9e568224a37bcf5cff874aaf (patch)
tree8d7c3a99896415392035ba350a606d350b6d5e37 /intern/cycles/device/device_cuda.cpp
parent5d3f679013bbbb9f0c7aae47b5653c54266cf7ca (diff)
parent7ff3cd26932cbc93068eea4dc7438442216e4ee1 (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r--intern/cycles/device/device_cuda.cpp192
1 files changed, 68 insertions, 124 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 64295838f0c..54d4c731ed4 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -309,9 +309,7 @@ public:
delete split_kernel;
- if(!info.has_fermi_limits) {
- texture_info.free();
- }
+ texture_info.free();
cuda_assert(cuCtxDestroy(cuContext));
}
@@ -322,9 +320,9 @@ public:
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
- /* We only support sm_20 and above */
- if(major < 2) {
- cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
+ /* We only support sm_30 and above */
+ if(major < 3) {
+ cuda_error_message(string_printf("CUDA device supported only with compute capability 3.0 or up, found %d.%d.", major, minor));
return false;
}
@@ -462,9 +460,9 @@ public:
#ifdef _WIN32
if(have_precompiled_kernels()) {
- if(major < 2) {
+ if(major < 3) {
cuda_error_message(string_printf(
- "CUDA device requires compute capability 2.0 or up, "
+ "CUDA device requires compute capability 3.0 or up, "
"found %d.%d. Your GPU is not supported.",
major, minor));
}
@@ -680,7 +678,7 @@ public:
void load_texture_info()
{
- if(!info.has_fermi_limits && need_texture_info) {
+ if(need_texture_info) {
texture_info.copy_to_device();
need_texture_info = false;
}
@@ -1018,9 +1016,6 @@ public:
{
CUDAContextScope scope(this);
- /* Check if we are on sm_30 or above, for bindless textures. */
- bool has_fermi_limits = info.has_fermi_limits;
-
/* General variables for both architectures */
string bind_name = mem.name;
size_t dsize = datatype_size(mem.data_type);
@@ -1074,27 +1069,6 @@ public:
}
/* Image Texture Storage */
- CUtexref texref = NULL;
-
- if(has_fermi_limits) {
- if(mem.data_depth > 1) {
- /* Kernel uses different bind names for 2d and 3d float textures,
- * so we have to adjust couple of things here.
- */
- vector<string> tokens;
- string_split(tokens, mem.name, "_");
- bind_name = string_printf("__tex_image_%s_3d_%s",
- tokens[2].c_str(),
- tokens[3].c_str());
- }
-
- cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
-
- if(!texref) {
- return;
- }
- }
-
CUarray_format_enum format;
switch(mem.data_type) {
case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
@@ -1187,97 +1161,68 @@ public:
cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
}
- if(!has_fermi_limits) {
- /* Kepler+, bindless textures. */
- int flat_slot = 0;
- if(string_startswith(mem.name, "__tex_image")) {
- int pos = string(mem.name).rfind("_");
- flat_slot = atoi(mem.name + pos + 1);
- }
- else {
- assert(0);
- }
-
- CUDA_RESOURCE_DESC resDesc;
- memset(&resDesc, 0, sizeof(resDesc));
-
- if(array_3d) {
- resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
- resDesc.res.array.hArray = array_3d;
- resDesc.flags = 0;
- }
- else if(mem.data_height > 0) {
- resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
- resDesc.res.pitch2D.devPtr = mem.device_pointer;
- resDesc.res.pitch2D.format = format;
- resDesc.res.pitch2D.numChannels = mem.data_elements;
- resDesc.res.pitch2D.height = mem.data_height;
- resDesc.res.pitch2D.width = mem.data_width;
- resDesc.res.pitch2D.pitchInBytes = dst_pitch;
- }
- else {
- resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
- resDesc.res.linear.devPtr = mem.device_pointer;
- resDesc.res.linear.format = format;
- resDesc.res.linear.numChannels = mem.data_elements;
- resDesc.res.linear.sizeInBytes = mem.device_size;
- }
-
- CUDA_TEXTURE_DESC texDesc;
- memset(&texDesc, 0, sizeof(texDesc));
- texDesc.addressMode[0] = address_mode;
- texDesc.addressMode[1] = address_mode;
- texDesc.addressMode[2] = address_mode;
- texDesc.filterMode = filter_mode;
- texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
-
- cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
-
- /* Resize once */
- if(flat_slot >= texture_info.size()) {
- /* Allocate some slots in advance, to reduce amount
- * of re-allocations. */
- texture_info.resize(flat_slot + 128);
- }
-
- /* Set Mapping and tag that we need to (re-)upload to device */
- TextureInfo& info = texture_info[flat_slot];
- info.data = (uint64_t)cmem->texobject;
- info.cl_buffer = 0;
- info.interpolation = mem.interpolation;
- info.extension = mem.extension;
- info.width = mem.data_width;
- info.height = mem.data_height;
- info.depth = mem.data_depth;
- need_texture_info = true;
+ /* Kepler+, bindless textures. */
+ int flat_slot = 0;
+ if(string_startswith(mem.name, "__tex_image")) {
+ int pos = string(mem.name).rfind("_");
+ flat_slot = atoi(mem.name + pos + 1);
}
else {
- /* Fermi, fixed texture slots. */
- if(array_3d) {
- cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT));
- }
- else if(mem.data_height > 0) {
- CUDA_ARRAY_DESCRIPTOR array_desc;
- array_desc.Format = format;
- array_desc.Height = mem.data_height;
- array_desc.Width = mem.data_width;
- array_desc.NumChannels = mem.data_elements;
- cuda_assert(cuTexRefSetAddress2D_v3(texref, &array_desc, mem.device_pointer, dst_pitch));
- }
- else {
- cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
- }
+ assert(0);
+ }
- /* Attach to texture reference. */
- cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
- cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
- cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
- cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
- cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
- if(mem.data_depth > 1) {
- cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
- }
+ CUDA_RESOURCE_DESC resDesc;
+ memset(&resDesc, 0, sizeof(resDesc));
+
+ if(array_3d) {
+ resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+ resDesc.res.array.hArray = array_3d;
+ resDesc.flags = 0;
+ }
+ else if(mem.data_height > 0) {
+ resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
+ resDesc.res.pitch2D.devPtr = mem.device_pointer;
+ resDesc.res.pitch2D.format = format;
+ resDesc.res.pitch2D.numChannels = mem.data_elements;
+ resDesc.res.pitch2D.height = mem.data_height;
+ resDesc.res.pitch2D.width = mem.data_width;
+ resDesc.res.pitch2D.pitchInBytes = dst_pitch;
}
+ else {
+ resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
+ resDesc.res.linear.devPtr = mem.device_pointer;
+ resDesc.res.linear.format = format;
+ resDesc.res.linear.numChannels = mem.data_elements;
+ resDesc.res.linear.sizeInBytes = mem.device_size;
+ }
+
+ CUDA_TEXTURE_DESC texDesc;
+ memset(&texDesc, 0, sizeof(texDesc));
+ texDesc.addressMode[0] = address_mode;
+ texDesc.addressMode[1] = address_mode;
+ texDesc.addressMode[2] = address_mode;
+ texDesc.filterMode = filter_mode;
+ texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+
+ cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
+
+ /* Resize once */
+ if(flat_slot >= texture_info.size()) {
+ /* Allocate some slots in advance, to reduce amount
+ * of re-allocations. */
+ texture_info.resize(flat_slot + 128);
+ }
+
+ /* Set Mapping and tag that we need to (re-)upload to device */
+ TextureInfo& info = texture_info[flat_slot];
+ info.data = (uint64_t)cmem->texobject;
+ info.cl_buffer = 0;
+ info.interpolation = mem.interpolation;
+ info.extension = mem.extension;
+ info.width = mem.data_width;
+ info.height = mem.data_height;
+ info.depth = mem.data_depth;
+ need_texture_info = true;
}
void tex_free(device_memory& mem)
@@ -2550,9 +2495,9 @@ void device_cuda_info(vector<DeviceInfo>& devices)
int major;
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
- if(major < 2) {
+ if(major < 3) {
VLOG(1) << "Ignoring device \"" << name
- << "\", compute capability is too low.";
+ << "\", this graphics card is no longer supported.";
continue;
}
@@ -2562,8 +2507,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
info.description = string(name);
info.num = num;
- info.advanced_shading = (major >= 2);
- info.has_fermi_limits = !(major >= 3);
+ info.advanced_shading = (major >= 3);
info.has_half_images = (major >= 3);
info.has_volume_decoupled = false;
info.bvh_layout_mask = BVH_LAYOUT_BVH2;