Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCampbell Barton <ideasman42@gmail.com>2017-11-04 13:45:52 +0300
committerCampbell Barton <ideasman42@gmail.com>2017-11-04 13:45:52 +0300
commitd4fe083b356d7967ce6d9716cd0e6ba3039d1655 (patch)
tree49eb05d3589f180c1056691efa5dd7c67b28dca8 /intern/cycles/device/device_cuda.cpp
parent10024603ee904a4201ae35b87643012543274d0f (diff)
parent01a3c9560938c98793a2d2a0b61e574ed5c27e4a (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r--intern/cycles/device/device_cuda.cpp363
1 files changed, 203 insertions, 160 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index c742e91c561..62d8a92a024 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -274,7 +274,7 @@ public:
delete split_kernel;
- if(info.has_bindless_textures) {
+ if(!info.has_fermi_limits) {
texture_info.free();
}
@@ -547,13 +547,13 @@ public:
void load_texture_info()
{
- if(info.has_bindless_textures && need_texture_info) {
+ if(!info.has_fermi_limits && need_texture_info) {
texture_info.copy_to_device();
need_texture_info = false;
}
}
- void generic_alloc(device_memory& mem)
+ void generic_alloc(device_memory& mem, size_t padding = 0)
{
CUDAContextScope scope(this);
@@ -565,7 +565,7 @@ public:
CUdeviceptr device_pointer;
size_t size = mem.memory_size();
- cuda_assert(cuMemAlloc(&device_pointer, size));
+ cuda_assert(cuMemAlloc(&device_pointer, size + padding));
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
@@ -701,7 +701,7 @@ public:
<< string_human_readable_size(mem.memory_size()) << ")";
/* Check if we are on sm_30 or above, for bindless textures. */
- bool has_bindless_textures = info.has_bindless_textures;
+ bool has_fermi_limits = info.has_fermi_limits;
/* General variables for both architectures */
string bind_name = mem.name;
@@ -732,30 +732,8 @@ public:
filter_mode = CU_TR_FILTER_MODE_LINEAR;
}
- /* General variables for Fermi */
- CUtexref texref = NULL;
-
- if(!has_bindless_textures && mem.interpolation != INTERPOLATION_NONE) {
- if(mem.data_depth > 1) {
- /* Kernel uses different bind names for 2d and 3d float textures,
- * so we have to adjust couple of things here.
- */
- vector<string> tokens;
- string_split(tokens, mem.name, "_");
- bind_name = string_printf("__tex_image_%s_3d_%s",
- tokens[2].c_str(),
- tokens[3].c_str());
- }
-
- cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
-
- if(!texref) {
- return;
- }
- }
-
+ /* Data Storage */
if(mem.interpolation == INTERPOLATION_NONE) {
- /* Data Storage */
generic_alloc(mem);
generic_copy_to(mem);
@@ -774,179 +752,243 @@ public:
uint32_t ptr = (uint32_t)mem.device_pointer;
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
+
+ tex_interp_map[mem.device_pointer] = false;
+ return;
}
- else {
- /* Texture Storage */
- CUarray handle = NULL;
-
- CUarray_format_enum format;
- switch(mem.data_type) {
- case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
- case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
- case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
- case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
- case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
- default: assert(0); return;
- }
+ /* Image Texture Storage */
+ CUtexref texref = NULL;
+
+ if(has_fermi_limits) {
if(mem.data_depth > 1) {
- CUDA_ARRAY3D_DESCRIPTOR desc;
+ /* Kernel uses different bind names for 2d and 3d float textures,
+ * so we have to adjust couple of things here.
+ */
+ vector<string> tokens;
+ string_split(tokens, mem.name, "_");
+ bind_name = string_printf("__tex_image_%s_3d_%s",
+ tokens[2].c_str(),
+ tokens[3].c_str());
+ }
- desc.Width = mem.data_width;
- desc.Height = mem.data_height;
- desc.Depth = mem.data_depth;
- desc.Format = format;
- desc.NumChannels = mem.data_elements;
- desc.Flags = 0;
+ cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
- cuda_assert(cuArray3DCreate(&handle, &desc));
+ if(!texref) {
+ return;
}
- else {
- CUDA_ARRAY_DESCRIPTOR desc;
+ }
- desc.Width = mem.data_width;
- desc.Height = mem.data_height;
- desc.Format = format;
- desc.NumChannels = mem.data_elements;
+ CUarray_format_enum format;
+ switch(mem.data_type) {
+ case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
+ case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
+ case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
+ case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+ case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
+ default: assert(0); return;
+ }
- cuda_assert(cuArrayCreate(&handle, &desc));
- }
- if(!handle) {
+ CUarray array_3d = NULL;
+ size_t src_pitch = mem.data_width * dsize * mem.data_elements;
+ size_t dst_pitch = src_pitch;
+
+ if(mem.data_depth > 1) {
+ /* 3D texture using array, there is no API for linear memory. */
+ CUDA_ARRAY3D_DESCRIPTOR desc;
+
+ desc.Width = mem.data_width;
+ desc.Height = mem.data_height;
+ desc.Depth = mem.data_depth;
+ desc.Format = format;
+ desc.NumChannels = mem.data_elements;
+ desc.Flags = 0;
+
+ cuda_assert(cuArray3DCreate(&array_3d, &desc));
+
+ if(!array_3d) {
return;
}
- /* Allocate 3D, 2D or 1D memory */
- if(mem.data_depth > 1) {
- CUDA_MEMCPY3D param;
- memset(&param, 0, sizeof(param));
- param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
- param.dstArray = handle;
- param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = (void*)mem.data_pointer;
- param.srcPitch = mem.data_width*dsize*mem.data_elements;
- param.WidthInBytes = param.srcPitch;
- param.Height = mem.data_height;
- param.Depth = mem.data_depth;
-
- cuda_assert(cuMemcpy3D(&param));
- }
- else if(mem.data_height > 1) {
- CUDA_MEMCPY2D param;
- memset(&param, 0, sizeof(param));
- param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
- param.dstArray = handle;
- param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = (void*)mem.data_pointer;
- param.srcPitch = mem.data_width*dsize*mem.data_elements;
- param.WidthInBytes = param.srcPitch;
- param.Height = mem.data_height;
-
- cuda_assert(cuMemcpy2D(&param));
- }
- else
- cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size));
+ CUDA_MEMCPY3D param;
+ memset(&param, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+ param.dstArray = array_3d;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = (void*)mem.data_pointer;
+ param.srcPitch = src_pitch;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
+ param.Depth = mem.data_depth;
- /* Fermi and Kepler */
- mem.device_pointer = (device_ptr)handle;
- mem.device_size = size;
+ cuda_assert(cuMemcpy3D(&param));
+ mem.device_pointer = (device_ptr)array_3d;
+ mem.device_size = size;
stats.mem_alloc(size);
+ }
+ else if(mem.data_height > 1) {
+ /* 2D texture, using pitch aligned linear memory. */
+ int alignment = 0;
+ cuda_assert(cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
+ dst_pitch = align_up(src_pitch, alignment);
+ size_t dst_size = dst_pitch * mem.data_height;
- if(has_bindless_textures) {
- /* Bindless Textures - Kepler */
- int flat_slot = 0;
- if(string_startswith(mem.name, "__tex_image")) {
- int pos = string(mem.name).rfind("_");
- flat_slot = atoi(mem.name + pos + 1);
- }
- else {
- assert(0);
- }
+ generic_alloc(mem, dst_size - mem.memory_size());
+
+ CUDA_MEMCPY2D param;
+ memset(&param, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+ param.dstDevice = mem.device_pointer;
+ param.dstPitch = dst_pitch;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = (void*)mem.data_pointer;
+ param.srcPitch = src_pitch;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
- CUDA_RESOURCE_DESC resDesc;
- memset(&resDesc, 0, sizeof(resDesc));
+ cuda_assert(cuMemcpy2DUnaligned(&param));
+ }
+ else {
+ /* 1D texture, using linear memory. */
+ generic_alloc(mem);
+ cuda_assert(cuMemcpyHtoD(mem.device_pointer, (void*)mem.data_pointer, size));
+ }
+
+ if(!has_fermi_limits) {
+ /* Kepler+, bindless textures. */
+ int flat_slot = 0;
+ if(string_startswith(mem.name, "__tex_image")) {
+ int pos = string(mem.name).rfind("_");
+ flat_slot = atoi(mem.name + pos + 1);
+ }
+ else {
+ assert(0);
+ }
+
+ CUDA_RESOURCE_DESC resDesc;
+ memset(&resDesc, 0, sizeof(resDesc));
+
+ if(mem.data_depth > 1) {
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
- resDesc.res.array.hArray = handle;
+ resDesc.res.array.hArray = array_3d;
resDesc.flags = 0;
+ }
+ else if(mem.data_height > 1) {
+ resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
+ resDesc.res.pitch2D.devPtr = mem.device_pointer;
+ resDesc.res.pitch2D.format = format;
+ resDesc.res.pitch2D.numChannels = mem.data_elements;
+ resDesc.res.pitch2D.height = mem.data_height;
+ resDesc.res.pitch2D.width = mem.data_width;
+ resDesc.res.pitch2D.pitchInBytes = dst_pitch;
+ }
+ else {
+ resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
+ resDesc.res.linear.devPtr = mem.device_pointer;
+ resDesc.res.linear.format = format;
+ resDesc.res.linear.numChannels = mem.data_elements;
+ resDesc.res.linear.sizeInBytes = mem.device_size;
+ }
- CUDA_TEXTURE_DESC texDesc;
- memset(&texDesc, 0, sizeof(texDesc));
- texDesc.addressMode[0] = address_mode;
- texDesc.addressMode[1] = address_mode;
- texDesc.addressMode[2] = address_mode;
- texDesc.filterMode = filter_mode;
- texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+ CUDA_TEXTURE_DESC texDesc;
+ memset(&texDesc, 0, sizeof(texDesc));
+ texDesc.addressMode[0] = address_mode;
+ texDesc.addressMode[1] = address_mode;
+ texDesc.addressMode[2] = address_mode;
+ texDesc.filterMode = filter_mode;
+ texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
- CUtexObject tex = 0;
- cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, NULL));
+ CUtexObject tex = 0;
+ cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, NULL));
- /* Safety check */
- if((uint)tex > UINT_MAX) {
- assert(0);
- }
+ /* Safety check */
+ if((uint)tex > UINT_MAX) {
+ assert(0);
+ }
- /* Resize once */
- if(flat_slot >= texture_info.size()) {
- /* Allocate some slots in advance, to reduce amount
- * of re-allocations. */
- texture_info.resize(flat_slot + 128);
- }
+ /* Resize once */
+ if(flat_slot >= texture_info.size()) {
+ /* Allocate some slots in advance, to reduce amount
+ * of re-allocations. */
+ texture_info.resize(flat_slot + 128);
+ }
- /* Set Mapping and tag that we need to (re-)upload to device */
- TextureInfo& info = texture_info[flat_slot];
- info.data = (uint64_t)tex;
- info.cl_buffer = 0;
- info.interpolation = mem.interpolation;
- info.extension = mem.extension;
- info.width = mem.data_width;
- info.height = mem.data_height;
- info.depth = mem.data_depth;
-
- tex_bindless_map[mem.device_pointer] = tex;
- need_texture_info = true;
+ /* Set Mapping and tag that we need to (re-)upload to device */
+ TextureInfo& info = texture_info[flat_slot];
+ info.data = (uint64_t)tex;
+ info.cl_buffer = 0;
+ info.interpolation = mem.interpolation;
+ info.extension = mem.extension;
+ info.width = mem.data_width;
+ info.height = mem.data_height;
+ info.depth = mem.data_depth;
+
+ tex_bindless_map[mem.device_pointer] = tex;
+ need_texture_info = true;
+ }
+ else {
+ /* Fermi, fixed texture slots. */
+ if(mem.data_depth > 1) {
+ cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT));
+ }
+ else if(mem.data_height > 1) {
+ CUDA_ARRAY_DESCRIPTOR array_desc;
+ array_desc.Format = format;
+ array_desc.Height = mem.data_height;
+ array_desc.Width = mem.data_width;
+ array_desc.NumChannels = mem.data_elements;
+ cuda_assert(cuTexRefSetAddress2D_v3(texref, &array_desc, mem.device_pointer, dst_pitch));
}
else {
- /* Regular Textures - Fermi */
- cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT));
- cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
- cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
-
- cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
- cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
- if(mem.data_depth > 1) {
- cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
- }
+ cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
+ }
- cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
+ /* Attach to texture reference. */
+ cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
+ cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
+ cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
+ cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
+ cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
+ if(mem.data_depth > 1) {
+ cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
}
}
/* Fermi and Kepler */
- tex_interp_map[mem.device_pointer] = (mem.interpolation != INTERPOLATION_NONE);
+ tex_interp_map[mem.device_pointer] = true;
}
void tex_free(device_memory& mem)
{
if(mem.device_pointer) {
- if(tex_interp_map[mem.device_pointer]) {
+ bool interp = tex_interp_map[mem.device_pointer];
+ tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
+
+ if(interp) {
CUDAContextScope scope(this);
- cuArrayDestroy((CUarray)mem.device_pointer);
- /* Free CUtexObject (Bindless Textures) */
- if(info.has_bindless_textures && tex_bindless_map[mem.device_pointer]) {
- CUtexObject tex = tex_bindless_map[mem.device_pointer];
- cuTexObjectDestroy(tex);
+ if(!info.has_fermi_limits) {
+ /* Free bindless texture. */
+ if(tex_bindless_map[mem.device_pointer]) {
+ CUtexObject tex = tex_bindless_map[mem.device_pointer];
+ cuTexObjectDestroy(tex);
+ }
}
- tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
- mem.device_pointer = 0;
-
- stats.mem_free(mem.device_size);
- mem.device_size = 0;
+ if(mem.data_depth > 1) {
+ /* Free array. */
+ cuArrayDestroy((CUarray)mem.device_pointer);
+ stats.mem_free(mem.device_size);
+ mem.device_pointer = 0;
+ mem.device_size = 0;
+ }
+ else {
+ generic_free(mem);
+ }
}
else {
- tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
generic_free(mem);
}
}
@@ -2195,7 +2237,8 @@ void device_cuda_info(vector<DeviceInfo>& devices)
info.num = num;
info.advanced_shading = (major >= 2);
- info.has_bindless_textures = (major >= 3);
+ info.has_fermi_limits = !(major >= 3);
+ info.has_half_images = (major >= 3);
info.has_volume_decoupled = false;
info.has_qbvh = false;