From cdb0b3b1dcd4e9962426422868b2f40535670a5c Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 8 Oct 2017 04:32:25 +0200 Subject: Code refactor: use DeviceInfo to enable QBVH and decoupled volume shading. --- intern/cycles/device/device_cuda.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'intern/cycles/device/device_cuda.cpp') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index dcbe6033bcc..56a56c5217c 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -2128,6 +2128,8 @@ void device_cuda_info(vector& devices) info.advanced_shading = (major >= 2); info.has_bindless_textures = (major >= 3); + info.has_volume_decoupled = false; + info.has_qbvh = false; int pci_location[3] = {0, 0, 0}; cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num); -- cgit v1.2.3 From e360d003ea45ee233c6f10c03ff57c956929b383 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 8 Oct 2017 19:08:44 +0200 Subject: Cycles: schedule more work for non-display and compute preemption CUDA cards. This change affects CUDA GPUs not connected to a display or connected to a display but supporting compute preemption so that the display does not freeze. I couldn't find an official list, but compute preemption seems to be only supported with GTX 1070+ and Linux (not GTX 1060- or Windows). This helps improve small tile rendering performance further if there are sufficient samples x number of pixels in a single tile to keep the GPU busy. --- intern/cycles/device/device_cuda.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'intern/cycles/device/device_cuda.cpp') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 56a56c5217c..216c85f24e7 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1313,9 +1313,14 @@ public: CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); /* Prepare work size. More step samples render faster, but for now we - * remain conservative to avoid driver timeouts. */ + * remain conservative for GPUs connected to a display to avoid driver + * timeouts and display freezing. */ int min_blocks, num_threads_per_block; cuda_assert(cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0)); + if(!info.display_device) { + min_blocks *= 8; + } + uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);; /* Render all samples. */ @@ -2109,7 +2114,6 @@ void device_cuda_info(vector& devices) for(int num = 0; num < count; num++) { char name[256]; - int attr; if(cuDeviceGetName(name, 256, num) != CUDA_SUCCESS) continue; @@ -2141,14 +2145,21 @@ void device_cuda_info(vector& devices) (unsigned int)pci_location[1], (unsigned int)pci_location[2]); - /* if device has a kernel timeout, assume it is used for display */ - if(cuDeviceGetAttribute(&attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num) == CUDA_SUCCESS && attr == 1) { + /* If device has a kernel timeout and no compute preemption, we assume + * it is connected to a display and will freeze the display while doing + * computations. */ + int timeout_attr = 0, preempt_attr = 0; + cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num); + cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num); + + if(timeout_attr && !preempt_attr) { info.description += " (Display)"; info.display_device = true; display_devices.push_back(info); } - else + else { devices.push_back(info); + } } if(!display_devices.empty()) -- cgit v1.2.3