diff options
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 53 |
1 files changed, 31 insertions, 22 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 99537e9a983..3b75142ee67 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -105,7 +105,8 @@ public: device_memory& use_queues_flag, device_memory& work_pool_wgs); - virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&); + virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name, + const DeviceRequestedFeatures&); virtual int2 split_kernel_local_size(); virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task); }; @@ -1051,8 +1052,6 @@ public: bool denoising_reconstruct(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr, DenoisingTask *task) { @@ -1096,8 +1095,8 @@ public: task->reconstruction_state.source_h - max(0, dy)}; void *calc_difference_args[] = {&dx, &dy, - &guide_ptr, - &guide_variance_ptr, + &color_ptr, + &color_variance_ptr, &difference, &local_rect, &task->buffer.w, @@ -1126,8 +1125,6 @@ public: void *construct_gramian_args[] = {&dx, &dy, &blurDifference, &task->buffer.mem.device_pointer, - &color_ptr, - &color_variance_ptr, &task->storage.transform.device_pointer, &task->storage.rank.device_pointer, &task->storage.XtWX.device_pointer, @@ -1294,7 +1291,7 @@ public: DenoisingTask denoising(this); denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising); - denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising); + denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.divide_shadow = function_bind(&CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); @@ -1901,17 +1898,13 @@ public: int threads_per_block; cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); - int xthreads = (int)sqrt(threads_per_block); - int ythreads = (int)sqrt(threads_per_block); - - int xblocks = (dim.global_size[0] + xthreads - 1)/xthreads; - int yblocks = (dim.global_size[1] + ythreads - 1)/ythreads; + int xblocks = (dim.global_size[0]*dim.global_size[1] + threads_per_block - 1)/threads_per_block; cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1)); cuda_assert(cuLaunchKernel(func, - xblocks , yblocks, 1, /* blocks */ - xthreads, ythreads, 1, /* threads */ + xblocks, 1, 1, /* blocks */ + threads_per_block, 1, 1, /* threads */ 0, 0, args, 0)); device->cuda_pop_context(); @@ -2041,7 +2034,8 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim return !device->have_error(); } -SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) +SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(const string& kernel_name, + const DeviceRequestedFeatures&) { CUfunction func; @@ -2129,18 +2123,34 @@ Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background) return new CUDADevice(info, stats, background); } -void device_cuda_info(vector<DeviceInfo>& devices) +static CUresult device_cuda_safe_init() { - CUresult result; - int count = 0; +#ifdef _WIN32 + __try { + return cuInit(0); + } + __except(EXCEPTION_EXECUTE_HANDLER) { + /* Ignore crashes inside the CUDA driver and hope we can + * survive even with corrupted CUDA installs. */ + fprintf(stderr, "Cycles CUDA: driver crashed, continuing without CUDA.\n"); + } + + return CUDA_ERROR_NO_DEVICE; +#else + return cuInit(0); +#endif +} - result = cuInit(0); +void device_cuda_info(vector<DeviceInfo>& devices) +{ + CUresult result = device_cuda_safe_init(); if(result != CUDA_SUCCESS) { if(result != CUDA_ERROR_NO_DEVICE) fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result)); return; } + int count = 0; result = cuDeviceGetCount(&count); if(result != CUDA_SUCCESS) { fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result)); @@ -2170,7 +2180,6 @@ void device_cuda_info(vector<DeviceInfo>& devices) info.advanced_shading = (major >= 2); info.has_bindless_textures = (major >= 3); - info.pack_images = false; int pci_location[3] = {0, 0, 0}; cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num); @@ -2198,7 +2207,7 @@ void device_cuda_info(vector<DeviceInfo>& devices) string device_cuda_capabilities(void) { - CUresult result = cuInit(0); + CUresult result = device_cuda_safe_init(); if(result != CUDA_SUCCESS) { if(result != CUDA_ERROR_NO_DEVICE) { return string("Error initializing CUDA: ") + cuewErrorString(result); |