From 6699454fb642bfd07e85f8d7bd8f8879878e3fc5 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 2 Jan 2018 22:56:07 +0100 Subject: Cycles: make CUDA code a bit more robust to host/device alloc failures. Fixes a few corner cases found while stress testing host mapped memory. --- intern/cycles/device/device_cuda.cpp | 36 +++++++++++++++++++++------------- intern/cycles/device/device_memory.cpp | 2 +- intern/cycles/render/buffers.cpp | 8 ++++++++ intern/cycles/render/image.cpp | 14 ++++++------- intern/cycles/render/image.h | 1 + intern/cycles/render/object.cpp | 2 +- 6 files changed, 40 insertions(+), 23 deletions(-) (limited to 'intern') diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 391809e5278..29aabd3169c 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -650,7 +650,7 @@ public: void generic_copy_to(device_memory& mem) { - if(mem.device_pointer) { + if(mem.host_pointer && mem.device_pointer) { CUDAContextScope scope(this); cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size())); } @@ -715,11 +715,11 @@ public: size_t offset = elem*y*w; size_t size = elem*w*h; - if(mem.device_pointer) { + if(mem.host_pointer && mem.device_pointer) { cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size)); } - else { + else if(mem.host_pointer) { memset((char*)mem.host_pointer + offset, 0, size); } } @@ -1118,13 +1118,17 @@ public: int shift_stride = stride*h; int num_shifts = (2*r+1)*(2*r+1); - int mem_size = sizeof(float)*shift_stride*2*num_shifts; + int mem_size = sizeof(float)*shift_stride*num_shifts; int channel_offset = 0; - CUdeviceptr temporary_mem; - cuda_assert(cuMemAlloc(&temporary_mem, mem_size)); - CUdeviceptr difference = temporary_mem; - CUdeviceptr blurDifference = temporary_mem + sizeof(float)*shift_stride * num_shifts; + device_only_memory temporary_mem(this, "Denoising temporary_mem"); + temporary_mem.alloc_to_device(2*mem_size); + + if(have_error()) + return false; + + CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer); + CUdeviceptr blurDifference = difference + mem_size; CUdeviceptr weightAccum = task->nlm_state.temporary_3_ptr; cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*shift_stride)); @@ -1156,7 +1160,7 @@ public: CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args); } - cuMemFree(temporary_mem); + temporary_mem.free(); { CUfunction cuNLMNormalize; @@ -1225,10 +1229,14 @@ public: int num_shifts = (2*r+1)*(2*r+1); int mem_size = sizeof(float)*shift_stride*num_shifts; - CUdeviceptr temporary_mem; - cuda_assert(cuMemAlloc(&temporary_mem, 2*mem_size)); - CUdeviceptr difference = temporary_mem; - CUdeviceptr blurDifference = temporary_mem + mem_size; + device_only_memory temporary_mem(this, "Denoising temporary_mem"); + temporary_mem.alloc_to_device(2*mem_size); + + if(have_error()) + return false; + + CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer); + CUdeviceptr blurDifference = difference + mem_size; { CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian; @@ -1268,7 +1276,7 @@ public: CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args); } - cuMemFree(temporary_mem); + temporary_mem.free(); { CUfunction cuFinalize; diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp index 3ad0946330b..b5db76bb3df 100644 --- a/intern/cycles/device/device_memory.cpp +++ b/intern/cycles/device/device_memory.cpp @@ -86,7 +86,7 @@ void device_memory::device_free() void device_memory::device_copy_to() { - if(data_size) { + if(host_pointer) { device->mem_copy_to(*this); } } diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index 5c7729ec89f..9899fa1c39c 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -151,6 +151,10 @@ bool RenderBuffers::copy_from_device() bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels) { + if(buffer.data() == NULL) { + return false; + } + float invsample = 1.0f/sample; float scale = invsample; bool variance = (offset == DENOISING_PASS_NORMAL_VAR) || @@ -218,6 +222,10 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels) { + if(buffer.data() == NULL) { + return false; + } + int pass_offset = 0; for(size_t j = 0; j < params.passes.size(); j++) { diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 482442cce29..feaa17148ee 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -703,7 +703,7 @@ void ImageManager::device_load_image(Device *device, /* Slot assignment */ int flat_slot = type_index_to_flattened_slot(slot, type); - string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot); + img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot); /* Free previous texture in slot. */ if(img->mem) { @@ -715,7 +715,7 @@ void ImageManager::device_load_image(Device *device, /* Create new texture. */ if(type == IMAGE_DATA_TYPE_FLOAT4) { device_vector *tex_img - = new device_vector(device, name.c_str(), MEM_TEXTURE); + = new device_vector(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, @@ -741,7 +741,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_FLOAT) { device_vector *tex_img - = new device_vector(device, name.c_str(), MEM_TEXTURE); + = new device_vector(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, @@ -764,7 +764,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_BYTE4) { device_vector *tex_img - = new device_vector(device, name.c_str(), MEM_TEXTURE); + = new device_vector(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, @@ -790,7 +790,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_BYTE) { device_vector *tex_img - = new device_vector(device, name.c_str(), MEM_TEXTURE); + = new device_vector(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, @@ -812,7 +812,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_HALF4) { device_vector *tex_img - = new device_vector(device, name.c_str(), MEM_TEXTURE); + = new device_vector(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, @@ -837,7 +837,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_HALF) { device_vector *tex_img - = new device_vector(device, name.c_str(), MEM_TEXTURE); + = new device_vector(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image(img, type, diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index cc7c8544bed..3519a67bc05 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -111,6 +111,7 @@ public: InterpolationType interpolation; ExtensionType extension; + string mem_name; device_memory *mem; int users; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index aef7fc29573..d7143f24850 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -644,7 +644,7 @@ void ObjectManager::device_update_flags(Device *, void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene) { - if(scene->objects.size() == 0) { + if(dscene->objects.size() == 0) { return; } -- cgit v1.2.3