diff options
author | Campbell Barton <ideasman42@gmail.com> | 2018-01-03 15:44:47 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2018-01-03 15:44:47 +0300 |
commit | be403891652a375e5a0ac61b493342ca6d39afb7 (patch) | |
tree | ef9637103db6d66c4b311cba5b705d575562a1f8 /intern/cycles | |
parent | 060fdb49d64857ff1cbf9937420ed70b10b17086 (diff) | |
parent | cbc7aa80d49e3b36c9ecc0e27ec528b34c491fc1 (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 307 | ||||
-rw-r--r-- | intern/cycles/device/device_memory.cpp | 5 | ||||
-rw-r--r-- | intern/cycles/device/device_memory.h | 3 | ||||
-rw-r--r-- | intern/cycles/device/device_multi.cpp | 16 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm.h | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_closure.h | 21 | ||||
-rw-r--r-- | intern/cycles/render/bake.cpp | 7 | ||||
-rw-r--r-- | intern/cycles/render/buffers.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/render/image.cpp | 14 | ||||
-rw-r--r-- | intern/cycles/render/image.h | 1 | ||||
-rw-r--r-- | intern/cycles/render/object.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/util/util_system.cpp | 21 | ||||
-rw-r--r-- | intern/cycles/util/util_system.h | 2 |
13 files changed, 358 insertions, 57 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 47c09dfebf9..82460af3b17 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -37,6 +37,7 @@ # include <cudaGL.h> #endif #include "util/util_debug.h" +#include "util/util_foreach.h" #include "util/util_logging.h" #include "util/util_map.h" #include "util/util_md5.h" @@ -128,6 +129,12 @@ public: CUdevice cuDevice; CUcontext cuContext; CUmodule cuModule, cuFilterModule; + size_t device_texture_headroom; + size_t device_working_headroom; + bool move_texture_to_host; + size_t map_host_used; + size_t map_host_limit; + int can_map_host; int cuDevId; int cuDevArchitecture; bool first_error; @@ -135,12 +142,15 @@ public: struct CUDAMem { CUDAMem() - : texobject(0), array(0) {} + : texobject(0), array(0), map_host_pointer(0), free_map_host(false) {} CUtexObject texobject; CUarray array; + void *map_host_pointer; + bool free_map_host; }; - map<device_memory*, CUDAMem> cuda_mem_map; + typedef map<device_memory*, CUDAMem> CUDAMemMap; + CUDAMemMap cuda_mem_map; struct PixelMem { GLuint cuPBO; @@ -240,6 +250,13 @@ public: need_texture_info = false; + device_texture_headroom = 0; + device_working_headroom = 0; + move_texture_to_host = false; + map_host_limit = 0; + map_host_used = 0; + can_map_host = 0; + /* Intialize CUDA. */ if(cuda_error(cuInit(0))) return; @@ -248,9 +265,16 @@ public: if(cuda_error(cuDeviceGet(&cuDevice, cuDevId))) return; - /* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render, + /* CU_CTX_MAP_HOST for mapping host memory when out of device memory. + * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render, * so we can predict which memory to map to host. */ + cuda_assert(cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice)); + unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX; + if(can_map_host) { + ctx_flags |= CU_CTX_MAP_HOST; + init_host_memory(); + } /* Create context. */ CUresult result; @@ -611,6 +635,50 @@ public: VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after) << " bytes. (" << string_human_readable_size(free_before - free_after) << ")"; + +#if 0 + /* For testing mapped host memory, fill up device memory. */ + const size_t keep_mb = 1024; + + while(free_after > keep_mb * 1024 * 1024LL) { + CUdeviceptr tmp; + cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL)); + cuMemGetInfo(&free_after, &total); + } +#endif + } + + void init_host_memory() + { + /* Limit amount of host mapped memory, because allocating too much can + * cause system instability. Leave at least half or 4 GB of system + * memory free, whichever is smaller. */ + size_t default_limit = 4 * 1024 * 1024 * 1024LL; + size_t system_ram = system_physical_ram(); + + if(system_ram > 0) { + if(system_ram / 2 > default_limit) { + map_host_limit = system_ram - default_limit; + } + else { + map_host_limit = system_ram / 2; + } + } + else { + VLOG(1) << "Mapped host memory disabled, failed to get system RAM"; + map_host_limit = 0; + } + + /* Amount of device memory to keep is free after texture memory + * and working memory allocations respectively. We set the working + * memory limit headroom lower so that some space is left after all + * texture memory allocations. */ + device_working_headroom = 32 * 1024 * 1024LL; // 32MB + device_texture_headroom = 128 * 1024 * 1024LL; // 128MB + + VLOG(1) << "Mapped host memory limit set to " + << string_human_readable_number(map_host_limit) << " bytes. (" + << string_human_readable_size(map_host_limit) << ")"; } void load_texture_info() @@ -621,20 +689,167 @@ public: } } - CUDAMem *generic_alloc(device_memory& mem, size_t padding = 0) + void move_textures_to_host(size_t size, bool for_texture) + { + /* Signal to reallocate textures in host memory only. */ + move_texture_to_host = true; + + while(size > 0) { + /* Find suitable memory allocation to move. */ + device_memory *max_mem = NULL; + size_t max_size = 0; + bool max_is_image = false; + + foreach(CUDAMemMap::value_type& pair, cuda_mem_map) { + device_memory& mem = *pair.first; + CUDAMem *cmem = &pair.second; + + bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); + bool is_image = is_texture && (mem.data_height > 1); + + /* Can't move this type of memory. */ + if(!is_texture || cmem->array) { + continue; + } + + /* Already in host memory. */ + if(cmem->map_host_pointer) { + continue; + } + + /* For other textures, only move image textures. */ + if(for_texture && !is_image) { + continue; + } + + /* Try to move largest allocation, prefer moving images. */ + if(is_image > max_is_image || + (is_image == max_is_image && mem.device_size > max_size)) { + max_is_image = is_image; + max_size = mem.device_size; + max_mem = &mem; + } + } + + /* Move to host memory. This part is mutex protected since + * multiple CUDA devices could be moving the memory. The + * first one will do it, and the rest will adopt the pointer. */ + if(max_mem) { + VLOG(1) << "Move memory from device to host: " << max_mem->name; + + static thread_mutex move_mutex; + thread_scoped_lock lock(move_mutex); + + /* Preserve the original device pointer, in case of multi device + * we can't change it because the pointer mapping would break. */ + device_ptr prev_pointer = max_mem->device_pointer; + size_t prev_size = max_mem->device_size; + + tex_free(*max_mem); + tex_alloc(*max_mem); + size = (max_size >= size)? 0: size - max_size; + + max_mem->device_pointer = prev_pointer; + max_mem->device_size = prev_size; + } + else { + break; + } + } + + /* Update texture info array with new pointers. */ + load_texture_info(); + + move_texture_to_host = false; + } + + CUDAMem *generic_alloc(device_memory& mem, size_t pitch_padding = 0) { CUDAContextScope scope(this); + CUdeviceptr device_pointer = 0; + size_t size = mem.memory_size() + pitch_padding; + + CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY; + const char *status = ""; + + /* First try allocating in device memory, respecting headroom. We make + * an exception for texture info. It is small and frequently accessed, + * so treat it as working memory. + * + * If there is not enough room for working memory, we will try to move + * textures to host memory, assuming the performance impact would have + * been worse for working memory. */ + bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); + bool is_image = is_texture && (mem.data_height > 1); + + size_t headroom = (is_texture)? device_texture_headroom: + device_working_headroom; + + size_t total = 0, free = 0; + cuMemGetInfo(&free, &total); + + /* Move textures to host memory if needed. */ + if(!move_texture_to_host && !is_image && (size + headroom) >= free) { + move_textures_to_host(size + headroom - free, is_texture); + cuMemGetInfo(&free, &total); + } + + /* Allocate in device memory. */ + if(!move_texture_to_host && (size + headroom) < free) { + mem_alloc_result = cuMemAlloc(&device_pointer, size); + if(mem_alloc_result == CUDA_SUCCESS) { + status = " in device memory"; + } + } + + /* Fall back to mapped host memory if needed and possible. */ + void *map_host_pointer = 0; + bool free_map_host = false; + + if(mem_alloc_result != CUDA_SUCCESS && can_map_host && + map_host_used + size < map_host_limit) { + if(mem.shared_pointer) { + /* Another device already allocated host memory. */ + mem_alloc_result = CUDA_SUCCESS; + map_host_pointer = mem.shared_pointer; + } + else { + /* Allocate host memory ourselves. */ + mem_alloc_result = cuMemHostAlloc(&map_host_pointer, size, + CU_MEMHOSTALLOC_DEVICEMAP | + CU_MEMHOSTALLOC_WRITECOMBINED); + mem.shared_pointer = map_host_pointer; + free_map_host = true; + } + + if(mem_alloc_result == CUDA_SUCCESS) { + cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, mem.shared_pointer, 0)); + map_host_used += size; + status = " in host memory"; + + /* Replace host pointer with our host allocation. Only works if + * CUDA memory layout is the same and has no pitch padding. */ + if(pitch_padding == 0 && mem.host_pointer && mem.host_pointer != mem.shared_pointer) { + memcpy(mem.shared_pointer, mem.host_pointer, size); + mem.host_free(); + mem.host_pointer = mem.shared_pointer; + } + } + } + + if(mem_alloc_result != CUDA_SUCCESS) { + cuda_assert(mem_alloc_result); + status = " failed, out of memory"; + } + if(mem.name) { VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; + << string_human_readable_size(mem.memory_size()) << ")" + << status; } - /* Allocate memory on device. */ - CUdeviceptr device_pointer = 0; - size_t size = mem.memory_size(); - cuda_assert(cuMemAlloc(&device_pointer, size + padding)); mem.device_pointer = (device_ptr)device_pointer; mem.device_size = size; stats.mem_alloc(size); @@ -645,14 +860,21 @@ public: /* Insert into map of allocations. */ CUDAMem *cmem = &cuda_mem_map[&mem]; + cmem->map_host_pointer = map_host_pointer; + cmem->free_map_host = free_map_host; return cmem; } void generic_copy_to(device_memory& mem) { - if(mem.device_pointer) { + if(mem.host_pointer && mem.device_pointer) { CUDAContextScope scope(this); - cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size())); + + if(mem.host_pointer != mem.shared_pointer) { + cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), + mem.host_pointer, + mem.memory_size())); + } } } @@ -660,8 +882,24 @@ public: { if(mem.device_pointer) { CUDAContextScope scope(this); + const CUDAMem& cmem = cuda_mem_map[&mem]; + + if(cmem.map_host_pointer) { + /* Free host memory. */ + if(cmem.free_map_host) { + cuMemFreeHost(cmem.map_host_pointer); + if(mem.host_pointer == mem.shared_pointer) { + mem.host_pointer = 0; + } + mem.shared_pointer = 0; + } - cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); + map_host_used -= mem.device_size; + } + else { + /* Free device memory. */ + cuMemFree(mem.device_pointer); + } stats.mem_free(mem.device_size); mem.device_pointer = 0; @@ -715,11 +953,11 @@ public: size_t offset = elem*y*w; size_t size = elem*w*h; - if(mem.device_pointer) { + if(mem.host_pointer && mem.device_pointer) { cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size)); } - else { + else if(mem.host_pointer) { memset((char*)mem.host_pointer + offset, 0, size); } } @@ -735,7 +973,8 @@ public: memset(mem.host_pointer, 0, mem.memory_size()); } - if(mem.device_pointer) { + if(mem.device_pointer && + (!mem.host_pointer || mem.host_pointer != mem.shared_pointer)) { CUDAContextScope scope(this); cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())); } @@ -774,10 +1013,6 @@ public: { CUDAContextScope scope(this); - VLOG(1) << "Texture allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; - /* Check if we are on sm_30 or above, for bindless textures. */ bool has_fermi_limits = info.has_fermi_limits; @@ -881,6 +1116,10 @@ public: desc.NumChannels = mem.data_elements; desc.Flags = 0; + VLOG(1) << "Array 3D allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + cuda_assert(cuArray3DCreate(&array_3d, &desc)); if(!array_3d) { @@ -1118,13 +1357,17 @@ public: int shift_stride = stride*h; int num_shifts = (2*r+1)*(2*r+1); - int mem_size = sizeof(float)*shift_stride*2*num_shifts; + int mem_size = sizeof(float)*shift_stride*num_shifts; int channel_offset = 0; - CUdeviceptr temporary_mem; - cuda_assert(cuMemAlloc(&temporary_mem, mem_size)); - CUdeviceptr difference = temporary_mem; - CUdeviceptr blurDifference = temporary_mem + sizeof(float)*shift_stride * num_shifts; + device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem"); + temporary_mem.alloc_to_device(2*mem_size); + + if(have_error()) + return false; + + CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer); + CUdeviceptr blurDifference = difference + mem_size; CUdeviceptr weightAccum = task->nlm_state.temporary_3_ptr; cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*shift_stride)); @@ -1156,7 +1399,7 @@ public: CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args); } - cuMemFree(temporary_mem); + temporary_mem.free(); { CUfunction cuNLMNormalize; @@ -1225,10 +1468,14 @@ public: int num_shifts = (2*r+1)*(2*r+1); int mem_size = sizeof(float)*shift_stride*num_shifts; - CUdeviceptr temporary_mem; - cuda_assert(cuMemAlloc(&temporary_mem, 2*mem_size)); - CUdeviceptr difference = temporary_mem; - CUdeviceptr blurDifference = temporary_mem + mem_size; + device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem"); + temporary_mem.alloc_to_device(2*mem_size); + + if(have_error()) + return false; + + CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer); + CUdeviceptr blurDifference = difference + mem_size; { CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian; @@ -1268,7 +1515,7 @@ public: CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args); } - cuMemFree(temporary_mem); + temporary_mem.free(); { CUfunction cuFinalize; diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp index 3ad0946330b..82598007a59 100644 --- a/intern/cycles/device/device_memory.cpp +++ b/intern/cycles/device/device_memory.cpp @@ -35,7 +35,8 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type) extension(EXTENSION_REPEAT), device(device), device_pointer(0), - host_pointer(0) + host_pointer(0), + shared_pointer(0) { } @@ -86,7 +87,7 @@ void device_memory::device_free() void device_memory::device_copy_to() { - if(data_size) { + if(host_pointer) { device->mem_copy_to(*this); } } diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 453dab9bfb3..2a027917066 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -197,10 +197,13 @@ public: Device *device; device_ptr device_pointer; void *host_pointer; + void *shared_pointer; virtual ~device_memory(); protected: + friend class CUDADevice; + /* Only create through subclasses. */ device_memory(Device *device, const char *name, MemoryType type); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 16238c14aa0..91507e6be0c 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -48,11 +48,17 @@ public: MultiDevice(DeviceInfo& info, Stats &stats, bool background_) : Device(info, stats, background_), unique_key(1) { - Device *device; - foreach(DeviceInfo& subinfo, info.multi_devices) { - device = Device::create(subinfo, sub_stats_, background); - devices.push_back(SubDevice(device)); + Device *device = Device::create(subinfo, sub_stats_, background); + + /* Always add CPU devices at the back since GPU devices can change + * host memory pointers, which CPU uses as device pointer. */ + if(subinfo.type == DEVICE_CPU) { + devices.push_back(SubDevice(device)); + } + else { + devices.push_front(SubDevice(device)); + } } #ifdef WITH_NETWORK @@ -63,7 +69,7 @@ public: vector<string> servers = discovery.get_server_list(); foreach(string& server, servers) { - device = device_network_create(info, stats, server.c_str()); + Device *device = device_network_create(info, stats, server.c_str()); if(device) devices.push_back(SubDevice(device)); } diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 9ff02c1586b..d3dac5706d0 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -211,9 +211,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a break; } case NODE_CLOSURE_BSDF: - if(type == SHADER_TYPE_SURFACE) { - svm_node_closure_bsdf(kg, sd, stack, node, path_flag, &offset); - } + svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset); break; case NODE_CLOSURE_EMISSION: svm_node_closure_emission(sd, stack, node); @@ -331,9 +329,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a break; # if NODES_FEATURE(NODE_FEATURE_VOLUME) case NODE_CLOSURE_VOLUME: - if(type == SHADER_TYPE_VOLUME) { - svm_node_closure_volume(kg, sd, stack, node, path_flag); - } + svm_node_closure_volume(kg, sd, stack, node, type, path_flag); break; # endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ # ifdef __EXTRA_NODES__ diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index f04c46ef7f9..47ebe4288e3 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -56,7 +56,7 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int t } } -ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int *offset) +ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag, int *offset) { uint type, param1_offset, param2_offset; @@ -67,8 +67,18 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * /* note we read this extra node before weight check, so offset is added */ uint4 data_node = read_node(kg, offset); - if(mix_weight == 0.0f) + /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */ + if(mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) { + if(type == CLOSURE_BSDF_PRINCIPLED_ID) { + /* Read all principled BSDF extra data to get the right offset. */ + read_node(kg, offset); + read_node(kg, offset); + read_node(kg, offset); + read_node(kg, offset); + } + return; + } float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N; @@ -835,9 +845,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * } } -ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag) +ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag) { #ifdef __VOLUME__ + /* Only sum extinction for volumes, variable is shared with surface transparency. */ + if(shader_type != SHADER_TYPE_VOLUME) { + return; + } + uint type, param1_offset, param2_offset; uint mix_weight_offset; diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index aeb5d1c1316..1fef7a0188f 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -151,6 +151,10 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre progress.reset_sample(); progress.set_total_pixel_samples(total_pixel_samples); + /* needs to be up to date for baking specific AA samples */ + dscene->data.integrator.aa_samples = num_samples; + device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); + for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); @@ -175,9 +179,6 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre d_output.zero_to_device(); d_input.copy_to_device(); - /* needs to be up to data for attribute access */ - device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - DeviceTask task(DeviceTask::SHADER); task.shader_input = d_input.device_pointer; task.shader_output = d_output.device_pointer; diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index c6502df7252..89a44c7ce3c 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -151,6 +151,10 @@ bool RenderBuffers::copy_from_device() bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels) { + if(buffer.data() == NULL) { + return false; + } + float invsample = 1.0f/sample; float scale = invsample; bool variance = (offset == DENOISING_PASS_NORMAL_VAR) || @@ -218,6 +222,10 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels) { + if(buffer.data() == NULL) { + return false; + } + int pass_offset = 0; for(size_t j = 0; j < params.passes.size(); j++) { diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 482442cce29..feaa17148ee 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -703,7 +703,7 @@ void ImageManager::device_load_image(Device *device, /* Slot assignment */ int flat_slot = type_index_to_flattened_slot(slot, type); - string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot); + img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot); /* Free previous texture in slot. */ if(img->mem) { @@ -715,7 +715,7 @@ void ImageManager::device_load_image(Device *device, /* Create new texture. */ if(type == IMAGE_DATA_TYPE_FLOAT4) { device_vector<float4> *tex_img - = new device_vector<float4>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<float4>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::FLOAT, float>(img, type, @@ -741,7 +741,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_FLOAT) { device_vector<float> *tex_img - = new device_vector<float>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<float>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::FLOAT, float>(img, type, @@ -764,7 +764,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_BYTE4) { device_vector<uchar4> *tex_img - = new device_vector<uchar4>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<uchar4>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::UINT8, uchar>(img, type, @@ -790,7 +790,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_BYTE) { device_vector<uchar> *tex_img - = new device_vector<uchar>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<uchar>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::UINT8, uchar>(img, type, @@ -812,7 +812,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_HALF4) { device_vector<half4> *tex_img - = new device_vector<half4>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<half4>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::HALF, half>(img, type, @@ -837,7 +837,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_HALF) { device_vector<half> *tex_img - = new device_vector<half>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<half>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::HALF, half>(img, type, diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index cc7c8544bed..3519a67bc05 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -111,6 +111,7 @@ public: InterpolationType interpolation; ExtensionType extension; + string mem_name; device_memory *mem; int users; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index aef7fc29573..d7143f24850 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -644,7 +644,7 @@ void ObjectManager::device_update_flags(Device *, void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene) { - if(scene->objects.size() == 0) { + if(dscene->objects.size() == 0) { return; } diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index a942d738b8a..9b1b9a60c30 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -292,5 +292,26 @@ bool system_cpu_support_avx2() #endif +size_t system_physical_ram() +{ +#ifdef _WIN32 + MEMORYSTATUSEX ram; + ram.dwLength = sizeof (ram); + GlobalMemoryStatusEx(&ram); + return ram.ullTotalPhys * 1024; +#elif defined(__APPLE__) + uint64_t ram = 0; + size_t len = sizeof(ram); + if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) { + return ram; + } + return 0; +#else + size_t ps = sysconf(_SC_PAGESIZE); + size_t pn = sysconf(_SC_PHYS_PAGES); + return ps * pn; +#endif +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h index db7a45b2d59..e55dd6dd136 100644 --- a/intern/cycles/util/util_system.h +++ b/intern/cycles/util/util_system.h @@ -42,6 +42,8 @@ bool system_cpu_support_sse41(); bool system_cpu_support_avx(); bool system_cpu_support_avx2(); +size_t system_physical_ram(); + CCL_NAMESPACE_END #endif /* __UTIL_SYSTEM_H__ */ |