Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCampbell Barton <ideasman42@gmail.com>2018-01-03 15:44:47 +0300
committerCampbell Barton <ideasman42@gmail.com>2018-01-03 15:44:47 +0300
commitbe403891652a375e5a0ac61b493342ca6d39afb7 (patch)
treeef9637103db6d66c4b311cba5b705d575562a1f8 /intern/cycles
parent060fdb49d64857ff1cbf9937420ed70b10b17086 (diff)
parentcbc7aa80d49e3b36c9ecc0e27ec528b34c491fc1 (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/device/device_cuda.cpp307
-rw-r--r--intern/cycles/device/device_memory.cpp5
-rw-r--r--intern/cycles/device/device_memory.h3
-rw-r--r--intern/cycles/device/device_multi.cpp16
-rw-r--r--intern/cycles/kernel/svm/svm.h8
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h21
-rw-r--r--intern/cycles/render/bake.cpp7
-rw-r--r--intern/cycles/render/buffers.cpp8
-rw-r--r--intern/cycles/render/image.cpp14
-rw-r--r--intern/cycles/render/image.h1
-rw-r--r--intern/cycles/render/object.cpp2
-rw-r--r--intern/cycles/util/util_system.cpp21
-rw-r--r--intern/cycles/util/util_system.h2
13 files changed, 358 insertions, 57 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 47c09dfebf9..82460af3b17 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -37,6 +37,7 @@
# include <cudaGL.h>
#endif
#include "util/util_debug.h"
+#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_map.h"
#include "util/util_md5.h"
@@ -128,6 +129,12 @@ public:
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule, cuFilterModule;
+ size_t device_texture_headroom;
+ size_t device_working_headroom;
+ bool move_texture_to_host;
+ size_t map_host_used;
+ size_t map_host_limit;
+ int can_map_host;
int cuDevId;
int cuDevArchitecture;
bool first_error;
@@ -135,12 +142,15 @@ public:
struct CUDAMem {
CUDAMem()
- : texobject(0), array(0) {}
+ : texobject(0), array(0), map_host_pointer(0), free_map_host(false) {}
CUtexObject texobject;
CUarray array;
+ void *map_host_pointer;
+ bool free_map_host;
};
- map<device_memory*, CUDAMem> cuda_mem_map;
+ typedef map<device_memory*, CUDAMem> CUDAMemMap;
+ CUDAMemMap cuda_mem_map;
struct PixelMem {
GLuint cuPBO;
@@ -240,6 +250,13 @@ public:
need_texture_info = false;
+ device_texture_headroom = 0;
+ device_working_headroom = 0;
+ move_texture_to_host = false;
+ map_host_limit = 0;
+ map_host_used = 0;
+ can_map_host = 0;
+
/* Intialize CUDA. */
if(cuda_error(cuInit(0)))
return;
@@ -248,9 +265,16 @@ public:
if(cuda_error(cuDeviceGet(&cuDevice, cuDevId)))
return;
- /* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
+ /* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
+ * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
* so we can predict which memory to map to host. */
+ cuda_assert(cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
+
unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
+ if(can_map_host) {
+ ctx_flags |= CU_CTX_MAP_HOST;
+ init_host_memory();
+ }
/* Create context. */
CUresult result;
@@ -611,6 +635,50 @@ public:
VLOG(1) << "Local memory reserved "
<< string_human_readable_number(free_before - free_after) << " bytes. ("
<< string_human_readable_size(free_before - free_after) << ")";
+
+#if 0
+ /* For testing mapped host memory, fill up device memory. */
+ const size_t keep_mb = 1024;
+
+ while(free_after > keep_mb * 1024 * 1024LL) {
+ CUdeviceptr tmp;
+ cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL));
+ cuMemGetInfo(&free_after, &total);
+ }
+#endif
+ }
+
+ void init_host_memory()
+ {
+ /* Limit amount of host mapped memory, because allocating too much can
+ * cause system instability. Leave at least half or 4 GB of system
+ * memory free, whichever is smaller. */
+ size_t default_limit = 4 * 1024 * 1024 * 1024LL;
+ size_t system_ram = system_physical_ram();
+
+ if(system_ram > 0) {
+ if(system_ram / 2 > default_limit) {
+ map_host_limit = system_ram - default_limit;
+ }
+ else {
+ map_host_limit = system_ram / 2;
+ }
+ }
+ else {
+ VLOG(1) << "Mapped host memory disabled, failed to get system RAM";
+ map_host_limit = 0;
+ }
+
+ /* Amount of device memory to keep is free after texture memory
+ * and working memory allocations respectively. We set the working
+ * memory limit headroom lower so that some space is left after all
+ * texture memory allocations. */
+ device_working_headroom = 32 * 1024 * 1024LL; // 32MB
+ device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
+
+ VLOG(1) << "Mapped host memory limit set to "
+ << string_human_readable_number(map_host_limit) << " bytes. ("
+ << string_human_readable_size(map_host_limit) << ")";
}
void load_texture_info()
@@ -621,20 +689,167 @@ public:
}
}
- CUDAMem *generic_alloc(device_memory& mem, size_t padding = 0)
+ void move_textures_to_host(size_t size, bool for_texture)
+ {
+ /* Signal to reallocate textures in host memory only. */
+ move_texture_to_host = true;
+
+ while(size > 0) {
+ /* Find suitable memory allocation to move. */
+ device_memory *max_mem = NULL;
+ size_t max_size = 0;
+ bool max_is_image = false;
+
+ foreach(CUDAMemMap::value_type& pair, cuda_mem_map) {
+ device_memory& mem = *pair.first;
+ CUDAMem *cmem = &pair.second;
+
+ bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
+ bool is_image = is_texture && (mem.data_height > 1);
+
+ /* Can't move this type of memory. */
+ if(!is_texture || cmem->array) {
+ continue;
+ }
+
+ /* Already in host memory. */
+ if(cmem->map_host_pointer) {
+ continue;
+ }
+
+ /* For other textures, only move image textures. */
+ if(for_texture && !is_image) {
+ continue;
+ }
+
+ /* Try to move largest allocation, prefer moving images. */
+ if(is_image > max_is_image ||
+ (is_image == max_is_image && mem.device_size > max_size)) {
+ max_is_image = is_image;
+ max_size = mem.device_size;
+ max_mem = &mem;
+ }
+ }
+
+ /* Move to host memory. This part is mutex protected since
+ * multiple CUDA devices could be moving the memory. The
+ * first one will do it, and the rest will adopt the pointer. */
+ if(max_mem) {
+ VLOG(1) << "Move memory from device to host: " << max_mem->name;
+
+ static thread_mutex move_mutex;
+ thread_scoped_lock lock(move_mutex);
+
+ /* Preserve the original device pointer, in case of multi device
+ * we can't change it because the pointer mapping would break. */
+ device_ptr prev_pointer = max_mem->device_pointer;
+ size_t prev_size = max_mem->device_size;
+
+ tex_free(*max_mem);
+ tex_alloc(*max_mem);
+ size = (max_size >= size)? 0: size - max_size;
+
+ max_mem->device_pointer = prev_pointer;
+ max_mem->device_size = prev_size;
+ }
+ else {
+ break;
+ }
+ }
+
+ /* Update texture info array with new pointers. */
+ load_texture_info();
+
+ move_texture_to_host = false;
+ }
+
+ CUDAMem *generic_alloc(device_memory& mem, size_t pitch_padding = 0)
{
CUDAContextScope scope(this);
+ CUdeviceptr device_pointer = 0;
+ size_t size = mem.memory_size() + pitch_padding;
+
+ CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
+ const char *status = "";
+
+ /* First try allocating in device memory, respecting headroom. We make
+ * an exception for texture info. It is small and frequently accessed,
+ * so treat it as working memory.
+ *
+ * If there is not enough room for working memory, we will try to move
+ * textures to host memory, assuming the performance impact would have
+ * been worse for working memory. */
+ bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
+ bool is_image = is_texture && (mem.data_height > 1);
+
+ size_t headroom = (is_texture)? device_texture_headroom:
+ device_working_headroom;
+
+ size_t total = 0, free = 0;
+ cuMemGetInfo(&free, &total);
+
+ /* Move textures to host memory if needed. */
+ if(!move_texture_to_host && !is_image && (size + headroom) >= free) {
+ move_textures_to_host(size + headroom - free, is_texture);
+ cuMemGetInfo(&free, &total);
+ }
+
+ /* Allocate in device memory. */
+ if(!move_texture_to_host && (size + headroom) < free) {
+ mem_alloc_result = cuMemAlloc(&device_pointer, size);
+ if(mem_alloc_result == CUDA_SUCCESS) {
+ status = " in device memory";
+ }
+ }
+
+ /* Fall back to mapped host memory if needed and possible. */
+ void *map_host_pointer = 0;
+ bool free_map_host = false;
+
+ if(mem_alloc_result != CUDA_SUCCESS && can_map_host &&
+ map_host_used + size < map_host_limit) {
+ if(mem.shared_pointer) {
+ /* Another device already allocated host memory. */
+ mem_alloc_result = CUDA_SUCCESS;
+ map_host_pointer = mem.shared_pointer;
+ }
+ else {
+ /* Allocate host memory ourselves. */
+ mem_alloc_result = cuMemHostAlloc(&map_host_pointer, size,
+ CU_MEMHOSTALLOC_DEVICEMAP |
+ CU_MEMHOSTALLOC_WRITECOMBINED);
+ mem.shared_pointer = map_host_pointer;
+ free_map_host = true;
+ }
+
+ if(mem_alloc_result == CUDA_SUCCESS) {
+ cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, mem.shared_pointer, 0));
+ map_host_used += size;
+ status = " in host memory";
+
+ /* Replace host pointer with our host allocation. Only works if
+ * CUDA memory layout is the same and has no pitch padding. */
+ if(pitch_padding == 0 && mem.host_pointer && mem.host_pointer != mem.shared_pointer) {
+ memcpy(mem.shared_pointer, mem.host_pointer, size);
+ mem.host_free();
+ mem.host_pointer = mem.shared_pointer;
+ }
+ }
+ }
+
+ if(mem_alloc_result != CUDA_SUCCESS) {
+ cuda_assert(mem_alloc_result);
+ status = " failed, out of memory";
+ }
+
if(mem.name) {
VLOG(1) << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
+ << string_human_readable_size(mem.memory_size()) << ")"
+ << status;
}
- /* Allocate memory on device. */
- CUdeviceptr device_pointer = 0;
- size_t size = mem.memory_size();
- cuda_assert(cuMemAlloc(&device_pointer, size + padding));
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
@@ -645,14 +860,21 @@ public:
/* Insert into map of allocations. */
CUDAMem *cmem = &cuda_mem_map[&mem];
+ cmem->map_host_pointer = map_host_pointer;
+ cmem->free_map_host = free_map_host;
return cmem;
}
void generic_copy_to(device_memory& mem)
{
- if(mem.device_pointer) {
+ if(mem.host_pointer && mem.device_pointer) {
CUDAContextScope scope(this);
- cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
+
+ if(mem.host_pointer != mem.shared_pointer) {
+ cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer),
+ mem.host_pointer,
+ mem.memory_size()));
+ }
}
}
@@ -660,8 +882,24 @@ public:
{
if(mem.device_pointer) {
CUDAContextScope scope(this);
+ const CUDAMem& cmem = cuda_mem_map[&mem];
+
+ if(cmem.map_host_pointer) {
+ /* Free host memory. */
+ if(cmem.free_map_host) {
+ cuMemFreeHost(cmem.map_host_pointer);
+ if(mem.host_pointer == mem.shared_pointer) {
+ mem.host_pointer = 0;
+ }
+ mem.shared_pointer = 0;
+ }
- cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
+ map_host_used -= mem.device_size;
+ }
+ else {
+ /* Free device memory. */
+ cuMemFree(mem.device_pointer);
+ }
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
@@ -715,11 +953,11 @@ public:
size_t offset = elem*y*w;
size_t size = elem*w*h;
- if(mem.device_pointer) {
+ if(mem.host_pointer && mem.device_pointer) {
cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset,
(CUdeviceptr)(mem.device_pointer + offset), size));
}
- else {
+ else if(mem.host_pointer) {
memset((char*)mem.host_pointer + offset, 0, size);
}
}
@@ -735,7 +973,8 @@ public:
memset(mem.host_pointer, 0, mem.memory_size());
}
- if(mem.device_pointer) {
+ if(mem.device_pointer &&
+ (!mem.host_pointer || mem.host_pointer != mem.shared_pointer)) {
CUDAContextScope scope(this);
cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
}
@@ -774,10 +1013,6 @@ public:
{
CUDAContextScope scope(this);
- VLOG(1) << "Texture allocate: " << mem.name << ", "
- << string_human_readable_number(mem.memory_size()) << " bytes. ("
- << string_human_readable_size(mem.memory_size()) << ")";
-
/* Check if we are on sm_30 or above, for bindless textures. */
bool has_fermi_limits = info.has_fermi_limits;
@@ -881,6 +1116,10 @@ public:
desc.NumChannels = mem.data_elements;
desc.Flags = 0;
+ VLOG(1) << "Array 3D allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+
cuda_assert(cuArray3DCreate(&array_3d, &desc));
if(!array_3d) {
@@ -1118,13 +1357,17 @@ public:
int shift_stride = stride*h;
int num_shifts = (2*r+1)*(2*r+1);
- int mem_size = sizeof(float)*shift_stride*2*num_shifts;
+ int mem_size = sizeof(float)*shift_stride*num_shifts;
int channel_offset = 0;
- CUdeviceptr temporary_mem;
- cuda_assert(cuMemAlloc(&temporary_mem, mem_size));
- CUdeviceptr difference = temporary_mem;
- CUdeviceptr blurDifference = temporary_mem + sizeof(float)*shift_stride * num_shifts;
+ device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
+ temporary_mem.alloc_to_device(2*mem_size);
+
+ if(have_error())
+ return false;
+
+ CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer);
+ CUdeviceptr blurDifference = difference + mem_size;
CUdeviceptr weightAccum = task->nlm_state.temporary_3_ptr;
cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*shift_stride));
@@ -1156,7 +1399,7 @@ public:
CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
}
- cuMemFree(temporary_mem);
+ temporary_mem.free();
{
CUfunction cuNLMNormalize;
@@ -1225,10 +1468,14 @@ public:
int num_shifts = (2*r+1)*(2*r+1);
int mem_size = sizeof(float)*shift_stride*num_shifts;
- CUdeviceptr temporary_mem;
- cuda_assert(cuMemAlloc(&temporary_mem, 2*mem_size));
- CUdeviceptr difference = temporary_mem;
- CUdeviceptr blurDifference = temporary_mem + mem_size;
+ device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
+ temporary_mem.alloc_to_device(2*mem_size);
+
+ if(have_error())
+ return false;
+
+ CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer);
+ CUdeviceptr blurDifference = difference + mem_size;
{
CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
@@ -1268,7 +1515,7 @@ public:
CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
}
- cuMemFree(temporary_mem);
+ temporary_mem.free();
{
CUfunction cuFinalize;
diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp
index 3ad0946330b..82598007a59 100644
--- a/intern/cycles/device/device_memory.cpp
+++ b/intern/cycles/device/device_memory.cpp
@@ -35,7 +35,8 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
extension(EXTENSION_REPEAT),
device(device),
device_pointer(0),
- host_pointer(0)
+ host_pointer(0),
+ shared_pointer(0)
{
}
@@ -86,7 +87,7 @@ void device_memory::device_free()
void device_memory::device_copy_to()
{
- if(data_size) {
+ if(host_pointer) {
device->mem_copy_to(*this);
}
}
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index 453dab9bfb3..2a027917066 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -197,10 +197,13 @@ public:
Device *device;
device_ptr device_pointer;
void *host_pointer;
+ void *shared_pointer;
virtual ~device_memory();
protected:
+ friend class CUDADevice;
+
/* Only create through subclasses. */
device_memory(Device *device, const char *name, MemoryType type);
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 16238c14aa0..91507e6be0c 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -48,11 +48,17 @@ public:
MultiDevice(DeviceInfo& info, Stats &stats, bool background_)
: Device(info, stats, background_), unique_key(1)
{
- Device *device;
-
foreach(DeviceInfo& subinfo, info.multi_devices) {
- device = Device::create(subinfo, sub_stats_, background);
- devices.push_back(SubDevice(device));
+ Device *device = Device::create(subinfo, sub_stats_, background);
+
+ /* Always add CPU devices at the back since GPU devices can change
+ * host memory pointers, which CPU uses as device pointer. */
+ if(subinfo.type == DEVICE_CPU) {
+ devices.push_back(SubDevice(device));
+ }
+ else {
+ devices.push_front(SubDevice(device));
+ }
}
#ifdef WITH_NETWORK
@@ -63,7 +69,7 @@ public:
vector<string> servers = discovery.get_server_list();
foreach(string& server, servers) {
- device = device_network_create(info, stats, server.c_str());
+ Device *device = device_network_create(info, stats, server.c_str());
if(device)
devices.push_back(SubDevice(device));
}
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 9ff02c1586b..d3dac5706d0 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -211,9 +211,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a
break;
}
case NODE_CLOSURE_BSDF:
- if(type == SHADER_TYPE_SURFACE) {
- svm_node_closure_bsdf(kg, sd, stack, node, path_flag, &offset);
- }
+ svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset);
break;
case NODE_CLOSURE_EMISSION:
svm_node_closure_emission(sd, stack, node);
@@ -331,9 +329,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a
break;
# if NODES_FEATURE(NODE_FEATURE_VOLUME)
case NODE_CLOSURE_VOLUME:
- if(type == SHADER_TYPE_VOLUME) {
- svm_node_closure_volume(kg, sd, stack, node, path_flag);
- }
+ svm_node_closure_volume(kg, sd, stack, node, type, path_flag);
break;
# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
# ifdef __EXTRA_NODES__
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index f04c46ef7f9..47ebe4288e3 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -56,7 +56,7 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int t
}
}
-ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int *offset)
+ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag, int *offset)
{
uint type, param1_offset, param2_offset;
@@ -67,8 +67,18 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
/* note we read this extra node before weight check, so offset is added */
uint4 data_node = read_node(kg, offset);
- if(mix_weight == 0.0f)
+ /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */
+ if(mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) {
+ if(type == CLOSURE_BSDF_PRINCIPLED_ID) {
+ /* Read all principled BSDF extra data to get the right offset. */
+ read_node(kg, offset);
+ read_node(kg, offset);
+ read_node(kg, offset);
+ read_node(kg, offset);
+ }
+
return;
+ }
float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N;
@@ -835,9 +845,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
}
}
-ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag)
+ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag)
{
#ifdef __VOLUME__
+ /* Only sum extinction for volumes, variable is shared with surface transparency. */
+ if(shader_type != SHADER_TYPE_VOLUME) {
+ return;
+ }
+
uint type, param1_offset, param2_offset;
uint mix_weight_offset;
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index aeb5d1c1316..1fef7a0188f 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -151,6 +151,10 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
progress.reset_sample();
progress.set_total_pixel_samples(total_pixel_samples);
+ /* needs to be up to date for baking specific AA samples */
+ dscene->data.integrator.aa_samples = num_samples;
+ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
+
for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
@@ -175,9 +179,6 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
d_output.zero_to_device();
d_input.copy_to_device();
- /* needs to be up to data for attribute access */
- device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
-
DeviceTask task(DeviceTask::SHADER);
task.shader_input = d_input.device_pointer;
task.shader_output = d_output.device_pointer;
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index c6502df7252..89a44c7ce3c 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -151,6 +151,10 @@ bool RenderBuffers::copy_from_device()
bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels)
{
+ if(buffer.data() == NULL) {
+ return false;
+ }
+
float invsample = 1.0f/sample;
float scale = invsample;
bool variance = (offset == DENOISING_PASS_NORMAL_VAR) ||
@@ -218,6 +222,10 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels)
{
+ if(buffer.data() == NULL) {
+ return false;
+ }
+
int pass_offset = 0;
for(size_t j = 0; j < params.passes.size(); j++) {
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 482442cce29..feaa17148ee 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -703,7 +703,7 @@ void ImageManager::device_load_image(Device *device,
/* Slot assignment */
int flat_slot = type_index_to_flattened_slot(slot, type);
- string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
+ img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
/* Free previous texture in slot. */
if(img->mem) {
@@ -715,7 +715,7 @@ void ImageManager::device_load_image(Device *device,
/* Create new texture. */
if(type == IMAGE_DATA_TYPE_FLOAT4) {
device_vector<float4> *tex_img
- = new device_vector<float4>(device, name.c_str(), MEM_TEXTURE);
+ = new device_vector<float4>(device, img->mem_name.c_str(), MEM_TEXTURE);
if(!file_load_image<TypeDesc::FLOAT, float>(img,
type,
@@ -741,7 +741,7 @@ void ImageManager::device_load_image(Device *device,
}
else if(type == IMAGE_DATA_TYPE_FLOAT) {
device_vector<float> *tex_img
- = new device_vector<float>(device, name.c_str(), MEM_TEXTURE);
+ = new device_vector<float>(device, img->mem_name.c_str(), MEM_TEXTURE);
if(!file_load_image<TypeDesc::FLOAT, float>(img,
type,
@@ -764,7 +764,7 @@ void ImageManager::device_load_image(Device *device,
}
else if(type == IMAGE_DATA_TYPE_BYTE4) {
device_vector<uchar4> *tex_img
- = new device_vector<uchar4>(device, name.c_str(), MEM_TEXTURE);
+ = new device_vector<uchar4>(device, img->mem_name.c_str(), MEM_TEXTURE);
if(!file_load_image<TypeDesc::UINT8, uchar>(img,
type,
@@ -790,7 +790,7 @@ void ImageManager::device_load_image(Device *device,
}
else if(type == IMAGE_DATA_TYPE_BYTE) {
device_vector<uchar> *tex_img
- = new device_vector<uchar>(device, name.c_str(), MEM_TEXTURE);
+ = new device_vector<uchar>(device, img->mem_name.c_str(), MEM_TEXTURE);
if(!file_load_image<TypeDesc::UINT8, uchar>(img,
type,
@@ -812,7 +812,7 @@ void ImageManager::device_load_image(Device *device,
}
else if(type == IMAGE_DATA_TYPE_HALF4) {
device_vector<half4> *tex_img
- = new device_vector<half4>(device, name.c_str(), MEM_TEXTURE);
+ = new device_vector<half4>(device, img->mem_name.c_str(), MEM_TEXTURE);
if(!file_load_image<TypeDesc::HALF, half>(img,
type,
@@ -837,7 +837,7 @@ void ImageManager::device_load_image(Device *device,
}
else if(type == IMAGE_DATA_TYPE_HALF) {
device_vector<half> *tex_img
- = new device_vector<half>(device, name.c_str(), MEM_TEXTURE);
+ = new device_vector<half>(device, img->mem_name.c_str(), MEM_TEXTURE);
if(!file_load_image<TypeDesc::HALF, half>(img,
type,
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index cc7c8544bed..3519a67bc05 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -111,6 +111,7 @@ public:
InterpolationType interpolation;
ExtensionType extension;
+ string mem_name;
device_memory *mem;
int users;
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index aef7fc29573..d7143f24850 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -644,7 +644,7 @@ void ObjectManager::device_update_flags(Device *,
void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene)
{
- if(scene->objects.size() == 0) {
+ if(dscene->objects.size() == 0) {
return;
}
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index a942d738b8a..9b1b9a60c30 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -292,5 +292,26 @@ bool system_cpu_support_avx2()
#endif
+size_t system_physical_ram()
+{
+#ifdef _WIN32
+ MEMORYSTATUSEX ram;
+ ram.dwLength = sizeof (ram);
+ GlobalMemoryStatusEx(&ram);
+ return ram.ullTotalPhys * 1024;
+#elif defined(__APPLE__)
+ uint64_t ram = 0;
+ size_t len = sizeof(ram);
+ if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) {
+ return ram;
+ }
+ return 0;
+#else
+ size_t ps = sysconf(_SC_PAGESIZE);
+ size_t pn = sysconf(_SC_PHYS_PAGES);
+ return ps * pn;
+#endif
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index db7a45b2d59..e55dd6dd136 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -42,6 +42,8 @@ bool system_cpu_support_sse41();
bool system_cpu_support_avx();
bool system_cpu_support_avx2();
+size_t system_physical_ram();
+
CCL_NAMESPACE_END
#endif /* __UTIL_SYSTEM_H__ */