Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/blender/addon/properties.py3
-rw-r--r--intern/cycles/blender/addon/ui.py1
-rw-r--r--intern/cycles/blender/blender_python.cpp1
-rw-r--r--intern/cycles/device/device_cpu.cpp7
-rw-r--r--intern/cycles/device/device_cuda.cpp6
-rw-r--r--intern/cycles/device/device_split_kernel.h3
-rw-r--r--intern/cycles/device/opencl/opencl.h19
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp30
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp11
-rw-r--r--intern/cycles/device/opencl/opencl_util.cpp12
-rw-r--r--intern/cycles/kernel/closure/bsdf.h5
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_diffuse.h8
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp4
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp2
-rw-r--r--intern/cycles/kernel/split/kernel_queue_enqueue.h3
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h5
-rw-r--r--intern/cycles/render/constant_fold.cpp8
-rw-r--r--intern/cycles/render/light.cpp4
-rw-r--r--intern/cycles/render/nodes.cpp6
-rw-r--r--intern/cycles/render/nodes.h2
-rw-r--r--intern/cycles/render/osl.cpp1
-rw-r--r--intern/cycles/util/util_debug.cpp4
-rw-r--r--intern/cycles/util/util_debug.h4
-rw-r--r--intern/cycles/util/util_logging.h16
-rw-r--r--intern/cycles/util/util_math_float3.h6
25 files changed, 130 insertions, 41 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 7f8d28e0618..68474529ed3 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -703,6 +703,9 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
cls.debug_use_opencl_debug = BoolProperty(name="Debug OpenCL", default=False)
+ cls.debug_opencl_mem_limit = IntProperty(name="Memory limit", default=0,
+ description="Artificial limit on OpenCL memory usage in MB (0 to disable limit)")
+
@classmethod
def unregister(cls):
del bpy.types.Scene.cycles
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 38a39e19003..49beebe5ab4 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -1608,6 +1608,7 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel):
col.prop(cscene, "debug_opencl_device_type", text="Device")
col.prop(cscene, "debug_opencl_kernel_single_program", text="Single Program")
col.prop(cscene, "debug_use_opencl_debug", text="Debug")
+ col.prop(cscene, "debug_opencl_mem_limit")
class CyclesParticle_PT_CurveSettings(CyclesButtonsPanel, Panel):
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index 01570b1e3f9..54973fd1b7f 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -106,6 +106,7 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
}
/* Synchronize other OpenCL flags. */
flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
+ flags.opencl.mem_limit = ((size_t)get_int(cscene, "debug_opencl_mem_limit"))*1024*1024;
flags.opencl.single_program = get_boolean(cscene, "debug_opencl_kernel_single_program");
return flags.opencl.device_type != opencl_device_type ||
flags.opencl.kernel_type != opencl_kernel_type;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 29bb1f91a40..18112437b45 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -149,7 +149,8 @@ public:
device_memory& use_queues_flag,
device_memory& work_pool_wgs);
- virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
+ virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
+ const DeviceRequestedFeatures&);
virtual int2 split_kernel_local_size();
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
@@ -248,6 +249,7 @@ public:
REGISTER_SPLIT_KERNEL(direct_lighting);
REGISTER_SPLIT_KERNEL(shadow_blocked_ao);
REGISTER_SPLIT_KERNEL(shadow_blocked_dl);
+ REGISTER_SPLIT_KERNEL(enqueue_inactive);
REGISTER_SPLIT_KERNEL(next_iteration_setup);
REGISTER_SPLIT_KERNEL(indirect_subsurface);
REGISTER_SPLIT_KERNEL(buffer_update);
@@ -931,7 +933,8 @@ bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,
return true;
}
-SplitKernelFunction* CPUSplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
+SplitKernelFunction* CPUSplitKernel::get_split_kernel_function(const string& kernel_name,
+ const DeviceRequestedFeatures&)
{
CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(device);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 29fa08d94b1..3a29538aa13 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -105,7 +105,8 @@ public:
device_memory& use_queues_flag,
device_memory& work_pool_wgs);
- virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
+ virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
+ const DeviceRequestedFeatures&);
virtual int2 split_kernel_local_size();
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
};
@@ -2037,7 +2038,8 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim
return !device->have_error();
}
-SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
+SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(const string& kernel_name,
+ const DeviceRequestedFeatures&)
{
CUfunction func;
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 2bac1998cb7..9c42cb58520 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -125,7 +125,8 @@ public:
device_memory& use_queues_flag,
device_memory& work_pool_wgs) = 0;
- virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) = 0;
+ virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
+ const DeviceRequestedFeatures&) = 0;
virtual int2 split_kernel_local_size() = 0;
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
};
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 399fae9b42e..78ca377d933 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -27,6 +27,9 @@
CCL_NAMESPACE_BEGIN
+/* Disable workarounds, seems to be working fine on latest drivers. */
+#define CYCLES_DISABLE_DRIVER_WORKAROUNDS
+
/* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */
#ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */
@@ -84,7 +87,7 @@ public:
string *error = NULL);
static bool device_version_check(cl_device_id device,
string *error = NULL);
- static string get_hardware_id(string platform_name,
+ static string get_hardware_id(const string& platform_name,
cl_device_id device_id);
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
bool force_all = false);
@@ -247,17 +250,17 @@ public:
public:
OpenCLProgram() : loaded(false), device(NULL) {}
OpenCLProgram(OpenCLDeviceBase *device,
- string program_name,
- string kernel_name,
- string kernel_build_options,
+ const string& program_name,
+ const string& kernel_name,
+ const string& kernel_build_options,
bool use_stdout = true);
~OpenCLProgram();
void add_kernel(ustring name);
void load();
- bool is_loaded() { return loaded; }
- string get_log() { return log; }
+ bool is_loaded() const { return loaded; }
+ const string& get_log() const { return log; }
void report_error();
cl_kernel operator()();
@@ -271,8 +274,8 @@ public:
bool load_binary(const string& clbin, const string *debug_src = NULL);
bool save_binary(const string& clbin);
- void add_log(string msg, bool is_debug);
- void add_error(string msg);
+ void add_log(const string& msg, bool is_debug);
+ void add_error(const string& msg);
bool loaded;
cl_program program;
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index e4ab979dcbf..509da7a0a84 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -20,6 +20,7 @@
#include "kernel/kernel_types.h"
+#include "util/util_algorithm.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_md5.h"
@@ -276,6 +277,25 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp
size_t size = mem.memory_size();
+ /* check there is enough memory available for the allocation */
+ cl_ulong max_alloc_size = 0;
+ clGetDeviceInfo(cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_alloc_size, NULL);
+
+ if(DebugFlags().opencl.mem_limit) {
+ max_alloc_size = min(max_alloc_size,
+ cl_ulong(DebugFlags().opencl.mem_limit - stats.mem_used));
+ }
+
+ if(size > max_alloc_size) {
+ string error = "Scene too complex to fit in available memory.";
+ if(name != NULL) {
+ error += string_printf(" (allocating buffer %s failed.)", name);
+ }
+ set_error(error);
+
+ return;
+ }
+
cl_mem_flags mem_flag;
void *mem_ptr = NULL;
@@ -1226,7 +1246,7 @@ void OpenCLDeviceBase::store_cached_kernel(
}
string OpenCLDeviceBase::build_options_for_base_program(
- const DeviceRequestedFeatures& /*requested_features*/)
+ const DeviceRequestedFeatures& requested_features)
{
/* TODO(sergey): By default we compile all features, meaning
* mega kernel is not getting feature-based optimizations.
@@ -1234,6 +1254,14 @@ string OpenCLDeviceBase::build_options_for_base_program(
* Ideally we need always compile kernel with as less features
* enabled as possible to keep performance at it's max.
*/
+
+ /* For now disable baking when not in use as this has major
+ * impact on kernel build times.
+ */
+ if(!requested_features.use_baking) {
+ return "-D__NO_BAKING__";
+ }
+
return "";
}
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 08b632ee9d3..76d9983e9a2 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -25,6 +25,7 @@
#include "device/device_split_kernel.h"
+#include "util/util_algorithm.h"
#include "util/util_logging.h"
#include "util/util_md5.h"
#include "util/util_path.h"
@@ -263,7 +264,7 @@ public:
explicit OpenCLSplitKernel(OpenCLDeviceSplitKernel *device) : DeviceSplitKernel(device), device(device) {
}
- virtual SplitKernelFunction* get_split_kernel_function(string kernel_name,
+ virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
const DeviceRequestedFeatures& requested_features)
{
OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device, cached_memory);
@@ -423,12 +424,18 @@ public:
cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
+
+ if(DebugFlags().opencl.mem_limit) {
+ max_buffer_size = min(max_buffer_size,
+ cl_ulong(DebugFlags().opencl.mem_limit - device->stats.mem_used));
+ }
+
VLOG(1) << "Maximum device allocation size: "
<< string_human_readable_number(max_buffer_size) << " bytes. ("
<< string_human_readable_size(max_buffer_size) << ").";
size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size / 2);
- int2 global_size = make_int2(round_down((int)sqrt(num_elements), 64), (int)sqrt(num_elements));
+ int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64), (int)sqrt(num_elements));
VLOG(1) << "Global size: " << global_size << ".";
return global_size;
}
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index 8ba2a8e26da..0d34af3e040 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -241,9 +241,9 @@ string OpenCLCache::get_kernel_md5()
}
OpenCLDeviceBase::OpenCLProgram::OpenCLProgram(OpenCLDeviceBase *device,
- string program_name,
- string kernel_file,
- string kernel_build_options,
+ const string& program_name,
+ const string& kernel_file,
+ const string& kernel_build_options,
bool use_stdout)
: device(device),
program_name(program_name),
@@ -274,7 +274,7 @@ void OpenCLDeviceBase::OpenCLProgram::release()
}
}
-void OpenCLDeviceBase::OpenCLProgram::add_log(string msg, bool debug)
+void OpenCLDeviceBase::OpenCLProgram::add_log(const string& msg, bool debug)
{
if(!use_stdout) {
log += msg + "\n";
@@ -288,7 +288,7 @@ void OpenCLDeviceBase::OpenCLProgram::add_log(string msg, bool debug)
}
}
-void OpenCLDeviceBase::OpenCLProgram::add_error(string msg)
+void OpenCLDeviceBase::OpenCLProgram::add_error(const string& msg)
{
if(use_stdout) {
fprintf(stderr, "%s\n", msg.c_str());
@@ -707,7 +707,7 @@ bool OpenCLInfo::device_version_check(cl_device_id device,
return true;
}
-string OpenCLInfo::get_hardware_id(string platform_name, cl_device_id device_id)
+string OpenCLInfo::get_hardware_id(const string& platform_name, cl_device_id device_id)
{
if(platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") {
/* Use cl_amd_device_topology extension. */
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index a04c157dc40..86a00d2124d 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -423,6 +423,11 @@ ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b)
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
return bsdf_hair_merge(a, b);
+#ifdef __PRINCIPLED__
+ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+ return bsdf_principled_diffuse_merge(a, b);
+#endif
#ifdef __VOLUME__
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
return volume_henyey_greenstein_merge(a, b);
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 215c32e1ffb..f8ca64293b0 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -58,6 +58,14 @@ ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
+ccl_device bool bsdf_principled_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b)
+{
+ const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf*)a;
+ const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf*)b;
+
+ return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness);
+}
+
ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I,
const float3 omega_in, float *pdf)
{
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index 2f0897434ec..14c5c1c3db5 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -156,7 +156,7 @@ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refra
BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(HairReflectionClosure, unused),
+ CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N),
CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1),
CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2),
CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
@@ -164,7 +164,7 @@ BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY
BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection)
BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, unused),
+ CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N),
CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1),
CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2),
CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index b767c60c617..1535496c73d 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -824,7 +824,7 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *
bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object_name,
TypeDesc type, ustring name, void *val)
{
- if(sg->renderstate == NULL)
+ if(sg == NULL || sg->renderstate == NULL)
return false;
ShaderData *sd = (ShaderData *)(sg->renderstate);
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index e2e841f36d3..66ce2dfb6f1 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -51,7 +51,8 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
int queue_number = -1;
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
}
else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 844245ee2d4..7704aa545c8 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -158,8 +158,8 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
}
/* diffuse */
- if(fabsf(average(base_color)) > CLOSURE_WEIGHT_CUTOFF) {
- if(subsurface < CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
+ if(fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) {
+ if(subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
float3 diff_weight = weight * base_color * diffuse_weight;
PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
@@ -725,6 +725,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
HairBsdf *bsdf = (HairBsdf*)bsdf_alloc(sd, sizeof(HairBsdf), weight);
if(bsdf) {
+ bsdf->N = N;
bsdf->roughness1 = param1;
bsdf->roughness2 = param2;
bsdf->offset = -stack_load_float(stack, data_node.z);
diff --git a/intern/cycles/render/constant_fold.cpp b/intern/cycles/render/constant_fold.cpp
index 2569d9eec27..943b218f0e4 100644
--- a/intern/cycles/render/constant_fold.cpp
+++ b/intern/cycles/render/constant_fold.cpp
@@ -160,6 +160,14 @@ bool ConstantFolder::try_bypass_or_make_constant(ShaderInput *input, bool clamp)
bypass(input->link);
return true;
}
+ else {
+ /* disconnect other inputs if we can't fully bypass due to clamp */
+ foreach(ShaderInput *other, node->inputs) {
+ if(other != input && other->link) {
+ graph->disconnect(other);
+ }
+ }
+ }
return false;
}
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index 625dd3ded39..93d88c5642c 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -224,6 +224,10 @@ void LightManager::disable_ineffective_light(Device *device, Scene *scene)
bool LightManager::object_usable_as_light(Object *object) {
Mesh *mesh = object->mesh;
+ /* Skip objects with NaNs */
+ if (!object->bounds.valid()) {
+ return false;
+ }
/* Skip if we are not visible for BSDFs. */
if(!(object->visibility & (PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY|PATH_RAY_TRANSMIT))) {
return false;
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 166156f7ac3..86e25df1da3 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -2337,6 +2337,12 @@ PrincipledBsdfNode::PrincipledBsdfNode()
distribution_orig = NBUILTIN_CLOSURES;
}
+bool PrincipledBsdfNode::has_surface_bssrdf()
+{
+ ShaderInput *subsurface_in = input("Subsurface");
+ return (subsurface_in->link != NULL || subsurface > CLOSURE_WEIGHT_CUTOFF);
+}
+
void PrincipledBsdfNode::attributes(Shader *shader, AttributeRequestSet *attributes)
{
if(shader->has_surface) {
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index c6ab47fcc84..c0271a3c8eb 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -374,7 +374,7 @@ public:
SHADER_NODE_CLASS(PrincipledBsdfNode)
bool has_spatial_varying() { return true; }
- bool has_surface_bssrdf() { return true; }
+ bool has_surface_bssrdf();
bool has_bssrdf_bump();
void compile(SVMCompiler& compiler, ShaderInput *metallic, ShaderInput *subsurface, ShaderInput *subsurface_radius,
ShaderInput *specular, ShaderInput *roughness, ShaderInput *specular_tint, ShaderInput *anisotropic,
diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp
index 6bff29d1c76..a794f233718 100644
--- a/intern/cycles/render/osl.cpp
+++ b/intern/cycles/render/osl.cpp
@@ -156,6 +156,7 @@ void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s
og->surface_state.clear();
og->volume_state.clear();
og->displacement_state.clear();
+ og->bump_state.clear();
og->background_state.reset();
}
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index ab038d2b9fb..10895f2e918 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -184,8 +184,8 @@ std::ostream& operator <<(std::ostream &os,
<< " Device type : " << opencl_device_type << "\n"
<< " Kernel type : " << opencl_kernel_type << "\n"
<< " Debug : " << string_from_bool(debug_flags.opencl.debug) << "\n"
- << " Single program : " << string_from_bool(debug_flags.opencl.single_program)
- << "\n";
+ << " Single program : " << string_from_bool(debug_flags.opencl.single_program) << "\n"
+ << " Memory limit : " << string_human_readable_size(debug_flags.opencl.mem_limit) << "\n";
return os;
}
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index 4505d584490..450cd900a9f 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -115,6 +115,10 @@ public:
/* Use single program */
bool single_program;
+
+ /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all devices. */
+ /* Artificial memory limit in bytes (0 if disabled). */
+ size_t mem_limit;
};
/* Get instance of debug flags registry. */
diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h
index ecf9c9cfee0..492f830e67c 100644
--- a/intern/cycles/util/util_logging.h
+++ b/intern/cycles/util/util_logging.h
@@ -19,28 +19,30 @@
#if defined(WITH_CYCLES_LOGGING) && !defined(__KERNEL_GPU__)
# include <glog/logging.h>
-#else
-# include <iostream>
#endif
+#include <iostream>
+
CCL_NAMESPACE_BEGIN
#if !defined(WITH_CYCLES_LOGGING) || defined(__KERNEL_GPU__)
-class StubStream : public std::ostream {
- public:
- StubStream() : std::ostream(NULL) { }
+class StubStream {
+public:
+ template<class T>
+ StubStream& operator<<(const T&) {
+ return *this;
+ }
};
class LogMessageVoidify {
public:
LogMessageVoidify() { }
- void operator&(::std::ostream&) { }
+ void operator&(StubStream&) { }
};
# define LOG_SUPPRESS() (true) ? (void) 0 : LogMessageVoidify() & StubStream()
# define LOG(severity) LOG_SUPPRESS()
# define VLOG(severity) LOG_SUPPRESS()
-
#endif
#define VLOG_ONCE(level, flag) if(!flag) flag = true, VLOG(level)
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 5327d9f7cc6..bb04c4aa2d9 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -374,9 +374,9 @@ ccl_device_inline bool isfinite3_safe(float3 v)
ccl_device_inline float3 ensure_finite3(float3 v)
{
- if(!isfinite_safe(v.x)) v.x = 0.0;
- if(!isfinite_safe(v.y)) v.y = 0.0;
- if(!isfinite_safe(v.z)) v.z = 0.0;
+ if(!isfinite_safe(v.x)) v.x = 0.0f;
+ if(!isfinite_safe(v.y)) v.y = 0.0f;
+ if(!isfinite_safe(v.z)) v.z = 0.0f;
return v;
}