Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/addon/engine.py3
-rw-r--r--intern/cycles/blender/addon/properties.py10
-rw-r--r--intern/cycles/blender/addon/ui.py21
-rw-r--r--intern/cycles/blender/blender_python.cpp1
-rw-r--r--intern/cycles/blender/blender_session.cpp9
-rw-r--r--intern/cycles/blender/blender_sync.cpp11
-rw-r--r--intern/cycles/blender/blender_sync.h3
-rw-r--r--intern/cycles/device/device.cpp2
-rw-r--r--intern/cycles/device/device.h10
-rw-r--r--intern/cycles/device/device_cpu.cpp23
-rw-r--r--intern/cycles/device/device_cuda.cpp16
-rw-r--r--intern/cycles/device/device_denoising.cpp2
-rw-r--r--intern/cycles/device/device_denoising.h2
-rw-r--r--intern/cycles/device/device_opencl.cpp13
-rw-r--r--intern/cycles/device/device_split_kernel.cpp4
-rw-r--r--intern/cycles/device/device_split_kernel.h4
-rw-r--r--intern/cycles/device/opencl/opencl.h23
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp42
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp74
-rw-r--r--intern/cycles/device/opencl/opencl_util.cpp65
-rw-r--r--intern/cycles/kernel/CMakeLists.txt9
-rw-r--r--intern/cycles/kernel/closure/bsdf.h5
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h18
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_multi.h78
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_diffuse.h8
-rw-r--r--intern/cycles/kernel/filter/filter_features.h12
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_cpu.h5
-rw-r--r--intern/cycles/kernel/filter/filter_nlm_gpu.h5
-rw-r--r--intern/cycles/kernel/filter/filter_prefilter.h23
-rw-r--r--intern/cycles/kernel/filter/filter_reconstruction.h56
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h52
-rw-r--r--intern/cycles/kernel/kernel_passes.h2
-rw-r--r--intern/cycles/kernel/kernel_path_state.h2
-rw-r--r--intern/cycles/kernel/kernel_queues.h15
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h2
-rw-r--r--intern/cycles/kernel/kernel_types.h10
-rw-r--r--intern/cycles/kernel/kernel_volume.h2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h4
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h1
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h2
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu3
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu2
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl3
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl26
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl15
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_path_init.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl10
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split.cl1
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_function.h72
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl10
-rw-r--r--intern/cycles/kernel/osl/osl_bssrdf.cpp2
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp24
-rw-r--r--intern/cycles/kernel/shaders/node_principled_bsdf.osl8
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h2
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h52
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h19
-rw-r--r--intern/cycles/kernel/split/kernel_enqueue_inactive.h46
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h2
-rw-r--r--intern/cycles/kernel/split/kernel_queue_enqueue.h3
-rw-r--r--intern/cycles/kernel/split/kernel_scene_intersect.h16
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h8
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h15
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h23
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h31
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h35
-rw-r--r--intern/cycles/render/constant_fold.cpp8
-rw-r--r--intern/cycles/render/light.cpp4
-rw-r--r--intern/cycles/render/nodes.cpp12
-rw-r--r--intern/cycles/render/nodes.h5
-rw-r--r--intern/cycles/render/osl.cpp1
-rw-r--r--intern/cycles/render/session.cpp1
-rw-r--r--intern/cycles/test/util_string_test.cpp37
-rw-r--r--intern/cycles/util/util_atomic.h4
-rw-r--r--intern/cycles/util/util_debug.cpp4
-rw-r--r--intern/cycles/util/util_debug.h4
-rw-r--r--intern/cycles/util/util_logging.h16
-rw-r--r--intern/cycles/util/util_math_float3.h6
-rw-r--r--intern/cycles/util/util_progress.h3
-rw-r--r--intern/cycles/util/util_string.cpp6
-rw-r--r--intern/elbeem/intern/isosurface.cpp33
-rw-r--r--intern/elbeem/intern/solver_util.cpp4
-rw-r--r--intern/ghost/GHOST_C-api.h2
-rw-r--r--intern/ghost/intern/GHOST_SystemWin32.cpp3
-rw-r--r--intern/ghost/intern/GHOST_WindowCocoa.h2
-rw-r--r--intern/guardedalloc/intern/mallocn_lockfree_impl.c4
-rw-r--r--intern/libmv/ChangeLog389
-rwxr-xr-xintern/libmv/bundle.sh1
-rw-r--r--intern/libmv/intern/track_region.cc2
-rw-r--r--intern/memutil/MEM_CacheLimiterC-Api.h24
-rw-r--r--intern/opensubdiv/opensubdiv_capi.cc6
-rw-r--r--intern/opensubdiv/opensubdiv_capi.h2
105 files changed, 1176 insertions, 644 deletions
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index b5149b5082e..3018fd5b316 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -239,7 +239,8 @@ def register_passes(engine, scene, srl):
if crl.pass_debug_bvh_intersections: engine.register_pass(scene, srl, "Debug BVH Intersections", 1, "X", 'VALUE')
if crl.pass_debug_ray_bounces: engine.register_pass(scene, srl, "Debug Ray Bounces", 1, "X", 'VALUE')
- if crl.use_denoising and crl.denoising_store_passes:
+ cscene = scene.cycles
+ if crl.use_denoising and crl.denoising_store_passes and not cscene.use_progressive_refine:
engine.register_pass(scene, srl, "Denoising Normal", 3, "XYZ", 'VECTOR')
engine.register_pass(scene, srl, "Denoising Normal Variance", 3, "XYZ", 'VECTOR')
engine.register_pass(scene, srl, "Denoising Albedo", 3, "RGB", 'COLOR')
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 8bb25aba13c..68474529ed3 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -695,10 +695,17 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
update=devices_update_callback
)
- cls.debug_opencl_kernel_single_program = BoolProperty(name="Single Program", default=True, update=devices_update_callback);
+ cls.debug_opencl_kernel_single_program = BoolProperty(
+ name="Single Program",
+ default=True,
+ update=devices_update_callback,
+ )
cls.debug_use_opencl_debug = BoolProperty(name="Debug OpenCL", default=False)
+ cls.debug_opencl_mem_limit = IntProperty(name="Memory limit", default=0,
+ description="Artificial limit on OpenCL memory usage in MB (0 to disable limit)")
+
@classmethod
def unregister(cls):
del bpy.types.Scene.cycles
@@ -1209,6 +1216,7 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
name="Use Denoising",
description="Denoise the rendered image",
default=False,
+ update=update_render_passes,
)
cls.denoising_diffuse_direct = BoolProperty(
name="Diffuse Direct",
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 4ed3ccd9a2c..49beebe5ab4 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -531,17 +531,17 @@ class CyclesRender_PT_layer_passes(CyclesButtonsPanel, Panel):
col.prop(rl, "use_pass_environment")
if context.scene.cycles.feature_set == 'EXPERIMENTAL':
- col.separator()
- sub = col.column()
- sub.active = crl.use_denoising
- sub.prop(crl, "denoising_store_passes", text="Denoising")
+ col.separator()
+ sub = col.column()
+ sub.active = crl.use_denoising
+ sub.prop(crl, "denoising_store_passes", text="Denoising")
if _cycles.with_cycles_debug:
- col = layout.column()
- col.prop(crl, "pass_debug_bvh_traversed_nodes")
- col.prop(crl, "pass_debug_bvh_traversed_instances")
- col.prop(crl, "pass_debug_bvh_intersections")
- col.prop(crl, "pass_debug_ray_bounces")
+ col = layout.column()
+ col.prop(crl, "pass_debug_bvh_traversed_nodes")
+ col.prop(crl, "pass_debug_bvh_traversed_instances")
+ col.prop(crl, "pass_debug_bvh_intersections")
+ col.prop(crl, "pass_debug_ray_bounces")
class CyclesRender_PT_views(CyclesButtonsPanel, Panel):
@@ -1608,6 +1608,7 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel):
col.prop(cscene, "debug_opencl_device_type", text="Device")
col.prop(cscene, "debug_opencl_kernel_single_program", text="Single Program")
col.prop(cscene, "debug_use_opencl_debug", text="Debug")
+ col.prop(cscene, "debug_opencl_mem_limit")
class CyclesParticle_PT_CurveSettings(CyclesButtonsPanel, Panel):
@@ -1710,7 +1711,7 @@ def draw_device(self, context):
layout.prop(cscene, "feature_set")
- split = layout.split(percentage=1/3)
+ split = layout.split(percentage=1 / 3)
split.label("Device:")
row = split.row()
row.active = show_device_active(context)
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index 01570b1e3f9..54973fd1b7f 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -106,6 +106,7 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
}
/* Synchronize other OpenCL flags. */
flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
+ flags.opencl.mem_limit = ((size_t)get_int(cscene, "debug_opencl_mem_limit"))*1024*1024;
flags.opencl.single_program = get_boolean(cscene, "debug_opencl_kernel_single_program");
return flags.opencl.device_type != opencl_device_type ||
flags.opencl.kernel_type != opencl_kernel_type;
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index c6a59577507..2b5dd5eadea 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -399,14 +399,7 @@ void BlenderSession::render()
BL::RenderLayer b_rlay = *b_single_rlay;
/* add passes */
- array<Pass> passes;
- if(session_params.device.advanced_shading) {
- passes = sync->sync_render_passes(b_rlay, *b_layer_iter);
- }
- else {
- Pass::add(PASS_COMBINED, passes);
- }
-
+ array<Pass> passes = sync->sync_render_passes(b_rlay, *b_layer_iter, session_params);
buffer_params.passes = passes;
PointerRNA crl = RNA_pointer_get(&b_layer_iter->ptr, "cycles");
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 41723599874..3a00384458a 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -553,11 +553,16 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass)
}
array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
- BL::SceneRenderLayer& b_srlay)
+ BL::SceneRenderLayer& b_srlay,
+ const SessionParams &session_params)
{
array<Pass> passes;
Pass::add(PASS_COMBINED, passes);
+ if(!session_params.device.advanced_shading) {
+ return passes;
+ }
+
/* loop over passes */
BL::RenderLayer::passes_iterator b_pass_iter;
@@ -572,7 +577,9 @@ array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
}
PointerRNA crp = RNA_pointer_get(&b_srlay.ptr, "cycles");
- if(get_boolean(crp, "denoising_store_passes")) {
+ if(get_boolean(crp, "denoising_store_passes") &&
+ get_boolean(crp, "use_denoising") &&
+ !session_params.progressive_refine) {
b_engine.add_pass("Denoising Normal", 3, "XYZ", b_srlay.name().c_str());
b_engine.add_pass("Denoising Normal Variance", 3, "XYZ", b_srlay.name().c_str());
b_engine.add_pass("Denoising Albedo", 3, "RGB", b_srlay.name().c_str());
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index 0950285d976..4ec46424b5a 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -68,7 +68,8 @@ public:
const char *layer = 0);
void sync_render_layers(BL::SpaceView3D& b_v3d, const char *layer);
array<Pass> sync_render_passes(BL::RenderLayer& b_rlay,
- BL::SceneRenderLayer& b_srlay);
+ BL::SceneRenderLayer& b_srlay,
+ const SessionParams &session_params);
void sync_integrator();
void sync_camera(BL::RenderSettings& b_render,
BL::Object& b_override,
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 0603ecb3afb..a54bb77f9f3 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -68,6 +68,8 @@ std::ostream& operator <<(std::ostream &os,
<< string_from_bool(requested_features.use_transparent) << std::endl;
os << "Use Principled BSDF: "
<< string_from_bool(requested_features.use_principled) << std::endl;
+ os << "Use Denoising: "
+ << string_from_bool(requested_features.use_denoising) << std::endl;
return os;
}
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 527940e8f50..b3b693c630c 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -127,6 +127,9 @@ public:
/* Per-uber shader usage flags. */
bool use_principled;
+ /* Denoising features. */
+ bool use_denoising;
+
DeviceRequestedFeatures()
{
/* TODO(sergey): Find more meaningful defaults. */
@@ -145,6 +148,7 @@ public:
use_transparent = false;
use_shadow_tricks = false;
use_principled = false;
+ use_denoising = false;
}
bool modified(const DeviceRequestedFeatures& requested_features)
@@ -163,7 +167,8 @@ public:
use_patch_evaluation == requested_features.use_patch_evaluation &&
use_transparent == requested_features.use_transparent &&
use_shadow_tricks == requested_features.use_shadow_tricks &&
- use_principled == requested_features.use_principled);
+ use_principled == requested_features.use_principled &&
+ use_denoising == requested_features.use_denoising);
}
/* Convert the requested features structure to a build options,
@@ -213,6 +218,9 @@ public:
if(!use_principled) {
build_options += " -D__NO_PRINCIPLED__";
}
+ if(!use_denoising) {
+ build_options += " -D__NO_DENOISING__";
+ }
return build_options;
}
};
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index c2f74aa8903..18112437b45 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -149,7 +149,8 @@ public:
device_memory& use_queues_flag,
device_memory& work_pool_wgs);
- virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
+ virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
+ const DeviceRequestedFeatures&);
virtual int2 split_kernel_local_size();
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
@@ -185,9 +186,9 @@ public:
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel;
KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel;
- KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)> filter_construct_transform_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int, int)> filter_nlm_construct_gramian_kernel;
- KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
+ KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)> filter_construct_transform_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int, int)> filter_nlm_construct_gramian_kernel;
+ KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*,
ccl_global uint*, int, int, int, int, int, int, int, int, ccl_global int*, int,
@@ -248,6 +249,7 @@ public:
REGISTER_SPLIT_KERNEL(direct_lighting);
REGISTER_SPLIT_KERNEL(shadow_blocked_ao);
REGISTER_SPLIT_KERNEL(shadow_blocked_dl);
+ REGISTER_SPLIT_KERNEL(enqueue_inactive);
REGISTER_SPLIT_KERNEL(next_iteration_setup);
REGISTER_SPLIT_KERNEL(indirect_subsurface);
REGISTER_SPLIT_KERNEL(buffer_update);
@@ -465,8 +467,6 @@ public:
bool denoising_reconstruct(device_ptr color_ptr,
device_ptr color_variance_ptr,
- device_ptr guide_ptr,
- device_ptr guide_variance_ptr,
device_ptr output_ptr,
DenoisingTask *task)
{
@@ -485,8 +485,8 @@ public:
task->reconstruction_state.source_w - max(0, dx),
task->reconstruction_state.source_h - max(0, dy)};
filter_nlm_calc_difference_kernel()(dx, dy,
- (float*) guide_ptr,
- (float*) guide_variance_ptr,
+ (float*) color_ptr,
+ (float*) color_variance_ptr,
difference,
local_rect,
task->buffer.w,
@@ -499,8 +499,6 @@ public:
filter_nlm_construct_gramian_kernel()(dx, dy,
blurDifference,
(float*) task->buffer.mem.device_pointer,
- (float*) color_ptr,
- (float*) color_variance_ptr,
(float*) task->storage.transform.device_pointer,
(int*) task->storage.rank.device_pointer,
(float*) task->storage.XtWX.device_pointer,
@@ -648,7 +646,7 @@ public:
DenoisingTask denoising(this);
denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
- denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, &denoising);
denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
@@ -935,7 +933,8 @@ bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,
return true;
}
-SplitKernelFunction* CPUSplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
+SplitKernelFunction* CPUSplitKernel::get_split_kernel_function(const string& kernel_name,
+ const DeviceRequestedFeatures&)
{
CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(device);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 99537e9a983..3a29538aa13 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -105,7 +105,8 @@ public:
device_memory& use_queues_flag,
device_memory& work_pool_wgs);
- virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
+ virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
+ const DeviceRequestedFeatures&);
virtual int2 split_kernel_local_size();
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
};
@@ -1051,8 +1052,6 @@ public:
bool denoising_reconstruct(device_ptr color_ptr,
device_ptr color_variance_ptr,
- device_ptr guide_ptr,
- device_ptr guide_variance_ptr,
device_ptr output_ptr,
DenoisingTask *task)
{
@@ -1096,8 +1095,8 @@ public:
task->reconstruction_state.source_h - max(0, dy)};
void *calc_difference_args[] = {&dx, &dy,
- &guide_ptr,
- &guide_variance_ptr,
+ &color_ptr,
+ &color_variance_ptr,
&difference,
&local_rect,
&task->buffer.w,
@@ -1126,8 +1125,6 @@ public:
void *construct_gramian_args[] = {&dx, &dy,
&blurDifference,
&task->buffer.mem.device_pointer,
- &color_ptr,
- &color_variance_ptr,
&task->storage.transform.device_pointer,
&task->storage.rank.device_pointer,
&task->storage.XtWX.device_pointer,
@@ -1294,7 +1291,7 @@ public:
DenoisingTask denoising(this);
denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising);
- denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising);
denoising.functions.divide_shadow = function_bind(&CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
@@ -2041,7 +2038,8 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim
return !device->have_error();
}
-SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&)
+SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(const string& kernel_name,
+ const DeviceRequestedFeatures&)
{
CUfunction func;
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index 613bd9112cf..619cc1d171e 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -215,7 +215,7 @@ bool DenoisingTask::run_denoising()
{
device_sub_ptr color_ptr (device, buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
device_sub_ptr color_var_ptr(device, buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
- functions.reconstruct(*color_ptr, *color_var_ptr, *color_ptr, *color_var_ptr, render_buffer.ptr);
+ functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr);
}
device->mem_free(storage.XtWX);
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index 25b93c2ad74..def7b72f67d 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -58,8 +58,6 @@ public:
)> non_local_means;
function<bool(device_ptr color_ptr,
device_ptr color_variance_ptr,
- device_ptr guide_ptr,
- device_ptr guide_variance_ptr,
device_ptr output_ptr
)> reconstruct;
function<bool()> construct_transform;
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index edd2047debc..681b8214b03 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -130,10 +130,22 @@ string device_opencl_capabilities(void)
opencl_assert(func(id, what, sizeof(data), &data, NULL)); \
result += string_printf("%s: %s\n", name, data); \
} while(false)
+#define APPEND_STRING_EXTENSION_INFO(func, id, name, what) \
+ do { \
+ char data[1024] = "\0"; \
+ size_t length = 0; \
+ if(func(id, what, sizeof(data), &data, &length) == CL_SUCCESS) { \
+ if(length != 0 && data[0] != '\0') { \
+ result += string_printf("%s: %s\n", name, data); \
+ } \
+ } \
+ } while(false)
#define APPEND_PLATFORM_STRING_INFO(id, name, what) \
APPEND_STRING_INFO(clGetPlatformInfo, id, "\tPlatform " name, what)
#define APPEND_DEVICE_STRING_INFO(id, name, what) \
APPEND_STRING_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what)
+#define APPEND_DEVICE_STRING_EXTENSION_INFO(id, name, what) \
+ APPEND_STRING_EXTENSION_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what)
vector<cl_device_id> device_ids;
for(cl_uint platform = 0; platform < num_platforms; ++platform) {
@@ -167,6 +179,7 @@ string device_opencl_capabilities(void)
result += string_printf("\t\tDevice: #%u\n", device);
APPEND_DEVICE_STRING_INFO(device_id, "Name", CL_DEVICE_NAME);
+ APPEND_DEVICE_STRING_EXTENSION_INFO(device_id, "Board Name", CL_DEVICE_BOARD_NAME_AMD);
APPEND_DEVICE_STRING_INFO(device_id, "Vendor", CL_DEVICE_VENDOR);
APPEND_DEVICE_STRING_INFO(device_id, "OpenCL C Version", CL_DEVICE_OPENCL_C_VERSION);
APPEND_DEVICE_STRING_INFO(device_id, "Profile", CL_DEVICE_PROFILE);
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index dddd19f179f..d2b3a89fa98 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -47,6 +47,7 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
kernel_direct_lighting = NULL;
kernel_shadow_blocked_ao = NULL;
kernel_shadow_blocked_dl = NULL;
+ kernel_enqueue_inactive = NULL;
kernel_next_iteration_setup = NULL;
kernel_indirect_subsurface = NULL;
kernel_buffer_update = NULL;
@@ -74,6 +75,7 @@ DeviceSplitKernel::~DeviceSplitKernel()
delete kernel_direct_lighting;
delete kernel_shadow_blocked_ao;
delete kernel_shadow_blocked_dl;
+ delete kernel_enqueue_inactive;
delete kernel_next_iteration_setup;
delete kernel_indirect_subsurface;
delete kernel_buffer_update;
@@ -101,6 +103,7 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
LOAD_KERNEL(direct_lighting);
LOAD_KERNEL(shadow_blocked_ao);
LOAD_KERNEL(shadow_blocked_dl);
+ LOAD_KERNEL(enqueue_inactive);
LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(indirect_subsurface);
LOAD_KERNEL(buffer_update);
@@ -256,6 +259,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 68c2ba974a5..9c42cb58520 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -69,6 +69,7 @@ private:
SplitKernelFunction *kernel_direct_lighting;
SplitKernelFunction *kernel_shadow_blocked_ao;
SplitKernelFunction *kernel_shadow_blocked_dl;
+ SplitKernelFunction *kernel_enqueue_inactive;
SplitKernelFunction *kernel_next_iteration_setup;
SplitKernelFunction *kernel_indirect_subsurface;
SplitKernelFunction *kernel_buffer_update;
@@ -124,7 +125,8 @@ public:
device_memory& use_queues_flag,
device_memory& work_pool_wgs) = 0;
- virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) = 0;
+ virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
+ const DeviceRequestedFeatures&) = 0;
virtual int2 split_kernel_local_size() = 0;
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
};
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 27e196d1e68..7da690904aa 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -84,7 +84,7 @@ public:
string *error = NULL);
static bool device_version_check(cl_device_id device,
string *error = NULL);
- static string get_hardware_id(string platform_name,
+ static string get_hardware_id(const string& platform_name,
cl_device_id device_id);
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
bool force_all = false);
@@ -130,6 +130,11 @@ public:
cl_int* error = NULL);
static cl_device_type get_device_type(cl_device_id device_id);
+ static bool get_driver_version(cl_device_id device_id,
+ int *major,
+ int *minor,
+ cl_int* error = NULL);
+
static int mem_address_alignment(cl_device_id device_id);
/* Get somewhat more readable device name.
@@ -242,17 +247,17 @@ public:
public:
OpenCLProgram() : loaded(false), device(NULL) {}
OpenCLProgram(OpenCLDeviceBase *device,
- string program_name,
- string kernel_name,
- string kernel_build_options,
+ const string& program_name,
+ const string& kernel_name,
+ const string& kernel_build_options,
bool use_stdout = true);
~OpenCLProgram();
void add_kernel(ustring name);
void load();
- bool is_loaded() { return loaded; }
- string get_log() { return log; }
+ bool is_loaded() const { return loaded; }
+ const string& get_log() const { return log; }
void report_error();
cl_kernel operator()();
@@ -266,8 +271,8 @@ public:
bool load_binary(const string& clbin, const string *debug_src = NULL);
bool save_binary(const string& clbin);
- void add_log(string msg, bool is_debug);
- void add_error(string msg);
+ void add_log(const string& msg, bool is_debug);
+ void add_error(const string& msg);
bool loaded;
cl_program program;
@@ -390,8 +395,6 @@ protected:
bool denoising_construct_transform(DenoisingTask *task);
bool denoising_reconstruct(device_ptr color_ptr,
device_ptr color_variance_ptr,
- device_ptr guide_ptr,
- device_ptr guide_variance_ptr,
device_ptr output_ptr,
DenoisingTask *task);
bool denoising_combine_halves(device_ptr a_ptr,
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 24b70e3446c..509da7a0a84 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -20,6 +20,7 @@
#include "kernel/kernel_types.h"
+#include "util/util_algorithm.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_md5.h"
@@ -276,6 +277,25 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp
size_t size = mem.memory_size();
+ /* check there is enough memory available for the allocation */
+ cl_ulong max_alloc_size = 0;
+ clGetDeviceInfo(cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_alloc_size, NULL);
+
+ if(DebugFlags().opencl.mem_limit) {
+ max_alloc_size = min(max_alloc_size,
+ cl_ulong(DebugFlags().opencl.mem_limit - stats.mem_used));
+ }
+
+ if(size > max_alloc_size) {
+ string error = "Scene too complex to fit in available memory.";
+ if(name != NULL) {
+ error += string_printf(" (allocating buffer %s failed.)", name);
+ }
+ set_error(error);
+
+ return;
+ }
+
cl_mem_flags mem_flag;
void *mem_ptr = NULL;
@@ -693,8 +713,6 @@ bool OpenCLDeviceBase::denoising_construct_transform(DenoisingTask *task)
bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
device_ptr color_variance_ptr,
- device_ptr guide_ptr,
- device_ptr guide_variance_ptr,
device_ptr output_ptr,
DenoisingTask *task)
{
@@ -703,8 +721,6 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
cl_mem color_mem = CL_MEM_PTR(color_ptr);
cl_mem color_variance_mem = CL_MEM_PTR(color_variance_ptr);
- cl_mem guide_mem = CL_MEM_PTR(guide_ptr);
- cl_mem guide_variance_mem = CL_MEM_PTR(guide_variance_ptr);
cl_mem output_mem = CL_MEM_PTR(output_ptr);
cl_mem buffer_mem = CL_MEM_PTR(task->buffer.mem.device_pointer);
@@ -735,8 +751,8 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
kernel_set_args(ckNLMCalcDifference, 0,
dx, dy,
- guide_mem,
- guide_variance_mem,
+ color_mem,
+ color_variance_mem,
difference,
local_rect,
task->buffer.w,
@@ -775,8 +791,6 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr,
dx, dy,
blurDifference,
buffer_mem,
- color_mem,
- color_variance_mem,
transform_mem,
rank_mem,
XtWX_mem,
@@ -961,7 +975,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task)
denoising.functions.set_tiles = function_bind(&OpenCLDeviceBase::denoising_set_tiles, this, _1, &denoising);
denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising);
- denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, &denoising);
denoising.functions.divide_shadow = function_bind(&OpenCLDeviceBase::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(&OpenCLDeviceBase::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&OpenCLDeviceBase::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
@@ -1232,7 +1246,7 @@ void OpenCLDeviceBase::store_cached_kernel(
}
string OpenCLDeviceBase::build_options_for_base_program(
- const DeviceRequestedFeatures& /*requested_features*/)
+ const DeviceRequestedFeatures& requested_features)
{
/* TODO(sergey): By default we compile all features, meaning
* mega kernel is not getting feature-based optimizations.
@@ -1240,6 +1254,14 @@ string OpenCLDeviceBase::build_options_for_base_program(
* Ideally we need always compile kernel with as less features
* enabled as possible to keep performance at it's max.
*/
+
+ /* For now disable baking when not in use as this has major
+ * impact on kernel build times.
+ */
+ if(!requested_features.use_baking) {
+ return "-D__NO_BAKING__";
+ }
+
return "";
}
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 76dcbd6fc9a..76d9983e9a2 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -25,6 +25,7 @@
#include "device/device_split_kernel.h"
+#include "util/util_algorithm.h"
#include "util/util_logging.h"
#include "util/util_md5.h"
#include "util/util_path.h"
@@ -176,17 +177,62 @@ protected:
friend class OpenCLSplitKernelFunction;
};
+struct CachedSplitMemory {
+ int id;
+ device_memory *split_data;
+ device_memory *ray_state;
+ device_ptr *rng_state;
+ device_memory *queue_index;
+ device_memory *use_queues_flag;
+ device_memory *work_pools;
+ device_ptr *buffer;
+};
+
class OpenCLSplitKernelFunction : public SplitKernelFunction {
public:
OpenCLDeviceSplitKernel* device;
OpenCLDeviceBase::OpenCLProgram program;
+ CachedSplitMemory& cached_memory;
+ int cached_id;
+
+ OpenCLSplitKernelFunction(OpenCLDeviceSplitKernel* device, CachedSplitMemory& cached_memory) :
+ device(device), cached_memory(cached_memory), cached_id(cached_memory.id-1)
+ {
+ }
- OpenCLSplitKernelFunction(OpenCLDeviceSplitKernel* device) : device(device) {}
- ~OpenCLSplitKernelFunction() { program.release(); }
+ ~OpenCLSplitKernelFunction()
+ {
+ program.release();
+ }
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data)
{
- device->kernel_set_args(program(), 0, kg, data);
+ if(cached_id != cached_memory.id) {
+ cl_uint start_arg_index =
+ device->kernel_set_args(program(),
+ 0,
+ kg,
+ data,
+ *cached_memory.split_data,
+ *cached_memory.ray_state,
+ *cached_memory.rng_state);
+
+/* TODO(sergey): Avoid map lookup here. */
+#define KERNEL_TEX(type, ttype, name) \
+ device->set_kernel_arg_mem(program(), &start_arg_index, #name);
+#include "kernel/kernel_textures.h"
+#undef KERNEL_TEX
+
+ start_arg_index +=
+ device->kernel_set_args(program(),
+ start_arg_index,
+ *cached_memory.queue_index,
+ *cached_memory.use_queues_flag,
+ *cached_memory.work_pools,
+ *cached_memory.buffer);
+
+ cached_id = cached_memory.id;
+ }
device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
program(),
@@ -213,14 +259,15 @@ public:
class OpenCLSplitKernel : public DeviceSplitKernel {
OpenCLDeviceSplitKernel *device;
+ CachedSplitMemory cached_memory;
public:
explicit OpenCLSplitKernel(OpenCLDeviceSplitKernel *device) : DeviceSplitKernel(device), device(device) {
}
- virtual SplitKernelFunction* get_split_kernel_function(string kernel_name,
+ virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
const DeviceRequestedFeatures& requested_features)
{
- OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device);
+ OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device, cached_memory);
bool single_program = OpenCLInfo::use_single_program();
kernel->program =
@@ -349,6 +396,15 @@ public:
return false;
}
+ cached_memory.split_data = &split_data;
+ cached_memory.ray_state = &ray_state;
+ cached_memory.rng_state = &rtile.rng_state;
+ cached_memory.queue_index = &queue_index;
+ cached_memory.use_queues_flag = &use_queues_flag;
+ cached_memory.work_pools = &work_pool_wgs;
+ cached_memory.buffer = &rtile.buffer;
+ cached_memory.id++;
+
return true;
}
@@ -368,12 +424,18 @@ public:
cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
+
+ if(DebugFlags().opencl.mem_limit) {
+ max_buffer_size = min(max_buffer_size,
+ cl_ulong(DebugFlags().opencl.mem_limit - device->stats.mem_used));
+ }
+
VLOG(1) << "Maximum device allocation size: "
<< string_human_readable_number(max_buffer_size) << " bytes. ("
<< string_human_readable_size(max_buffer_size) << ").";
size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size / 2);
- int2 global_size = make_int2(round_down((int)sqrt(num_elements), 64), (int)sqrt(num_elements));
+ int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64), (int)sqrt(num_elements));
VLOG(1) << "Global size: " << global_size << ".";
return global_size;
}
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index 642c1bfa11c..0d34af3e040 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -241,9 +241,9 @@ string OpenCLCache::get_kernel_md5()
}
OpenCLDeviceBase::OpenCLProgram::OpenCLProgram(OpenCLDeviceBase *device,
- string program_name,
- string kernel_file,
- string kernel_build_options,
+ const string& program_name,
+ const string& kernel_file,
+ const string& kernel_build_options,
bool use_stdout)
: device(device),
program_name(program_name),
@@ -274,7 +274,7 @@ void OpenCLDeviceBase::OpenCLProgram::release()
}
}
-void OpenCLDeviceBase::OpenCLProgram::add_log(string msg, bool debug)
+void OpenCLDeviceBase::OpenCLProgram::add_log(const string& msg, bool debug)
{
if(!use_stdout) {
log += msg + "\n";
@@ -288,7 +288,7 @@ void OpenCLDeviceBase::OpenCLProgram::add_log(string msg, bool debug)
}
}
-void OpenCLDeviceBase::OpenCLProgram::add_error(string msg)
+void OpenCLDeviceBase::OpenCLProgram::add_error(const string& msg)
{
if(use_stdout) {
fprintf(stderr, "%s\n", msg.c_str());
@@ -608,6 +608,14 @@ bool OpenCLInfo::device_supported(const string& platform_name,
if(!get_device_name(device_id, &device_name)) {
return false;
}
+
+ int driver_major = 0;
+ int driver_minor = 0;
+ if(!get_driver_version(device_id, &driver_major, &driver_minor)) {
+ return false;
+ }
+ VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor;
+
/* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
* (aka, it will not be on Intel framework). This isn't supported
* and needs an explicit blacklist.
@@ -618,6 +626,21 @@ bool OpenCLInfo::device_supported(const string& platform_name,
if(platform_name == "AMD Accelerated Parallel Processing" &&
device_type == CL_DEVICE_TYPE_GPU)
{
+ if(driver_major < 2236) {
+ VLOG(1) << "AMD driver version " << driver_major << "." << driver_minor << " not supported.";
+ return false;
+ }
+ const char *blacklist[] = {
+ /* GCN 1 */
+ "Tahiti", "Pitcairn", "Capeverde", "Oland",
+ NULL
+ };
+ for (int i = 0; blacklist[i] != NULL; i++) {
+ if(device_name == blacklist[i]) {
+ VLOG(1) << "AMD device " << device_name << " not supported";
+ return false;
+ }
+ }
return true;
}
if(platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) {
@@ -684,7 +707,7 @@ bool OpenCLInfo::device_version_check(cl_device_id device,
return true;
}
-string OpenCLInfo::get_hardware_id(string platform_name, cl_device_id device_id)
+string OpenCLInfo::get_hardware_id(const string& platform_name, cl_device_id device_id)
{
if(platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") {
/* Use cl_amd_device_topology extension. */
@@ -1063,7 +1086,7 @@ string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
CL_DEVICE_BOARD_NAME_AMD,
sizeof(board_name),
&board_name,
- &length) == CL_SUCCESS)
+ &length) == CL_SUCCESS)
{
if(length != 0 && board_name[0] != '\0') {
return board_name;
@@ -1073,6 +1096,34 @@ string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
return get_device_name(device_id);
}
+bool OpenCLInfo::get_driver_version(cl_device_id device_id,
+ int *major,
+ int *minor,
+ cl_int* error)
+{
+ char buffer[1024];
+ cl_int err;
+ if((err = clGetDeviceInfo(device_id,
+ CL_DRIVER_VERSION,
+ sizeof(buffer),
+ &buffer,
+ NULL)) != CL_SUCCESS)
+ {
+ if(error != NULL) {
+ *error = err;
+ }
+ return false;
+ }
+ if(error != NULL) {
+ *error = CL_SUCCESS;
+ }
+ if(sscanf(buffer, "%d.%d", major, minor) < 2) {
+ VLOG(1) << string_printf("OpenCL: failed to parse driver version string (%s).", buffer);
+ return false;
+ }
+ return true;
+}
+
int OpenCLInfo::mem_address_alignment(cl_device_id device_id)
{
int base_align_bits;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index bef869f34b4..23e9bd311c4 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -45,6 +45,7 @@ set(SRC
kernels/opencl/kernel_direct_lighting.cl
kernels/opencl/kernel_shadow_blocked_ao.cl
kernels/opencl/kernel_shadow_blocked_dl.cl
+ kernels/opencl/kernel_enqueue_inactive.cl
kernels/opencl/kernel_next_iteration_setup.cl
kernels/opencl/kernel_indirect_subsurface.cl
kernels/opencl/kernel_buffer_update.cl
@@ -121,6 +122,10 @@ set(SRC_KERNELS_CUDA_HEADERS
kernels/cuda/kernel_config.h
)
+set(SRC_KERNELS_OPENCL_HEADERS
+ kernels/opencl/kernel_split_function.h
+)
+
set(SRC_CLOSURE_HEADERS
closure/alloc.h
closure/bsdf.h
@@ -278,6 +283,7 @@ set(SRC_SPLIT_HEADERS
split/kernel_data_init.h
split/kernel_direct_lighting.h
split/kernel_do_volume.h
+ split/kernel_enqueue_inactive.h
split/kernel_holdout_emission_blurring_pathtermination_ao.h
split/kernel_indirect_background.h
split/kernel_indirect_subsurface.h
@@ -450,6 +456,7 @@ add_library(cycles_kernel
${SRC_HEADERS}
${SRC_KERNELS_CPU_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPENCL_HEADERS}
${SRC_BVH_HEADERS}
${SRC_CLOSURE_HEADERS}
${SRC_FILTER_HEADERS}
@@ -490,9 +497,11 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_sc
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_enqueue_inactive.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split_function.h" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/filter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index a04c157dc40..86a00d2124d 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -423,6 +423,11 @@ ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b)
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
return bsdf_hair_merge(a, b);
+#ifdef __PRINCIPLED__
+ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+ case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
+ return bsdf_principled_diffuse_merge(a, b);
+#endif
#ifdef __VOLUME__
case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
return volume_henyey_greenstein_merge(a, b);
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 30cc8b90330..b12e248f0a3 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -288,12 +288,16 @@ ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
-ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
bsdf->alpha_x = saturate(bsdf->alpha_x);
bsdf->alpha_y = bsdf->alpha_x;
@@ -302,12 +306,16 @@ ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
-ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F;
+
bsdf->alpha_x = saturate(bsdf->alpha_x);
bsdf->alpha_y = bsdf->alpha_x;
@@ -343,12 +351,16 @@ ccl_device int bsdf_microfacet_ggx_aniso_setup(MicrofacetBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
-ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x);
bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y);
bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z);
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
bsdf->alpha_x = saturate(bsdf->alpha_x);
bsdf->alpha_y = saturate(bsdf->alpha_y);
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
index b07b515c405..22d0092093a 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -245,35 +245,69 @@ ccl_device_forceinline float mf_ggx_albedo(float r)
return saturate(albedo);
}
+ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior)
+{
+ if(ior < 1.0f) {
+ ior = 1.0f/ior;
+ }
+ a = saturate(a);
+ ior = clamp(ior, 1.0f, 3.0f);
+ float I_1 = 0.0476898f*expf(-0.978352f*(ior-0.65657f)*(ior-0.65657f)) - 0.033756f*ior + 0.993261f;
+ float R_1 = (((0.116991f*a - 0.270369f)*a + 0.0501366f)*a - 0.00411511f)*a + 1.00008f;
+ float I_2 = (((-2.08704f*ior + 26.3298f)*ior - 127.906f)*ior + 292.958f)*ior - 287.946f + 199.803f/(ior*ior) - 101.668f/(ior*ior*ior);
+ float R_2 = ((((5.3725f*a -24.9307f)*a + 22.7437f)*a - 3.40751f)*a + 0.0986325f)*a + 0.00493504f;
+
+ return saturate(1.0f + I_2*R_2*0.0019127f - (1.0f - I_1)*(1.0f - R_1)*9.3205f);
+}
+
ccl_device_forceinline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha)
{
float D = D_ggx(normalize(wi+wo), alpha);
float lambda = mf_lambda(wi, make_float2(alpha, alpha));
+ float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
+
+ float multiscatter = wo.z * M_1_PI_F;
+
float albedo = mf_ggx_albedo(alpha);
- return 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f) + (1.0f - albedo) * wo.z;
+ return albedo*singlescatter + (1.0f - albedo)*multiscatter;
}
ccl_device_forceinline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha)
{
- return 0.25f * D_ggx_aniso(normalize(wi+wo), alpha) / ((1.0f + mf_lambda(wi, alpha)) * wi.z) + (1.0f - mf_ggx_albedo(sqrtf(alpha.x*alpha.y))) * wo.z;
+ float D = D_ggx_aniso(normalize(wi+wo), alpha);
+ float lambda = mf_lambda(wi, alpha);
+ float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f);
+
+ float multiscatter = wo.z * M_1_PI_F;
+
+ float albedo = mf_ggx_albedo(sqrtf(alpha.x*alpha.y));
+ return albedo*singlescatter + (1.0f - albedo)*multiscatter;
}
ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta)
{
- float3 wh;
- float fresnel;
- if(wi.z*wo.z > 0.0f) {
- wh = normalize(wi + wo);
- fresnel = fresnel_dielectric_cos(dot(wi, wh), eta);
- }
- else {
- wh = normalize(wi + wo*eta);
- fresnel = 1.0f - fresnel_dielectric_cos(dot(wi, wh), eta);
- }
+ bool reflective = (wi.z*wo.z > 0.0f);
+
+ float wh_len;
+ float3 wh = normalize_len(wi + (reflective? wo : (wo*eta)), &wh_len);
if(wh.z < 0.0f)
wh = -wh;
float3 r_wi = (wi.z < 0.0f)? -wi: wi;
- return fresnel * max(0.0f, dot(r_wi, wh)) * D_ggx(wh, alpha) / ((1.0f + mf_lambda(r_wi, make_float2(alpha, alpha))) * r_wi.z) + fabsf(wo.z);
+ float lambda = mf_lambda(r_wi, make_float2(alpha, alpha));
+ float D = D_ggx(wh, alpha);
+ float fresnel = fresnel_dielectric_cos(dot(r_wi, wh), eta);
+
+ float multiscatter = fabsf(wo.z * M_1_PI_F);
+ if(reflective) {
+ float singlescatter = 0.25f * D / max((1.0f + lambda) * r_wi.z, 1e-7f);
+ float albedo = mf_ggx_albedo(alpha);
+ return fresnel * (albedo*singlescatter + (1.0f - albedo)*multiscatter);
+ }
+ else {
+ float singlescatter = fabsf(dot(r_wi, wh)*dot(wo, wh) * D * eta*eta / max((1.0f + lambda) * r_wi.z * wh_len*wh_len, 1e-7f));
+ float albedo = mf_ggx_transmission_albedo(alpha, eta);
+ return (1.0f - fresnel) * (albedo*singlescatter + (1.0f - albedo)*multiscatter);
+ }
}
/* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */
@@ -326,13 +360,17 @@ ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf)
return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
-ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
if(is_zero(bsdf->T))
bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
@@ -345,12 +383,16 @@ ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf)
return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
-ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
bsdf->alpha_y = bsdf->alpha_x;
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
return bsdf_microfacet_multi_ggx_common_setup(bsdf);
}
@@ -455,7 +497,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
}
-ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
{
bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
bsdf->alpha_y = bsdf->alpha_x;
@@ -469,6 +511,10 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsd
bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID;
+ float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior);
+ float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0));
+ bsdf->sample_weight *= F;
+
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG;
}
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 215c32e1ffb..f8ca64293b0 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -58,6 +58,14 @@ ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf)
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
+ccl_device bool bsdf_principled_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b)
+{
+ const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf*)a;
+ const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf*)b;
+
+ return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness);
+}
+
ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I,
const float3 omega_in, float *pdf)
{
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index 53d703de143..6226ed2c2ef 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -78,16 +78,10 @@ ccl_device_inline void filter_calculate_scale(float *scale)
scale[3] = scale[4] = scale[5] = 1.0f/max(sqrtf(scale[3]), 0.01f);
}
-ccl_device_inline float3 filter_get_pixel_color(const ccl_global float *ccl_restrict buffer,
- int pass_stride)
+ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer,
+ int pass_stride)
{
- return make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2));
-}
-
-ccl_device_inline float filter_get_pixel_variance(const ccl_global float *ccl_restrict buffer,
- int pass_stride)
-{
- return average(make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2)));
+ return make_float3(ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10));
}
ccl_device_inline void design_row_add(float *design_row,
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
index 5cb4038bc33..3e752bce68f 100644
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h
@@ -101,7 +101,7 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict d
for(int x = rect.x; x < rect.z; x++) {
const int low = max(rect.x, x-f);
const int high = min(rect.z, x+f+1);
- out_image[y*w+x] = expf(-max(out_image[y*w+x] * (1.0f/(high - low)), 0.0f));
+ out_image[y*w+x] = fast_expf(-max(out_image[y*w+x] * (1.0f/(high - low)), 0.0f));
}
}
}
@@ -133,8 +133,6 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy,
const float *ccl_restrict difference_image,
const float *ccl_restrict buffer,
- float *color_pass,
- float *variance_pass,
float *transform,
int *rank,
float *XtWX,
@@ -167,7 +165,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy,
dx, dy, w, h,
pass_stride,
buffer,
- color_pass, variance_pass,
l_transform, l_rank,
weight, l_XtWX, l_XtWY, 0);
}
diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h
index 078c5f56763..2c5ac807051 100644
--- a/intern/cycles/kernel/filter/filter_nlm_gpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_gpu.h
@@ -66,7 +66,7 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(int x, int y,
sum += difference_image[y*w+x1];
}
sum *= 1.0f/(high-low);
- out_image[y*w+x] = expf(-max(sum, 0.0f));
+ out_image[y*w+x] = fast_expf(-max(sum, 0.0f));
}
ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
@@ -97,8 +97,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy,
int dx, int dy,
const ccl_global float *ccl_restrict difference_image,
const ccl_global float *ccl_restrict buffer,
- ccl_global float *color_pass,
- ccl_global float *variance_pass,
const ccl_global float *ccl_restrict transform,
ccl_global int *rank,
ccl_global float *XtWX,
@@ -130,7 +128,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy,
dx, dy, w, h,
pass_stride,
buffer,
- color_pass, variance_pass,
transform, rank,
weight, XtWX, XtWY,
localIdx);
diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h
index 82cc36625ec..d5ae1b73927 100644
--- a/intern/cycles/kernel/filter/filter_prefilter.h
+++ b/intern/cycles/kernel/filter/filter_prefilter.h
@@ -142,13 +142,22 @@ ccl_device void kernel_filter_detect_outliers(int x, int y,
float ref = 2.0f*values[(int)(n*0.75f)];
float fac = 1.0f;
if(L > ref) {
- /* If the pixel is an outlier, negate the depth value to mark it as one.
- * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
- depth[idx] = -depth[idx];
- fac = ref/L;
- variance[idx ] *= fac*fac;
- variance[idx + pass_stride] *= fac*fac;
- variance[idx+2*pass_stride] *= fac*fac;
+ /* The pixel appears to be an outlier.
+ * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel
+ * should actually be at the reference value:
+ * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier.
+ * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight.
+ */
+ float stddev = sqrtf(average(make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride])));
+ if(L - 3*stddev < ref) {
+ /* The pixel is an outlier, so negate the depth value to mark it as one.
+ * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
+ depth[idx] = -depth[idx];
+ fac = ref/L;
+ variance[idx ] *= fac*fac;
+ variance[idx + pass_stride] *= fac*fac;
+ variance[idx+2*pass_stride] *= fac*fac;
+ }
}
out[idx ] = fac*image[idx];
out[idx + pass_stride] = fac*image[idx + pass_stride];
diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h
index 4a4c81b7ba3..25a3025056c 100644
--- a/intern/cycles/kernel/filter/filter_reconstruction.h
+++ b/intern/cycles/kernel/filter/filter_reconstruction.h
@@ -22,8 +22,6 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
int w, int h,
int pass_stride,
const ccl_global float *ccl_restrict buffer,
- ccl_global float *color_pass,
- ccl_global float *variance_pass,
const ccl_global float *ccl_restrict transform,
ccl_global int *rank,
float weight,
@@ -31,38 +29,31 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
ccl_global float3 *XtWY,
int localIdx)
{
+ if(weight < 1e-3f) {
+ return;
+ }
+
int p_offset = y *w + x;
int q_offset = (y+dy)*w + (x+dx);
-#ifdef __KERNEL_CPU__
- const int stride = 1;
- (void)storage_stride;
- (void)localIdx;
- float design_row[DENOISE_FEATURES+1];
-#elif defined(__KERNEL_CUDA__)
+#ifdef __KERNEL_GPU__
const int stride = storage_stride;
+#else
+ const int stride = 1;
+ (void) storage_stride;
+#endif
+
+#ifdef __KERNEL_CUDA__
ccl_local float shared_design_row[(DENOISE_FEATURES+1)*CCL_MAX_LOCAL_SIZE];
ccl_local_param float *design_row = shared_design_row + localIdx*(DENOISE_FEATURES+1);
#else
- const int stride = storage_stride;
float design_row[DENOISE_FEATURES+1];
#endif
- float3 p_color = filter_get_pixel_color(color_pass + p_offset, pass_stride);
- float3 q_color = filter_get_pixel_color(color_pass + q_offset, pass_stride);
+ float3 q_color = filter_get_color(buffer + q_offset, pass_stride);
- float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride));
- float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride));
-
- /* If the pixel was flagged as an outlier during prefiltering, skip it.
- * Otherwise, perform the regular confidence interval test unless
- * the center pixel is an outlier (in that case, using the confidence
- * interval test could result in no pixels being used at all). */
- bool p_outlier = (ccl_get_feature(buffer + p_offset, 0) < 0.0f);
- bool q_outlier = (ccl_get_feature(buffer + q_offset, 0) < 0.0f);
- bool outside_of_interval = (average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f));
-
- if(q_outlier || (!p_outlier && outside_of_interval)) {
+ /* If the pixel was flagged as an outlier during prefiltering, skip it. */
+ if(ccl_get_feature(buffer + q_offset, 0) < 0.0f) {
return;
}
@@ -83,13 +74,19 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
int4 buffer_params,
int sample)
{
-#ifdef __KERNEL_CPU__
- const int stride = 1;
- (void)storage_stride;
-#else
+#ifdef __KERNEL_GPU__
const int stride = storage_stride;
+#else
+ const int stride = 1;
+ (void) storage_stride;
#endif
+ if(XtWX[0] < 1e-3f) {
+ /* There is not enough information to determine a denoised result.
+ * As a fallback, keep the original value of the pixel. */
+ return;
+ }
+
/* The weighted average of pixel colors (essentially, the NLM-filtered image).
* In case the solution of the linear model fails due to numerical issues,
* fall back to this value. */
@@ -102,6 +99,9 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
final_color = mean_color;
}
+ /* Clamp pixel value to positive values. */
+ final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f));
+
ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
final_color *= sample;
if(buffer_params.w) {
@@ -114,6 +114,4 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
combined_buffer[2] = final_color.z;
}
-#undef STORAGE_TYPE
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 06728415c15..175bd6b9737 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -621,25 +621,43 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance
{
float fac = 1.0f/num_samples;
+#ifdef __SPLIT_KERNEL__
+# define safe_float3_add(f, v) \
+ do { \
+ ccl_global float *p = (ccl_global float*)(&(f)); \
+ atomic_add_and_fetch_float(p+0, (v).x); \
+ atomic_add_and_fetch_float(p+1, (v).y); \
+ atomic_add_and_fetch_float(p+2, (v).z); \
+ } while(0)
+#else
+# define safe_float3_add(f, v) (f) += (v)
+#endif /* __SPLIT_KERNEL__ */
+
#ifdef __PASSES__
- L->direct_diffuse += L_sample->direct_diffuse*fac;
- L->direct_glossy += L_sample->direct_glossy*fac;
- L->direct_transmission += L_sample->direct_transmission*fac;
- L->direct_subsurface += L_sample->direct_subsurface*fac;
- L->direct_scatter += L_sample->direct_scatter*fac;
-
- L->indirect_diffuse += L_sample->indirect_diffuse*fac;
- L->indirect_glossy += L_sample->indirect_glossy*fac;
- L->indirect_transmission += L_sample->indirect_transmission*fac;
- L->indirect_subsurface += L_sample->indirect_subsurface*fac;
- L->indirect_scatter += L_sample->indirect_scatter*fac;
-
- L->background += L_sample->background*fac;
- L->ao += L_sample->ao*fac;
- L->shadow += L_sample->shadow*fac;
+ safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse*fac);
+ safe_float3_add(L->direct_glossy, L_sample->direct_glossy*fac);
+ safe_float3_add(L->direct_transmission, L_sample->direct_transmission*fac);
+ safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface*fac);
+ safe_float3_add(L->direct_scatter, L_sample->direct_scatter*fac);
+
+ safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse*fac);
+ safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy*fac);
+ safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission*fac);
+ safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface*fac);
+ safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter*fac);
+
+ safe_float3_add(L->background, L_sample->background*fac);
+ safe_float3_add(L->ao, L_sample->ao*fac);
+ safe_float3_add(L->shadow, L_sample->shadow*fac);
+# ifdef __SPLIT_KERNEL__
+ atomic_add_and_fetch_float(&L->mist, L_sample->mist*fac);
+# else
L->mist += L_sample->mist*fac;
-#endif
- L->emission += L_sample->emission * fac;
+# endif /* __SPLIT_KERNEL__ */
+#endif /* __PASSES__ */
+ safe_float3_add(L->emission, L_sample->emission*fac);
+
+#undef safe_float3_add
}
#ifdef __SHADOW_TRICKS__
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 9d52834ffcc..9cd7ffb181d 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -142,7 +142,7 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_glob
ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
ShaderData *sd,
- ccl_global PathState *state,
+ ccl_addr_space PathState *state,
PathRadiance *L)
{
#ifdef __DENOISING_FEATURES__
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index 0fa77d9e8bd..5d92fd12201 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -139,9 +139,11 @@ ccl_device_inline void path_state_next(KernelGlobals *kg, ccl_addr_space PathSta
/* random number generator next bounce */
state->rng_offset += PRNG_BOUNCE_NUM;
+#ifdef __DENOISING_FEATURES__
if((state->denoising_feature_weight == 0.0f) && !(state->flag & PATH_RAY_SHADOW_CATCHER)) {
state->flag &= ~PATH_RAY_STORE_SHADOW_INFO;
}
+#endif
}
ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *state)
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index 96bc636d5ac..e32d4bbbc1b 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -128,6 +128,21 @@ ccl_device unsigned int get_global_queue_index(
return my_gqidx;
}
+ccl_device int dequeue_ray_index(
+ int queue_number,
+ ccl_global int *queues,
+ int queue_size,
+ ccl_global int *queue_index)
+{
+ int index = atomic_fetch_and_dec_uint32((ccl_global uint*)&queue_index[queue_number])-1;
+
+ if(index < 0) {
+ return QUEUE_EMPTY_SLOT;
+ }
+
+ return queues[index + queue_number * queue_size];
+}
+
CCL_NAMESPACE_END
#endif // __KERNEL_QUEUE_H__
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 1026cde7b29..6475d4b66fd 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -418,7 +418,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
}
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
-ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_global PathState *state,
+ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state,
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index dbeaffdfb24..34affab1b9d 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -135,7 +135,7 @@ CCL_NAMESPACE_BEGIN
* this is because megakernel in device_opencl does not support
* custom cflags depending on the scene features.
*/
-# endif /* __KERNEL_OPENCL_NVIDIA__ */
+# endif /* __KERNEL_OPENCL_APPLE__ */
# ifdef __KERNEL_OPENCL_AMD__
# define __CL_USE_NATIVE__
@@ -236,6 +236,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __NO_PRINCIPLED__
# undef __PRINCIPLED__
#endif
+#ifdef __NO_DENOISING__
+# undef __DENOISING_FEATURES__
+#endif
/* Random Numbers */
@@ -1387,6 +1390,8 @@ enum QueueNumber {
#ifdef __BRANCHED_PATH__
/* All rays moving to next iteration of the indirect loop for light */
QUEUE_LIGHT_INDIRECT_ITER,
+ /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */
+ QUEUE_INACTIVE_RAYS,
# ifdef __VOLUME__
/* All rays moving to next iteration of the indirect loop for volumes */
QUEUE_VOLUME_INDIRECT_ITER,
@@ -1429,6 +1434,9 @@ enum RayState {
RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT),
+
+ /* Ray is evaluating an iteration of an indirect loop for another thread */
+ RAY_BRANCHED_INDIRECT_SHARED = (1 << 7),
};
#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 9c0878249d4..1e472aaf51a 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -660,6 +660,7 @@ typedef struct VolumeSegment {
* but the entire segment is needed to do always scattering, rather than probabilistically
* hitting or missing the volume. if we don't know the transmittance at the end of the
* volume we can't generate stratified distance samples up to that transmittance */
+#ifdef __VOLUME_DECOUPLED__
ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
{
@@ -829,6 +830,7 @@ ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *s
#endif
}
}
+#endif /* __VOLUME_DECOUPLED__ */
/* scattering for homogeneous and heterogeneous volumes, using decoupled ray
* marching.
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index ffd34c293fc..2ed713299fd 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -107,8 +107,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
int dy,
float *difference_image,
float *buffer,
- float *color_pass,
- float *variance_pass,
float *transform,
int *rank,
float *XtWX,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index 261176846b1..8dc1a8d583c 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -213,8 +213,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
int dy,
float *difference_image,
float *buffer,
- float *color_pass,
- float *variance_pass,
float *transform,
int *rank,
float *XtWX,
@@ -229,7 +227,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
#else
- kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, color_pass, variance_pass, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride);
+ kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride);
#endif
}
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 9895080d328..c8938534fe8 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -85,6 +85,7 @@ DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
+DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive)
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 9b85a864153..d4315ee5ec4 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -53,6 +53,7 @@
# include "kernel/split/kernel_direct_lighting.h"
# include "kernel/split/kernel_shadow_blocked_ao.h"
# include "kernel/split/kernel_shadow_blocked_dl.h"
+# include "kernel/split/kernel_enqueue_inactive.h"
# include "kernel/split/kernel_next_iteration_setup.h"
# include "kernel/split/kernel_indirect_subsurface.h"
# include "kernel/split/kernel_buffer_update.h"
@@ -230,6 +231,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 2edbff08087..009c3fde9d5 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -207,8 +207,6 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_nlm_construct_gramian(int dx, int dy,
const float *ccl_restrict difference_image,
const float *ccl_restrict buffer,
- float *color_pass,
- float *variance_pass,
float const* __restrict__ transform,
int *rank,
float *XtWX,
@@ -225,7 +223,6 @@ kernel_cuda_filter_nlm_construct_gramian(int dx, int dy,
dx, dy,
difference_image,
buffer,
- color_pass, variance_pass,
transform, rank,
XtWX, XtWY,
rect, filter_rect,
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 8b7f1a8d405..628891b1458 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -39,6 +39,7 @@
#include "kernel/split/kernel_direct_lighting.h"
#include "kernel/split/kernel_shadow_blocked_ao.h"
#include "kernel/split/kernel_shadow_blocked_dl.h"
+#include "kernel/split/kernel_enqueue_inactive.h"
#include "kernel/split/kernel_next_iteration_setup.h"
#include "kernel/split/kernel_indirect_subsurface.h"
#include "kernel/split/kernel_buffer_update.h"
@@ -118,6 +119,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index 0462ca6f9bc..ba53ba4b26f 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -207,8 +207,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx,
int dy,
const ccl_global float *ccl_restrict difference_image,
const ccl_global float *ccl_restrict buffer,
- ccl_global float *color_pass,
- ccl_global float *variance_pass,
const ccl_global float *ccl_restrict transform,
ccl_global int *rank,
ccl_global float *XtWX,
@@ -227,7 +225,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx,
dx, dy,
difference_image,
buffer,
- color_pass, variance_pass,
transform, rank,
XtWX, XtWY,
rect, filter_rect,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
index db65c91baf7..dcea2630aef 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
@@ -18,10 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_buffer_update.h"
-__kernel void kernel_ocl_path_trace_buffer_update(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local unsigned int local_queue_atomics;
- kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics);
-}
+#define KERNEL_NAME buffer_update
+#define LOCALS_TYPE unsigned int
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
index eb34f750881..ed64ae01aae 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
@@ -18,10 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_direct_lighting.h"
-__kernel void kernel_ocl_path_trace_direct_lighting(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local unsigned int local_queue_atomics;
- kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics);
-}
+#define KERNEL_NAME direct_lighting
+#define LOCALS_TYPE unsigned int
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
index 83ef5f5f3f2..8afaa686e28 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_do_volume.h"
-__kernel void kernel_ocl_path_trace_do_volume(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_do_volume((KernelGlobals*)kg);
-}
+#define KERNEL_NAME do_volume
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl
new file mode 100644
index 00000000000..e68d4104a91
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_enqueue_inactive.h"
+
+#define KERNEL_NAME enqueue_inactive
+#define LOCALS_TYPE unsigned int
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
index d071b39aa6f..9e1e57beba6 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
@@ -18,12 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
-__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local BackgroundAOLocals locals;
- kernel_holdout_emission_blurring_pathtermination_ao(
- (KernelGlobals*)kg,
- &locals);
-}
+#define KERNEL_NAME holdout_emission_blurring_pathtermination_ao
+#define LOCALS_TYPE BackgroundAOLocals
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
index 8c213ff5cb2..192d01444ba 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_indirect_background.h"
-__kernel void kernel_ocl_path_trace_indirect_background(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_indirect_background((KernelGlobals*)kg);
-}
+#define KERNEL_NAME indirect_background
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
index 998ebc4c0c3..84938b889e5 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_indirect_subsurface.h"
-__kernel void kernel_ocl_path_trace_indirect_subsurface(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_indirect_subsurface((KernelGlobals*)kg);
-}
+#define KERNEL_NAME indirect_subsurface
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
index 822d2287715..c314dc96c33 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_lamp_emission.h"
-__kernel void kernel_ocl_path_trace_lamp_emission(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_lamp_emission((KernelGlobals*)kg);
-}
+#define KERNEL_NAME lamp_emission
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
index 6d207253a40..8b1332bf013 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
@@ -18,10 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_next_iteration_setup.h"
-__kernel void kernel_ocl_path_trace_next_iteration_setup(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local unsigned int local_queue_atomics;
- kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics);
-}
+#define KERNEL_NAME next_iteration_setup
+#define LOCALS_TYPE unsigned int
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
index bd9aa9538c8..fa210e747c0 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_path_init.h"
-__kernel void kernel_ocl_path_trace_path_init(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_path_init((KernelGlobals*)kg);
-}
+#define KERNEL_NAME path_init
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
index 9be154e3d75..68ee6f1d536 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
@@ -18,10 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_queue_enqueue.h"
-__kernel void kernel_ocl_path_trace_queue_enqueue(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local QueueEnqueueLocals locals;
- kernel_queue_enqueue((KernelGlobals*)kg, &locals);
-}
+#define KERNEL_NAME queue_enqueue
+#define LOCALS_TYPE QueueEnqueueLocals
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
index eb4fb4d153a..10d09377ba9 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_scene_intersect.h"
-__kernel void kernel_ocl_path_trace_scene_intersect(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_scene_intersect((KernelGlobals*)kg);
-}
+#define KERNEL_NAME scene_intersect
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
index 5bfb31b193a..40eaa561863 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_shader_eval.h"
-__kernel void kernel_ocl_path_trace_shader_eval(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_shader_eval((KernelGlobals*)kg);
-}
+#define KERNEL_NAME shader_eval
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
index 38bfd04ad4c..8c36100f762 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
@@ -18,10 +18,9 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_shader_setup.h"
-__kernel void kernel_ocl_path_trace_shader_setup(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local unsigned int local_queue_atomics;
- kernel_shader_setup((KernelGlobals*)kg, &local_queue_atomics);
-}
+#define KERNEL_NAME shader_setup
+#define LOCALS_TYPE unsigned int
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
index 6f722915d45..bcacaa4a054 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
@@ -19,10 +19,9 @@
#include "kernel/split/kernel_shader_sort.h"
__attribute__((reqd_work_group_size(64, 1, 1)))
-__kernel void kernel_ocl_path_trace_shader_sort(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- ccl_local ShaderSortLocals locals;
- kernel_shader_sort((KernelGlobals*)kg, &locals);
-}
+#define KERNEL_NAME shader_sort
+#define LOCALS_TYPE ShaderSortLocals
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+#undef LOCALS_TYPE
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
index 6a8ef81b32a..8de250a375c 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_shadow_blocked_ao.h"
-__kernel void kernel_ocl_path_trace_shadow_blocked_ao(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_shadow_blocked_ao((KernelGlobals*)kg);
-}
+#define KERNEL_NAME shadow_blocked_ao
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
index b255cc5ef8b..29da77022ed 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_shadow_blocked_dl.h"
-__kernel void kernel_ocl_path_trace_shadow_blocked_dl(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_shadow_blocked_dl((KernelGlobals*)kg);
-}
+#define KERNEL_NAME shadow_blocked_dl
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
index 8de82db7afe..651addb02f4 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
@@ -31,6 +31,7 @@
#include "kernel/kernels/opencl/kernel_direct_lighting.cl"
#include "kernel/kernels/opencl/kernel_shadow_blocked_ao.cl"
#include "kernel/kernels/opencl/kernel_shadow_blocked_dl.cl"
+#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl"
#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
#include "kernel/kernels/opencl/kernel_buffer_update.cl"
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
new file mode 100644
index 00000000000..f1e914a70d4
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define KERNEL_NAME_JOIN(a, b) a ## _ ## b
+#define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b)
+
+__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
+ ccl_global char *kg_global,
+ ccl_constant KernelData *data,
+
+ ccl_global void *split_data_buffer,
+ ccl_global char *ray_state,
+ ccl_global uint *rng_state,
+
+#define KERNEL_TEX(type, ttype, name) \
+ ccl_global type *name,
+#include "kernel/kernel_textures.h"
+
+ ccl_global int *queue_index,
+ ccl_global char *use_queues_flag,
+ ccl_global unsigned int *work_pools,
+ ccl_global float *buffer
+ )
+{
+#ifdef LOCALS_TYPE
+ ccl_local LOCALS_TYPE locals;
+#endif
+
+ KernelGlobals *kg = (KernelGlobals*)kg_global;
+
+ if(ccl_local_id(0) + ccl_local_id(1) == 0) {
+ kg->data = data;
+
+ kernel_split_params.rng_state = rng_state;
+ kernel_split_params.queue_index = queue_index;
+ kernel_split_params.use_queues_flag = use_queues_flag;
+ kernel_split_params.work_pools = work_pools;
+ kernel_split_params.buffer = buffer;
+
+ split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state);
+
+#define KERNEL_TEX(type, ttype, name) \
+ kg->name = name;
+#include "kernel/kernel_textures.h"
+ }
+
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ KERNEL_NAME_EVAL(kernel, KERNEL_NAME)(
+ kg
+#ifdef LOCALS_TYPE
+ , &locals
+#endif
+ );
+}
+
+#undef KERNEL_NAME_JOIN
+#undef KERNEL_NAME_EVAL
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
index 99b74a1802b..2b3be38df84 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
@@ -18,9 +18,7 @@
#include "kernel/split/kernel_split_common.h"
#include "kernel/split/kernel_subsurface_scatter.h"
-__kernel void kernel_ocl_path_trace_subsurface_scatter(
- ccl_global char *kg,
- ccl_constant KernelData *data)
-{
- kernel_subsurface_scatter((KernelGlobals*)kg);
-}
+#define KERNEL_NAME subsurface_scatter
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
+
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
index 188c3960a5f..27a96720c1e 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -191,7 +191,7 @@ class PrincipledBSSRDFClosure : public CBSSRDFClosure {
public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
- alloc(sd, path_flag, weight * albedo, CLOSURE_BSSRDF_PRINCIPLED_ID);
+ alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID);
}
};
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index 5b66793a05d..14c5c1c3db5 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -156,7 +156,7 @@ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refra
BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(HairReflectionClosure, unused),
+ CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N),
CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1),
CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2),
CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
@@ -164,7 +164,7 @@ BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY
BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection)
BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY)
- CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, unused),
+ CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N),
CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1),
CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2),
CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T),
@@ -191,7 +191,7 @@ BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen)
class PrincipledClearcoatClosure : public CBSDFClosure {
public:
MicrofacetBsdf params;
- float clearcoat, clearcoat_gloss;
+ float clearcoat, clearcoat_roughness;
MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight)
{
@@ -202,8 +202,8 @@ public:
bsdf->ior = 1.5f;
- bsdf->alpha_x = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss;
- bsdf->alpha_y = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss;
+ bsdf->alpha_x = clearcoat_roughness;
+ bsdf->alpha_y = clearcoat_roughness;
bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
bsdf->extra->clearcoat = clearcoat;
@@ -217,7 +217,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_ggx_clearcoat_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd) : 0;
}
};
@@ -226,7 +226,7 @@ ClosureParam *closure_bsdf_principled_clearcoat_params()
static ClosureParam params[] = {
CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N),
CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat),
- CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_gloss),
+ CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness),
CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"),
CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)
};
@@ -389,7 +389,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_ggx_fresnel_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_ggx_fresnel_setup(bsdf, sd) : 0;
}
};
@@ -413,7 +413,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd) : 0;
}
};
@@ -566,7 +566,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_fresnel_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd) : 0;
}
};
@@ -590,7 +590,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd) : 0;
}
};
@@ -618,7 +618,7 @@ public:
void setup(ShaderData *sd, int path_flag, float3 weight)
{
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf) : 0;
+ sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd) : 0;
}
};
diff --git a/intern/cycles/kernel/shaders/node_principled_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
index 57f40789d49..2bb981c3918 100644
--- a/intern/cycles/kernel/shaders/node_principled_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_principled_bsdf.osl
@@ -32,7 +32,7 @@ shader node_principled_bsdf(
float Sheen = 0.0,
float SheenTint = 0.5,
float Clearcoat = 0.0,
- float ClearcoatGloss = 1.0,
+ float ClearcoatRoughness = 0.03,
float IOR = 1.45,
float Transmission = 0.0,
float TransmissionRoughness = 0.0,
@@ -57,8 +57,8 @@ shader node_principled_bsdf(
if (diffuse_weight > 1e-5) {
if (Subsurface > 1e-5) {
- color Albedo = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface);
- BSDF = bssrdf_principled(Normal, Subsurface * SubsurfaceRadius, 0.0, Albedo, Roughness);
+ color mixed_ss_base_color = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface);
+ BSDF = mixed_ss_base_color * bssrdf_principled(Normal, Subsurface * SubsurfaceRadius, 0.0, SubsurfaceColor, Roughness);
} else {
BSDF = BaseColor * principled_diffuse(Normal, Roughness);
}
@@ -114,7 +114,7 @@ shader node_principled_bsdf(
}
if (Clearcoat > 1e-5) {
- BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatGloss);
+ BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatRoughness * ClearcoatRoughness);
}
}
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
index 289d1091b0a..c91d2918687 100644
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@@ -546,7 +546,7 @@ closure color holdout() BUILTIN;
closure color ambient_occlusion() BUILTIN;
closure color principled_diffuse(normal N, float roughness) BUILTIN;
closure color principled_sheen(normal N) BUILTIN;
-closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_gloss) BUILTIN;
+closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_roughness) BUILTIN;
// BSSRDF
closure color bssrdf_cubic(normal N, vector radius, float texture_blur, float sharpness) BUILTIN;
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index dc74a2ada53..e2762a85fc8 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -63,12 +63,49 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobal
REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
}
+ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index)
+{
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+ int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
+ kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index);
+
+ if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
+ return false;
+ }
+
+#define SPLIT_DATA_ENTRY(type, name, num) \
+ kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index];
+ SPLIT_DATA_ENTRIES_BRANCHED_SHARED
+#undef SPLIT_DATA_ENTRY
+
+ kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
+ kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
+ kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
+
+ path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
+ inactive_L->direct_throughput = L->direct_throughput;
+ path_radiance_copy_indirect(inactive_L, L);
+
+ ray_state[inactive_ray] = RAY_REGENERATED;
+ ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
+ ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
+
+ atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count);
+
+ return true;
+}
+
/* bounce off surface and integrate indirect light */
ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg,
int ray_index,
float num_samples_adjust,
ShaderData *saved_sd,
- bool reset_path_state)
+ bool reset_path_state,
+ bool wait_for_shared)
{
SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
@@ -155,12 +192,25 @@ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(
/* start the indirect path */
*tp *= num_samples_inv;
+ if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+
return true;
}
branched_state->next_sample = 0;
}
+ branched_state->next_closure = sd->num_closure;
+
+ if(wait_for_shared) {
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+ }
+
return false;
}
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 694b777f429..9f8dd2392d9 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -75,11 +75,30 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
branched_state->next_sample = j+1;
branched_state->num_samples = num_samples;
+ /* Attempting to share too many samples is slow for volumes as it causes us to
+ * loop here more and have many calls to kernel_volume_integrate which evaluates
+ * shaders. The many expensive shader evaluations cause the work load to become
+ * unbalanced and many threads to become idle in this kernel. Limiting the
+ * number of shared samples here helps quite a lot.
+ */
+ if(branched_state->shared_sample_count < 2) {
+ if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+ }
+
return true;
}
# endif
}
+ branched_state->next_sample = num_samples;
+
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+
kernel_split_branched_path_indirect_loop_end(kg, ray_index);
/* todo: avoid this calculation using decoupled ray marching */
diff --git a/intern/cycles/kernel/split/kernel_enqueue_inactive.h b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
new file mode 100644
index 00000000000..496355bbc3a
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_enqueue_inactive(KernelGlobals *kg,
+ ccl_local_param unsigned int *local_queue_atomics)
+{
+#ifdef __BRANCHED_PATH__
+ /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+ char enqueue_flag = 0;
+ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
+ enqueue_flag = 1;
+ }
+
+ enqueue_ray_index_local(ray_index,
+ QUEUE_INACTIVE_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+#endif /* __BRANCHED_PATH__ */
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 71017fed19e..7758e35fd32 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -147,6 +147,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
ray_index,
1.0f,
&kernel_split_state.branched_state[ray_index].sd,
+ true,
true))
{
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
@@ -193,6 +194,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
ray_index,
1.0f,
&kernel_split_state.branched_state[ray_index].sd,
+ true,
true))
{
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index e2e841f36d3..66ce2dfb6f1 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -51,7 +51,8 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
int queue_number = -1;
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
}
else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index 5dc94caec85..45984ca509b 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -43,11 +43,21 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
}
/* All regenerated rays become active here */
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED))
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+#ifdef __BRANCHED_PATH__
+ if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else
+#endif /* __BRANCHED_PATH__ */
+ {
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ }
+ }
- if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE))
+ if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
return;
+ }
#ifdef __KERNEL_DEBUG__
DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index 386fbbc4d09..78e61709b01 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -29,6 +29,14 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
}
+#ifdef __BRANCHED_PATH__
+ /* TODO(mai): move this somewhere else? */
+ if(thread_index == 0) {
+ /* Clear QUEUE_INACTIVE_RAYS before next kernel. */
+ kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
+ }
+#endif /* __BRANCHED_PATH__ */
+
if(ray_index == QUEUE_EMPTY_SLOT)
return;
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 57f070d51e0..08f0124b529 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -56,7 +56,20 @@ ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
ccl_global char *ray_state = kernel_split_state.ray_state;
#ifdef __BRANCHED_PATH__
- if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
+ if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
+ int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
+
+ path_radiance_sum_indirect(L);
+ path_radiance_accum_sample(orig_ray_L, L, 1);
+
+ atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
+
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
}
else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index bb1aca2acbf..4bb2f0d3d80 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -95,6 +95,10 @@ typedef ccl_global struct SplitBranchedState {
VolumeStack volume_stack[VOLUME_STACK_SIZE];
# endif /* __VOLUME__ */
#endif /*__SUBSURFACE__ */
+
+ int shared_sample_count; /* number of branched samples shared with other threads */
+ int original_ray; /* index of original ray when sharing branched samples */
+ bool waiting_on_shared_samples;
} SplitBranchedState;
#define SPLIT_DATA_BRANCHED_ENTRIES \
@@ -137,6 +141,25 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_BRANCHED_ENTRIES \
SPLIT_DATA_DEBUG_ENTRIES \
+/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
+#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
+ SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \
+ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+ SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \
+ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+ SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+ SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
+ SPLIT_DATA_SUBSURFACE_ENTRIES \
+ SPLIT_DATA_VOLUME_ENTRIES \
+ SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_DEBUG_ENTRIES \
+
/* struct that holds pointers to data in the shared state buffer */
typedef struct SplitData {
#define SPLIT_DATA_ENTRY(type, name, num) type *name;
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 1dffe1b179e..d5083b23f80 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -169,6 +169,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
ray_index,
num_samples_inv,
bssrdf_sd,
+ false,
false))
{
branched_state->ss_next_closure = i;
@@ -187,6 +188,13 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
branched_state->ss_next_sample = 0;
}
+ branched_state->ss_next_closure = sd->num_closure;
+
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+
kernel_split_branched_path_indirect_loop_end(kg, ray_index);
return false;
@@ -257,21 +265,20 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb);
-
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg,
- &rng,
- state,
- PRNG_BSDF_U,
- &bssrdf_u, &bssrdf_v);
+ &rng,
+ state,
+ PRNG_BSDF_U,
+ &bssrdf_u, &bssrdf_v);
subsurface_scatter_step(kg,
- sd,
- state,
- state->flag,
- sc,
- &lcg_state,
- bssrdf_u, bssrdf_v,
- false);
+ sd,
+ state,
+ state->flag,
+ sc,
+ &lcg_state,
+ bssrdf_u, bssrdf_v,
+ false);
}
}
else {
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index f04f765686e..9578fcf2687 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -79,13 +79,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
#ifdef __PRINCIPLED__
case CLOSURE_BSDF_PRINCIPLED_ID: {
uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, sheen_offset,
- sheen_tint_offset, clearcoat_offset, clearcoat_gloss_offset, eta_offset, transmission_offset,
+ sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset, eta_offset, transmission_offset,
anisotropic_rotation_offset, transmission_roughness_offset;
uint4 data_node2 = read_node(kg, offset);
float3 T = stack_load_float3(stack, data_node.y);
decode_node_uchar4(data_node.z, &specular_offset, &roughness_offset, &specular_tint_offset, &anisotropic_offset);
- decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_gloss_offset);
+ decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_roughness_offset);
decode_node_uchar4(data_node2.x, &eta_offset, &transmission_offset, &anisotropic_rotation_offset, &transmission_roughness_offset);
// get Disney principled parameters
@@ -98,7 +98,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
float sheen = stack_load_float(stack, sheen_offset);
float sheen_tint = stack_load_float(stack, sheen_tint_offset);
float clearcoat = stack_load_float(stack, clearcoat_offset);
- float clearcoat_gloss = stack_load_float(stack, clearcoat_gloss_offset);
+ float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset);
float transmission = stack_load_float(stack, transmission_offset);
float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset);
float transmission_roughness = stack_load_float(stack, transmission_roughness_offset);
@@ -141,8 +141,8 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
float3 weight = sd->svm_closure_weight * mix_weight;
#ifdef __SUBSURFACE__
- float3 albedo = subsurface_color * subsurface + base_color * (1.0f - subsurface);
- float3 subsurf_weight = weight * albedo * diffuse_weight;
+ float3 mixed_ss_base_color = subsurface_color * subsurface + base_color * (1.0f - subsurface);
+ float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight;
float subsurf_sample_weight = fabsf(average(subsurf_weight));
/* disable in case of diffuse ancestor, can't see it well then and
@@ -154,11 +154,11 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
/* need to set the base color in this case such that the
* rays get the correctly mixed color after transmitting
* the object */
- base_color = albedo;
+ base_color = mixed_ss_base_color;
}
/* diffuse */
- if(fabsf(average(base_color)) > CLOSURE_WEIGHT_CUTOFF) {
+ if(fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) {
if(subsurface < CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
float3 diff_weight = weight * base_color * diffuse_weight;
@@ -186,7 +186,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bssrdf->sample_weight = subsurf_sample_weight;
bssrdf->radius = radius.x;
bssrdf->texture_blur = texture_blur;
- bssrdf->albedo = albedo.x;
+ bssrdf->albedo = subsurface_color.x;
bssrdf->sharpness = sharpness;
bssrdf->N = N;
bssrdf->roughness = roughness;
@@ -200,7 +200,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bssrdf->sample_weight = subsurf_sample_weight;
bssrdf->radius = radius.y;
bssrdf->texture_blur = texture_blur;
- bssrdf->albedo = albedo.y;
+ bssrdf->albedo = subsurface_color.y;
bssrdf->sharpness = sharpness;
bssrdf->N = N;
bssrdf->roughness = roughness;
@@ -214,7 +214,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bssrdf->sample_weight = subsurf_sample_weight;
bssrdf->radius = radius.z;
bssrdf->texture_blur = texture_blur;
- bssrdf->albedo = albedo.z;
+ bssrdf->albedo = subsurface_color.z;
bssrdf->sharpness = sharpness;
bssrdf->N = N;
bssrdf->roughness = roughness;
@@ -292,9 +292,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
/* setup bsdf */
if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || roughness <= 0.075f) /* use single-scatter GGX */
- sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf);
+ sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd);
else /* use multi-scatter GGX */
- sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf);
+ sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd);
}
}
#ifdef __CAUSTICS_TRICKS__
@@ -332,7 +332,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bsdf->extra->cspec0 = cspec0;
/* setup bsdf */
- sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf);
+ sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
}
}
@@ -377,7 +377,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bsdf->extra->cspec0 = cspec0;
/* setup bsdf */
- sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf);
+ sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
}
}
}
@@ -398,14 +398,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bsdf->ior = 1.5f;
bsdf->extra = extra;
- bsdf->alpha_x = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss;
- bsdf->alpha_y = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss;
+ bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness;
+ bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness;
bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
bsdf->extra->clearcoat = clearcoat;
/* setup bsdf */
- sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf);
+ sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
}
}
#ifdef __CAUSTICS_TRICKS__
@@ -725,6 +725,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
HairBsdf *bsdf = (HairBsdf*)bsdf_alloc(sd, sizeof(HairBsdf), weight);
if(bsdf) {
+ bsdf->N = N;
bsdf->roughness1 = param1;
bsdf->roughness2 = param2;
bsdf->offset = -stack_load_float(stack, data_node.z);
diff --git a/intern/cycles/render/constant_fold.cpp b/intern/cycles/render/constant_fold.cpp
index 2569d9eec27..943b218f0e4 100644
--- a/intern/cycles/render/constant_fold.cpp
+++ b/intern/cycles/render/constant_fold.cpp
@@ -160,6 +160,14 @@ bool ConstantFolder::try_bypass_or_make_constant(ShaderInput *input, bool clamp)
bypass(input->link);
return true;
}
+ else {
+ /* disconnect other inputs if we can't fully bypass due to clamp */
+ foreach(ShaderInput *other, node->inputs) {
+ if(other != input && other->link) {
+ graph->disconnect(other);
+ }
+ }
+ }
return false;
}
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index 625dd3ded39..93d88c5642c 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -224,6 +224,10 @@ void LightManager::disable_ineffective_light(Device *device, Scene *scene)
bool LightManager::object_usable_as_light(Object *object) {
Mesh *mesh = object->mesh;
+ /* Skip objects with NaNs */
+ if (!object->bounds.valid()) {
+ return false;
+ }
/* Skip if we are not visible for BSDFs. */
if(!(object->visibility & (PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY|PATH_RAY_TRANSMIT))) {
return false;
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 57b475e5cd0..166156f7ac3 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -2308,13 +2308,13 @@ NODE_DEFINE(PrincipledBsdfNode)
SOCKET_IN_FLOAT(subsurface, "Subsurface", 0.0f);
SOCKET_IN_VECTOR(subsurface_radius, "Subsurface Radius", make_float3(0.1f, 0.1f, 0.1f));
SOCKET_IN_FLOAT(specular, "Specular", 0.0f);
- SOCKET_IN_FLOAT(roughness, "Roughness", 0.0f);
+ SOCKET_IN_FLOAT(roughness, "Roughness", 0.5f);
SOCKET_IN_FLOAT(specular_tint, "Specular Tint", 0.0f);
SOCKET_IN_FLOAT(anisotropic, "Anisotropic", 0.0f);
SOCKET_IN_FLOAT(sheen, "Sheen", 0.0f);
SOCKET_IN_FLOAT(sheen_tint, "Sheen Tint", 0.0f);
SOCKET_IN_FLOAT(clearcoat, "Clearcoat", 0.0f);
- SOCKET_IN_FLOAT(clearcoat_gloss, "Clearcoat Gloss", 0.0f);
+ SOCKET_IN_FLOAT(clearcoat_roughness, "Clearcoat Roughness", 0.03f);
SOCKET_IN_FLOAT(ior, "IOR", 0.0f);
SOCKET_IN_FLOAT(transmission, "Transmission", 0.0f);
SOCKET_IN_FLOAT(transmission_roughness, "Transmission Roughness", 0.0f);
@@ -2351,7 +2351,7 @@ void PrincipledBsdfNode::attributes(Shader *shader, AttributeRequestSet *attribu
void PrincipledBsdfNode::compile(SVMCompiler& compiler, ShaderInput *p_metallic, ShaderInput *p_subsurface, ShaderInput *p_subsurface_radius,
ShaderInput *p_specular, ShaderInput *p_roughness, ShaderInput *p_specular_tint, ShaderInput *p_anisotropic,
- ShaderInput *p_sheen, ShaderInput *p_sheen_tint, ShaderInput *p_clearcoat, ShaderInput *p_clearcoat_gloss,
+ ShaderInput *p_sheen, ShaderInput *p_sheen_tint, ShaderInput *p_clearcoat, ShaderInput *p_clearcoat_roughness,
ShaderInput *p_ior, ShaderInput *p_transmission, ShaderInput *p_anisotropic_rotation, ShaderInput *p_transmission_roughness)
{
ShaderInput *base_color_in = input("Base Color");
@@ -2374,7 +2374,7 @@ void PrincipledBsdfNode::compile(SVMCompiler& compiler, ShaderInput *p_metallic,
int sheen_offset = compiler.stack_assign(p_sheen);
int sheen_tint_offset = compiler.stack_assign(p_sheen_tint);
int clearcoat_offset = compiler.stack_assign(p_clearcoat);
- int clearcoat_gloss_offset = compiler.stack_assign(p_clearcoat_gloss);
+ int clearcoat_roughness_offset = compiler.stack_assign(p_clearcoat_roughness);
int ior_offset = compiler.stack_assign(p_ior);
int transmission_offset = compiler.stack_assign(p_transmission);
int transmission_roughness_offset = compiler.stack_assign(p_transmission_roughness);
@@ -2391,7 +2391,7 @@ void PrincipledBsdfNode::compile(SVMCompiler& compiler, ShaderInput *p_metallic,
compiler.add_node(normal_offset, tangent_offset,
compiler.encode_uchar4(specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset),
- compiler.encode_uchar4(sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_gloss_offset));
+ compiler.encode_uchar4(sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset));
compiler.add_node(compiler.encode_uchar4(ior_offset, transmission_offset, anisotropic_rotation_offset, transmission_roughness_offset),
distribution, SVM_STACK_INVALID, SVM_STACK_INVALID);
@@ -2419,7 +2419,7 @@ void PrincipledBsdfNode::compile(SVMCompiler& compiler)
{
compile(compiler, input("Metallic"), input("Subsurface"), input("Subsurface Radius"), input("Specular"),
input("Roughness"), input("Specular Tint"), input("Anisotropic"), input("Sheen"), input("Sheen Tint"),
- input("Clearcoat"), input("Clearcoat Gloss"), input("IOR"), input("Transmission"),
+ input("Clearcoat"), input("Clearcoat Roughness"), input("IOR"), input("Transmission"),
input("Anisotropic Rotation"), input("Transmission Roughness"));
}
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index aac6ce2f375..c6ab47fcc84 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -252,6 +252,7 @@ public:
class PointDensityTextureNode : public ShaderNode {
public:
SHADER_NODE_NO_CLONE_CLASS(PointDensityTextureNode)
+ virtual int get_group() { return NODE_GROUP_LEVEL_3; }
~PointDensityTextureNode();
ShaderNode *clone() const;
@@ -377,13 +378,13 @@ public:
bool has_bssrdf_bump();
void compile(SVMCompiler& compiler, ShaderInput *metallic, ShaderInput *subsurface, ShaderInput *subsurface_radius,
ShaderInput *specular, ShaderInput *roughness, ShaderInput *specular_tint, ShaderInput *anisotropic,
- ShaderInput *sheen, ShaderInput *sheen_tint, ShaderInput *clearcoat, ShaderInput *clearcoat_gloss,
+ ShaderInput *sheen, ShaderInput *sheen_tint, ShaderInput *clearcoat, ShaderInput *clearcoat_roughness,
ShaderInput *ior, ShaderInput *transmission, ShaderInput *anisotropic_rotation, ShaderInput *transmission_roughness);
float3 base_color;
float3 subsurface_color, subsurface_radius;
float metallic, subsurface, specular, roughness, specular_tint, anisotropic,
- sheen, sheen_tint, clearcoat, clearcoat_gloss, ior, transmission,
+ sheen, sheen_tint, clearcoat, clearcoat_roughness, ior, transmission,
anisotropic_rotation, transmission_roughness;
float3 normal, clearcoat_normal, tangent;
float surface_mix_weight;
diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp
index 6bff29d1c76..a794f233718 100644
--- a/intern/cycles/render/osl.cpp
+++ b/intern/cycles/render/osl.cpp
@@ -156,6 +156,7 @@ void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s
og->surface_state.clear();
og->volume_state.clear();
og->displacement_state.clear();
+ og->bump_state.clear();
og->background_state.reset();
}
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 08909943c49..ae462a1084a 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -722,6 +722,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
requested_features.use_baking = bake_manager->get_baking();
requested_features.use_integrator_branched = (scene->integrator->method == Integrator::BRANCHED_PATH);
requested_features.use_transparent &= scene->integrator->transparent_shadows;
+ requested_features.use_denoising = params.use_denoising;
return requested_features;
}
diff --git a/intern/cycles/test/util_string_test.cpp b/intern/cycles/test/util_string_test.cpp
index 22ec8e0ee8e..6c059ba5d12 100644
--- a/intern/cycles/test/util_string_test.cpp
+++ b/intern/cycles/test/util_string_test.cpp
@@ -245,4 +245,41 @@ TEST(util_string_remove_trademark, both)
EXPECT_EQ(str, "foo bar zzz");
}
+TEST(util_string_remove_trademark, both_space)
+{
+ string str = string_remove_trademark("foo bar(TM) (R) zzz");
+ EXPECT_EQ(str, "foo bar zzz");
+}
+
+TEST(util_string_remove_trademark, both_space_around)
+{
+ string str = string_remove_trademark("foo bar (TM) (R) zzz");
+ EXPECT_EQ(str, "foo bar zzz");
+}
+
+TEST(util_string_remove_trademark, trademark_space_suffix)
+{
+ string str = string_remove_trademark("foo bar (TM)");
+ EXPECT_EQ(str, "foo bar");
+}
+
+TEST(util_string_remove_trademark, trademark_space_middle)
+{
+ string str = string_remove_trademark("foo bar (TM) baz");
+ EXPECT_EQ(str, "foo bar baz");
+}
+
+
+TEST(util_string_remove_trademark, r_space_suffix)
+{
+ string str = string_remove_trademark("foo bar (R)");
+ EXPECT_EQ(str, "foo bar");
+}
+
+TEST(util_string_remove_trademark, r_space_middle)
+{
+ string str = string_remove_trademark("foo bar (R) baz");
+ EXPECT_EQ(str, "foo bar baz");
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index 6c52117ef9a..643af87a65f 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -35,6 +35,7 @@ ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value)
#define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x))
#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
+#define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1)
#define CCL_LOCAL_MEM_FENCE 0
#define ccl_barrier(flags) (void)0
@@ -68,6 +69,7 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so
#define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))
#define atomic_fetch_and_inc_uint32(p) atomic_inc((p))
+#define atomic_fetch_and_dec_uint32(p) atomic_dec((p))
#define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE
#define ccl_barrier(flags) barrier(flags)
@@ -79,7 +81,9 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so
#define atomic_add_and_fetch_float(p, x) (atomicAdd((float*)(p), (float)(x)) + (float)(x))
#define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int*)(p), (unsigned int)(x))
+#define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int*)(p), (unsigned int)(x))
#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
+#define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
#define CCL_LOCAL_MEM_FENCE
#define ccl_barrier(flags) __syncthreads()
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index ab038d2b9fb..10895f2e918 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -184,8 +184,8 @@ std::ostream& operator <<(std::ostream &os,
<< " Device type : " << opencl_device_type << "\n"
<< " Kernel type : " << opencl_kernel_type << "\n"
<< " Debug : " << string_from_bool(debug_flags.opencl.debug) << "\n"
- << " Single program : " << string_from_bool(debug_flags.opencl.single_program)
- << "\n";
+ << " Single program : " << string_from_bool(debug_flags.opencl.single_program) << "\n"
+ << " Memory limit : " << string_human_readable_size(debug_flags.opencl.mem_limit) << "\n";
return os;
}
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index 4505d584490..450cd900a9f 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -115,6 +115,10 @@ public:
/* Use single program */
bool single_program;
+
+ /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all devices. */
+ /* Artificial memory limit in bytes (0 if disabled). */
+ size_t mem_limit;
};
/* Get instance of debug flags registry. */
diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h
index ecf9c9cfee0..492f830e67c 100644
--- a/intern/cycles/util/util_logging.h
+++ b/intern/cycles/util/util_logging.h
@@ -19,28 +19,30 @@
#if defined(WITH_CYCLES_LOGGING) && !defined(__KERNEL_GPU__)
# include <glog/logging.h>
-#else
-# include <iostream>
#endif
+#include <iostream>
+
CCL_NAMESPACE_BEGIN
#if !defined(WITH_CYCLES_LOGGING) || defined(__KERNEL_GPU__)
-class StubStream : public std::ostream {
- public:
- StubStream() : std::ostream(NULL) { }
+class StubStream {
+public:
+ template<class T>
+ StubStream& operator<<(const T&) {
+ return *this;
+ }
};
class LogMessageVoidify {
public:
LogMessageVoidify() { }
- void operator&(::std::ostream&) { }
+ void operator&(StubStream&) { }
};
# define LOG_SUPPRESS() (true) ? (void) 0 : LogMessageVoidify() & StubStream()
# define LOG(severity) LOG_SUPPRESS()
# define VLOG(severity) LOG_SUPPRESS()
-
#endif
#define VLOG_ONCE(level, flag) if(!flag) flag = true, VLOG(level)
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 5327d9f7cc6..bb04c4aa2d9 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -374,9 +374,9 @@ ccl_device_inline bool isfinite3_safe(float3 v)
ccl_device_inline float3 ensure_finite3(float3 v)
{
- if(!isfinite_safe(v.x)) v.x = 0.0;
- if(!isfinite_safe(v.y)) v.y = 0.0;
- if(!isfinite_safe(v.z)) v.z = 0.0;
+ if(!isfinite_safe(v.x)) v.x = 0.0f;
+ if(!isfinite_safe(v.y)) v.y = 0.0f;
+ if(!isfinite_safe(v.z)) v.z = 0.0f;
return v;
}
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index bc672669e1f..cd4fe52fdc9 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -226,6 +226,7 @@ public:
int get_current_sample()
{
+ thread_scoped_lock lock(progress_mutex);
/* Note that the value here always belongs to the last tile that updated,
* so it's only useful if there is only one active tile. */
return current_tile_sample;
@@ -233,11 +234,13 @@ public:
int get_rendered_tiles()
{
+ thread_scoped_lock lock(progress_mutex);
return rendered_tiles;
}
int get_denoised_tiles()
{
+ thread_scoped_lock lock(progress_mutex);
return denoised_tiles;
}
diff --git a/intern/cycles/util/util_string.cpp b/intern/cycles/util/util_string.cpp
index a1008d510d1..94ad512982c 100644
--- a/intern/cycles/util/util_string.cpp
+++ b/intern/cycles/util/util_string.cpp
@@ -148,6 +148,12 @@ void string_replace(string& haystack, const string& needle, const string& other)
string string_remove_trademark(const string &s)
{
string result = s;
+
+ /* Special case, so we don;t leave sequential spaces behind. */
+ /* TODO(sergey): Consider using regex perhaps? */
+ string_replace(result, " (TM)", "");
+ string_replace(result, " (R)", "");
+
string_replace(result, "(TM)", "");
string_replace(result, "(R)", "");
diff --git a/intern/elbeem/intern/isosurface.cpp b/intern/elbeem/intern/isosurface.cpp
index fb61fb416b4..de7bfe8e687 100644
--- a/intern/elbeem/intern/isosurface.cpp
+++ b/intern/elbeem/intern/isosurface.cpp
@@ -15,6 +15,7 @@
#include "particletracer.h"
#include <algorithm>
#include <stdio.h>
+#include <cmath>
#ifdef sun
#include "ieeefp.h"
@@ -25,6 +26,8 @@
#define round(x) (x)
#endif
+using std::isfinite;
+
/******************************************************************************
* Constructor
*****************************************************************************/
@@ -937,17 +940,10 @@ void IsoSurface::smoothSurface(float sigma, bool normSmooth)
ew[(j+2)%3]);
}
- // NT important, check this...
-#ifndef WIN32
- if(! finite(cornerareas[i][0]) ) cornerareas[i][0]=1e-6;
- if(! finite(cornerareas[i][1]) ) cornerareas[i][1]=1e-6;
- if(! finite(cornerareas[i][2]) ) cornerareas[i][2]=1e-6;
-#else // WIN32
- // FIXME check as well...
- if(! (cornerareas[i][0]>=0.0) ) cornerareas[i][0]=1e-6;
- if(! (cornerareas[i][1]>=0.0) ) cornerareas[i][1]=1e-6;
- if(! (cornerareas[i][2]>=0.0) ) cornerareas[i][2]=1e-6;
-#endif // WIN32
+ // FIX T50887: ensure pointareas are finite
+ if (!isfinite(cornerareas[i][0])) cornerareas[i][0] = 1e-6;
+ if (!isfinite(cornerareas[i][1])) cornerareas[i][1] = 1e-6;
+ if (!isfinite(cornerareas[i][2])) cornerareas[i][2] = 1e-6;
pointareas[mIndices[i*3+0]] += cornerareas[i][0];
pointareas[mIndices[i*3+1]] += cornerareas[i][1];
@@ -1096,17 +1092,10 @@ void IsoSurface::smoothNormals(float sigma) {
ew[(j+2)%3]);
}
- // NT important, check this...
-#ifndef WIN32
- if(! finite(cornerareas[i][0]) ) cornerareas[i][0]=1e-6;
- if(! finite(cornerareas[i][1]) ) cornerareas[i][1]=1e-6;
- if(! finite(cornerareas[i][2]) ) cornerareas[i][2]=1e-6;
-#else // WIN32
- // FIXME check as well...
- if(! (cornerareas[i][0]>=0.0) ) cornerareas[i][0]=1e-6;
- if(! (cornerareas[i][1]>=0.0) ) cornerareas[i][1]=1e-6;
- if(! (cornerareas[i][2]>=0.0) ) cornerareas[i][2]=1e-6;
-#endif // WIN32
+ // FIX T50887: ensure pointareas are finite
+ if (!isfinite(cornerareas[i][0])) cornerareas[i][0] = 1e-6;
+ if (!isfinite(cornerareas[i][1])) cornerareas[i][1] = 1e-6;
+ if (!isfinite(cornerareas[i][2])) cornerareas[i][2] = 1e-6;
pointareas[mIndices[i*3+0]] += cornerareas[i][0];
pointareas[mIndices[i*3+1]] += cornerareas[i][1];
diff --git a/intern/elbeem/intern/solver_util.cpp b/intern/elbeem/intern/solver_util.cpp
index 6eca427c787..f0c7bce2b4e 100644
--- a/intern/elbeem/intern/solver_util.cpp
+++ b/intern/elbeem/intern/solver_util.cpp
@@ -855,6 +855,10 @@ void LbmFsgrSolver::advanceParticles() {
if(k<=mSizez-1-cutval){
CellFlagType pflag = RFLAG(level, i,j,k, workSet);
//errMsg("PIT move"," at "<<PRINT_IJK<<" flag"<<convertCellFlagType2String(pflag) );
+ if (pflag & CFMbndOutflow) {
+ DEL_PART;
+ continue;
+ }
if(pflag & (CFBnd)) {
handleObstacleParticle(p);
continue;
diff --git a/intern/ghost/GHOST_C-api.h b/intern/ghost/GHOST_C-api.h
index 6887063eae9..967d3f58143 100644
--- a/intern/ghost/GHOST_C-api.h
+++ b/intern/ghost/GHOST_C-api.h
@@ -43,7 +43,7 @@ extern "C" {
* Creates a "handle" for a C++ GHOST object.
* A handle is just an opaque pointer to an empty struct.
* In the API the pointer is casted to the actual C++ class.
- * \param name Name of the handle to create.
+ * The 'name' argument to the macro is the name of the handle to create.
*/
GHOST_DECLARE_HANDLE(GHOST_SystemHandle);
diff --git a/intern/ghost/intern/GHOST_SystemWin32.cpp b/intern/ghost/intern/GHOST_SystemWin32.cpp
index 9f03b5e9537..b0dae432643 100644
--- a/intern/ghost/intern/GHOST_SystemWin32.cpp
+++ b/intern/ghost/intern/GHOST_SystemWin32.cpp
@@ -941,6 +941,8 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam,
GHOST_ASSERT(system, "GHOST_SystemWin32::s_wndProc(): system not initialized");
if (hwnd) {
+#if 0
+ // Disabled due to bug in Intel drivers, see T51959
if(msg == WM_NCCREATE) {
// Tell Windows to automatically handle scaling of non-client areas
// such as the caption bar. EnableNonClientDpiScaling was introduced in Windows 10
@@ -954,6 +956,7 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam,
}
}
}
+#endif
GHOST_WindowWin32 *window = (GHOST_WindowWin32 *)::GetWindowLongPtr(hwnd, GWLP_USERDATA);
if (window) {
diff --git a/intern/ghost/intern/GHOST_WindowCocoa.h b/intern/ghost/intern/GHOST_WindowCocoa.h
index b234291396b..5168c48ca2f 100644
--- a/intern/ghost/intern/GHOST_WindowCocoa.h
+++ b/intern/ghost/intern/GHOST_WindowCocoa.h
@@ -56,7 +56,7 @@ public:
* \param systemCocoa The associated system class to forward events to
* \param title The text shown in the title bar of the window.
* \param left The coordinate of the left edge of the window.
- * \param top The coordinate of the top edge of the window.
+ * \param bottom The coordinate of the bottom edge of the window.
* \param width The width the window.
* \param height The height the window.
* \param state The state the window is initially opened with.
diff --git a/intern/guardedalloc/intern/mallocn_lockfree_impl.c b/intern/guardedalloc/intern/mallocn_lockfree_impl.c
index ce8a5b29ece..b4838cdca18 100644
--- a/intern/guardedalloc/intern/mallocn_lockfree_impl.c
+++ b/intern/guardedalloc/intern/mallocn_lockfree_impl.c
@@ -64,9 +64,9 @@ enum {
MEMHEAD_ALIGN_FLAG = 2,
};
-#define MEMHEAD_FROM_PTR(ptr) (((MemHead*) vmemh) - 1)
+#define MEMHEAD_FROM_PTR(ptr) (((MemHead*) ptr) - 1)
#define PTR_FROM_MEMHEAD(memhead) (memhead + 1)
-#define MEMHEAD_ALIGNED_FROM_PTR(ptr) (((MemHeadAligned*) vmemh) - 1)
+#define MEMHEAD_ALIGNED_FROM_PTR(ptr) (((MemHeadAligned*) ptr) - 1)
#define MEMHEAD_IS_MMAP(memhead) ((memhead)->len & (size_t) MEMHEAD_MMAP_FLAG)
#define MEMHEAD_IS_ALIGNED(memhead) ((memhead)->len & (size_t) MEMHEAD_ALIGN_FLAG)
diff --git a/intern/libmv/ChangeLog b/intern/libmv/ChangeLog
index 45be9c25afa..81096dd90c9 100644
--- a/intern/libmv/ChangeLog
+++ b/intern/libmv/ChangeLog
@@ -1,3 +1,156 @@
+commit efd7a93317e0278b99e66785f667823e451daef1
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Tue May 9 10:16:42 2017 +0200
+
+ Fix strict compiler warnings, unused variables
+
+commit 8efd47e13dfdd3f7209bc96f26d0b13127dd6376
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Wed Dec 14 10:44:57 2016 +0100
+
+ Fix T50243: libmv_panography_test is broken
+
+ There was fully wrong logic in comparison: was actually accessing memory
+ past the array boundary. Run test manually and the figure seems correct
+ to me now.
+
+ Spotted by @LazyDodo, thanks!
+
+commit 6dfb9cd1bd14669d84be789000ce234747fb00ff
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Thu Jul 14 11:49:38 2016 +0200
+
+ Fix some strict compiler warnings
+
+ One of them was a real bug!
+
+commit f61adaecf7b29ebe6677be0e1c825f0a8d475e4b
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Wed May 31 11:22:34 2017 +0200
+
+ Enable explicit schur complement for BA step
+
+ This is something we do in Blender and only reason it was not
+ enabled for standalone Libmv is because we did not have fresh
+ enough version of Ceres bundled.
+
+commit fc5d3a1d4880c6658aff693c1c1e8c10c96ce1a7
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Wed Nov 2 15:32:11 2016 +0100
+
+ Update tests to make tests pass after recent Ceres update
+
+ Just a precision issue, difference is around 1e-7. Should be fine to
+ simply update expected value.
+
+commit e1ac9f6124110c1a90d8e417bea47acfcbdcca42
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Wed May 31 10:54:48 2017 +0200
+
+ Update Ceres to latest release 1.12.0
+
+commit ac1571352b4962f110929b963f8616d7310ceea5
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Fri Apr 7 17:10:44 2017 +0200
+
+ Fix crash of keyframe selection on 32bit linux
+
+commit 5f8df3da965686df39a6ae5c9f17482075017bf4
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Tue Jan 19 14:00:53 2016 +0500
+
+ Solve some strict warnings in tests
+
+commit 8ea3a5d752a9ce3337ab7643897472a4d33747f1
+Author: Brecht Van Lommel <brechtvanlommel@gmail.com>
+Date: Sat Feb 18 23:52:31 2017 +0100
+
+ Fix a few compiler warnings with macOS / clang.
+
+commit ffbe81461770e70736e80b8cab8e6eb1f8b27160
+Author: Mike Erwin <significant.bit@gmail.com>
+Date: Wed May 31 10:43:08 2017 +0200
+
+ Fix comparison of identicals
+
+ Some of these check that dimensions match before running code that
+ assumes they do match.
+
+ Found with PVS-Studio T48917.
+
+commit 206c01999cde16c1c6c43a8e13ffa86020821d98
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Wed May 31 10:39:16 2017 +0200
+
+ Add basic track masking API in place
+
+ This brings back ability to mask non-interesting parts of
+ specific track (the feature got lost with new auto-track API).
+
+ Added it back by extending frame accessor class. This isn't really
+ a frame thing, but we don't have other type of accessor here.
+
+ Surely, we can use old-style API here and pass mask via region
+ tracker options for this particular case, but then it becomes much
+ less obvious how real auto-tracker will access this mask with old
+ style API.
+
+ So seems we do need an accessor for such data, just matter of
+ finding better place than frame accessor.
+
+commit faa069cb826892780356477cc10602390fecf06b
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Wed May 31 10:36:26 2017 +0200
+
+ Tests: Tweak epsilon to avoid what looks a false-positive failure
+
+commit 7c84e45c1d330871477ba3516f57178e5b9d101f
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Wed May 31 10:15:43 2017 +0200
+
+ CMake: Fix mistake in closing branch
+
+commit cb769a0d319a8c95948153d78a4c3378a0142ece
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Thu Jul 21 12:52:33 2016 +0200
+
+ Set of fixes for MSVC215
+
+ - Move GLOG/GFLAGS defines to a more global scope,
+ this way ANY of our own libraries will use proper
+ declspec.
+
+ - Compile png/zlib/openexif on Windows as well since
+ those are required for a correct linking.
+
+commit bb95c8654fd2cea72d66ed04cd825cc3712ea804
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Wed Jul 20 18:14:46 2016 +0200
+
+ Disable unexisting Ceres option
+
+ Explicit Schur complement requires having
+ newer Ceres than we currently have bundled.
+
+commit a2e12c959ef32cc9382244d1581992c2f7aa9c09
+Author: Sergey Sharybin <sergey.vfx@gmail.com>
+Date: Wed Jul 20 18:04:57 2016 +0200
+
+ Various fixes for MSVC
+
+ - Update Eigen to 3.2.7 since this brings crucial
+ fixes for MSVC 2015.
+
+ - Switch to STATIC build by default.
+
+ There are issues building current sources as dynamic
+ libraries with MSVC2015 and additionally building
+ dynamic Ceres is not recommended anyway, so let's
+ not do this for the time being.
+
+ If anyone finds a way to make this all working --
+ it'llsurely be a welcome addition.
+
commit 7a676106720fb126a27ff010abdd8bb65d7e0d9a
Author: Sergey Sharybin <sergey.vfx@gmail.com>
Date: Mon Jan 4 18:30:12 2016 +0500
@@ -365,239 +518,3 @@ Date: Thu May 8 15:50:26 2014 +0200
Reviewed By: sergey
Differential Revision: https://developer.blender.org/D516
-
-commit 4405dff60ea08d454b64da1a7c0595d9328cf8a3
-Author: Keir Mierle <mierle@gmail.com>
-Date: Thu May 8 15:38:14 2014 +0200
-
- Add public SetMarkers to AutoTrack
-
- Reviewers: sergey
-
- Reviewed By: sergey
-
- Differential Revision: https://developer.blender.org/D515
-
-commit c90837f6db276a3b1f610eaad509155f6a43b24f
-Author: Keir Mierle <mierle@gmail.com>
-Date: Thu May 8 15:17:48 2014 +0200
-
- Make autotrack skeleton compile
-
- Reviewers: sergey
-
- Reviewed By: sergey
-
- Differential Revision: https://developer.blender.org/D514
-
-commit be01baa2e82e36f63e548f073157e68d2ff870c0
-Author: Keir Mierle <mierle@gmail.com>
-Date: Wed May 7 18:48:55 2014 +0200
-
- Add preliminary TrackMarkerToFrame in autotrack
-
- Reviewers: sergey
-
- Reviewed By: sergey
-
- Differential Revision: https://developer.blender.org/D509
-
-commit 0cab028d591b3d08672ca86eb6c6e4ac1aacf1d0
-Author: Sergey Sharybin <sergey.vfx@gmail.com>
-Date: Wed May 7 17:59:11 2014 +0200
-
- Remove assert from ArrayND Resize
-
- That assert broke initialization of arrays which doesn't
- own the data since constructor uses Resize to set shape
- and strides.
-
- Strides are still to be fixed, but that's for later.
-
-commit 64f9c118029a9351e9023e96527c120e1d724d5b
-Author: Sergey Sharybin <sergey.vfx@gmail.com>
-Date: Wed May 7 17:42:21 2014 +0200
-
- Fix ArrayND freeing the data it doesn't own
-
- Can't really guarantee it works fully correct now,
- but at least this check is needed anyway and compilation
- works just fine.
-
- Reviewers: keir
-
- Reviewed By: keir
-
- Differential Revision: https://developer.blender.org/D508
-
-commit 0618f1c8e88dfc738cdde55784da80b889905e7c
-Author: Keir Mierle <mierle@gmail.com>
-Date: Wed May 7 12:03:32 2014 +0200
-
- Minor changes
-
- Reviewers: sergey
-
- Reviewed By: sergey
-
- Differential Revision: https://developer.blender.org/D505
-
-commit 5c34335e1bb90c4ed701ee830c718ed4e20dbffa
-Author: Sergey Sharybin <sergey.vfx@gmail.com>
-Date: Wed May 7 11:12:23 2014 +0200
-
- Fix compilation error in frame accessor
-
- - int64 is not a standard type, we've got int64_t defined in
- std int. We also have an msvc port of this header, so should
- not be an issue.
-
- - Fixed inconsistency in usage of CacheKey and Key, used Key.
-
- - Some functions weren't marked as virtual.
-
- Additional change: added self to authors.
-
- Reviewers: keir
-
- Reviewed By: keir
-
- Differential Revision: https://developer.blender.org/D504
-
-commit 06bc207614e262cd688e2c3ed820ade7c77bdb66
-Author: Keir Mierle <mierle@gmail.com>
-Date: Tue May 6 22:30:59 2014 +0200
-
- Start new Tracks implementation
-
- This adds the new Tracks implementation, as well as a
- trivial test to show it compiles.
-
- Reviewers: sergey
-
- Reviewed By: sergey
-
- Differential Revision: https://developer.blender.org/D502
-
-commit 25ce061e6da69881460ba7718bb0d660a2380a02
-Author: Keir Mierle <mierle@gmail.com>
-Date: Tue May 6 19:10:51 2014 +0200
-
- Add Reconstruction class for new API
-
- This starts the new Reconstruction class (with support for e.g. planes). This
- also starts the new namespace "mv" which will eventually have all the symbols
- we wish to export.
-
- Reviewers: sergey
-
- Reviewed By: sergey
-
- Differential Revision: https://developer.blender.org/D501
-
-commit 0a6af3e29016048978aea607673340500e050339
-Author: Keir Mierle <mierle@gmail.com>
-Date: Tue May 6 17:52:53 2014 +0200
-
- Add a new Tracks implementation
-
- Reviewers: sergey
-
- Reviewed By: sergey
-
- Differential Revision: https://developer.blender.org/D500
-
-commit 887b68d29c2b198f4939f9ab5153881aa2c1806e
-Author: Keir Mierle <mierle@gmail.com>
-Date: Tue May 6 17:01:39 2014 +0200
-
- Initial commit of unfinished AutoTrack API
-
- This starts the creating the new AutoTrack API. The new API will
- make it possible for libmv to do full autotracking, including
- predictive tracking and also support multiple motion models (3D
- planes etc).
-
- The first goal (not in this patch) is to convert Blender to use
- the new API without adding any new functionality.
-
- Note: This does not add any of the API to the build system!
- It likely does not compile.
-
- Reviewers: sergey
-
- Reviewed By: sergey
-
- Differential Revision: https://developer.blender.org/D499
-
-commit 08cc227d431d257d27f300fbb8e6991e663302da
-Author: Sergey Sharybin <sergey.vfx@gmail.com>
-Date: Tue May 6 13:09:22 2014 +0200
-
- Fix homography test failure
-
- It was caused by assuming that reconstructed homography matrix
- should look exactly the same as the matrix used to generate a
- test case.
-
- It's not actually valid assumption because different-looking
- matrices could correspond to the same exact transform.
-
- In this change we make it so actual "re-projected" vectors
- are being checked, not the values in matrix. This makes it
- more predictable verification.
-
- Reviewers: keir
-
- Reviewed By: keir
-
- Differential Revision: https://developer.blender.org/D488
-
-commit 0b7d83dc9627447dc7df64d7e3a468aefe9ddc13
-Author: Sergey Sharybin <sergey.vfx@gmail.com>
-Date: Wed Apr 23 19:14:55 2014 +0600
-
- Fix compilation on OSX after previous commit
-
- EXPECT_EQ wasn't defined in the scope.
-
-commit d14049e00dabf8fdf49056779f0a3718fbb39e8f
-Author: Sergey Sharybin <sergey.vfx@gmail.com>
-Date: Wed Apr 23 15:08:16 2014 +0600
-
- Move aligned malloc implementation into own file
-
- It was rather stupid having it in brute region tracker,
- now it is in own file in base library (which was also
- added in this commit, before this it consist of header
- files only).
-
- Reviewers: keir
-
- Reviewed By: keir
-
- Differential Revision: https://developer.blender.org/D479
-
-commit 0ddf3851bfcb8de43660b119a25a77a25674200d
-Author: Sergey Sharybin <sergey.vfx@gmail.com>
-Date: Mon Apr 21 14:14:03 2014 +0600
-
- Optimization of PearsonProductMomentCorrelation
-
- Pass the arrays by reference rather than by value,
- should give some percent of speedup.
-
- Also don't pass the dimensions to the function but
- get them from the images themselves.
-
- Hopefully this will give some %% of tracker speedup.
-
-commit f68fdbe5896a6c5bd8b500caeec61b876c5e44c6
-Author: Sergey Sharybin <sergey.vfx@gmail.com>
-Date: Mon Apr 21 14:10:43 2014 +0600
-
- Fix wrong assert in ResizeImage()
-
- The assert didn't make any sense because ComputeBoundingBox()
- is intended to return bounding box in the following way:
- (xmin, xmax, ymin, ymax).
diff --git a/intern/libmv/bundle.sh b/intern/libmv/bundle.sh
index 27e012f665f..d155d050782 100755
--- a/intern/libmv/bundle.sh
+++ b/intern/libmv/bundle.sh
@@ -120,6 +120,7 @@ if(WITH_LIBMV)
add_definitions(\${GFLAGS_DEFINES})
add_definitions(\${GLOG_DEFINES})
add_definitions(\${CERES_DEFINES})
+ add_definitions(-DLIBMV_GFLAGS_NAMESPACE=\${GFLAGS_NAMESPACE})
list(APPEND INC
\${GFLAGS_INCLUDE_DIRS}
diff --git a/intern/libmv/intern/track_region.cc b/intern/libmv/intern/track_region.cc
index d395b6457d7..8989897e09f 100644
--- a/intern/libmv/intern/track_region.cc
+++ b/intern/libmv/intern/track_region.cc
@@ -36,7 +36,7 @@
/* define this to generate PNG images with content of search areas
on every itteration of tracking */
-#define DUMP_ALWAYS
+#undef DUMP_ALWAYS
using libmv::FloatImage;
using libmv::TrackRegionOptions;
diff --git a/intern/memutil/MEM_CacheLimiterC-Api.h b/intern/memutil/MEM_CacheLimiterC-Api.h
index 0fe5469a4d4..b5680890eb8 100644
--- a/intern/memutil/MEM_CacheLimiterC-Api.h
+++ b/intern/memutil/MEM_CacheLimiterC-Api.h
@@ -61,8 +61,8 @@ bool MEM_CacheLimiter_is_disabled(void);
* Create new MEM_CacheLimiter object
* managed objects are destructed with the data_destructor
*
- * @param data_destructor
- * @return A new MEM_CacheLimter object
+ * \param data_destructor
+ * \return A new MEM_CacheLimter object
*/
MEM_CacheLimiterC *new_MEM_CacheLimiter(MEM_CacheLimiter_Destruct_Func data_destructor,
@@ -73,7 +73,7 @@ MEM_CacheLimiterC *new_MEM_CacheLimiter(MEM_CacheLimiter_Destruct_Func data_dest
*
* Frees the memory of the CacheLimiter but does not touch managed objects!
*
- * @param This "This" pointer
+ * \param This "This" pointer
*/
void delete_MEM_CacheLimiter(MEM_CacheLimiterC *This);
@@ -81,8 +81,8 @@ void delete_MEM_CacheLimiter(MEM_CacheLimiterC *This);
/**
* Manage object
*
- * @param This "This" pointer, data data object to manage
- * @return CacheLimiterHandle to ref, unref, touch the managed object
+ * \param This "This" pointer, data data object to manage
+ * \return CacheLimiterHandle to ref, unref, touch the managed object
*/
MEM_CacheLimiterHandleC *MEM_CacheLimiter_insert(MEM_CacheLimiterC *This, void *data);
@@ -90,7 +90,7 @@ MEM_CacheLimiterHandleC *MEM_CacheLimiter_insert(MEM_CacheLimiterC *This, void *
/**
* Free objects until memory constraints are satisfied
*
- * @param This "This" pointer
+ * \param This "This" pointer
*/
void MEM_CacheLimiter_enforce_limits(MEM_CacheLimiterC *This);
@@ -99,7 +99,7 @@ void MEM_CacheLimiter_enforce_limits(MEM_CacheLimiterC *This);
* Unmanage object previously inserted object.
* Does _not_ delete managed object!
*
- * @param This "This" pointer, handle of object
+ * \param handle of object
*/
void MEM_CacheLimiter_unmanage(MEM_CacheLimiterHandleC *handle);
@@ -108,7 +108,7 @@ void MEM_CacheLimiter_unmanage(MEM_CacheLimiterHandleC *handle);
/**
* Raise priority of object (put it at the tail of the deletion chain)
*
- * @param handle of object
+ * \param handle of object
*/
void MEM_CacheLimiter_touch(MEM_CacheLimiterHandleC *handle);
@@ -117,7 +117,7 @@ void MEM_CacheLimiter_touch(MEM_CacheLimiterHandleC *handle);
* Increment reference counter. Objects with reference counter != 0 are _not_
* deleted.
*
- * @param handle of object
+ * \param handle of object
*/
void MEM_CacheLimiter_ref(MEM_CacheLimiterHandleC *handle);
@@ -126,7 +126,7 @@ void MEM_CacheLimiter_ref(MEM_CacheLimiterHandleC *handle);
* Decrement reference counter. Objects with reference counter != 0 are _not_
* deleted.
*
- * @param handle of object
+ * \param handle of object
*/
void MEM_CacheLimiter_unref(MEM_CacheLimiterHandleC *handle);
@@ -134,7 +134,7 @@ void MEM_CacheLimiter_unref(MEM_CacheLimiterHandleC *handle);
/**
* Get reference counter.
*
- * @param handle of object
+ * \param handle of object
*/
int MEM_CacheLimiter_get_refcount(MEM_CacheLimiterHandleC *handle);
@@ -142,7 +142,7 @@ int MEM_CacheLimiter_get_refcount(MEM_CacheLimiterHandleC *handle);
/**
* Get pointer to managed object
*
- * @param handle of object
+ * \param handle of object
*/
void *MEM_CacheLimiter_get(MEM_CacheLimiterHandleC *handle);
diff --git a/intern/opensubdiv/opensubdiv_capi.cc b/intern/opensubdiv/opensubdiv_capi.cc
index 52ce98fe74b..91803551f12 100644
--- a/intern/opensubdiv/opensubdiv_capi.cc
+++ b/intern/opensubdiv/opensubdiv_capi.cc
@@ -33,6 +33,7 @@
#include <stdlib.h>
#include <GL/glew.h>
+#include <opensubdiv/version.h>
#include <opensubdiv/osd/glMesh.h>
/* CPU Backend */
@@ -381,3 +382,8 @@ int openSubdiv_supportGPUDisplay(void)
(GLEW_ARB_texture_buffer_object || GLEW_EXT_texture_buffer_object)));
/* also ARB_explicit_attrib_location? */
}
+
+int openSubdiv_getVersionHex(void)
+{
+ return OPENSUBDIV_VERSION_NUMBER;
+}
diff --git a/intern/opensubdiv/opensubdiv_capi.h b/intern/opensubdiv/opensubdiv_capi.h
index c3a194813e6..281bd3f010d 100644
--- a/intern/opensubdiv/opensubdiv_capi.h
+++ b/intern/opensubdiv/opensubdiv_capi.h
@@ -152,6 +152,8 @@ void openSubdiv_init(bool gpu_legacy_support);
void openSubdiv_cleanup(void);
bool openSubdiv_gpu_legacy_support(void);
+int openSubdiv_getVersionHex(void);
+
#ifdef __cplusplus
}
#endif