diff options
author | Raja Kedia(miyagix) <rajakedia2222@gmail.com> | 2017-12-14 21:47:35 +0300 |
---|---|---|
committer | Raja Kedia(miyagix) <rajakedia2222@gmail.com> | 2017-12-14 21:47:35 +0300 |
commit | 450eb4ef6c54108e16164aa0820ee00fea9b8b48 (patch) | |
tree | 643a8c2b026205961d2539ac7a5bf363e5f5f500 /intern | |
parent | 31d5262cece4c5644a36cf0511bc42007a2c1e6d (diff) | |
parent | ba256b32ee5d3ab7991fe5abbb47071ccfe8577c (diff) |
Merge remote-tracking branch 'origin' into soc-2017-sculpting_brushsoc-2017-sculpting_brush
Diffstat (limited to 'intern')
105 files changed, 1176 insertions, 644 deletions
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index b5149b5082e..3018fd5b316 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -239,7 +239,8 @@ def register_passes(engine, scene, srl): if crl.pass_debug_bvh_intersections: engine.register_pass(scene, srl, "Debug BVH Intersections", 1, "X", 'VALUE') if crl.pass_debug_ray_bounces: engine.register_pass(scene, srl, "Debug Ray Bounces", 1, "X", 'VALUE') - if crl.use_denoising and crl.denoising_store_passes: + cscene = scene.cycles + if crl.use_denoising and crl.denoising_store_passes and not cscene.use_progressive_refine: engine.register_pass(scene, srl, "Denoising Normal", 3, "XYZ", 'VECTOR') engine.register_pass(scene, srl, "Denoising Normal Variance", 3, "XYZ", 'VECTOR') engine.register_pass(scene, srl, "Denoising Albedo", 3, "RGB", 'COLOR') diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 8bb25aba13c..68474529ed3 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -695,10 +695,17 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): update=devices_update_callback ) - cls.debug_opencl_kernel_single_program = BoolProperty(name="Single Program", default=True, update=devices_update_callback); + cls.debug_opencl_kernel_single_program = BoolProperty( + name="Single Program", + default=True, + update=devices_update_callback, + ) cls.debug_use_opencl_debug = BoolProperty(name="Debug OpenCL", default=False) + cls.debug_opencl_mem_limit = IntProperty(name="Memory limit", default=0, + description="Artificial limit on OpenCL memory usage in MB (0 to disable limit)") + @classmethod def unregister(cls): del bpy.types.Scene.cycles @@ -1209,6 +1216,7 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): name="Use Denoising", description="Denoise the rendered image", default=False, + update=update_render_passes, ) cls.denoising_diffuse_direct = BoolProperty( name="Diffuse Direct", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 4ed3ccd9a2c..49beebe5ab4 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -531,17 +531,17 @@ class CyclesRender_PT_layer_passes(CyclesButtonsPanel, Panel): col.prop(rl, "use_pass_environment") if context.scene.cycles.feature_set == 'EXPERIMENTAL': - col.separator() - sub = col.column() - sub.active = crl.use_denoising - sub.prop(crl, "denoising_store_passes", text="Denoising") + col.separator() + sub = col.column() + sub.active = crl.use_denoising + sub.prop(crl, "denoising_store_passes", text="Denoising") if _cycles.with_cycles_debug: - col = layout.column() - col.prop(crl, "pass_debug_bvh_traversed_nodes") - col.prop(crl, "pass_debug_bvh_traversed_instances") - col.prop(crl, "pass_debug_bvh_intersections") - col.prop(crl, "pass_debug_ray_bounces") + col = layout.column() + col.prop(crl, "pass_debug_bvh_traversed_nodes") + col.prop(crl, "pass_debug_bvh_traversed_instances") + col.prop(crl, "pass_debug_bvh_intersections") + col.prop(crl, "pass_debug_ray_bounces") class CyclesRender_PT_views(CyclesButtonsPanel, Panel): @@ -1608,6 +1608,7 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel): col.prop(cscene, "debug_opencl_device_type", text="Device") col.prop(cscene, "debug_opencl_kernel_single_program", text="Single Program") col.prop(cscene, "debug_use_opencl_debug", text="Debug") + col.prop(cscene, "debug_opencl_mem_limit") class CyclesParticle_PT_CurveSettings(CyclesButtonsPanel, Panel): @@ -1710,7 +1711,7 @@ def draw_device(self, context): layout.prop(cscene, "feature_set") - split = layout.split(percentage=1/3) + split = layout.split(percentage=1 / 3) split.label("Device:") row = split.row() row.active = show_device_active(context) diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index 01570b1e3f9..54973fd1b7f 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -106,6 +106,7 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene) } /* Synchronize other OpenCL flags. */ flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug"); + flags.opencl.mem_limit = ((size_t)get_int(cscene, "debug_opencl_mem_limit"))*1024*1024; flags.opencl.single_program = get_boolean(cscene, "debug_opencl_kernel_single_program"); return flags.opencl.device_type != opencl_device_type || flags.opencl.kernel_type != opencl_kernel_type; diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index c6a59577507..2b5dd5eadea 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -399,14 +399,7 @@ void BlenderSession::render() BL::RenderLayer b_rlay = *b_single_rlay; /* add passes */ - array<Pass> passes; - if(session_params.device.advanced_shading) { - passes = sync->sync_render_passes(b_rlay, *b_layer_iter); - } - else { - Pass::add(PASS_COMBINED, passes); - } - + array<Pass> passes = sync->sync_render_passes(b_rlay, *b_layer_iter, session_params); buffer_params.passes = passes; PointerRNA crl = RNA_pointer_get(&b_layer_iter->ptr, "cycles"); diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 41723599874..3a00384458a 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -553,11 +553,16 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass) } array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, - BL::SceneRenderLayer& b_srlay) + BL::SceneRenderLayer& b_srlay, + const SessionParams &session_params) { array<Pass> passes; Pass::add(PASS_COMBINED, passes); + if(!session_params.device.advanced_shading) { + return passes; + } + /* loop over passes */ BL::RenderLayer::passes_iterator b_pass_iter; @@ -572,7 +577,9 @@ array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, } PointerRNA crp = RNA_pointer_get(&b_srlay.ptr, "cycles"); - if(get_boolean(crp, "denoising_store_passes")) { + if(get_boolean(crp, "denoising_store_passes") && + get_boolean(crp, "use_denoising") && + !session_params.progressive_refine) { b_engine.add_pass("Denoising Normal", 3, "XYZ", b_srlay.name().c_str()); b_engine.add_pass("Denoising Normal Variance", 3, "XYZ", b_srlay.name().c_str()); b_engine.add_pass("Denoising Albedo", 3, "RGB", b_srlay.name().c_str()); diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index 0950285d976..4ec46424b5a 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -68,7 +68,8 @@ public: const char *layer = 0); void sync_render_layers(BL::SpaceView3D& b_v3d, const char *layer); array<Pass> sync_render_passes(BL::RenderLayer& b_rlay, - BL::SceneRenderLayer& b_srlay); + BL::SceneRenderLayer& b_srlay, + const SessionParams &session_params); void sync_integrator(); void sync_camera(BL::RenderSettings& b_render, BL::Object& b_override, diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 0603ecb3afb..a54bb77f9f3 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -68,6 +68,8 @@ std::ostream& operator <<(std::ostream &os, << string_from_bool(requested_features.use_transparent) << std::endl; os << "Use Principled BSDF: " << string_from_bool(requested_features.use_principled) << std::endl; + os << "Use Denoising: " + << string_from_bool(requested_features.use_denoising) << std::endl; return os; } diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 527940e8f50..b3b693c630c 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -127,6 +127,9 @@ public: /* Per-uber shader usage flags. */ bool use_principled; + /* Denoising features. */ + bool use_denoising; + DeviceRequestedFeatures() { /* TODO(sergey): Find more meaningful defaults. */ @@ -145,6 +148,7 @@ public: use_transparent = false; use_shadow_tricks = false; use_principled = false; + use_denoising = false; } bool modified(const DeviceRequestedFeatures& requested_features) @@ -163,7 +167,8 @@ public: use_patch_evaluation == requested_features.use_patch_evaluation && use_transparent == requested_features.use_transparent && use_shadow_tricks == requested_features.use_shadow_tricks && - use_principled == requested_features.use_principled); + use_principled == requested_features.use_principled && + use_denoising == requested_features.use_denoising); } /* Convert the requested features structure to a build options, @@ -213,6 +218,9 @@ public: if(!use_principled) { build_options += " -D__NO_PRINCIPLED__"; } + if(!use_denoising) { + build_options += " -D__NO_DENOISING__"; + } return build_options; } }; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index c2f74aa8903..18112437b45 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -149,7 +149,8 @@ public: device_memory& use_queues_flag, device_memory& work_pool_wgs); - virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&); + virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name, + const DeviceRequestedFeatures&); virtual int2 split_kernel_local_size(); virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task); virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads); @@ -185,9 +186,9 @@ public: KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel; KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel; - KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)> filter_construct_transform_kernel; - KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int, int)> filter_nlm_construct_gramian_kernel; - KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel; + KernelFunctions<void(*)(float*, int, int, int, float*, int*, int*, int, int, float)> filter_construct_transform_kernel; + KernelFunctions<void(*)(int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int, int)> filter_nlm_construct_gramian_kernel; + KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel; KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*, ccl_global uint*, int, int, int, int, int, int, int, int, ccl_global int*, int, @@ -248,6 +249,7 @@ public: REGISTER_SPLIT_KERNEL(direct_lighting); REGISTER_SPLIT_KERNEL(shadow_blocked_ao); REGISTER_SPLIT_KERNEL(shadow_blocked_dl); + REGISTER_SPLIT_KERNEL(enqueue_inactive); REGISTER_SPLIT_KERNEL(next_iteration_setup); REGISTER_SPLIT_KERNEL(indirect_subsurface); REGISTER_SPLIT_KERNEL(buffer_update); @@ -465,8 +467,6 @@ public: bool denoising_reconstruct(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr, DenoisingTask *task) { @@ -485,8 +485,8 @@ public: task->reconstruction_state.source_w - max(0, dx), task->reconstruction_state.source_h - max(0, dy)}; filter_nlm_calc_difference_kernel()(dx, dy, - (float*) guide_ptr, - (float*) guide_variance_ptr, + (float*) color_ptr, + (float*) color_variance_ptr, difference, local_rect, task->buffer.w, @@ -499,8 +499,6 @@ public: filter_nlm_construct_gramian_kernel()(dx, dy, blurDifference, (float*) task->buffer.mem.device_pointer, - (float*) color_ptr, - (float*) color_variance_ptr, (float*) task->storage.transform.device_pointer, (int*) task->storage.rank.device_pointer, (float*) task->storage.XtWX.device_pointer, @@ -648,7 +646,7 @@ public: DenoisingTask denoising(this); denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising); - denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising); + denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); @@ -935,7 +933,8 @@ bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim, return true; } -SplitKernelFunction* CPUSplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) +SplitKernelFunction* CPUSplitKernel::get_split_kernel_function(const string& kernel_name, + const DeviceRequestedFeatures&) { CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(device); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 99537e9a983..3a29538aa13 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -105,7 +105,8 @@ public: device_memory& use_queues_flag, device_memory& work_pool_wgs); - virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&); + virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name, + const DeviceRequestedFeatures&); virtual int2 split_kernel_local_size(); virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task); }; @@ -1051,8 +1052,6 @@ public: bool denoising_reconstruct(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr, DenoisingTask *task) { @@ -1096,8 +1095,8 @@ public: task->reconstruction_state.source_h - max(0, dy)}; void *calc_difference_args[] = {&dx, &dy, - &guide_ptr, - &guide_variance_ptr, + &color_ptr, + &color_variance_ptr, &difference, &local_rect, &task->buffer.w, @@ -1126,8 +1125,6 @@ public: void *construct_gramian_args[] = {&dx, &dy, &blurDifference, &task->buffer.mem.device_pointer, - &color_ptr, - &color_variance_ptr, &task->storage.transform.device_pointer, &task->storage.rank.device_pointer, &task->storage.XtWX.device_pointer, @@ -1294,7 +1291,7 @@ public: DenoisingTask denoising(this); denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising); - denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising); + denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.divide_shadow = function_bind(&CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); @@ -2041,7 +2038,8 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim return !device->have_error(); } -SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) +SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(const string& kernel_name, + const DeviceRequestedFeatures&) { CUfunction func; diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 613bd9112cf..619cc1d171e 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -215,7 +215,7 @@ bool DenoisingTask::run_denoising() { device_sub_ptr color_ptr (device, buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); device_sub_ptr color_var_ptr(device, buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE); - functions.reconstruct(*color_ptr, *color_var_ptr, *color_ptr, *color_var_ptr, render_buffer.ptr); + functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr); } device->mem_free(storage.XtWX); diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index 25b93c2ad74..def7b72f67d 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -58,8 +58,6 @@ public: )> non_local_means; function<bool(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr )> reconstruct; function<bool()> construct_transform; diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index edd2047debc..681b8214b03 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -130,10 +130,22 @@ string device_opencl_capabilities(void) opencl_assert(func(id, what, sizeof(data), &data, NULL)); \ result += string_printf("%s: %s\n", name, data); \ } while(false) +#define APPEND_STRING_EXTENSION_INFO(func, id, name, what) \ + do { \ + char data[1024] = "\0"; \ + size_t length = 0; \ + if(func(id, what, sizeof(data), &data, &length) == CL_SUCCESS) { \ + if(length != 0 && data[0] != '\0') { \ + result += string_printf("%s: %s\n", name, data); \ + } \ + } \ + } while(false) #define APPEND_PLATFORM_STRING_INFO(id, name, what) \ APPEND_STRING_INFO(clGetPlatformInfo, id, "\tPlatform " name, what) #define APPEND_DEVICE_STRING_INFO(id, name, what) \ APPEND_STRING_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what) +#define APPEND_DEVICE_STRING_EXTENSION_INFO(id, name, what) \ + APPEND_STRING_EXTENSION_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what) vector<cl_device_id> device_ids; for(cl_uint platform = 0; platform < num_platforms; ++platform) { @@ -167,6 +179,7 @@ string device_opencl_capabilities(void) result += string_printf("\t\tDevice: #%u\n", device); APPEND_DEVICE_STRING_INFO(device_id, "Name", CL_DEVICE_NAME); + APPEND_DEVICE_STRING_EXTENSION_INFO(device_id, "Board Name", CL_DEVICE_BOARD_NAME_AMD); APPEND_DEVICE_STRING_INFO(device_id, "Vendor", CL_DEVICE_VENDOR); APPEND_DEVICE_STRING_INFO(device_id, "OpenCL C Version", CL_DEVICE_OPENCL_C_VERSION); APPEND_DEVICE_STRING_INFO(device_id, "Profile", CL_DEVICE_PROFILE); diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index dddd19f179f..d2b3a89fa98 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -47,6 +47,7 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device) kernel_direct_lighting = NULL; kernel_shadow_blocked_ao = NULL; kernel_shadow_blocked_dl = NULL; + kernel_enqueue_inactive = NULL; kernel_next_iteration_setup = NULL; kernel_indirect_subsurface = NULL; kernel_buffer_update = NULL; @@ -74,6 +75,7 @@ DeviceSplitKernel::~DeviceSplitKernel() delete kernel_direct_lighting; delete kernel_shadow_blocked_ao; delete kernel_shadow_blocked_dl; + delete kernel_enqueue_inactive; delete kernel_next_iteration_setup; delete kernel_indirect_subsurface; delete kernel_buffer_update; @@ -101,6 +103,7 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe LOAD_KERNEL(direct_lighting); LOAD_KERNEL(shadow_blocked_ao); LOAD_KERNEL(shadow_blocked_dl); + LOAD_KERNEL(enqueue_inactive); LOAD_KERNEL(next_iteration_setup); LOAD_KERNEL(indirect_subsurface); LOAD_KERNEL(buffer_update); @@ -256,6 +259,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size); ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size); ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h index 68c2ba974a5..9c42cb58520 100644 --- a/intern/cycles/device/device_split_kernel.h +++ b/intern/cycles/device/device_split_kernel.h @@ -69,6 +69,7 @@ private: SplitKernelFunction *kernel_direct_lighting; SplitKernelFunction *kernel_shadow_blocked_ao; SplitKernelFunction *kernel_shadow_blocked_dl; + SplitKernelFunction *kernel_enqueue_inactive; SplitKernelFunction *kernel_next_iteration_setup; SplitKernelFunction *kernel_indirect_subsurface; SplitKernelFunction *kernel_buffer_update; @@ -124,7 +125,8 @@ public: device_memory& use_queues_flag, device_memory& work_pool_wgs) = 0; - virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) = 0; + virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name, + const DeviceRequestedFeatures&) = 0; virtual int2 split_kernel_local_size() = 0; virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0; }; diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 27e196d1e68..7da690904aa 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -84,7 +84,7 @@ public: string *error = NULL); static bool device_version_check(cl_device_id device, string *error = NULL); - static string get_hardware_id(string platform_name, + static string get_hardware_id(const string& platform_name, cl_device_id device_id); static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices, bool force_all = false); @@ -130,6 +130,11 @@ public: cl_int* error = NULL); static cl_device_type get_device_type(cl_device_id device_id); + static bool get_driver_version(cl_device_id device_id, + int *major, + int *minor, + cl_int* error = NULL); + static int mem_address_alignment(cl_device_id device_id); /* Get somewhat more readable device name. @@ -242,17 +247,17 @@ public: public: OpenCLProgram() : loaded(false), device(NULL) {} OpenCLProgram(OpenCLDeviceBase *device, - string program_name, - string kernel_name, - string kernel_build_options, + const string& program_name, + const string& kernel_name, + const string& kernel_build_options, bool use_stdout = true); ~OpenCLProgram(); void add_kernel(ustring name); void load(); - bool is_loaded() { return loaded; } - string get_log() { return log; } + bool is_loaded() const { return loaded; } + const string& get_log() const { return log; } void report_error(); cl_kernel operator()(); @@ -266,8 +271,8 @@ public: bool load_binary(const string& clbin, const string *debug_src = NULL); bool save_binary(const string& clbin); - void add_log(string msg, bool is_debug); - void add_error(string msg); + void add_log(const string& msg, bool is_debug); + void add_error(const string& msg); bool loaded; cl_program program; @@ -390,8 +395,6 @@ protected: bool denoising_construct_transform(DenoisingTask *task); bool denoising_reconstruct(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr, DenoisingTask *task); bool denoising_combine_halves(device_ptr a_ptr, diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 24b70e3446c..509da7a0a84 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -20,6 +20,7 @@ #include "kernel/kernel_types.h" +#include "util/util_algorithm.h" #include "util/util_foreach.h" #include "util/util_logging.h" #include "util/util_md5.h" @@ -276,6 +277,25 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp size_t size = mem.memory_size(); + /* check there is enough memory available for the allocation */ + cl_ulong max_alloc_size = 0; + clGetDeviceInfo(cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_alloc_size, NULL); + + if(DebugFlags().opencl.mem_limit) { + max_alloc_size = min(max_alloc_size, + cl_ulong(DebugFlags().opencl.mem_limit - stats.mem_used)); + } + + if(size > max_alloc_size) { + string error = "Scene too complex to fit in available memory."; + if(name != NULL) { + error += string_printf(" (allocating buffer %s failed.)", name); + } + set_error(error); + + return; + } + cl_mem_flags mem_flag; void *mem_ptr = NULL; @@ -693,8 +713,6 @@ bool OpenCLDeviceBase::denoising_construct_transform(DenoisingTask *task) bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, device_ptr color_variance_ptr, - device_ptr guide_ptr, - device_ptr guide_variance_ptr, device_ptr output_ptr, DenoisingTask *task) { @@ -703,8 +721,6 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, cl_mem color_mem = CL_MEM_PTR(color_ptr); cl_mem color_variance_mem = CL_MEM_PTR(color_variance_ptr); - cl_mem guide_mem = CL_MEM_PTR(guide_ptr); - cl_mem guide_variance_mem = CL_MEM_PTR(guide_variance_ptr); cl_mem output_mem = CL_MEM_PTR(output_ptr); cl_mem buffer_mem = CL_MEM_PTR(task->buffer.mem.device_pointer); @@ -735,8 +751,8 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, kernel_set_args(ckNLMCalcDifference, 0, dx, dy, - guide_mem, - guide_variance_mem, + color_mem, + color_variance_mem, difference, local_rect, task->buffer.w, @@ -775,8 +791,6 @@ bool OpenCLDeviceBase::denoising_reconstruct(device_ptr color_ptr, dx, dy, blurDifference, buffer_mem, - color_mem, - color_variance_mem, transform_mem, rank_mem, XtWX_mem, @@ -961,7 +975,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task) denoising.functions.set_tiles = function_bind(&OpenCLDeviceBase::denoising_set_tiles, this, _1, &denoising); denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising); - denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising); + denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.divide_shadow = function_bind(&OpenCLDeviceBase::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); denoising.functions.non_local_means = function_bind(&OpenCLDeviceBase::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); denoising.functions.combine_halves = function_bind(&OpenCLDeviceBase::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); @@ -1232,7 +1246,7 @@ void OpenCLDeviceBase::store_cached_kernel( } string OpenCLDeviceBase::build_options_for_base_program( - const DeviceRequestedFeatures& /*requested_features*/) + const DeviceRequestedFeatures& requested_features) { /* TODO(sergey): By default we compile all features, meaning * mega kernel is not getting feature-based optimizations. @@ -1240,6 +1254,14 @@ string OpenCLDeviceBase::build_options_for_base_program( * Ideally we need always compile kernel with as less features * enabled as possible to keep performance at it's max. */ + + /* For now disable baking when not in use as this has major + * impact on kernel build times. + */ + if(!requested_features.use_baking) { + return "-D__NO_BAKING__"; + } + return ""; } diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 76dcbd6fc9a..76d9983e9a2 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -25,6 +25,7 @@ #include "device/device_split_kernel.h" +#include "util/util_algorithm.h" #include "util/util_logging.h" #include "util/util_md5.h" #include "util/util_path.h" @@ -176,17 +177,62 @@ protected: friend class OpenCLSplitKernelFunction; }; +struct CachedSplitMemory { + int id; + device_memory *split_data; + device_memory *ray_state; + device_ptr *rng_state; + device_memory *queue_index; + device_memory *use_queues_flag; + device_memory *work_pools; + device_ptr *buffer; +}; + class OpenCLSplitKernelFunction : public SplitKernelFunction { public: OpenCLDeviceSplitKernel* device; OpenCLDeviceBase::OpenCLProgram program; + CachedSplitMemory& cached_memory; + int cached_id; + + OpenCLSplitKernelFunction(OpenCLDeviceSplitKernel* device, CachedSplitMemory& cached_memory) : + device(device), cached_memory(cached_memory), cached_id(cached_memory.id-1) + { + } - OpenCLSplitKernelFunction(OpenCLDeviceSplitKernel* device) : device(device) {} - ~OpenCLSplitKernelFunction() { program.release(); } + ~OpenCLSplitKernelFunction() + { + program.release(); + } virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) { - device->kernel_set_args(program(), 0, kg, data); + if(cached_id != cached_memory.id) { + cl_uint start_arg_index = + device->kernel_set_args(program(), + 0, + kg, + data, + *cached_memory.split_data, + *cached_memory.ray_state, + *cached_memory.rng_state); + +/* TODO(sergey): Avoid map lookup here. */ +#define KERNEL_TEX(type, ttype, name) \ + device->set_kernel_arg_mem(program(), &start_arg_index, #name); +#include "kernel/kernel_textures.h" +#undef KERNEL_TEX + + start_arg_index += + device->kernel_set_args(program(), + start_arg_index, + *cached_memory.queue_index, + *cached_memory.use_queues_flag, + *cached_memory.work_pools, + *cached_memory.buffer); + + cached_id = cached_memory.id; + } device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, program(), @@ -213,14 +259,15 @@ public: class OpenCLSplitKernel : public DeviceSplitKernel { OpenCLDeviceSplitKernel *device; + CachedSplitMemory cached_memory; public: explicit OpenCLSplitKernel(OpenCLDeviceSplitKernel *device) : DeviceSplitKernel(device), device(device) { } - virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, + virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name, const DeviceRequestedFeatures& requested_features) { - OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device); + OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device, cached_memory); bool single_program = OpenCLInfo::use_single_program(); kernel->program = @@ -349,6 +396,15 @@ public: return false; } + cached_memory.split_data = &split_data; + cached_memory.ray_state = &ray_state; + cached_memory.rng_state = &rtile.rng_state; + cached_memory.queue_index = &queue_index; + cached_memory.use_queues_flag = &use_queues_flag; + cached_memory.work_pools = &work_pool_wgs; + cached_memory.buffer = &rtile.buffer; + cached_memory.id++; + return true; } @@ -368,12 +424,18 @@ public: cl_ulong max_buffer_size; clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL); + + if(DebugFlags().opencl.mem_limit) { + max_buffer_size = min(max_buffer_size, + cl_ulong(DebugFlags().opencl.mem_limit - device->stats.mem_used)); + } + VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(max_buffer_size) << " bytes. (" << string_human_readable_size(max_buffer_size) << ")."; size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size / 2); - int2 global_size = make_int2(round_down((int)sqrt(num_elements), 64), (int)sqrt(num_elements)); + int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64), (int)sqrt(num_elements)); VLOG(1) << "Global size: " << global_size << "."; return global_size; } diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index 642c1bfa11c..0d34af3e040 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -241,9 +241,9 @@ string OpenCLCache::get_kernel_md5() } OpenCLDeviceBase::OpenCLProgram::OpenCLProgram(OpenCLDeviceBase *device, - string program_name, - string kernel_file, - string kernel_build_options, + const string& program_name, + const string& kernel_file, + const string& kernel_build_options, bool use_stdout) : device(device), program_name(program_name), @@ -274,7 +274,7 @@ void OpenCLDeviceBase::OpenCLProgram::release() } } -void OpenCLDeviceBase::OpenCLProgram::add_log(string msg, bool debug) +void OpenCLDeviceBase::OpenCLProgram::add_log(const string& msg, bool debug) { if(!use_stdout) { log += msg + "\n"; @@ -288,7 +288,7 @@ void OpenCLDeviceBase::OpenCLProgram::add_log(string msg, bool debug) } } -void OpenCLDeviceBase::OpenCLProgram::add_error(string msg) +void OpenCLDeviceBase::OpenCLProgram::add_error(const string& msg) { if(use_stdout) { fprintf(stderr, "%s\n", msg.c_str()); @@ -608,6 +608,14 @@ bool OpenCLInfo::device_supported(const string& platform_name, if(!get_device_name(device_id, &device_name)) { return false; } + + int driver_major = 0; + int driver_minor = 0; + if(!get_driver_version(device_id, &driver_major, &driver_minor)) { + return false; + } + VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor; + /* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework * (aka, it will not be on Intel framework). This isn't supported * and needs an explicit blacklist. @@ -618,6 +626,21 @@ bool OpenCLInfo::device_supported(const string& platform_name, if(platform_name == "AMD Accelerated Parallel Processing" && device_type == CL_DEVICE_TYPE_GPU) { + if(driver_major < 2236) { + VLOG(1) << "AMD driver version " << driver_major << "." << driver_minor << " not supported."; + return false; + } + const char *blacklist[] = { + /* GCN 1 */ + "Tahiti", "Pitcairn", "Capeverde", "Oland", + NULL + }; + for (int i = 0; blacklist[i] != NULL; i++) { + if(device_name == blacklist[i]) { + VLOG(1) << "AMD device " << device_name << " not supported"; + return false; + } + } return true; } if(platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) { @@ -684,7 +707,7 @@ bool OpenCLInfo::device_version_check(cl_device_id device, return true; } -string OpenCLInfo::get_hardware_id(string platform_name, cl_device_id device_id) +string OpenCLInfo::get_hardware_id(const string& platform_name, cl_device_id device_id) { if(platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") { /* Use cl_amd_device_topology extension. */ @@ -1063,7 +1086,7 @@ string OpenCLInfo::get_readable_device_name(cl_device_id device_id) CL_DEVICE_BOARD_NAME_AMD, sizeof(board_name), &board_name, - &length) == CL_SUCCESS) + &length) == CL_SUCCESS) { if(length != 0 && board_name[0] != '\0') { return board_name; @@ -1073,6 +1096,34 @@ string OpenCLInfo::get_readable_device_name(cl_device_id device_id) return get_device_name(device_id); } +bool OpenCLInfo::get_driver_version(cl_device_id device_id, + int *major, + int *minor, + cl_int* error) +{ + char buffer[1024]; + cl_int err; + if((err = clGetDeviceInfo(device_id, + CL_DRIVER_VERSION, + sizeof(buffer), + &buffer, + NULL)) != CL_SUCCESS) + { + if(error != NULL) { + *error = err; + } + return false; + } + if(error != NULL) { + *error = CL_SUCCESS; + } + if(sscanf(buffer, "%d.%d", major, minor) < 2) { + VLOG(1) << string_printf("OpenCL: failed to parse driver version string (%s).", buffer); + return false; + } + return true; +} + int OpenCLInfo::mem_address_alignment(cl_device_id device_id) { int base_align_bits; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index bef869f34b4..23e9bd311c4 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -45,6 +45,7 @@ set(SRC kernels/opencl/kernel_direct_lighting.cl kernels/opencl/kernel_shadow_blocked_ao.cl kernels/opencl/kernel_shadow_blocked_dl.cl + kernels/opencl/kernel_enqueue_inactive.cl kernels/opencl/kernel_next_iteration_setup.cl kernels/opencl/kernel_indirect_subsurface.cl kernels/opencl/kernel_buffer_update.cl @@ -121,6 +122,10 @@ set(SRC_KERNELS_CUDA_HEADERS kernels/cuda/kernel_config.h ) +set(SRC_KERNELS_OPENCL_HEADERS + kernels/opencl/kernel_split_function.h +) + set(SRC_CLOSURE_HEADERS closure/alloc.h closure/bsdf.h @@ -278,6 +283,7 @@ set(SRC_SPLIT_HEADERS split/kernel_data_init.h split/kernel_direct_lighting.h split/kernel_do_volume.h + split/kernel_enqueue_inactive.h split/kernel_holdout_emission_blurring_pathtermination_ao.h split/kernel_indirect_background.h split/kernel_indirect_subsurface.h @@ -450,6 +456,7 @@ add_library(cycles_kernel ${SRC_HEADERS} ${SRC_KERNELS_CPU_HEADERS} ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_KERNELS_OPENCL_HEADERS} ${SRC_BVH_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_FILTER_HEADERS} @@ -490,9 +497,11 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_sc delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_enqueue_inactive.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split_function.h" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/filter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index a04c157dc40..86a00d2124d 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -423,6 +423,11 @@ ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b) case CLOSURE_BSDF_HAIR_REFLECTION_ID: case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: return bsdf_hair_merge(a, b); +#ifdef __PRINCIPLED__ + case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID: + return bsdf_principled_diffuse_merge(a, b); +#endif #ifdef __VOLUME__ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: return volume_henyey_greenstein_merge(a, b); diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index 30cc8b90330..b12e248f0a3 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -288,12 +288,16 @@ ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf) return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + bsdf->alpha_x = saturate(bsdf->alpha_x); bsdf->alpha_y = bsdf->alpha_x; @@ -302,12 +306,16 @@ ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf) return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F; + bsdf->alpha_x = saturate(bsdf->alpha_x); bsdf->alpha_y = bsdf->alpha_x; @@ -343,12 +351,16 @@ ccl_device int bsdf_microfacet_ggx_aniso_setup(MicrofacetBsdf *bsdf) return SD_BSDF|SD_BSDF_HAS_EVAL; } -ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + bsdf->alpha_x = saturate(bsdf->alpha_x); bsdf->alpha_y = saturate(bsdf->alpha_y); diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h index b07b515c405..22d0092093a 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h @@ -245,35 +245,69 @@ ccl_device_forceinline float mf_ggx_albedo(float r) return saturate(albedo); } +ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior) +{ + if(ior < 1.0f) { + ior = 1.0f/ior; + } + a = saturate(a); + ior = clamp(ior, 1.0f, 3.0f); + float I_1 = 0.0476898f*expf(-0.978352f*(ior-0.65657f)*(ior-0.65657f)) - 0.033756f*ior + 0.993261f; + float R_1 = (((0.116991f*a - 0.270369f)*a + 0.0501366f)*a - 0.00411511f)*a + 1.00008f; + float I_2 = (((-2.08704f*ior + 26.3298f)*ior - 127.906f)*ior + 292.958f)*ior - 287.946f + 199.803f/(ior*ior) - 101.668f/(ior*ior*ior); + float R_2 = ((((5.3725f*a -24.9307f)*a + 22.7437f)*a - 3.40751f)*a + 0.0986325f)*a + 0.00493504f; + + return saturate(1.0f + I_2*R_2*0.0019127f - (1.0f - I_1)*(1.0f - R_1)*9.3205f); +} + ccl_device_forceinline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha) { float D = D_ggx(normalize(wi+wo), alpha); float lambda = mf_lambda(wi, make_float2(alpha, alpha)); + float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f); + + float multiscatter = wo.z * M_1_PI_F; + float albedo = mf_ggx_albedo(alpha); - return 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f) + (1.0f - albedo) * wo.z; + return albedo*singlescatter + (1.0f - albedo)*multiscatter; } ccl_device_forceinline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha) { - return 0.25f * D_ggx_aniso(normalize(wi+wo), alpha) / ((1.0f + mf_lambda(wi, alpha)) * wi.z) + (1.0f - mf_ggx_albedo(sqrtf(alpha.x*alpha.y))) * wo.z; + float D = D_ggx_aniso(normalize(wi+wo), alpha); + float lambda = mf_lambda(wi, alpha); + float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f); + + float multiscatter = wo.z * M_1_PI_F; + + float albedo = mf_ggx_albedo(sqrtf(alpha.x*alpha.y)); + return albedo*singlescatter + (1.0f - albedo)*multiscatter; } ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta) { - float3 wh; - float fresnel; - if(wi.z*wo.z > 0.0f) { - wh = normalize(wi + wo); - fresnel = fresnel_dielectric_cos(dot(wi, wh), eta); - } - else { - wh = normalize(wi + wo*eta); - fresnel = 1.0f - fresnel_dielectric_cos(dot(wi, wh), eta); - } + bool reflective = (wi.z*wo.z > 0.0f); + + float wh_len; + float3 wh = normalize_len(wi + (reflective? wo : (wo*eta)), &wh_len); if(wh.z < 0.0f) wh = -wh; float3 r_wi = (wi.z < 0.0f)? -wi: wi; - return fresnel * max(0.0f, dot(r_wi, wh)) * D_ggx(wh, alpha) / ((1.0f + mf_lambda(r_wi, make_float2(alpha, alpha))) * r_wi.z) + fabsf(wo.z); + float lambda = mf_lambda(r_wi, make_float2(alpha, alpha)); + float D = D_ggx(wh, alpha); + float fresnel = fresnel_dielectric_cos(dot(r_wi, wh), eta); + + float multiscatter = fabsf(wo.z * M_1_PI_F); + if(reflective) { + float singlescatter = 0.25f * D / max((1.0f + lambda) * r_wi.z, 1e-7f); + float albedo = mf_ggx_albedo(alpha); + return fresnel * (albedo*singlescatter + (1.0f - albedo)*multiscatter); + } + else { + float singlescatter = fabsf(dot(r_wi, wh)*dot(wo, wh) * D * eta*eta / max((1.0f + lambda) * r_wi.z * wh_len*wh_len, 1e-7f)); + float albedo = mf_ggx_transmission_albedo(alpha, eta); + return (1.0f - fresnel) * (albedo*singlescatter + (1.0f - albedo)*multiscatter); + } } /* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */ @@ -326,13 +360,17 @@ ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf) return bsdf_microfacet_multi_ggx_common_setup(bsdf); } -ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { if(is_zero(bsdf->T)) bsdf->T = make_float3(1.0f, 0.0f, 0.0f); bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } @@ -345,12 +383,16 @@ ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf) return bsdf_microfacet_multi_ggx_common_setup(bsdf); } -ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { bsdf->alpha_y = bsdf->alpha_x; bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } @@ -455,7 +497,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf) return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG; } -ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf) +ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); bsdf->alpha_y = bsdf->alpha_x; @@ -469,6 +511,10 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsd bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG; } diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h index 215c32e1ffb..f8ca64293b0 100644 --- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h @@ -58,6 +58,14 @@ ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf) return SD_BSDF|SD_BSDF_HAS_EVAL; } +ccl_device bool bsdf_principled_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b) +{ + const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf*)a; + const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf*)b; + + return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness); +} + ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h index 53d703de143..6226ed2c2ef 100644 --- a/intern/cycles/kernel/filter/filter_features.h +++ b/intern/cycles/kernel/filter/filter_features.h @@ -78,16 +78,10 @@ ccl_device_inline void filter_calculate_scale(float *scale) scale[3] = scale[4] = scale[5] = 1.0f/max(sqrtf(scale[3]), 0.01f); } -ccl_device_inline float3 filter_get_pixel_color(const ccl_global float *ccl_restrict buffer, - int pass_stride) +ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer, + int pass_stride) { - return make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2)); -} - -ccl_device_inline float filter_get_pixel_variance(const ccl_global float *ccl_restrict buffer, - int pass_stride) -{ - return average(make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2))); + return make_float3(ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10)); } ccl_device_inline void design_row_add(float *design_row, diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h index 5cb4038bc33..3e752bce68f 100644 --- a/intern/cycles/kernel/filter/filter_nlm_cpu.h +++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h @@ -101,7 +101,7 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict d for(int x = rect.x; x < rect.z; x++) { const int low = max(rect.x, x-f); const int high = min(rect.z, x+f+1); - out_image[y*w+x] = expf(-max(out_image[y*w+x] * (1.0f/(high - low)), 0.0f)); + out_image[y*w+x] = fast_expf(-max(out_image[y*w+x] * (1.0f/(high - low)), 0.0f)); } } } @@ -133,8 +133,6 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy, ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, const float *ccl_restrict difference_image, const float *ccl_restrict buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, @@ -167,7 +165,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, dx, dy, w, h, pass_stride, buffer, - color_pass, variance_pass, l_transform, l_rank, weight, l_XtWX, l_XtWY, 0); } diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h index 078c5f56763..2c5ac807051 100644 --- a/intern/cycles/kernel/filter/filter_nlm_gpu.h +++ b/intern/cycles/kernel/filter/filter_nlm_gpu.h @@ -66,7 +66,7 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(int x, int y, sum += difference_image[y*w+x1]; } sum *= 1.0f/(high-low); - out_image[y*w+x] = expf(-max(sum, 0.0f)); + out_image[y*w+x] = fast_expf(-max(sum, 0.0f)); } ccl_device_inline void kernel_filter_nlm_update_output(int x, int y, @@ -97,8 +97,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy, int dx, int dy, const ccl_global float *ccl_restrict difference_image, const ccl_global float *ccl_restrict buffer, - ccl_global float *color_pass, - ccl_global float *variance_pass, const ccl_global float *ccl_restrict transform, ccl_global int *rank, ccl_global float *XtWX, @@ -130,7 +128,6 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy, dx, dy, w, h, pass_stride, buffer, - color_pass, variance_pass, transform, rank, weight, XtWX, XtWY, localIdx); diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h index 82cc36625ec..d5ae1b73927 100644 --- a/intern/cycles/kernel/filter/filter_prefilter.h +++ b/intern/cycles/kernel/filter/filter_prefilter.h @@ -142,13 +142,22 @@ ccl_device void kernel_filter_detect_outliers(int x, int y, float ref = 2.0f*values[(int)(n*0.75f)]; float fac = 1.0f; if(L > ref) { - /* If the pixel is an outlier, negate the depth value to mark it as one. - * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ - depth[idx] = -depth[idx]; - fac = ref/L; - variance[idx ] *= fac*fac; - variance[idx + pass_stride] *= fac*fac; - variance[idx+2*pass_stride] *= fac*fac; + /* The pixel appears to be an outlier. + * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel + * should actually be at the reference value: + * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier. + * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight. + */ + float stddev = sqrtf(average(make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride]))); + if(L - 3*stddev < ref) { + /* The pixel is an outlier, so negate the depth value to mark it as one. + * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ + depth[idx] = -depth[idx]; + fac = ref/L; + variance[idx ] *= fac*fac; + variance[idx + pass_stride] *= fac*fac; + variance[idx+2*pass_stride] *= fac*fac; + } } out[idx ] = fac*image[idx]; out[idx + pass_stride] = fac*image[idx + pass_stride]; diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h index 4a4c81b7ba3..25a3025056c 100644 --- a/intern/cycles/kernel/filter/filter_reconstruction.h +++ b/intern/cycles/kernel/filter/filter_reconstruction.h @@ -22,8 +22,6 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y, int w, int h, int pass_stride, const ccl_global float *ccl_restrict buffer, - ccl_global float *color_pass, - ccl_global float *variance_pass, const ccl_global float *ccl_restrict transform, ccl_global int *rank, float weight, @@ -31,38 +29,31 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y, ccl_global float3 *XtWY, int localIdx) { + if(weight < 1e-3f) { + return; + } + int p_offset = y *w + x; int q_offset = (y+dy)*w + (x+dx); -#ifdef __KERNEL_CPU__ - const int stride = 1; - (void)storage_stride; - (void)localIdx; - float design_row[DENOISE_FEATURES+1]; -#elif defined(__KERNEL_CUDA__) +#ifdef __KERNEL_GPU__ const int stride = storage_stride; +#else + const int stride = 1; + (void) storage_stride; +#endif + +#ifdef __KERNEL_CUDA__ ccl_local float shared_design_row[(DENOISE_FEATURES+1)*CCL_MAX_LOCAL_SIZE]; ccl_local_param float *design_row = shared_design_row + localIdx*(DENOISE_FEATURES+1); #else - const int stride = storage_stride; float design_row[DENOISE_FEATURES+1]; #endif - float3 p_color = filter_get_pixel_color(color_pass + p_offset, pass_stride); - float3 q_color = filter_get_pixel_color(color_pass + q_offset, pass_stride); + float3 q_color = filter_get_color(buffer + q_offset, pass_stride); - float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride)); - float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride)); - - /* If the pixel was flagged as an outlier during prefiltering, skip it. - * Otherwise, perform the regular confidence interval test unless - * the center pixel is an outlier (in that case, using the confidence - * interval test could result in no pixels being used at all). */ - bool p_outlier = (ccl_get_feature(buffer + p_offset, 0) < 0.0f); - bool q_outlier = (ccl_get_feature(buffer + q_offset, 0) < 0.0f); - bool outside_of_interval = (average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f)); - - if(q_outlier || (!p_outlier && outside_of_interval)) { + /* If the pixel was flagged as an outlier during prefiltering, skip it. */ + if(ccl_get_feature(buffer + q_offset, 0) < 0.0f) { return; } @@ -83,13 +74,19 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h, int4 buffer_params, int sample) { -#ifdef __KERNEL_CPU__ - const int stride = 1; - (void)storage_stride; -#else +#ifdef __KERNEL_GPU__ const int stride = storage_stride; +#else + const int stride = 1; + (void) storage_stride; #endif + if(XtWX[0] < 1e-3f) { + /* There is not enough information to determine a denoised result. + * As a fallback, keep the original value of the pixel. */ + return; + } + /* The weighted average of pixel colors (essentially, the NLM-filtered image). * In case the solution of the linear model fails due to numerical issues, * fall back to this value. */ @@ -102,6 +99,9 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h, final_color = mean_color; } + /* Clamp pixel value to positive values. */ + final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f)); + ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z; final_color *= sample; if(buffer_params.w) { @@ -114,6 +114,4 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h, combined_buffer[2] = final_color.z; } -#undef STORAGE_TYPE - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index 06728415c15..175bd6b9737 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -621,25 +621,43 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance { float fac = 1.0f/num_samples; +#ifdef __SPLIT_KERNEL__ +# define safe_float3_add(f, v) \ + do { \ + ccl_global float *p = (ccl_global float*)(&(f)); \ + atomic_add_and_fetch_float(p+0, (v).x); \ + atomic_add_and_fetch_float(p+1, (v).y); \ + atomic_add_and_fetch_float(p+2, (v).z); \ + } while(0) +#else +# define safe_float3_add(f, v) (f) += (v) +#endif /* __SPLIT_KERNEL__ */ + #ifdef __PASSES__ - L->direct_diffuse += L_sample->direct_diffuse*fac; - L->direct_glossy += L_sample->direct_glossy*fac; - L->direct_transmission += L_sample->direct_transmission*fac; - L->direct_subsurface += L_sample->direct_subsurface*fac; - L->direct_scatter += L_sample->direct_scatter*fac; - - L->indirect_diffuse += L_sample->indirect_diffuse*fac; - L->indirect_glossy += L_sample->indirect_glossy*fac; - L->indirect_transmission += L_sample->indirect_transmission*fac; - L->indirect_subsurface += L_sample->indirect_subsurface*fac; - L->indirect_scatter += L_sample->indirect_scatter*fac; - - L->background += L_sample->background*fac; - L->ao += L_sample->ao*fac; - L->shadow += L_sample->shadow*fac; + safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse*fac); + safe_float3_add(L->direct_glossy, L_sample->direct_glossy*fac); + safe_float3_add(L->direct_transmission, L_sample->direct_transmission*fac); + safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface*fac); + safe_float3_add(L->direct_scatter, L_sample->direct_scatter*fac); + + safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse*fac); + safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy*fac); + safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission*fac); + safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface*fac); + safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter*fac); + + safe_float3_add(L->background, L_sample->background*fac); + safe_float3_add(L->ao, L_sample->ao*fac); + safe_float3_add(L->shadow, L_sample->shadow*fac); +# ifdef __SPLIT_KERNEL__ + atomic_add_and_fetch_float(&L->mist, L_sample->mist*fac); +# else L->mist += L_sample->mist*fac; -#endif - L->emission += L_sample->emission * fac; +# endif /* __SPLIT_KERNEL__ */ +#endif /* __PASSES__ */ + safe_float3_add(L->emission, L_sample->emission*fac); + +#undef safe_float3_add } #ifdef __SHADOW_TRICKS__ diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 9d52834ffcc..9cd7ffb181d 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -142,7 +142,7 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_glob ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, ShaderData *sd, - ccl_global PathState *state, + ccl_addr_space PathState *state, PathRadiance *L) { #ifdef __DENOISING_FEATURES__ diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h index 0fa77d9e8bd..5d92fd12201 100644 --- a/intern/cycles/kernel/kernel_path_state.h +++ b/intern/cycles/kernel/kernel_path_state.h @@ -139,9 +139,11 @@ ccl_device_inline void path_state_next(KernelGlobals *kg, ccl_addr_space PathSta /* random number generator next bounce */ state->rng_offset += PRNG_BOUNCE_NUM; +#ifdef __DENOISING_FEATURES__ if((state->denoising_feature_weight == 0.0f) && !(state->flag & PATH_RAY_SHADOW_CATCHER)) { state->flag &= ~PATH_RAY_STORE_SHADOW_INFO; } +#endif } ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *state) diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h index 96bc636d5ac..e32d4bbbc1b 100644 --- a/intern/cycles/kernel/kernel_queues.h +++ b/intern/cycles/kernel/kernel_queues.h @@ -128,6 +128,21 @@ ccl_device unsigned int get_global_queue_index( return my_gqidx; } +ccl_device int dequeue_ray_index( + int queue_number, + ccl_global int *queues, + int queue_size, + ccl_global int *queue_index) +{ + int index = atomic_fetch_and_dec_uint32((ccl_global uint*)&queue_index[queue_number])-1; + + if(index < 0) { + return QUEUE_EMPTY_SLOT; + } + + return queues[index + queue_number * queue_size]; +} + CCL_NAMESPACE_END #endif // __KERNEL_QUEUE_H__ diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 1026cde7b29..6475d4b66fd 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -418,7 +418,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup( } /* subsurface scattering step, from a point on the surface to another nearby point on the same object */ -ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_global PathState *state, +ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index dbeaffdfb24..34affab1b9d 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -135,7 +135,7 @@ CCL_NAMESPACE_BEGIN * this is because megakernel in device_opencl does not support * custom cflags depending on the scene features. */ -# endif /* __KERNEL_OPENCL_NVIDIA__ */ +# endif /* __KERNEL_OPENCL_APPLE__ */ # ifdef __KERNEL_OPENCL_AMD__ # define __CL_USE_NATIVE__ @@ -236,6 +236,9 @@ CCL_NAMESPACE_BEGIN #ifdef __NO_PRINCIPLED__ # undef __PRINCIPLED__ #endif +#ifdef __NO_DENOISING__ +# undef __DENOISING_FEATURES__ +#endif /* Random Numbers */ @@ -1387,6 +1390,8 @@ enum QueueNumber { #ifdef __BRANCHED_PATH__ /* All rays moving to next iteration of the indirect loop for light */ QUEUE_LIGHT_INDIRECT_ITER, + /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */ + QUEUE_INACTIVE_RAYS, # ifdef __VOLUME__ /* All rays moving to next iteration of the indirect loop for volumes */ QUEUE_VOLUME_INDIRECT_ITER, @@ -1429,6 +1434,9 @@ enum RayState { RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5), RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6), RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT), + + /* Ray is evaluating an iteration of an indirect loop for another thread */ + RAY_BRANCHED_INDIRECT_SHARED = (1 << 7), }; #define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state)) diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 9c0878249d4..1e472aaf51a 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -660,6 +660,7 @@ typedef struct VolumeSegment { * but the entire segment is needed to do always scattering, rather than probabilistically * hitting or missing the volume. if we don't know the transmittance at the end of the * volume we can't generate stratified distance samples up to that transmittance */ +#ifdef __VOLUME_DECOUPLED__ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous) { @@ -829,6 +830,7 @@ ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *s #endif } } +#endif /* __VOLUME_DECOUPLED__ */ /* scattering for homogeneous and heterogeneous volumes, using decoupled ray * marching. diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index ffd34c293fc..2ed713299fd 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -107,8 +107,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int dy, float *difference_image, float *buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index 261176846b1..8dc1a8d583c 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -213,8 +213,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int dy, float *difference_image, float *buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, @@ -229,7 +227,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); #else - kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, color_pass, variance_pass, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride); + kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride); #endif } diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 9895080d328..c8938534fe8 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -85,6 +85,7 @@ DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting) DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive) DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 9b85a864153..d4315ee5ec4 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -53,6 +53,7 @@ # include "kernel/split/kernel_direct_lighting.h" # include "kernel/split/kernel_shadow_blocked_ao.h" # include "kernel/split/kernel_shadow_blocked_dl.h" +# include "kernel/split/kernel_enqueue_inactive.h" # include "kernel/split/kernel_next_iteration_setup.h" # include "kernel/split/kernel_indirect_subsurface.h" # include "kernel/split/kernel_buffer_update.h" @@ -230,6 +231,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index 2edbff08087..009c3fde9d5 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -207,8 +207,6 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_filter_nlm_construct_gramian(int dx, int dy, const float *ccl_restrict difference_image, const float *ccl_restrict buffer, - float *color_pass, - float *variance_pass, float const* __restrict__ transform, int *rank, float *XtWX, @@ -225,7 +223,6 @@ kernel_cuda_filter_nlm_construct_gramian(int dx, int dy, dx, dy, difference_image, buffer, - color_pass, variance_pass, transform, rank, XtWX, XtWY, rect, filter_rect, diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index 8b7f1a8d405..628891b1458 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -39,6 +39,7 @@ #include "kernel/split/kernel_direct_lighting.h" #include "kernel/split/kernel_shadow_blocked_ao.h" #include "kernel/split/kernel_shadow_blocked_dl.h" +#include "kernel/split/kernel_enqueue_inactive.h" #include "kernel/split/kernel_next_iteration_setup.h" #include "kernel/split/kernel_indirect_subsurface.h" #include "kernel/split/kernel_buffer_update.h" @@ -118,6 +119,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl index 0462ca6f9bc..ba53ba4b26f 100644 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ b/intern/cycles/kernel/kernels/opencl/filter.cl @@ -207,8 +207,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx, int dy, const ccl_global float *ccl_restrict difference_image, const ccl_global float *ccl_restrict buffer, - ccl_global float *color_pass, - ccl_global float *variance_pass, const ccl_global float *ccl_restrict transform, ccl_global int *rank, ccl_global float *XtWX, @@ -227,7 +225,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx, dx, dy, difference_image, buffer, - color_pass, variance_pass, transform, rank, XtWX, XtWY, rect, filter_rect, diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl index db65c91baf7..dcea2630aef 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_buffer_update.h" -__kernel void kernel_ocl_path_trace_buffer_update( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME buffer_update +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl index eb34f750881..ed64ae01aae 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_direct_lighting.h" -__kernel void kernel_ocl_path_trace_direct_lighting( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME direct_lighting +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl index 83ef5f5f3f2..8afaa686e28 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_do_volume.h" -__kernel void kernel_ocl_path_trace_do_volume( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_do_volume((KernelGlobals*)kg); -} +#define KERNEL_NAME do_volume +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl new file mode 100644 index 00000000000..e68d4104a91 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_enqueue_inactive.h" + +#define KERNEL_NAME enqueue_inactive +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl index d071b39aa6f..9e1e57beba6 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -18,12 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" -__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local BackgroundAOLocals locals; - kernel_holdout_emission_blurring_pathtermination_ao( - (KernelGlobals*)kg, - &locals); -} +#define KERNEL_NAME holdout_emission_blurring_pathtermination_ao +#define LOCALS_TYPE BackgroundAOLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl index 8c213ff5cb2..192d01444ba 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_indirect_background.h" -__kernel void kernel_ocl_path_trace_indirect_background( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_indirect_background((KernelGlobals*)kg); -} +#define KERNEL_NAME indirect_background +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl index 998ebc4c0c3..84938b889e5 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_indirect_subsurface.h" -__kernel void kernel_ocl_path_trace_indirect_subsurface( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_indirect_subsurface((KernelGlobals*)kg); -} +#define KERNEL_NAME indirect_subsurface +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl index 822d2287715..c314dc96c33 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_lamp_emission.h" -__kernel void kernel_ocl_path_trace_lamp_emission( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_lamp_emission((KernelGlobals*)kg); -} +#define KERNEL_NAME lamp_emission +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl index 6d207253a40..8b1332bf013 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_next_iteration_setup.h" -__kernel void kernel_ocl_path_trace_next_iteration_setup( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME next_iteration_setup +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl index bd9aa9538c8..fa210e747c0 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_path_init.h" -__kernel void kernel_ocl_path_trace_path_init( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_path_init((KernelGlobals*)kg); -} +#define KERNEL_NAME path_init +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl index 9be154e3d75..68ee6f1d536 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_queue_enqueue.h" -__kernel void kernel_ocl_path_trace_queue_enqueue( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local QueueEnqueueLocals locals; - kernel_queue_enqueue((KernelGlobals*)kg, &locals); -} +#define KERNEL_NAME queue_enqueue +#define LOCALS_TYPE QueueEnqueueLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl index eb4fb4d153a..10d09377ba9 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_scene_intersect.h" -__kernel void kernel_ocl_path_trace_scene_intersect( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_scene_intersect((KernelGlobals*)kg); -} +#define KERNEL_NAME scene_intersect +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl index 5bfb31b193a..40eaa561863 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shader_eval.h" -__kernel void kernel_ocl_path_trace_shader_eval( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shader_eval((KernelGlobals*)kg); -} +#define KERNEL_NAME shader_eval +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl index 38bfd04ad4c..8c36100f762 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shader_setup.h" -__kernel void kernel_ocl_path_trace_shader_setup( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_shader_setup((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME shader_setup +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl index 6f722915d45..bcacaa4a054 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl @@ -19,10 +19,9 @@ #include "kernel/split/kernel_shader_sort.h" __attribute__((reqd_work_group_size(64, 1, 1))) -__kernel void kernel_ocl_path_trace_shader_sort( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local ShaderSortLocals locals; - kernel_shader_sort((KernelGlobals*)kg, &locals); -} +#define KERNEL_NAME shader_sort +#define LOCALS_TYPE ShaderSortLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl index 6a8ef81b32a..8de250a375c 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shadow_blocked_ao.h" -__kernel void kernel_ocl_path_trace_shadow_blocked_ao( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shadow_blocked_ao((KernelGlobals*)kg); -} +#define KERNEL_NAME shadow_blocked_ao +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl index b255cc5ef8b..29da77022ed 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shadow_blocked_dl.h" -__kernel void kernel_ocl_path_trace_shadow_blocked_dl( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shadow_blocked_dl((KernelGlobals*)kg); -} +#define KERNEL_NAME shadow_blocked_dl +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl index 8de82db7afe..651addb02f4 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl @@ -31,6 +31,7 @@ #include "kernel/kernels/opencl/kernel_direct_lighting.cl" #include "kernel/kernels/opencl/kernel_shadow_blocked_ao.cl" #include "kernel/kernels/opencl/kernel_shadow_blocked_dl.cl" +#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl" #include "kernel/kernels/opencl/kernel_next_iteration_setup.cl" #include "kernel/kernels/opencl/kernel_indirect_subsurface.cl" #include "kernel/kernels/opencl/kernel_buffer_update.cl" diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h new file mode 100644 index 00000000000..f1e914a70d4 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h @@ -0,0 +1,72 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define KERNEL_NAME_JOIN(a, b) a ## _ ## b +#define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b) + +__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)( + ccl_global char *kg_global, + ccl_constant KernelData *data, + + ccl_global void *split_data_buffer, + ccl_global char *ray_state, + ccl_global uint *rng_state, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "kernel/kernel_textures.h" + + ccl_global int *queue_index, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pools, + ccl_global float *buffer + ) +{ +#ifdef LOCALS_TYPE + ccl_local LOCALS_TYPE locals; +#endif + + KernelGlobals *kg = (KernelGlobals*)kg_global; + + if(ccl_local_id(0) + ccl_local_id(1) == 0) { + kg->data = data; + + kernel_split_params.rng_state = rng_state; + kernel_split_params.queue_index = queue_index; + kernel_split_params.use_queues_flag = use_queues_flag; + kernel_split_params.work_pools = work_pools; + kernel_split_params.buffer = buffer; + + split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state); + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "kernel/kernel_textures.h" + } + + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + KERNEL_NAME_EVAL(kernel, KERNEL_NAME)( + kg +#ifdef LOCALS_TYPE + , &locals +#endif + ); +} + +#undef KERNEL_NAME_JOIN +#undef KERNEL_NAME_EVAL + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl index 99b74a1802b..2b3be38df84 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_subsurface_scatter.h" -__kernel void kernel_ocl_path_trace_subsurface_scatter( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_subsurface_scatter((KernelGlobals*)kg); -} +#define KERNEL_NAME subsurface_scatter +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index 188c3960a5f..27a96720c1e 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -191,7 +191,7 @@ class PrincipledBSSRDFClosure : public CBSSRDFClosure { public: void setup(ShaderData *sd, int path_flag, float3 weight) { - alloc(sd, path_flag, weight * albedo, CLOSURE_BSSRDF_PRINCIPLED_ID); + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID); } }; diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 5b66793a05d..14c5c1c3db5 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -156,7 +156,7 @@ BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refra BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction) BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(HairReflectionClosure, unused), + CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N), CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1), CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2), CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T), @@ -164,7 +164,7 @@ BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection) BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, unused), + CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N), CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1), CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2), CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T), @@ -191,7 +191,7 @@ BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen) class PrincipledClearcoatClosure : public CBSDFClosure { public: MicrofacetBsdf params; - float clearcoat, clearcoat_gloss; + float clearcoat, clearcoat_roughness; MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) { @@ -202,8 +202,8 @@ public: bsdf->ior = 1.5f; - bsdf->alpha_x = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss; - bsdf->alpha_y = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss; + bsdf->alpha_x = clearcoat_roughness; + bsdf->alpha_y = clearcoat_roughness; bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); bsdf->extra->clearcoat = clearcoat; @@ -217,7 +217,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_ggx_clearcoat_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd) : 0; } }; @@ -226,7 +226,7 @@ ClosureParam *closure_bsdf_principled_clearcoat_params() static ClosureParam params[] = { CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N), CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat), - CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_gloss), + CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness), CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"), CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure) }; @@ -389,7 +389,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_ggx_fresnel_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_ggx_fresnel_setup(bsdf, sd) : 0; } }; @@ -413,7 +413,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd) : 0; } }; @@ -566,7 +566,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_fresnel_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd) : 0; } }; @@ -590,7 +590,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd) : 0; } }; @@ -618,7 +618,7 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf) : 0; + sd->flag |= (bsdf) ? bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd) : 0; } }; diff --git a/intern/cycles/kernel/shaders/node_principled_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_bsdf.osl index 57f40789d49..2bb981c3918 100644 --- a/intern/cycles/kernel/shaders/node_principled_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_principled_bsdf.osl @@ -32,7 +32,7 @@ shader node_principled_bsdf( float Sheen = 0.0, float SheenTint = 0.5, float Clearcoat = 0.0, - float ClearcoatGloss = 1.0, + float ClearcoatRoughness = 0.03, float IOR = 1.45, float Transmission = 0.0, float TransmissionRoughness = 0.0, @@ -57,8 +57,8 @@ shader node_principled_bsdf( if (diffuse_weight > 1e-5) { if (Subsurface > 1e-5) { - color Albedo = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface); - BSDF = bssrdf_principled(Normal, Subsurface * SubsurfaceRadius, 0.0, Albedo, Roughness); + color mixed_ss_base_color = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface); + BSDF = mixed_ss_base_color * bssrdf_principled(Normal, Subsurface * SubsurfaceRadius, 0.0, SubsurfaceColor, Roughness); } else { BSDF = BaseColor * principled_diffuse(Normal, Roughness); } @@ -114,7 +114,7 @@ shader node_principled_bsdf( } if (Clearcoat > 1e-5) { - BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatGloss); + BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatRoughness * ClearcoatRoughness); } } diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index 289d1091b0a..c91d2918687 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -546,7 +546,7 @@ closure color holdout() BUILTIN; closure color ambient_occlusion() BUILTIN; closure color principled_diffuse(normal N, float roughness) BUILTIN; closure color principled_sheen(normal N) BUILTIN; -closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_gloss) BUILTIN; +closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_roughness) BUILTIN; // BSSRDF closure color bssrdf_cubic(normal N, vector radius, float texture_blur, float sharpness) BUILTIN; diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h index dc74a2ada53..e2762a85fc8 100644 --- a/intern/cycles/kernel/split/kernel_branched.h +++ b/intern/cycles/kernel/split/kernel_branched.h @@ -63,12 +63,49 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobal REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT); } +ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index) +{ + ccl_global char *ray_state = kernel_split_state.ray_state; + + int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS, + kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index); + + if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) { + return false; + } + +#define SPLIT_DATA_ENTRY(type, name, num) \ + kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; + SPLIT_DATA_ENTRIES_BRANCHED_SHARED +#undef SPLIT_DATA_ENTRY + + kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0; + kernel_split_state.branched_state[inactive_ray].original_ray = ray_index; + kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false; + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray]; + + path_radiance_init(inactive_L, kernel_data.film.use_light_pass); + inactive_L->direct_throughput = L->direct_throughput; + path_radiance_copy_indirect(inactive_L, L); + + ray_state[inactive_ray] = RAY_REGENERATED; + ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED); + ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)); + + atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count); + + return true; +} + /* bounce off surface and integrate indirect light */ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg, int ray_index, float num_samples_adjust, ShaderData *saved_sd, - bool reset_path_state) + bool reset_path_state, + bool wait_for_shared) { SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; @@ -155,12 +192,25 @@ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter( /* start the indirect path */ *tp *= num_samples_inv; + if(kernel_split_branched_indirect_start_shared(kg, ray_index)) { + continue; + } + return true; } branched_state->next_sample = 0; } + branched_state->next_closure = sd->num_closure; + + if(wait_for_shared) { + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if(branched_state->waiting_on_shared_samples) { + return true; + } + } + return false; } diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index 694b777f429..9f8dd2392d9 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -75,11 +75,30 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K branched_state->next_sample = j+1; branched_state->num_samples = num_samples; + /* Attempting to share too many samples is slow for volumes as it causes us to + * loop here more and have many calls to kernel_volume_integrate which evaluates + * shaders. The many expensive shader evaluations cause the work load to become + * unbalanced and many threads to become idle in this kernel. Limiting the + * number of shared samples here helps quite a lot. + */ + if(branched_state->shared_sample_count < 2) { + if(kernel_split_branched_indirect_start_shared(kg, ray_index)) { + continue; + } + } + return true; } # endif } + branched_state->next_sample = num_samples; + + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if(branched_state->waiting_on_shared_samples) { + return true; + } + kernel_split_branched_path_indirect_loop_end(kg, ray_index); /* todo: avoid this calculation using decoupled ray marching */ diff --git a/intern/cycles/kernel/split/kernel_enqueue_inactive.h b/intern/cycles/kernel/split/kernel_enqueue_inactive.h new file mode 100644 index 00000000000..496355bbc3a --- /dev/null +++ b/intern/cycles/kernel/split/kernel_enqueue_inactive.h @@ -0,0 +1,46 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void kernel_enqueue_inactive(KernelGlobals *kg, + ccl_local_param unsigned int *local_queue_atomics) +{ +#ifdef __BRANCHED_PATH__ + /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */ + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + + char enqueue_flag = 0; + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) { + enqueue_flag = 1; + } + + enqueue_ray_index_local(ray_index, + QUEUE_INACTIVE_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); +#endif /* __BRANCHED_PATH__ */ +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index 71017fed19e..7758e35fd32 100644 --- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -147,6 +147,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, ray_index, 1.0f, &kernel_split_state.branched_state[ray_index].sd, + true, true)) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); @@ -193,6 +194,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, ray_index, 1.0f, &kernel_split_state.branched_state[ray_index].sd, + true, true)) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h index e2e841f36d3..66ce2dfb6f1 100644 --- a/intern/cycles/kernel/split/kernel_queue_enqueue.h +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -51,7 +51,8 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg, int queue_number = -1; if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) { + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) { queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; } else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h index 5dc94caec85..45984ca509b 100644 --- a/intern/cycles/kernel/split/kernel_scene_intersect.h +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -43,11 +43,21 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg) } /* All regenerated rays become active here */ - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { +#ifdef __BRANCHED_PATH__ + if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) { + kernel_split_path_end(kg, ray_index); + } + else +#endif /* __BRANCHED_PATH__ */ + { + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); + } + } - if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) + if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { return; + } #ifdef __KERNEL_DEBUG__ DebugData *debug_data = &kernel_split_state.debug_data[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h index 386fbbc4d09..78e61709b01 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -29,6 +29,14 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) kernel_split_state.queue_data, kernel_split_params.queue_size, 1); } +#ifdef __BRANCHED_PATH__ + /* TODO(mai): move this somewhere else? */ + if(thread_index == 0) { + /* Clear QUEUE_INACTIVE_RAYS before next kernel. */ + kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0; + } +#endif /* __BRANCHED_PATH__ */ + if(ray_index == QUEUE_EMPTY_SLOT) return; diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index 57f070d51e0..08f0124b529 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -56,7 +56,20 @@ ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index) ccl_global char *ray_state = kernel_split_state.ray_state; #ifdef __BRANCHED_PATH__ - if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) { + if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) { + int orig_ray = kernel_split_state.branched_state[ray_index].original_ray; + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray]; + + path_radiance_sum_indirect(L); + path_radiance_accum_sample(orig_ray_L, L, 1); + + atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count); + + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); + } + else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER); } else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) { diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index bb1aca2acbf..4bb2f0d3d80 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -95,6 +95,10 @@ typedef ccl_global struct SplitBranchedState { VolumeStack volume_stack[VOLUME_STACK_SIZE]; # endif /* __VOLUME__ */ #endif /*__SUBSURFACE__ */ + + int shared_sample_count; /* number of branched samples shared with other threads */ + int original_ray; /* index of original ray when sharing branched samples */ + bool waiting_on_shared_samples; } SplitBranchedState; #define SPLIT_DATA_BRANCHED_ENTRIES \ @@ -137,6 +141,25 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_BRANCHED_ENTRIES \ SPLIT_DATA_DEBUG_ENTRIES \ +/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */ +#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \ + SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \ + SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ + SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \ + SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ + SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ + SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ + SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ + SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ + SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ + SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \ + SPLIT_DATA_SUBSURFACE_ENTRIES \ + SPLIT_DATA_VOLUME_ENTRIES \ + SPLIT_DATA_BRANCHED_ENTRIES \ + SPLIT_DATA_DEBUG_ENTRIES \ + /* struct that holds pointers to data in the shared state buffer */ typedef struct SplitData { #define SPLIT_DATA_ENTRY(type, name, num) type *name; diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h index 1dffe1b179e..d5083b23f80 100644 --- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -169,6 +169,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it ray_index, num_samples_inv, bssrdf_sd, + false, false)) { branched_state->ss_next_closure = i; @@ -187,6 +188,13 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it branched_state->ss_next_sample = 0; } + branched_state->ss_next_closure = sd->num_closure; + + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if(branched_state->waiting_on_shared_samples) { + return true; + } + kernel_split_branched_path_indirect_loop_end(kg, ray_index); return false; @@ -257,21 +265,20 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) /* do bssrdf scatter step if we picked a bssrdf closure */ if(sc) { uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb); - float bssrdf_u, bssrdf_v; path_state_rng_2D(kg, - &rng, - state, - PRNG_BSDF_U, - &bssrdf_u, &bssrdf_v); + &rng, + state, + PRNG_BSDF_U, + &bssrdf_u, &bssrdf_v); subsurface_scatter_step(kg, - sd, - state, - state->flag, - sc, - &lcg_state, - bssrdf_u, bssrdf_v, - false); + sd, + state, + state->flag, + sc, + &lcg_state, + bssrdf_u, bssrdf_v, + false); } } else { diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index f04f765686e..9578fcf2687 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -79,13 +79,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * #ifdef __PRINCIPLED__ case CLOSURE_BSDF_PRINCIPLED_ID: { uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, sheen_offset, - sheen_tint_offset, clearcoat_offset, clearcoat_gloss_offset, eta_offset, transmission_offset, + sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset, eta_offset, transmission_offset, anisotropic_rotation_offset, transmission_roughness_offset; uint4 data_node2 = read_node(kg, offset); float3 T = stack_load_float3(stack, data_node.y); decode_node_uchar4(data_node.z, &specular_offset, &roughness_offset, &specular_tint_offset, &anisotropic_offset); - decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_gloss_offset); + decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_roughness_offset); decode_node_uchar4(data_node2.x, &eta_offset, &transmission_offset, &anisotropic_rotation_offset, &transmission_roughness_offset); // get Disney principled parameters @@ -98,7 +98,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float sheen = stack_load_float(stack, sheen_offset); float sheen_tint = stack_load_float(stack, sheen_tint_offset); float clearcoat = stack_load_float(stack, clearcoat_offset); - float clearcoat_gloss = stack_load_float(stack, clearcoat_gloss_offset); + float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset); float transmission = stack_load_float(stack, transmission_offset); float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset); float transmission_roughness = stack_load_float(stack, transmission_roughness_offset); @@ -141,8 +141,8 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float3 weight = sd->svm_closure_weight * mix_weight; #ifdef __SUBSURFACE__ - float3 albedo = subsurface_color * subsurface + base_color * (1.0f - subsurface); - float3 subsurf_weight = weight * albedo * diffuse_weight; + float3 mixed_ss_base_color = subsurface_color * subsurface + base_color * (1.0f - subsurface); + float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight; float subsurf_sample_weight = fabsf(average(subsurf_weight)); /* disable in case of diffuse ancestor, can't see it well then and @@ -154,11 +154,11 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * /* need to set the base color in this case such that the * rays get the correctly mixed color after transmitting * the object */ - base_color = albedo; + base_color = mixed_ss_base_color; } /* diffuse */ - if(fabsf(average(base_color)) > CLOSURE_WEIGHT_CUTOFF) { + if(fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) { if(subsurface < CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) { float3 diff_weight = weight * base_color * diffuse_weight; @@ -186,7 +186,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bssrdf->sample_weight = subsurf_sample_weight; bssrdf->radius = radius.x; bssrdf->texture_blur = texture_blur; - bssrdf->albedo = albedo.x; + bssrdf->albedo = subsurface_color.x; bssrdf->sharpness = sharpness; bssrdf->N = N; bssrdf->roughness = roughness; @@ -200,7 +200,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bssrdf->sample_weight = subsurf_sample_weight; bssrdf->radius = radius.y; bssrdf->texture_blur = texture_blur; - bssrdf->albedo = albedo.y; + bssrdf->albedo = subsurface_color.y; bssrdf->sharpness = sharpness; bssrdf->N = N; bssrdf->roughness = roughness; @@ -214,7 +214,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bssrdf->sample_weight = subsurf_sample_weight; bssrdf->radius = radius.z; bssrdf->texture_blur = texture_blur; - bssrdf->albedo = albedo.z; + bssrdf->albedo = subsurface_color.z; bssrdf->sharpness = sharpness; bssrdf->N = N; bssrdf->roughness = roughness; @@ -292,9 +292,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * /* setup bsdf */ if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || roughness <= 0.075f) /* use single-scatter GGX */ - sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd); else /* use multi-scatter GGX */ - sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf); + sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd); } } #ifdef __CAUSTICS_TRICKS__ @@ -332,7 +332,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->extra->cspec0 = cspec0; /* setup bsdf */ - sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); } } @@ -377,7 +377,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->extra->cspec0 = cspec0; /* setup bsdf */ - sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf); + sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd); } } } @@ -398,14 +398,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->ior = 1.5f; bsdf->extra = extra; - bsdf->alpha_x = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss; - bsdf->alpha_y = 0.1f * (1.0f - clearcoat_gloss) + 0.001f * clearcoat_gloss; + bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness; + bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness; bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); bsdf->extra->clearcoat = clearcoat; /* setup bsdf */ - sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); } } #ifdef __CAUSTICS_TRICKS__ @@ -725,6 +725,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * HairBsdf *bsdf = (HairBsdf*)bsdf_alloc(sd, sizeof(HairBsdf), weight); if(bsdf) { + bsdf->N = N; bsdf->roughness1 = param1; bsdf->roughness2 = param2; bsdf->offset = -stack_load_float(stack, data_node.z); diff --git a/intern/cycles/render/constant_fold.cpp b/intern/cycles/render/constant_fold.cpp index 2569d9eec27..943b218f0e4 100644 --- a/intern/cycles/render/constant_fold.cpp +++ b/intern/cycles/render/constant_fold.cpp @@ -160,6 +160,14 @@ bool ConstantFolder::try_bypass_or_make_constant(ShaderInput *input, bool clamp) bypass(input->link); return true; } + else { + /* disconnect other inputs if we can't fully bypass due to clamp */ + foreach(ShaderInput *other, node->inputs) { + if(other != input && other->link) { + graph->disconnect(other); + } + } + } return false; } diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 625dd3ded39..93d88c5642c 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -224,6 +224,10 @@ void LightManager::disable_ineffective_light(Device *device, Scene *scene) bool LightManager::object_usable_as_light(Object *object) { Mesh *mesh = object->mesh; + /* Skip objects with NaNs */ + if (!object->bounds.valid()) { + return false; + } /* Skip if we are not visible for BSDFs. */ if(!(object->visibility & (PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY|PATH_RAY_TRANSMIT))) { return false; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 57b475e5cd0..166156f7ac3 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -2308,13 +2308,13 @@ NODE_DEFINE(PrincipledBsdfNode) SOCKET_IN_FLOAT(subsurface, "Subsurface", 0.0f); SOCKET_IN_VECTOR(subsurface_radius, "Subsurface Radius", make_float3(0.1f, 0.1f, 0.1f)); SOCKET_IN_FLOAT(specular, "Specular", 0.0f); - SOCKET_IN_FLOAT(roughness, "Roughness", 0.0f); + SOCKET_IN_FLOAT(roughness, "Roughness", 0.5f); SOCKET_IN_FLOAT(specular_tint, "Specular Tint", 0.0f); SOCKET_IN_FLOAT(anisotropic, "Anisotropic", 0.0f); SOCKET_IN_FLOAT(sheen, "Sheen", 0.0f); SOCKET_IN_FLOAT(sheen_tint, "Sheen Tint", 0.0f); SOCKET_IN_FLOAT(clearcoat, "Clearcoat", 0.0f); - SOCKET_IN_FLOAT(clearcoat_gloss, "Clearcoat Gloss", 0.0f); + SOCKET_IN_FLOAT(clearcoat_roughness, "Clearcoat Roughness", 0.03f); SOCKET_IN_FLOAT(ior, "IOR", 0.0f); SOCKET_IN_FLOAT(transmission, "Transmission", 0.0f); SOCKET_IN_FLOAT(transmission_roughness, "Transmission Roughness", 0.0f); @@ -2351,7 +2351,7 @@ void PrincipledBsdfNode::attributes(Shader *shader, AttributeRequestSet *attribu void PrincipledBsdfNode::compile(SVMCompiler& compiler, ShaderInput *p_metallic, ShaderInput *p_subsurface, ShaderInput *p_subsurface_radius, ShaderInput *p_specular, ShaderInput *p_roughness, ShaderInput *p_specular_tint, ShaderInput *p_anisotropic, - ShaderInput *p_sheen, ShaderInput *p_sheen_tint, ShaderInput *p_clearcoat, ShaderInput *p_clearcoat_gloss, + ShaderInput *p_sheen, ShaderInput *p_sheen_tint, ShaderInput *p_clearcoat, ShaderInput *p_clearcoat_roughness, ShaderInput *p_ior, ShaderInput *p_transmission, ShaderInput *p_anisotropic_rotation, ShaderInput *p_transmission_roughness) { ShaderInput *base_color_in = input("Base Color"); @@ -2374,7 +2374,7 @@ void PrincipledBsdfNode::compile(SVMCompiler& compiler, ShaderInput *p_metallic, int sheen_offset = compiler.stack_assign(p_sheen); int sheen_tint_offset = compiler.stack_assign(p_sheen_tint); int clearcoat_offset = compiler.stack_assign(p_clearcoat); - int clearcoat_gloss_offset = compiler.stack_assign(p_clearcoat_gloss); + int clearcoat_roughness_offset = compiler.stack_assign(p_clearcoat_roughness); int ior_offset = compiler.stack_assign(p_ior); int transmission_offset = compiler.stack_assign(p_transmission); int transmission_roughness_offset = compiler.stack_assign(p_transmission_roughness); @@ -2391,7 +2391,7 @@ void PrincipledBsdfNode::compile(SVMCompiler& compiler, ShaderInput *p_metallic, compiler.add_node(normal_offset, tangent_offset, compiler.encode_uchar4(specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset), - compiler.encode_uchar4(sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_gloss_offset)); + compiler.encode_uchar4(sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset)); compiler.add_node(compiler.encode_uchar4(ior_offset, transmission_offset, anisotropic_rotation_offset, transmission_roughness_offset), distribution, SVM_STACK_INVALID, SVM_STACK_INVALID); @@ -2419,7 +2419,7 @@ void PrincipledBsdfNode::compile(SVMCompiler& compiler) { compile(compiler, input("Metallic"), input("Subsurface"), input("Subsurface Radius"), input("Specular"), input("Roughness"), input("Specular Tint"), input("Anisotropic"), input("Sheen"), input("Sheen Tint"), - input("Clearcoat"), input("Clearcoat Gloss"), input("IOR"), input("Transmission"), + input("Clearcoat"), input("Clearcoat Roughness"), input("IOR"), input("Transmission"), input("Anisotropic Rotation"), input("Transmission Roughness")); } diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index aac6ce2f375..c6ab47fcc84 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -252,6 +252,7 @@ public: class PointDensityTextureNode : public ShaderNode { public: SHADER_NODE_NO_CLONE_CLASS(PointDensityTextureNode) + virtual int get_group() { return NODE_GROUP_LEVEL_3; } ~PointDensityTextureNode(); ShaderNode *clone() const; @@ -377,13 +378,13 @@ public: bool has_bssrdf_bump(); void compile(SVMCompiler& compiler, ShaderInput *metallic, ShaderInput *subsurface, ShaderInput *subsurface_radius, ShaderInput *specular, ShaderInput *roughness, ShaderInput *specular_tint, ShaderInput *anisotropic, - ShaderInput *sheen, ShaderInput *sheen_tint, ShaderInput *clearcoat, ShaderInput *clearcoat_gloss, + ShaderInput *sheen, ShaderInput *sheen_tint, ShaderInput *clearcoat, ShaderInput *clearcoat_roughness, ShaderInput *ior, ShaderInput *transmission, ShaderInput *anisotropic_rotation, ShaderInput *transmission_roughness); float3 base_color; float3 subsurface_color, subsurface_radius; float metallic, subsurface, specular, roughness, specular_tint, anisotropic, - sheen, sheen_tint, clearcoat, clearcoat_gloss, ior, transmission, + sheen, sheen_tint, clearcoat, clearcoat_roughness, ior, transmission, anisotropic_rotation, transmission_roughness; float3 normal, clearcoat_normal, tangent; float surface_mix_weight; diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index 6bff29d1c76..a794f233718 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -156,6 +156,7 @@ void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s og->surface_state.clear(); og->volume_state.clear(); og->displacement_state.clear(); + og->bump_state.clear(); og->background_state.reset(); } diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 08909943c49..ae462a1084a 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -722,6 +722,7 @@ DeviceRequestedFeatures Session::get_requested_device_features() requested_features.use_baking = bake_manager->get_baking(); requested_features.use_integrator_branched = (scene->integrator->method == Integrator::BRANCHED_PATH); requested_features.use_transparent &= scene->integrator->transparent_shadows; + requested_features.use_denoising = params.use_denoising; return requested_features; } diff --git a/intern/cycles/test/util_string_test.cpp b/intern/cycles/test/util_string_test.cpp index 22ec8e0ee8e..6c059ba5d12 100644 --- a/intern/cycles/test/util_string_test.cpp +++ b/intern/cycles/test/util_string_test.cpp @@ -245,4 +245,41 @@ TEST(util_string_remove_trademark, both) EXPECT_EQ(str, "foo bar zzz"); } +TEST(util_string_remove_trademark, both_space) +{ + string str = string_remove_trademark("foo bar(TM) (R) zzz"); + EXPECT_EQ(str, "foo bar zzz"); +} + +TEST(util_string_remove_trademark, both_space_around) +{ + string str = string_remove_trademark("foo bar (TM) (R) zzz"); + EXPECT_EQ(str, "foo bar zzz"); +} + +TEST(util_string_remove_trademark, trademark_space_suffix) +{ + string str = string_remove_trademark("foo bar (TM)"); + EXPECT_EQ(str, "foo bar"); +} + +TEST(util_string_remove_trademark, trademark_space_middle) +{ + string str = string_remove_trademark("foo bar (TM) baz"); + EXPECT_EQ(str, "foo bar baz"); +} + + +TEST(util_string_remove_trademark, r_space_suffix) +{ + string str = string_remove_trademark("foo bar (R)"); + EXPECT_EQ(str, "foo bar"); +} + +TEST(util_string_remove_trademark, r_space_middle) +{ + string str = string_remove_trademark("foo bar (R) baz"); + EXPECT_EQ(str, "foo bar baz"); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h index 6c52117ef9a..643af87a65f 100644 --- a/intern/cycles/util/util_atomic.h +++ b/intern/cycles/util/util_atomic.h @@ -35,6 +35,7 @@ ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value) #define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x)) #define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) +#define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1) #define CCL_LOCAL_MEM_FENCE 0 #define ccl_barrier(flags) (void)0 @@ -68,6 +69,7 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so #define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x)) #define atomic_fetch_and_inc_uint32(p) atomic_inc((p)) +#define atomic_fetch_and_dec_uint32(p) atomic_dec((p)) #define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE #define ccl_barrier(flags) barrier(flags) @@ -79,7 +81,9 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so #define atomic_add_and_fetch_float(p, x) (atomicAdd((float*)(p), (float)(x)) + (float)(x)) #define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int*)(p), (unsigned int)(x)) +#define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int*)(p), (unsigned int)(x)) #define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) +#define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1) #define CCL_LOCAL_MEM_FENCE #define ccl_barrier(flags) __syncthreads() diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp index ab038d2b9fb..10895f2e918 100644 --- a/intern/cycles/util/util_debug.cpp +++ b/intern/cycles/util/util_debug.cpp @@ -184,8 +184,8 @@ std::ostream& operator <<(std::ostream &os, << " Device type : " << opencl_device_type << "\n" << " Kernel type : " << opencl_kernel_type << "\n" << " Debug : " << string_from_bool(debug_flags.opencl.debug) << "\n" - << " Single program : " << string_from_bool(debug_flags.opencl.single_program) - << "\n"; + << " Single program : " << string_from_bool(debug_flags.opencl.single_program) << "\n" + << " Memory limit : " << string_human_readable_size(debug_flags.opencl.mem_limit) << "\n"; return os; } diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h index 4505d584490..450cd900a9f 100644 --- a/intern/cycles/util/util_debug.h +++ b/intern/cycles/util/util_debug.h @@ -115,6 +115,10 @@ public: /* Use single program */ bool single_program; + + /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all devices. */ + /* Artificial memory limit in bytes (0 if disabled). */ + size_t mem_limit; }; /* Get instance of debug flags registry. */ diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h index ecf9c9cfee0..492f830e67c 100644 --- a/intern/cycles/util/util_logging.h +++ b/intern/cycles/util/util_logging.h @@ -19,28 +19,30 @@ #if defined(WITH_CYCLES_LOGGING) && !defined(__KERNEL_GPU__) # include <glog/logging.h> -#else -# include <iostream> #endif +#include <iostream> + CCL_NAMESPACE_BEGIN #if !defined(WITH_CYCLES_LOGGING) || defined(__KERNEL_GPU__) -class StubStream : public std::ostream { - public: - StubStream() : std::ostream(NULL) { } +class StubStream { +public: + template<class T> + StubStream& operator<<(const T&) { + return *this; + } }; class LogMessageVoidify { public: LogMessageVoidify() { } - void operator&(::std::ostream&) { } + void operator&(StubStream&) { } }; # define LOG_SUPPRESS() (true) ? (void) 0 : LogMessageVoidify() & StubStream() # define LOG(severity) LOG_SUPPRESS() # define VLOG(severity) LOG_SUPPRESS() - #endif #define VLOG_ONCE(level, flag) if(!flag) flag = true, VLOG(level) diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h index 5327d9f7cc6..bb04c4aa2d9 100644 --- a/intern/cycles/util/util_math_float3.h +++ b/intern/cycles/util/util_math_float3.h @@ -374,9 +374,9 @@ ccl_device_inline bool isfinite3_safe(float3 v) ccl_device_inline float3 ensure_finite3(float3 v) { - if(!isfinite_safe(v.x)) v.x = 0.0; - if(!isfinite_safe(v.y)) v.y = 0.0; - if(!isfinite_safe(v.z)) v.z = 0.0; + if(!isfinite_safe(v.x)) v.x = 0.0f; + if(!isfinite_safe(v.y)) v.y = 0.0f; + if(!isfinite_safe(v.z)) v.z = 0.0f; return v; } diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index bc672669e1f..cd4fe52fdc9 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -226,6 +226,7 @@ public: int get_current_sample() { + thread_scoped_lock lock(progress_mutex); /* Note that the value here always belongs to the last tile that updated, * so it's only useful if there is only one active tile. */ return current_tile_sample; @@ -233,11 +234,13 @@ public: int get_rendered_tiles() { + thread_scoped_lock lock(progress_mutex); return rendered_tiles; } int get_denoised_tiles() { + thread_scoped_lock lock(progress_mutex); return denoised_tiles; } diff --git a/intern/cycles/util/util_string.cpp b/intern/cycles/util/util_string.cpp index a1008d510d1..94ad512982c 100644 --- a/intern/cycles/util/util_string.cpp +++ b/intern/cycles/util/util_string.cpp @@ -148,6 +148,12 @@ void string_replace(string& haystack, const string& needle, const string& other) string string_remove_trademark(const string &s) { string result = s; + + /* Special case, so we don;t leave sequential spaces behind. */ + /* TODO(sergey): Consider using regex perhaps? */ + string_replace(result, " (TM)", ""); + string_replace(result, " (R)", ""); + string_replace(result, "(TM)", ""); string_replace(result, "(R)", ""); diff --git a/intern/elbeem/intern/isosurface.cpp b/intern/elbeem/intern/isosurface.cpp index fb61fb416b4..de7bfe8e687 100644 --- a/intern/elbeem/intern/isosurface.cpp +++ b/intern/elbeem/intern/isosurface.cpp @@ -15,6 +15,7 @@ #include "particletracer.h" #include <algorithm> #include <stdio.h> +#include <cmath> #ifdef sun #include "ieeefp.h" @@ -25,6 +26,8 @@ #define round(x) (x) #endif +using std::isfinite; + /****************************************************************************** * Constructor *****************************************************************************/ @@ -937,17 +940,10 @@ void IsoSurface::smoothSurface(float sigma, bool normSmooth) ew[(j+2)%3]); } - // NT important, check this... -#ifndef WIN32 - if(! finite(cornerareas[i][0]) ) cornerareas[i][0]=1e-6; - if(! finite(cornerareas[i][1]) ) cornerareas[i][1]=1e-6; - if(! finite(cornerareas[i][2]) ) cornerareas[i][2]=1e-6; -#else // WIN32 - // FIXME check as well... - if(! (cornerareas[i][0]>=0.0) ) cornerareas[i][0]=1e-6; - if(! (cornerareas[i][1]>=0.0) ) cornerareas[i][1]=1e-6; - if(! (cornerareas[i][2]>=0.0) ) cornerareas[i][2]=1e-6; -#endif // WIN32 + // FIX T50887: ensure pointareas are finite + if (!isfinite(cornerareas[i][0])) cornerareas[i][0] = 1e-6; + if (!isfinite(cornerareas[i][1])) cornerareas[i][1] = 1e-6; + if (!isfinite(cornerareas[i][2])) cornerareas[i][2] = 1e-6; pointareas[mIndices[i*3+0]] += cornerareas[i][0]; pointareas[mIndices[i*3+1]] += cornerareas[i][1]; @@ -1096,17 +1092,10 @@ void IsoSurface::smoothNormals(float sigma) { ew[(j+2)%3]); } - // NT important, check this... -#ifndef WIN32 - if(! finite(cornerareas[i][0]) ) cornerareas[i][0]=1e-6; - if(! finite(cornerareas[i][1]) ) cornerareas[i][1]=1e-6; - if(! finite(cornerareas[i][2]) ) cornerareas[i][2]=1e-6; -#else // WIN32 - // FIXME check as well... - if(! (cornerareas[i][0]>=0.0) ) cornerareas[i][0]=1e-6; - if(! (cornerareas[i][1]>=0.0) ) cornerareas[i][1]=1e-6; - if(! (cornerareas[i][2]>=0.0) ) cornerareas[i][2]=1e-6; -#endif // WIN32 + // FIX T50887: ensure pointareas are finite + if (!isfinite(cornerareas[i][0])) cornerareas[i][0] = 1e-6; + if (!isfinite(cornerareas[i][1])) cornerareas[i][1] = 1e-6; + if (!isfinite(cornerareas[i][2])) cornerareas[i][2] = 1e-6; pointareas[mIndices[i*3+0]] += cornerareas[i][0]; pointareas[mIndices[i*3+1]] += cornerareas[i][1]; diff --git a/intern/elbeem/intern/solver_util.cpp b/intern/elbeem/intern/solver_util.cpp index 6eca427c787..f0c7bce2b4e 100644 --- a/intern/elbeem/intern/solver_util.cpp +++ b/intern/elbeem/intern/solver_util.cpp @@ -855,6 +855,10 @@ void LbmFsgrSolver::advanceParticles() { if(k<=mSizez-1-cutval){ CellFlagType pflag = RFLAG(level, i,j,k, workSet); //errMsg("PIT move"," at "<<PRINT_IJK<<" flag"<<convertCellFlagType2String(pflag) ); + if (pflag & CFMbndOutflow) { + DEL_PART; + continue; + } if(pflag & (CFBnd)) { handleObstacleParticle(p); continue; diff --git a/intern/ghost/GHOST_C-api.h b/intern/ghost/GHOST_C-api.h index 6887063eae9..967d3f58143 100644 --- a/intern/ghost/GHOST_C-api.h +++ b/intern/ghost/GHOST_C-api.h @@ -43,7 +43,7 @@ extern "C" { * Creates a "handle" for a C++ GHOST object. * A handle is just an opaque pointer to an empty struct. * In the API the pointer is casted to the actual C++ class. - * \param name Name of the handle to create. + * The 'name' argument to the macro is the name of the handle to create. */ GHOST_DECLARE_HANDLE(GHOST_SystemHandle); diff --git a/intern/ghost/intern/GHOST_SystemWin32.cpp b/intern/ghost/intern/GHOST_SystemWin32.cpp index 9f03b5e9537..b0dae432643 100644 --- a/intern/ghost/intern/GHOST_SystemWin32.cpp +++ b/intern/ghost/intern/GHOST_SystemWin32.cpp @@ -941,6 +941,8 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam, GHOST_ASSERT(system, "GHOST_SystemWin32::s_wndProc(): system not initialized"); if (hwnd) { +#if 0 + // Disabled due to bug in Intel drivers, see T51959 if(msg == WM_NCCREATE) { // Tell Windows to automatically handle scaling of non-client areas // such as the caption bar. EnableNonClientDpiScaling was introduced in Windows 10 @@ -954,6 +956,7 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam, } } } +#endif GHOST_WindowWin32 *window = (GHOST_WindowWin32 *)::GetWindowLongPtr(hwnd, GWLP_USERDATA); if (window) { diff --git a/intern/ghost/intern/GHOST_WindowCocoa.h b/intern/ghost/intern/GHOST_WindowCocoa.h index b234291396b..5168c48ca2f 100644 --- a/intern/ghost/intern/GHOST_WindowCocoa.h +++ b/intern/ghost/intern/GHOST_WindowCocoa.h @@ -56,7 +56,7 @@ public: * \param systemCocoa The associated system class to forward events to * \param title The text shown in the title bar of the window. * \param left The coordinate of the left edge of the window. - * \param top The coordinate of the top edge of the window. + * \param bottom The coordinate of the bottom edge of the window. * \param width The width the window. * \param height The height the window. * \param state The state the window is initially opened with. diff --git a/intern/guardedalloc/intern/mallocn_lockfree_impl.c b/intern/guardedalloc/intern/mallocn_lockfree_impl.c index ce8a5b29ece..b4838cdca18 100644 --- a/intern/guardedalloc/intern/mallocn_lockfree_impl.c +++ b/intern/guardedalloc/intern/mallocn_lockfree_impl.c @@ -64,9 +64,9 @@ enum { MEMHEAD_ALIGN_FLAG = 2, }; -#define MEMHEAD_FROM_PTR(ptr) (((MemHead*) vmemh) - 1) +#define MEMHEAD_FROM_PTR(ptr) (((MemHead*) ptr) - 1) #define PTR_FROM_MEMHEAD(memhead) (memhead + 1) -#define MEMHEAD_ALIGNED_FROM_PTR(ptr) (((MemHeadAligned*) vmemh) - 1) +#define MEMHEAD_ALIGNED_FROM_PTR(ptr) (((MemHeadAligned*) ptr) - 1) #define MEMHEAD_IS_MMAP(memhead) ((memhead)->len & (size_t) MEMHEAD_MMAP_FLAG) #define MEMHEAD_IS_ALIGNED(memhead) ((memhead)->len & (size_t) MEMHEAD_ALIGN_FLAG) diff --git a/intern/libmv/ChangeLog b/intern/libmv/ChangeLog index 45be9c25afa..81096dd90c9 100644 --- a/intern/libmv/ChangeLog +++ b/intern/libmv/ChangeLog @@ -1,3 +1,156 @@ +commit efd7a93317e0278b99e66785f667823e451daef1 +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Tue May 9 10:16:42 2017 +0200 + + Fix strict compiler warnings, unused variables + +commit 8efd47e13dfdd3f7209bc96f26d0b13127dd6376 +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed Dec 14 10:44:57 2016 +0100 + + Fix T50243: libmv_panography_test is broken + + There was fully wrong logic in comparison: was actually accessing memory + past the array boundary. Run test manually and the figure seems correct + to me now. + + Spotted by @LazyDodo, thanks! + +commit 6dfb9cd1bd14669d84be789000ce234747fb00ff +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Thu Jul 14 11:49:38 2016 +0200 + + Fix some strict compiler warnings + + One of them was a real bug! + +commit f61adaecf7b29ebe6677be0e1c825f0a8d475e4b +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed May 31 11:22:34 2017 +0200 + + Enable explicit schur complement for BA step + + This is something we do in Blender and only reason it was not + enabled for standalone Libmv is because we did not have fresh + enough version of Ceres bundled. + +commit fc5d3a1d4880c6658aff693c1c1e8c10c96ce1a7 +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed Nov 2 15:32:11 2016 +0100 + + Update tests to make tests pass after recent Ceres update + + Just a precision issue, difference is around 1e-7. Should be fine to + simply update expected value. + +commit e1ac9f6124110c1a90d8e417bea47acfcbdcca42 +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed May 31 10:54:48 2017 +0200 + + Update Ceres to latest release 1.12.0 + +commit ac1571352b4962f110929b963f8616d7310ceea5 +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Fri Apr 7 17:10:44 2017 +0200 + + Fix crash of keyframe selection on 32bit linux + +commit 5f8df3da965686df39a6ae5c9f17482075017bf4 +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Tue Jan 19 14:00:53 2016 +0500 + + Solve some strict warnings in tests + +commit 8ea3a5d752a9ce3337ab7643897472a4d33747f1 +Author: Brecht Van Lommel <brechtvanlommel@gmail.com> +Date: Sat Feb 18 23:52:31 2017 +0100 + + Fix a few compiler warnings with macOS / clang. + +commit ffbe81461770e70736e80b8cab8e6eb1f8b27160 +Author: Mike Erwin <significant.bit@gmail.com> +Date: Wed May 31 10:43:08 2017 +0200 + + Fix comparison of identicals + + Some of these check that dimensions match before running code that + assumes they do match. + + Found with PVS-Studio T48917. + +commit 206c01999cde16c1c6c43a8e13ffa86020821d98 +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed May 31 10:39:16 2017 +0200 + + Add basic track masking API in place + + This brings back ability to mask non-interesting parts of + specific track (the feature got lost with new auto-track API). + + Added it back by extending frame accessor class. This isn't really + a frame thing, but we don't have other type of accessor here. + + Surely, we can use old-style API here and pass mask via region + tracker options for this particular case, but then it becomes much + less obvious how real auto-tracker will access this mask with old + style API. + + So seems we do need an accessor for such data, just matter of + finding better place than frame accessor. + +commit faa069cb826892780356477cc10602390fecf06b +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed May 31 10:36:26 2017 +0200 + + Tests: Tweak epsilon to avoid what looks a false-positive failure + +commit 7c84e45c1d330871477ba3516f57178e5b9d101f +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed May 31 10:15:43 2017 +0200 + + CMake: Fix mistake in closing branch + +commit cb769a0d319a8c95948153d78a4c3378a0142ece +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Thu Jul 21 12:52:33 2016 +0200 + + Set of fixes for MSVC215 + + - Move GLOG/GFLAGS defines to a more global scope, + this way ANY of our own libraries will use proper + declspec. + + - Compile png/zlib/openexif on Windows as well since + those are required for a correct linking. + +commit bb95c8654fd2cea72d66ed04cd825cc3712ea804 +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed Jul 20 18:14:46 2016 +0200 + + Disable unexisting Ceres option + + Explicit Schur complement requires having + newer Ceres than we currently have bundled. + +commit a2e12c959ef32cc9382244d1581992c2f7aa9c09 +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed Jul 20 18:04:57 2016 +0200 + + Various fixes for MSVC + + - Update Eigen to 3.2.7 since this brings crucial + fixes for MSVC 2015. + + - Switch to STATIC build by default. + + There are issues building current sources as dynamic + libraries with MSVC2015 and additionally building + dynamic Ceres is not recommended anyway, so let's + not do this for the time being. + + If anyone finds a way to make this all working -- + it'llsurely be a welcome addition. + commit 7a676106720fb126a27ff010abdd8bb65d7e0d9a Author: Sergey Sharybin <sergey.vfx@gmail.com> Date: Mon Jan 4 18:30:12 2016 +0500 @@ -365,239 +518,3 @@ Date: Thu May 8 15:50:26 2014 +0200 Reviewed By: sergey Differential Revision: https://developer.blender.org/D516 - -commit 4405dff60ea08d454b64da1a7c0595d9328cf8a3 -Author: Keir Mierle <mierle@gmail.com> -Date: Thu May 8 15:38:14 2014 +0200 - - Add public SetMarkers to AutoTrack - - Reviewers: sergey - - Reviewed By: sergey - - Differential Revision: https://developer.blender.org/D515 - -commit c90837f6db276a3b1f610eaad509155f6a43b24f -Author: Keir Mierle <mierle@gmail.com> -Date: Thu May 8 15:17:48 2014 +0200 - - Make autotrack skeleton compile - - Reviewers: sergey - - Reviewed By: sergey - - Differential Revision: https://developer.blender.org/D514 - -commit be01baa2e82e36f63e548f073157e68d2ff870c0 -Author: Keir Mierle <mierle@gmail.com> -Date: Wed May 7 18:48:55 2014 +0200 - - Add preliminary TrackMarkerToFrame in autotrack - - Reviewers: sergey - - Reviewed By: sergey - - Differential Revision: https://developer.blender.org/D509 - -commit 0cab028d591b3d08672ca86eb6c6e4ac1aacf1d0 -Author: Sergey Sharybin <sergey.vfx@gmail.com> -Date: Wed May 7 17:59:11 2014 +0200 - - Remove assert from ArrayND Resize - - That assert broke initialization of arrays which doesn't - own the data since constructor uses Resize to set shape - and strides. - - Strides are still to be fixed, but that's for later. - -commit 64f9c118029a9351e9023e96527c120e1d724d5b -Author: Sergey Sharybin <sergey.vfx@gmail.com> -Date: Wed May 7 17:42:21 2014 +0200 - - Fix ArrayND freeing the data it doesn't own - - Can't really guarantee it works fully correct now, - but at least this check is needed anyway and compilation - works just fine. - - Reviewers: keir - - Reviewed By: keir - - Differential Revision: https://developer.blender.org/D508 - -commit 0618f1c8e88dfc738cdde55784da80b889905e7c -Author: Keir Mierle <mierle@gmail.com> -Date: Wed May 7 12:03:32 2014 +0200 - - Minor changes - - Reviewers: sergey - - Reviewed By: sergey - - Differential Revision: https://developer.blender.org/D505 - -commit 5c34335e1bb90c4ed701ee830c718ed4e20dbffa -Author: Sergey Sharybin <sergey.vfx@gmail.com> -Date: Wed May 7 11:12:23 2014 +0200 - - Fix compilation error in frame accessor - - - int64 is not a standard type, we've got int64_t defined in - std int. We also have an msvc port of this header, so should - not be an issue. - - - Fixed inconsistency in usage of CacheKey and Key, used Key. - - - Some functions weren't marked as virtual. - - Additional change: added self to authors. - - Reviewers: keir - - Reviewed By: keir - - Differential Revision: https://developer.blender.org/D504 - -commit 06bc207614e262cd688e2c3ed820ade7c77bdb66 -Author: Keir Mierle <mierle@gmail.com> -Date: Tue May 6 22:30:59 2014 +0200 - - Start new Tracks implementation - - This adds the new Tracks implementation, as well as a - trivial test to show it compiles. - - Reviewers: sergey - - Reviewed By: sergey - - Differential Revision: https://developer.blender.org/D502 - -commit 25ce061e6da69881460ba7718bb0d660a2380a02 -Author: Keir Mierle <mierle@gmail.com> -Date: Tue May 6 19:10:51 2014 +0200 - - Add Reconstruction class for new API - - This starts the new Reconstruction class (with support for e.g. planes). This - also starts the new namespace "mv" which will eventually have all the symbols - we wish to export. - - Reviewers: sergey - - Reviewed By: sergey - - Differential Revision: https://developer.blender.org/D501 - -commit 0a6af3e29016048978aea607673340500e050339 -Author: Keir Mierle <mierle@gmail.com> -Date: Tue May 6 17:52:53 2014 +0200 - - Add a new Tracks implementation - - Reviewers: sergey - - Reviewed By: sergey - - Differential Revision: https://developer.blender.org/D500 - -commit 887b68d29c2b198f4939f9ab5153881aa2c1806e -Author: Keir Mierle <mierle@gmail.com> -Date: Tue May 6 17:01:39 2014 +0200 - - Initial commit of unfinished AutoTrack API - - This starts the creating the new AutoTrack API. The new API will - make it possible for libmv to do full autotracking, including - predictive tracking and also support multiple motion models (3D - planes etc). - - The first goal (not in this patch) is to convert Blender to use - the new API without adding any new functionality. - - Note: This does not add any of the API to the build system! - It likely does not compile. - - Reviewers: sergey - - Reviewed By: sergey - - Differential Revision: https://developer.blender.org/D499 - -commit 08cc227d431d257d27f300fbb8e6991e663302da -Author: Sergey Sharybin <sergey.vfx@gmail.com> -Date: Tue May 6 13:09:22 2014 +0200 - - Fix homography test failure - - It was caused by assuming that reconstructed homography matrix - should look exactly the same as the matrix used to generate a - test case. - - It's not actually valid assumption because different-looking - matrices could correspond to the same exact transform. - - In this change we make it so actual "re-projected" vectors - are being checked, not the values in matrix. This makes it - more predictable verification. - - Reviewers: keir - - Reviewed By: keir - - Differential Revision: https://developer.blender.org/D488 - -commit 0b7d83dc9627447dc7df64d7e3a468aefe9ddc13 -Author: Sergey Sharybin <sergey.vfx@gmail.com> -Date: Wed Apr 23 19:14:55 2014 +0600 - - Fix compilation on OSX after previous commit - - EXPECT_EQ wasn't defined in the scope. - -commit d14049e00dabf8fdf49056779f0a3718fbb39e8f -Author: Sergey Sharybin <sergey.vfx@gmail.com> -Date: Wed Apr 23 15:08:16 2014 +0600 - - Move aligned malloc implementation into own file - - It was rather stupid having it in brute region tracker, - now it is in own file in base library (which was also - added in this commit, before this it consist of header - files only). - - Reviewers: keir - - Reviewed By: keir - - Differential Revision: https://developer.blender.org/D479 - -commit 0ddf3851bfcb8de43660b119a25a77a25674200d -Author: Sergey Sharybin <sergey.vfx@gmail.com> -Date: Mon Apr 21 14:14:03 2014 +0600 - - Optimization of PearsonProductMomentCorrelation - - Pass the arrays by reference rather than by value, - should give some percent of speedup. - - Also don't pass the dimensions to the function but - get them from the images themselves. - - Hopefully this will give some %% of tracker speedup. - -commit f68fdbe5896a6c5bd8b500caeec61b876c5e44c6 -Author: Sergey Sharybin <sergey.vfx@gmail.com> -Date: Mon Apr 21 14:10:43 2014 +0600 - - Fix wrong assert in ResizeImage() - - The assert didn't make any sense because ComputeBoundingBox() - is intended to return bounding box in the following way: - (xmin, xmax, ymin, ymax). diff --git a/intern/libmv/bundle.sh b/intern/libmv/bundle.sh index 27e012f665f..d155d050782 100755 --- a/intern/libmv/bundle.sh +++ b/intern/libmv/bundle.sh @@ -120,6 +120,7 @@ if(WITH_LIBMV) add_definitions(\${GFLAGS_DEFINES}) add_definitions(\${GLOG_DEFINES}) add_definitions(\${CERES_DEFINES}) + add_definitions(-DLIBMV_GFLAGS_NAMESPACE=\${GFLAGS_NAMESPACE}) list(APPEND INC \${GFLAGS_INCLUDE_DIRS} diff --git a/intern/libmv/intern/track_region.cc b/intern/libmv/intern/track_region.cc index d395b6457d7..8989897e09f 100644 --- a/intern/libmv/intern/track_region.cc +++ b/intern/libmv/intern/track_region.cc @@ -36,7 +36,7 @@ /* define this to generate PNG images with content of search areas on every itteration of tracking */ -#define DUMP_ALWAYS +#undef DUMP_ALWAYS using libmv::FloatImage; using libmv::TrackRegionOptions; diff --git a/intern/memutil/MEM_CacheLimiterC-Api.h b/intern/memutil/MEM_CacheLimiterC-Api.h index 0fe5469a4d4..b5680890eb8 100644 --- a/intern/memutil/MEM_CacheLimiterC-Api.h +++ b/intern/memutil/MEM_CacheLimiterC-Api.h @@ -61,8 +61,8 @@ bool MEM_CacheLimiter_is_disabled(void); * Create new MEM_CacheLimiter object * managed objects are destructed with the data_destructor * - * @param data_destructor - * @return A new MEM_CacheLimter object + * \param data_destructor + * \return A new MEM_CacheLimter object */ MEM_CacheLimiterC *new_MEM_CacheLimiter(MEM_CacheLimiter_Destruct_Func data_destructor, @@ -73,7 +73,7 @@ MEM_CacheLimiterC *new_MEM_CacheLimiter(MEM_CacheLimiter_Destruct_Func data_dest * * Frees the memory of the CacheLimiter but does not touch managed objects! * - * @param This "This" pointer + * \param This "This" pointer */ void delete_MEM_CacheLimiter(MEM_CacheLimiterC *This); @@ -81,8 +81,8 @@ void delete_MEM_CacheLimiter(MEM_CacheLimiterC *This); /** * Manage object * - * @param This "This" pointer, data data object to manage - * @return CacheLimiterHandle to ref, unref, touch the managed object + * \param This "This" pointer, data data object to manage + * \return CacheLimiterHandle to ref, unref, touch the managed object */ MEM_CacheLimiterHandleC *MEM_CacheLimiter_insert(MEM_CacheLimiterC *This, void *data); @@ -90,7 +90,7 @@ MEM_CacheLimiterHandleC *MEM_CacheLimiter_insert(MEM_CacheLimiterC *This, void * /** * Free objects until memory constraints are satisfied * - * @param This "This" pointer + * \param This "This" pointer */ void MEM_CacheLimiter_enforce_limits(MEM_CacheLimiterC *This); @@ -99,7 +99,7 @@ void MEM_CacheLimiter_enforce_limits(MEM_CacheLimiterC *This); * Unmanage object previously inserted object. * Does _not_ delete managed object! * - * @param This "This" pointer, handle of object + * \param handle of object */ void MEM_CacheLimiter_unmanage(MEM_CacheLimiterHandleC *handle); @@ -108,7 +108,7 @@ void MEM_CacheLimiter_unmanage(MEM_CacheLimiterHandleC *handle); /** * Raise priority of object (put it at the tail of the deletion chain) * - * @param handle of object + * \param handle of object */ void MEM_CacheLimiter_touch(MEM_CacheLimiterHandleC *handle); @@ -117,7 +117,7 @@ void MEM_CacheLimiter_touch(MEM_CacheLimiterHandleC *handle); * Increment reference counter. Objects with reference counter != 0 are _not_ * deleted. * - * @param handle of object + * \param handle of object */ void MEM_CacheLimiter_ref(MEM_CacheLimiterHandleC *handle); @@ -126,7 +126,7 @@ void MEM_CacheLimiter_ref(MEM_CacheLimiterHandleC *handle); * Decrement reference counter. Objects with reference counter != 0 are _not_ * deleted. * - * @param handle of object + * \param handle of object */ void MEM_CacheLimiter_unref(MEM_CacheLimiterHandleC *handle); @@ -134,7 +134,7 @@ void MEM_CacheLimiter_unref(MEM_CacheLimiterHandleC *handle); /** * Get reference counter. * - * @param handle of object + * \param handle of object */ int MEM_CacheLimiter_get_refcount(MEM_CacheLimiterHandleC *handle); @@ -142,7 +142,7 @@ int MEM_CacheLimiter_get_refcount(MEM_CacheLimiterHandleC *handle); /** * Get pointer to managed object * - * @param handle of object + * \param handle of object */ void *MEM_CacheLimiter_get(MEM_CacheLimiterHandleC *handle); diff --git a/intern/opensubdiv/opensubdiv_capi.cc b/intern/opensubdiv/opensubdiv_capi.cc index 52ce98fe74b..91803551f12 100644 --- a/intern/opensubdiv/opensubdiv_capi.cc +++ b/intern/opensubdiv/opensubdiv_capi.cc @@ -33,6 +33,7 @@ #include <stdlib.h> #include <GL/glew.h> +#include <opensubdiv/version.h> #include <opensubdiv/osd/glMesh.h> /* CPU Backend */ @@ -381,3 +382,8 @@ int openSubdiv_supportGPUDisplay(void) (GLEW_ARB_texture_buffer_object || GLEW_EXT_texture_buffer_object))); /* also ARB_explicit_attrib_location? */ } + +int openSubdiv_getVersionHex(void) +{ + return OPENSUBDIV_VERSION_NUMBER; +} diff --git a/intern/opensubdiv/opensubdiv_capi.h b/intern/opensubdiv/opensubdiv_capi.h index c3a194813e6..281bd3f010d 100644 --- a/intern/opensubdiv/opensubdiv_capi.h +++ b/intern/opensubdiv/opensubdiv_capi.h @@ -152,6 +152,8 @@ void openSubdiv_init(bool gpu_legacy_support); void openSubdiv_cleanup(void); bool openSubdiv_gpu_legacy_support(void); +int openSubdiv_getVersionHex(void); + #ifdef __cplusplus } #endif |