From e11c899e715b01f65b0a3b9b99cd69cf460209b1 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Wed, 17 Aug 2022 18:31:52 +0200 Subject: Cycles: disable Metal inlining optimization on Apple GPUs This gave a 1.1x speedup, however also leads to very long compile times that make it seems like Blender has stopped working. This can be brought back in the future behind an option that users can explicitly enabled. Fix T100102 Ref D14923, D14763, T92212 --- intern/cycles/kernel/device/metal/compat.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'intern') diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 80ee8ef5b57..674de554f61 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -29,11 +29,12 @@ using namespace metal::raytracing; /* Qualifiers */ -#if defined(__KERNEL_METAL_APPLE__) +/* Inline everything for Apple GPUs. This gives ~1.1x speedup and 10% spill + * reduction for integator_shade_surface. However it comes at the cost of + * longer compile times (~4.5 minutes on M1 Max) and is disabled for that + * reason, until there is a user option to manually enable it. */ -/* Inline everything for Apple GPUs. - * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface - * at the cost of longer compile times (~4.5 minutes on M1 Max). */ +#if 0 // defined(__KERNEL_METAL_APPLE__) # define ccl_device __attribute__((always_inline)) # define ccl_device_inline __attribute__((always_inline)) -- cgit v1.2.3 From 6a4f4810f38b2efc49d55dad6960f610f166773f Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Thu, 18 Aug 2022 20:00:16 +0200 Subject: Fix T100246: Cycles GPU render error when adding AO node during viewport render --- intern/cycles/integrator/path_trace_work_gpu.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'intern') diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index fa313f6460a..ee250a6916b 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -204,22 +204,26 @@ void PathTraceWorkGPU::alloc_integrator_sorting() integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] = (int *)integrator_shader_sort_counter_.device_pointer; - if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) { + integrator_shader_sort_prefix_sum_.alloc(sort_buckets); + integrator_shader_sort_prefix_sum_.zero_to_device(); + } + + if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) { + if (integrator_shader_raytrace_sort_counter_.size() < sort_buckets) { integrator_shader_raytrace_sort_counter_.alloc(sort_buckets); integrator_shader_raytrace_sort_counter_.zero_to_device(); integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] = (int *)integrator_shader_raytrace_sort_counter_.device_pointer; } + } - if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) { + if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) { + if (integrator_shader_mnee_sort_counter_.size() < sort_buckets) { integrator_shader_mnee_sort_counter_.alloc(sort_buckets); integrator_shader_mnee_sort_counter_.zero_to_device(); integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] = (int *)integrator_shader_mnee_sort_counter_.device_pointer; } - - integrator_shader_sort_prefix_sum_.alloc(sort_buckets); - integrator_shader_sort_prefix_sum_.zero_to_device(); } } -- cgit v1.2.3