Merge branch 'blender-v3.0-release'

author: Brecht Van Lommel <brecht@blender.org> 2021-11-10 22:19:09 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2021-11-10 22:19:09 +0300
commit: 3fa86f4b280cbc6ccc18993c089b94dda45afa34 (patch)
tree: cd4608ca29636babed9dbc8ed9e7c37478422fce
parent: abf62d06d1c70c11fcb3a7f2b73eaa3fb03d4616 (diff)
parent: 7689f501e2c3beb3535e4a390971d7b0fde74ee9 (diff)
5 files changed, 36 insertions, 4 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a5ac1918bc..d2e2d02dcde 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -440,7 +440,11 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
 mark_as_advanced(WITH_CUDA_DYNLOAD)
 
 # AMD HIP
-option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" OFF)
+if(WIN32)
+  option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" ON)
+else()
+  option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" OFF)
+endif()
 option(WITH_CYCLES_HIP_BINARIES      "Build Cycles AMD HIP binaries" OFF)
 set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
 mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake
index b8180d733de..04074db688d 100644
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -81,4 +81,5 @@ if(NOT APPLE)
   set(WITH_CYCLES_DEVICE_OPTIX    ON  CACHE BOOL "" FORCE)
   set(WITH_CYCLES_CUDA_BINARIES   ON  CACHE BOOL "" FORCE)
   set(WITH_CYCLES_CUBIN_COMPILER  OFF CACHE BOOL "" FORCE)
+  set(WITH_CYCLES_HIP_BINARIES    ON  CACHE BOOL "" FORCE)
 endif()
diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp
index e8482772186..bb0573abf8d 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -1160,6 +1160,8 @@ bool HIPDevice::should_use_graphics_interop()
    * possible, but from the empiric measurements it can be considerably slower than using naive
    * pixels copy. */
 
+  /* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
+#  if 0
   HIPContextScope scope(this);
 
   int num_all_devices = 0;
@@ -1178,6 +1180,7 @@ bool HIPDevice::should_use_graphics_interop()
       return true;
     }
   }
+#  endif
 
   return false;
 }
diff --git a/intern/cycles/integrator/denoiser.cpp b/intern/cycles/integrator/denoiser.cpp
index b89024e0c85..2a5f99f358b 100644
--- a/intern/cycles/integrator/denoiser.cpp
+++ b/intern/cycles/integrator/denoiser.cpp
@@ -33,7 +33,10 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
     return make_unique<OptiXDenoiser>(path_trace_device, params);
   }
 
-  return make_unique<OIDNDenoiser>(path_trace_device, params);
+  /* Always fallback to OIDN. */
+  DenoiseParams oidn_params = params;
+  oidn_params.type = DENOISER_OPENIMAGEDENOISE;
+  return make_unique<OIDNDenoiser>(path_trace_device, oidn_params);
 }
 
 Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams &params)
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index e954178ec63..d63cd0e8262 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -523,6 +523,26 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
  * Film.
  */
 
+ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba,
+                                                          const int rgba_offset,
+                                                          const int rgba_stride,
+                                                          const int x,
+                                                          const int y,
+                                                          const half4 half_pixel)
+{
+  /* Work around HIP issue with half float display, see T92972. */
+#ifdef __KERNEL_HIP__
+  ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
+  out[0] = half_pixel.x;
+  out[1] = half_pixel.y;
+  out[2] = half_pixel.z;
+  out[3] = half_pixel.w;
+#else
+  ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
+  *out = half_pixel;
+#endif
+}
+
 #define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \
   ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
       ccl_gpu_kernel_signature(film_convert_##variant, \
@@ -588,8 +608,9 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 \
     film_apply_pass_pixel_overlays_rgba(&kfilm_convert, buffer, pixel); \
 \
-    ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x; \
-    *out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \
+    const half4 half_pixel = float4_to_half4_display( \
+        make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \
+    kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel); \
   }
 
 /* 1 channel inputs */
author	Brecht Van Lommel <brecht@blender.org>	2021-11-10 22:19:09 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2021-11-10 22:19:09 +0300
commit	3fa86f4b280cbc6ccc18993c089b94dda45afa34 (patch)
tree	cd4608ca29636babed9dbc8ed9e7c37478422fce
parent	abf62d06d1c70c11fcb3a7f2b73eaa3fb03d4616 (diff)
parent	7689f501e2c3beb3535e4a390971d7b0fde74ee9 (diff)