diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-11-10 22:19:09 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-11-10 22:19:09 +0300 |
commit | 3fa86f4b280cbc6ccc18993c089b94dda45afa34 (patch) | |
tree | cd4608ca29636babed9dbc8ed9e7c37478422fce | |
parent | abf62d06d1c70c11fcb3a7f2b73eaa3fb03d4616 (diff) | |
parent | 7689f501e2c3beb3535e4a390971d7b0fde74ee9 (diff) |
Merge branch 'blender-v3.0-release'
-rw-r--r-- | CMakeLists.txt | 6 | ||||
-rw-r--r-- | build_files/cmake/config/blender_release.cmake | 1 | ||||
-rw-r--r-- | intern/cycles/device/hip/device_impl.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/integrator/denoiser.cpp | 5 | ||||
-rw-r--r-- | intern/cycles/kernel/device/gpu/kernel.h | 25 |
5 files changed, 36 insertions, 4 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a5ac1918bc..d2e2d02dcde 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -440,7 +440,11 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL) mark_as_advanced(WITH_CUDA_DYNLOAD) # AMD HIP -option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF) +if(WIN32) + option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON) +else() + option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF) +endif() option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF) set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for") mark_as_advanced(WITH_CYCLES_DEVICE_HIP) diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake index b8180d733de..04074db688d 100644 --- a/build_files/cmake/config/blender_release.cmake +++ b/build_files/cmake/config/blender_release.cmake @@ -81,4 +81,5 @@ if(NOT APPLE) set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE) + set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE) endif() diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index e8482772186..bb0573abf8d 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -1160,6 +1160,8 @@ bool HIPDevice::should_use_graphics_interop() * possible, but from the empiric measurements it can be considerably slower than using naive * pixels copy. */ + /* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */ +# if 0 HIPContextScope scope(this); int num_all_devices = 0; @@ -1178,6 +1180,7 @@ bool HIPDevice::should_use_graphics_interop() return true; } } +# endif return false; } diff --git a/intern/cycles/integrator/denoiser.cpp b/intern/cycles/integrator/denoiser.cpp index b89024e0c85..2a5f99f358b 100644 --- a/intern/cycles/integrator/denoiser.cpp +++ b/intern/cycles/integrator/denoiser.cpp @@ -33,7 +33,10 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa return make_unique<OptiXDenoiser>(path_trace_device, params); } - return make_unique<OIDNDenoiser>(path_trace_device, params); + /* Always fallback to OIDN. */ + DenoiseParams oidn_params = params; + oidn_params.type = DENOISER_OPENIMAGEDENOISE; + return make_unique<OIDNDenoiser>(path_trace_device, oidn_params); } Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams ¶ms) diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index e954178ec63..d63cd0e8262 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -523,6 +523,26 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) * Film. */ +ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba, + const int rgba_offset, + const int rgba_stride, + const int x, + const int y, + const half4 half_pixel) +{ + /* Work around HIP issue with half float display, see T92972. */ +#ifdef __KERNEL_HIP__ + ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4; + out[0] = half_pixel.x; + out[1] = half_pixel.y; + out[2] = half_pixel.z; + out[3] = half_pixel.w; +#else + ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x; + *out = half_pixel; +#endif +} + #define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ ccl_gpu_kernel_signature(film_convert_##variant, \ @@ -588,8 +608,9 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ film_apply_pass_pixel_overlays_rgba(&kfilm_convert, buffer, pixel); \ \ - ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x; \ - *out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \ + const half4 half_pixel = float4_to_half4_display( \ + make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \ + kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel); \ } /* 1 channel inputs */ |