diff options
author | Hans-Kristian Arntzen <post@arntzen-software.no> | 2020-10-22 16:57:48 +0300 |
---|---|---|
committer | Hans-Kristian Arntzen <post@arntzen-software.no> | 2020-10-22 16:57:48 +0300 |
commit | b1114276e4bdb32668d6e2885e6c238cefcf037a (patch) | |
tree | 6faf871fd5e8d784b803682ab1e098dcb4155a54 | |
parent | 16f09a0ba0606390f410d0de45efa8cb40b26689 (diff) |
vkd3d: Ensure sane SSE rounding modes are set up before compiling.sse-flags
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
-rw-r--r-- | include/private/vkd3d_common.h | 35 | ||||
-rw-r--r-- | libs/vkd3d/state.c | 7 |
2 files changed, 42 insertions, 0 deletions
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 78661c1d..28382e62 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -271,4 +271,39 @@ static inline size_t vkd3d_wcslen(const WCHAR *wstr, size_t wchar_size) } } +#ifdef __SSE__ +#include <emmintrin.h> +static inline uint32_t vkd3d_set_neutral_floating_point_mode_flags(void) +{ + /* This is a quite gross workaround, however, this works around a very particular compiler bug on ACO on + * older Mesa drivers with Death Stranding. The issue was fixed here (2020-10-22): + * https://gitlab.freedesktop.org/mesa/mesa/-/issues/3668 + * https://gitlab.freedesktop.org/mesa/mesa/-/commit/4e2fe34aa9944612f2224188317a4ad2aee8035b + * The game sets flush-to-zero SSE flags, and what happens is that ACO performs a wrong compare of uint bitcast to FP != 0.0. + * This results in a denormal compare, which would normally work, but not with flush to zero. + * This was not reproducible when running the game in a captured form, which further complicated things. + * To avoid any potential weirdness happening inside the compiler which will be near-impossible to track down, + * we ensure there is a "neutral" FP flag setup. No flush-to-zero and no weird rounding. + * CSR state is per-thread, so this is safe. */ + uint32_t saved = _mm_getcsr(); + _mm_setcsr(_MM_MASK_MASK); + return saved; +} + +static inline void vkd3d_restore_floating_point_mode_flags(uint32_t flags) +{ + _mm_setcsr(flags); +} +#else +static inline uint32_t vkd3d_set_neutral_floating_point_mode_flags(void) +{ + return 0; +} + +static inline void vkd3d_restore_floating_point_mode_flags(uint32_t ignored) +{ + (void)ignored; +} +#endif + #endif /* __VKD3D_COMMON_H */ diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 3e61c4de..8f22f782 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1645,6 +1645,7 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_device *device, struct vkd3d_shader_debug_ring_spec_info spec_info; struct vkd3d_shader_compile_arguments compile_args; VkComputePipelineCreateInfo pipeline_info; + uint32_t mode_flags; VkResult vr; HRESULT hr; @@ -1670,8 +1671,10 @@ static HRESULT vkd3d_create_compute_pipeline(struct d3d12_device *device, pipeline_info.stage.pSpecializationInfo = &spec_info.spec_info; } + mode_flags = vkd3d_set_neutral_floating_point_mode_flags(); vr = VK_CALL(vkCreateComputePipelines(device->vk_device, vk_cache, 1, &pipeline_info, NULL, vk_pipeline)); + vkd3d_restore_floating_point_mode_flags(mode_flags); VK_CALL(vkDestroyShaderModule(device->vk_device, pipeline_info.stage.module, NULL)); if (vr < 0) { @@ -3067,6 +3070,7 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st VkGraphicsPipelineCreateInfo pipeline_desc; VkPipelineViewportStateCreateInfo vp_desc; VkPipeline vk_pipeline; + uint32_t mode_flags; unsigned int i; VkResult vr; HRESULT hr; @@ -3157,12 +3161,15 @@ VkPipeline d3d12_pipeline_state_create_pipeline_variant(struct d3d12_pipeline_st *vk_render_pass = pipeline_desc.renderPass; + mode_flags = vkd3d_set_neutral_floating_point_mode_flags(); if ((vr = VK_CALL(vkCreateGraphicsPipelines(device->vk_device, vk_cache, 1, &pipeline_desc, NULL, &vk_pipeline))) < 0) { WARN("Failed to create Vulkan graphics pipeline, vr %d.\n", vr); + vkd3d_restore_floating_point_mode_flags(mode_flags); return VK_NULL_HANDLE; } + vkd3d_restore_floating_point_mode_flags(mode_flags); return vk_pipeline; } |