From d547f9d3d291b495a022e1e3bfbb5f4af25c02b1 Mon Sep 17 00:00:00 2001 From: Stefan Werner Date: Tue, 27 Aug 2019 14:03:49 +0200 Subject: Fix T68944: Added check for SSE4.1 to denoising node. Since OpenImageDenoise requires a CPU with SSE 4.1 or newer, let the node act as passthrough on unsupported CPUs and display a message in the node itself. --- source/blender/blenlib/BLI_system.h | 1 + source/blender/blenlib/intern/system.c | 13 +++ .../compositor/operations/COM_DenoiseOperation.cpp | 99 +++++++++++----------- source/blender/editors/space_node/drawnode.c | 4 + 4 files changed, 69 insertions(+), 48 deletions(-) (limited to 'source') diff --git a/source/blender/blenlib/BLI_system.h b/source/blender/blenlib/BLI_system.h index f4c0399e959..19797ba23bc 100644 --- a/source/blender/blenlib/BLI_system.h +++ b/source/blender/blenlib/BLI_system.h @@ -24,6 +24,7 @@ */ int BLI_cpu_support_sse2(void); +int BLI_cpu_support_sse41(void); void BLI_system_backtrace(FILE *fp); /* Get CPU brand, result is to be MEM_freeN()-ed. */ diff --git a/source/blender/blenlib/intern/system.c b/source/blender/blenlib/intern/system.c index 88f2e2625e8..941c2b608e6 100644 --- a/source/blender/blenlib/intern/system.c +++ b/source/blender/blenlib/intern/system.c @@ -179,6 +179,19 @@ char *BLI_cpu_brand_string(void) return NULL; } +int BLI_cpu_support_sse41(void) +{ + int result[4], num; + __cpuid(result, 0); + num = result[0]; + + if (num >= 1) { + __cpuid(result, 0x00000001); + return (result[2] & ((int)1 << 19)) != 0; + } + return 0; +} + void BLI_hostname_get(char *buffer, size_t bufsize) { #ifndef WIN32 diff --git a/source/blender/compositor/operations/COM_DenoiseOperation.cpp b/source/blender/compositor/operations/COM_DenoiseOperation.cpp index 233840aa5a8..82a529dc2ef 100644 --- a/source/blender/compositor/operations/COM_DenoiseOperation.cpp +++ b/source/blender/compositor/operations/COM_DenoiseOperation.cpp @@ -97,66 +97,69 @@ void DenoiseOperation::generateDenoise(float *data, return; } #ifdef WITH_OPENIMAGEDENOISE - oidn::DeviceRef device = oidn::newDevice(); - device.commit(); + if (BLI_cpu_support_sse41()) { + oidn::DeviceRef device = oidn::newDevice(); + device.commit(); - oidn::FilterRef filter = device.newFilter("RT"); - filter.setImage("color", - inputBufferColor, - oidn::Format::Float3, - inputTileColor->getWidth(), - inputTileColor->getHeight(), - 0, - 4 * sizeof(float)); - if (inputTileAlbedo && inputTileAlbedo->getBuffer()) { - filter.setImage("albedo", - inputTileAlbedo->getBuffer(), + oidn::FilterRef filter = device.newFilter("RT"); + filter.setImage("color", + inputBufferColor, oidn::Format::Float3, - inputTileAlbedo->getWidth(), - inputTileAlbedo->getHeight(), + inputTileColor->getWidth(), + inputTileColor->getHeight(), 0, 4 * sizeof(float)); - } - if (inputTileNormal && inputTileNormal->getBuffer()) { - filter.setImage("normal", - inputTileNormal->getBuffer(), + if (inputTileAlbedo && inputTileAlbedo->getBuffer()) { + filter.setImage("albedo", + inputTileAlbedo->getBuffer(), + oidn::Format::Float3, + inputTileAlbedo->getWidth(), + inputTileAlbedo->getHeight(), + 0, + 4 * sizeof(float)); + } + if (inputTileNormal && inputTileNormal->getBuffer()) { + filter.setImage("normal", + inputTileNormal->getBuffer(), + oidn::Format::Float3, + inputTileNormal->getWidth(), + inputTileNormal->getHeight(), + 0, + 3 * sizeof(float)); + } + filter.setImage("output", + data, oidn::Format::Float3, - inputTileNormal->getWidth(), - inputTileNormal->getHeight(), + inputTileColor->getWidth(), + inputTileColor->getHeight(), 0, - 3 * sizeof(float)); - } - filter.setImage("output", - data, - oidn::Format::Float3, - inputTileColor->getWidth(), - inputTileColor->getHeight(), - 0, - 4 * sizeof(float)); + 4 * sizeof(float)); - BLI_assert(settings); - if (settings) { - filter.set("hdr", settings->hdr); - filter.set("srgb", false); - } + BLI_assert(settings); + if (settings) { + filter.set("hdr", settings->hdr); + filter.set("srgb", false); + } - filter.commit(); - /* Since it's memory intensive, it's better to run only one instance of OIDN at a time. - * OpenImageDenoise is multithreaded internally and should use all available cores nonetheless. - */ - BLI_mutex_lock(&oidn_lock); - filter.execute(); - BLI_mutex_unlock(&oidn_lock); + filter.commit(); + /* Since it's memory intensive, it's better to run only one instance of OIDN at a time. + * OpenImageDenoise is multithreaded internally and should use all available cores nonetheless. + */ + BLI_mutex_lock(&oidn_lock); + filter.execute(); + BLI_mutex_unlock(&oidn_lock); - /* copy the alpha channel, OpenImageDenoise currently only supports RGB */ - size_t numPixels = inputTileColor->getWidth() * inputTileColor->getHeight(); - for (size_t i = 0; i < numPixels; ++i) { - data[i * 4 + 3] = inputBufferColor[i * 4 + 3]; + /* copy the alpha channel, OpenImageDenoise currently only supports RGB */ + size_t numPixels = inputTileColor->getWidth() * inputTileColor->getHeight(); + for (size_t i = 0; i < numPixels; ++i) { + data[i * 4 + 3] = inputBufferColor[i * 4 + 3]; + } + return; } -#else +#endif + /* If built without OIDN or running on an unsupported CPU, just pass through. */ UNUSED_VARS(inputTileAlbedo, inputTileNormal, settings); ::memcpy(data, inputBufferColor, inputTileColor->getWidth() * inputTileColor->getHeight() * sizeof(float) * 4); -#endif } diff --git a/source/blender/editors/space_node/drawnode.c b/source/blender/editors/space_node/drawnode.c index a69eb254621..72a73f89227 100644 --- a/source/blender/editors/space_node/drawnode.c +++ b/source/blender/editors/space_node/drawnode.c @@ -2703,6 +2703,10 @@ static void node_composit_buts_denoise(uiLayout *layout, bContext *UNUSED(C), Po { #ifndef WITH_OPENIMAGEDENOISE uiItemL(layout, IFACE_("Disabled, built without OpenImageDenoise"), ICON_ERROR); +#else + if (!BLI_cpu_support_sse41()) { + uiItemL(layout, IFACE_("Disabled, CPU with SSE4.1 is required"), ICON_ERROR); + } #endif uiItemR(layout, ptr, "use_hdr", 0, NULL, ICON_NONE); -- cgit v1.2.3