diff options
author | Antonio Vazquez <blendergit@gmail.com> | 2022-11-07 18:09:54 +0300 |
---|---|---|
committer | Antonio Vazquez <blendergit@gmail.com> | 2022-11-07 18:09:54 +0300 |
commit | b6b888f7fb706487d9e5ae6b0738201da5493b9f (patch) | |
tree | 852f79b4532a1b93e75c2541ca0d181b00de08a2 /intern | |
parent | a2377b60548f2fe5d46b14ecdf4cf83971432b5c (diff) | |
parent | 95631c94c4bd08f8a7e9c713f624e934eb7eb7ae (diff) |
Merge branch 'master' into gpencil-new-data-proposalgpencil-new-data-proposal
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/CMakeLists.txt | 3 | ||||
-rw-r--r-- | intern/cycles/device/metal/kernel.mm | 61 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 7 | ||||
-rw-r--r-- | intern/cycles/test/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/test/util_md5_test.cpp | 16 | ||||
-rw-r--r-- | intern/cycles/util/md5.cpp | 9 | ||||
-rw-r--r-- | intern/ffmpeg/tests/ffmpeg_codecs.cc | 2 | ||||
-rw-r--r-- | intern/ghost/intern/GHOST_ContextGLX.cpp | 2 | ||||
-rw-r--r-- | intern/ghost/intern/GHOST_ImeWin32.h | 2 | ||||
-rw-r--r-- | intern/ghost/intern/GHOST_SystemWayland.cpp | 5 | ||||
-rw-r--r-- | intern/guardedalloc/MEM_guardedalloc.h | 2 | ||||
-rw-r--r-- | intern/guardedalloc/intern/leak_detector.cc | 2 | ||||
-rw-r--r-- | intern/wayland_dynload/intern/wayland_dynload_client.c | 5 | ||||
-rw-r--r-- | intern/wayland_dynload/intern/wayland_dynload_cursor.c | 2 | ||||
-rw-r--r-- | intern/wayland_dynload/intern/wayland_dynload_egl.c | 2 | ||||
-rw-r--r-- | intern/wayland_dynload/intern/wayland_dynload_libdecor.c | 2 | ||||
-rw-r--r-- | intern/wayland_dynload/intern/wayland_dynload_utils.c | 9 | ||||
-rw-r--r-- | intern/wayland_dynload/intern/wayland_dynload_utils.h | 3 |
18 files changed, 102 insertions, 33 deletions
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index 5296d819e42..bfca3ab6aea 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -224,7 +224,8 @@ include_directories(SYSTEM ${INC_SYS}) cycles_add_library(cycles_device "${LIB}" ${SRC}) if(WITH_CYCLES_DEVICE_ONEAPI) - # Need to have proper rebuilding in case of changes in cycles_kernel_oneapi due external project behaviour + # Need to have proper rebuilding in case of changes + # in cycles_kernel_oneapi due external project behavior. add_dependencies(cycles_device cycles_kernel_oneapi) endif() diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 55938d1a03a..dc8af9a5358 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -45,6 +45,36 @@ bool kernel_has_intersection(DeviceKernel device_kernel) struct ShaderCache { ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice) { + /* Initialize occupancy tuning LUT. */ + if (MetalInfo::get_device_vendor(mtlDevice) == METAL_GPU_APPLE) { + switch (MetalInfo::get_apple_gpu_architecture(mtlDevice)) { + default: + case APPLE_M2: + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {32, 32}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {832, 32}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST] = {64, 64}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW] = {64, 64}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE] = {704, 32}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY] = {1024, 256}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND] = {64, 32}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW] = {256, 256}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] = {448, 384}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY] = {1024, 1024}; + break; + case APPLE_M1: + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {256, 128}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {768, 32}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST] = {512, 128}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW] = {384, 128}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE] = {512, 64}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY] = {512, 256}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND] = {512, 128}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW] = {384, 32}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] = {576, 384}; + occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY] = {832, 832}; + break; + } + } } ~ShaderCache(); @@ -73,6 +103,11 @@ struct ShaderCache { std::function<void(MetalKernelPipeline *)> completionHandler; }; + struct OccupancyTuningParameters { + int threads_per_threadgroup = 0; + int num_threads_per_block = 0; + } occupancy_tuning[DEVICE_KERNEL_NUM]; + std::mutex cache_mutex; PipelineCollection pipelines[DEVICE_KERNEL_NUM]; @@ -230,6 +265,13 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel, request.pipeline->device_kernel = device_kernel; request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup; + if (occupancy_tuning[device_kernel].threads_per_threadgroup) { + request.pipeline->threads_per_threadgroup = + occupancy_tuning[device_kernel].threads_per_threadgroup; + request.pipeline->num_threads_per_block = + occupancy_tuning[device_kernel].num_threads_per_block; + } + /* metalrt options */ request.pipeline->use_metalrt = device->use_metalrt; request.pipeline->metalrt_hair = device->use_metalrt && @@ -374,13 +416,6 @@ void MetalKernelPipeline::compile() const std::string function_name = std::string("cycles_metal_") + device_kernel_as_string(device_kernel); - int threads_per_threadgroup = this->threads_per_threadgroup; - if (device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && - device_kernel < DEVICE_KERNEL_INTEGRATOR_RESET) { - /* Always use 512 for the sorting kernels */ - threads_per_threadgroup = 512; - } - NSString *entryPoint = [@(function_name.c_str()) copy]; NSError *error = NULL; @@ -644,12 +679,14 @@ void MetalKernelPipeline::compile() return; } - int num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup, - computePipelineState.threadExecutionWidth); - num_threads_per_block = std::max(num_threads_per_block, - (int)computePipelineState.threadExecutionWidth); + if (!num_threads_per_block) { + num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup, + computePipelineState.threadExecutionWidth); + num_threads_per_block = std::max(num_threads_per_block, + (int)computePipelineState.threadExecutionWidth); + } + this->pipeline = computePipelineState; - this->num_threads_per_block = num_threads_per_block; if (@available(macOS 11.0, *)) { if (creating_new_archive || recreate_archive) { diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 81c5f593974..3779fdc697a 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -866,8 +866,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI) else() list(APPEND sycl_compiler_flags -fPIC) - # We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation crash - # it triggers at either AoT or JIT stages gets fixed. + # We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation + # crash it triggers at either AoT or JIT stages gets fixed. list(APPEND sycl_compiler_flags -fhonor-nans) # add $ORIGIN to cycles_kernel_oneapi.so rpath so libsycl.so and @@ -881,7 +881,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI) OUTPUT ${cycles_kernel_oneapi_lib} COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=${sycl_compiler_root}/../lib:${OCLOC_INSTALL_DIR}/lib:${IGC_INSTALL_DIR}/lib" - "PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}" # env PATH is for compiler to find ld + # `$ENV{PATH}` is for compiler to find `ld`. + "PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}" ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags} DEPENDS ${cycles_oneapi_kernel_sources}) endif() diff --git a/intern/cycles/test/CMakeLists.txt b/intern/cycles/test/CMakeLists.txt index b126247de5f..c3ae81ed1db 100644 --- a/intern/cycles/test/CMakeLists.txt +++ b/intern/cycles/test/CMakeLists.txt @@ -34,6 +34,7 @@ set(SRC render_graph_finalize_test.cpp util_aligned_malloc_test.cpp util_math_test.cpp + util_md5_test.cpp util_path_test.cpp util_string_test.cpp util_task_test.cpp diff --git a/intern/cycles/test/util_md5_test.cpp b/intern/cycles/test/util_md5_test.cpp new file mode 100644 index 00000000000..abc147b70a1 --- /dev/null +++ b/intern/cycles/test/util_md5_test.cpp @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "testing/testing.h" + +#include "util/md5.h" + +CCL_NAMESPACE_BEGIN + +TEST(util, util_md5_string) +{ + /* The hash is calculated using `echo -n "Hello, World\!" | md5 | tr '[:lower:]' '[:upper:]'`. */ + EXPECT_EQ(util_md5_string("Hello, World!"), "65A8E27D8879283831B664BD8B7F0AD4"); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/md5.cpp b/intern/cycles/util/md5.cpp index 1c7e6b9bf3e..3342d7a509a 100644 --- a/intern/cycles/util/md5.cpp +++ b/intern/cycles/util/md5.cpp @@ -347,13 +347,18 @@ void MD5Hash::finish(uint8_t digest[16]) string MD5Hash::get_hex() { + constexpr char kHexDigits[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + uint8_t digest[16]; char buf[16 * 2 + 1]; finish(digest); - for (int i = 0; i < 16; i++) - sprintf(buf + i * 2, "%02X", (unsigned int)digest[i]); + for (int i = 0; i < 16; i++) { + buf[i * 2 + 0] = kHexDigits[digest[i] / 0x10]; + buf[i * 2 + 1] = kHexDigits[digest[i] % 0x10]; + } buf[sizeof(buf) - 1] = '\0'; return string(buf); diff --git a/intern/ffmpeg/tests/ffmpeg_codecs.cc b/intern/ffmpeg/tests/ffmpeg_codecs.cc index e5c33202417..10cbe4b938b 100644 --- a/intern/ffmpeg/tests/ffmpeg_codecs.cc +++ b/intern/ffmpeg/tests/ffmpeg_codecs.cc @@ -40,7 +40,7 @@ bool test_acodec(const AVCodec *codec, AVSampleFormat fmt) if (ctx) { ctx->sample_fmt = fmt; ctx->sample_rate = 48000; - ctx->channel_layout = AV_CH_LAYOUT_MONO; + av_channel_layout_from_mask(&ctx->ch_layout, AV_CH_LAYOUT_MONO); ctx->bit_rate = 128000; int open = avcodec_open2(ctx, codec, NULL); if (open >= 0) { diff --git a/intern/ghost/intern/GHOST_ContextGLX.cpp b/intern/ghost/intern/GHOST_ContextGLX.cpp index 93708983f37..d9f2df21ee0 100644 --- a/intern/ghost/intern/GHOST_ContextGLX.cpp +++ b/intern/ghost/intern/GHOST_ContextGLX.cpp @@ -140,7 +140,7 @@ GHOST_TSuccess GHOST_ContextGLX::initializeDrawingContext() /* End Inline GLEW. */ /* -------------------------------------------------------------------- */ #else - /* Important to initialize only glxew (_not_ GLEW), + /* Important to initialize only GLXEW (_not_ GLEW), * since this breaks w/ Mesa's `swrast`, see: T46431. */ glxewInit(); #endif /* USE_GLXEW_INIT_WORKAROUND */ diff --git a/intern/ghost/intern/GHOST_ImeWin32.h b/intern/ghost/intern/GHOST_ImeWin32.h index 85c8ed7b4bd..cb6d8a770cf 100644 --- a/intern/ghost/intern/GHOST_ImeWin32.h +++ b/intern/ghost/intern/GHOST_ImeWin32.h @@ -266,7 +266,7 @@ class GHOST_ImeWin32 { * Parameters * * window_handle [in] (HWND) * Represents the window handle of the caller. - * * caret_rect [in] (const gfx::Rect&) + * * caret_rect [in] (`const gfx::Rect&`) * Represent the rectangle of the input caret. * This rectangle is used for controlling the positions of IME windows. * * complete [in] (bool) diff --git a/intern/ghost/intern/GHOST_SystemWayland.cpp b/intern/ghost/intern/GHOST_SystemWayland.cpp index 5afbf5b0b5f..3a0ba5cd21a 100644 --- a/intern/ghost/intern/GHOST_SystemWayland.cpp +++ b/intern/ghost/intern/GHOST_SystemWayland.cpp @@ -101,7 +101,7 @@ static const struct GWL_RegistryHandler *gwl_registry_handler_from_interface_slo int interface_slot); /* -------------------------------------------------------------------- */ -/** \name Workaround Compositor Sprsific Bugs +/** \name Workaround Compositor Specific Bugs * \{ */ /** @@ -4013,7 +4013,8 @@ static void gwl_seat_capability_touch_disable(GWL_Seat *seat) } static void seat_handle_capabilities(void *data, - struct wl_seat *wl_seat, + /* Only used in an assert. */ + [[maybe_unused]] struct wl_seat *wl_seat, const uint32_t capabilities) { CLOG_INFO(LOG, diff --git a/intern/guardedalloc/MEM_guardedalloc.h b/intern/guardedalloc/MEM_guardedalloc.h index fdd77fb9eef..5ae33343949 100644 --- a/intern/guardedalloc/MEM_guardedalloc.h +++ b/intern/guardedalloc/MEM_guardedalloc.h @@ -271,7 +271,7 @@ void MEM_use_guarded_allocator(void); template<typename T, typename... Args> inline T *MEM_new(const char *allocation_name, Args &&...args) { - void *buffer = MEM_mallocN(sizeof(T), allocation_name); + void *buffer = MEM_mallocN_aligned(sizeof(T), alignof(T), allocation_name); return new (buffer) T(std::forward<Args>(args)...); } diff --git a/intern/guardedalloc/intern/leak_detector.cc b/intern/guardedalloc/intern/leak_detector.cc index 288d78fd206..5b565b15920 100644 --- a/intern/guardedalloc/intern/leak_detector.cc +++ b/intern/guardedalloc/intern/leak_detector.cc @@ -41,7 +41,7 @@ class MemLeakPrinter { if (fail_on_memleak) { /* There are many other ways to change the exit code to failure here: - * - Make the destructor noexcept(false) and throw an exception. + * - Make the destructor `noexcept(false)` and throw an exception. * - Call exit(EXIT_FAILURE). * - Call terminate(). */ diff --git a/intern/wayland_dynload/intern/wayland_dynload_client.c b/intern/wayland_dynload/intern/wayland_dynload_client.c index 68ba5374aba..617a8878199 100644 --- a/intern/wayland_dynload/intern/wayland_dynload_client.c +++ b/intern/wayland_dynload/intern/wayland_dynload_client.c @@ -44,7 +44,8 @@ bool wayland_dynload_client_init(const bool verbose) #define WAYLAND_DYNLOAD_IFACE(symbol) \ { \ const void *symbol_val; \ - if (!(symbol_val = dynamic_library_find_with_error(lib, #symbol, paths[path_found]))) { \ + if (!(symbol_val = dynamic_library_find_with_error( \ + lib, #symbol, paths[path_found], verbose))) { \ return false; \ } \ memcpy(&symbol, symbol_val, sizeof(symbol)); \ @@ -54,7 +55,7 @@ bool wayland_dynload_client_init(const bool verbose) #define WAYLAND_DYNLOAD_FN(symbol) \ if (!(wayland_dynload_client.symbol = dynamic_library_find_with_error( \ - lib, #symbol, paths[path_found]))) { \ + lib, #symbol, paths[path_found], verbose))) { \ return false; \ } #include "wayland_dynload_client.h" diff --git a/intern/wayland_dynload/intern/wayland_dynload_cursor.c b/intern/wayland_dynload/intern/wayland_dynload_cursor.c index 3d0526c7ba6..cc62a43bc01 100644 --- a/intern/wayland_dynload/intern/wayland_dynload_cursor.c +++ b/intern/wayland_dynload/intern/wayland_dynload_cursor.c @@ -36,7 +36,7 @@ bool wayland_dynload_cursor_init(const bool verbose) #define WAYLAND_DYNLOAD_FN(symbol) \ if (!(wayland_dynload_cursor.symbol = dynamic_library_find_with_error( \ - lib, #symbol, paths[path_index]))) { \ + lib, #symbol, paths[path_index], verbose))) { \ return false; \ } #include "wayland_dynload_cursor.h" diff --git a/intern/wayland_dynload/intern/wayland_dynload_egl.c b/intern/wayland_dynload/intern/wayland_dynload_egl.c index cfc195c0408..d8e4dfe0dad 100644 --- a/intern/wayland_dynload/intern/wayland_dynload_egl.c +++ b/intern/wayland_dynload/intern/wayland_dynload_egl.c @@ -36,7 +36,7 @@ bool wayland_dynload_egl_init(const bool verbose) #define WAYLAND_DYNLOAD_FN(symbol) \ if (!(wayland_dynload_egl.symbol = dynamic_library_find_with_error( \ - lib, #symbol, paths[path_found]))) { \ + lib, #symbol, paths[path_found], verbose))) { \ return false; \ } #include "wayland_dynload_egl.h" diff --git a/intern/wayland_dynload/intern/wayland_dynload_libdecor.c b/intern/wayland_dynload/intern/wayland_dynload_libdecor.c index d8bdd27bb27..dafd1badfda 100644 --- a/intern/wayland_dynload/intern/wayland_dynload_libdecor.c +++ b/intern/wayland_dynload/intern/wayland_dynload_libdecor.c @@ -36,7 +36,7 @@ bool wayland_dynload_libdecor_init(const bool verbose) #define WAYLAND_DYNLOAD_FN(symbol) \ if (!(wayland_dynload_libdecor.symbol = dynamic_library_find_with_error( \ - lib, #symbol, paths[path_index]))) { \ + lib, #symbol, paths[path_index], verbose))) { \ return false; \ } #include "wayland_dynload_libdecor.h" diff --git a/intern/wayland_dynload/intern/wayland_dynload_utils.c b/intern/wayland_dynload/intern/wayland_dynload_utils.c index 743dac14eec..666de20c5d3 100644 --- a/intern/wayland_dynload/intern/wayland_dynload_utils.c +++ b/intern/wayland_dynload/intern/wayland_dynload_utils.c @@ -30,11 +30,16 @@ DynamicLibrary dynamic_library_open_array_with_error(const char **paths, return lib; } -void *dynamic_library_find_with_error(DynamicLibrary lib, const char *symbol, const char *path_lib) +void *dynamic_library_find_with_error(DynamicLibrary lib, + const char *symbol, + const char *path_lib, + const bool verbose) { void *symbol_var = dynamic_library_find(lib, symbol); if (symbol_var == NULL) { - fprintf(stderr, "Unable to find '%s' in '%s'.\n", symbol, path_lib); + if (verbose) { + fprintf(stderr, "Unable to find '%s' in '%s'.\n", symbol, path_lib); + } } return symbol_var; } diff --git a/intern/wayland_dynload/intern/wayland_dynload_utils.h b/intern/wayland_dynload/intern/wayland_dynload_utils.h index 785f32521e4..1814879615b 100644 --- a/intern/wayland_dynload/intern/wayland_dynload_utils.h +++ b/intern/wayland_dynload/intern/wayland_dynload_utils.h @@ -26,4 +26,5 @@ DynamicLibrary dynamic_library_open_array_with_error(const char **paths, /** Find a symbol, printing an error when the symbol isn't found. */ void *dynamic_library_find_with_error(DynamicLibrary lib, const char *symbol, - const char *path_lib); + const char *path_lib, + bool verbose); |