From d092933abbadb3a6d5ab53a0b2b3b865cd5c9079 Mon Sep 17 00:00:00 2001 From: Sayak Biswas Date: Thu, 21 Oct 2021 20:57:17 +0200 Subject: Cycles: various fixes for HIP and compilation of HIP binaries * Additional structs added to the hipew loader for device props * Adds hipRTC functions to the loader for future usage * Enables CPU+GPU usage for HIP * Cleanup to the adaptive kernel compilation process * Fix for kernel compilation failures with HIP with latest master Ref T92393, D12958 --- intern/cycles/blender/addon/properties.py | 2 +- intern/cycles/device/hip/device_impl.cpp | 24 ++++++++----- intern/cycles/kernel/CMakeLists.txt | 57 ++++++++++++++----------------- intern/cycles/kernel/device/hip/globals.h | 4 +-- 4 files changed, 44 insertions(+), 43 deletions(-) (limited to 'intern') diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 1d8ebe94694..2a51e0be2a4 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1329,7 +1329,7 @@ class CyclesPreferences(bpy.types.AddonPreferences): elif entry.type == 'CPU': cpu_devices.append(entry) # Extend all GPU devices with CPU. - if compute_device_type != 'CPU' and compute_device_type != 'HIP': + if compute_device_type != 'CPU': devices.extend(cpu_devices) return devices diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 583ab8ae208..4ae714913ab 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -208,7 +208,7 @@ bool HIPDevice::use_adaptive_compilation() return DebugFlags().hip.adaptive_compile; } -/* Common NVCC flags which stays the same regardless of shading model, +/* Common HIPCC flags which stays the same regardless of shading model, * kernel sources md5 and only depends on compiler or compilation settings. */ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features) @@ -239,11 +239,13 @@ string HIPDevice::compile_kernel(const uint kernel_features, int major, minor; hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId); hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId); + hipDeviceProp_t props; + hipGetDeviceProperties(&props, hipDevId); /* Attempt to use kernel provided with Blender. */ if (!use_adaptive_compilation()) { if (!force_ptx) { - const string fatbin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor)); + const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, props.gcnArchName)); VLOG(1) << "Testing for pre-compiled kernel " << fatbin << "."; if (path_exists(fatbin)) { VLOG(1) << "Using precompiled kernel."; @@ -283,17 +285,21 @@ string HIPDevice::compile_kernel(const uint kernel_features, const string kernel_md5 = util_md5_string(source_md5 + common_cflags); const char *const kernel_ext = "genco"; + std::string options; # ifdef _WIN32 - const char *const options = - "save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp"; + options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -ffast-math"); # else - const char *const options = - "save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ggdb"; + options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ffast-math"); # endif +# ifdef _DEBUG + options.append(" -save-temps"); +# endif + options.append(" --amdgpu-target=").append(props.gcnArchName); + const string include_path = source_path; - const char *const kernel_arch = force_ptx ? "compute" : "sm"; + const char *const kernel_arch = props.gcnArchName; const string fatbin_file = string_printf( - "cycles_%s_%s_%d%d_%s", name, kernel_arch, major, minor, kernel_md5.c_str()); + "cycles_%s_%s_%s", name, kernel_arch, kernel_md5.c_str()); const string fatbin = path_cache_get(path_join("kernels", fatbin_file)); VLOG(1) << "Testing for locally compiled kernel " << fatbin << "."; if (path_exists(fatbin)) { @@ -350,7 +356,7 @@ string HIPDevice::compile_kernel(const uint kernel_features, string command = string_printf("%s -%s -I %s --%s %s -o \"%s\"", hipcc, - options, + options.c_str(), include_path.c_str(), kernel_ext, source_path.c_str(), diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 7357c5804ed..6c87c9c32f2 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -487,9 +487,6 @@ endif() # HIP module if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) - # 64 bit only - set(HIP_BITS 64) - # build for each arch set(hip_sources device/hip/kernel.cpp ${SRC_HEADERS} @@ -504,32 +501,41 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) set(hip_fatbins) macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental) - if(${arch} MATCHES "compute_.*") - set(format "ptx") - else() - set(format "fatbin") - endif() + set(format "fatbin") set(hip_file ${name}_${arch}.${format}) - set(kernel_sources ${sources}) if(NOT ${prev_arch} STREQUAL "none") - if(${prev_arch} MATCHES "compute_.*") - set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx) - else() - set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin) - endif() + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin) endif() set(hip_kernel_src "/device/hip/${name}.cpp") - set(hip_flags ${flags} + if(WIN32) + set(hip_command ${CMAKE_COMMAND}) + set(hip_flags + -E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_PATH}" + ${HIP_HIPCC_EXECUTABLE}.bat) + else() + set(hip_command ${HIP_HIPCC_EXECUTABLE}) + set(hip_flags) + endif() + + set(hip_flags + ${hip_flags} + --amdgpu-target=${arch} + ${HIP_HIPCC_FLAGS} + --genco + ${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src} + ${flags} -D CCL_NAMESPACE_BEGIN= -D CCL_NAMESPACE_END= -D HIPCC - -m ${HIP_BITS} -I ${CMAKE_CURRENT_SOURCE_DIR}/.. -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip - --use_fast_math + -Wno-parentheses-equality + -Wno-unused-value + --hipcc-func-supp + -ffast-math -o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file}) if(${experimental}) @@ -541,20 +547,9 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__) endif() - if(WITH_NANOVDB) - set(hip_flags ${hip_flags} - -D WITH_NANOVDB - -I "${NANOVDB_INCLUDE_DIR}") - endif() - - add_custom_command( - OUTPUT ${hip_file} - COMMAND ${HIP_HIPCC_EXECUTABLE} - -arch=${arch} - ${HIP_HIPCC_FLAGS} - --${format} - ${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src} - ${hip_flags} + add_custom_target( + ${hip_file} + COMMAND ${hip_command} ${hip_flags} DEPENDS ${kernel_sources}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib) list(APPEND hip_fatbins ${hip_file}) diff --git a/intern/cycles/kernel/device/hip/globals.h b/intern/cycles/kernel/device/hip/globals.h index 39978ae7899..28e1cc4282f 100644 --- a/intern/cycles/kernel/device/hip/globals.h +++ b/intern/cycles/kernel/device/hip/globals.h @@ -27,10 +27,10 @@ CCL_NAMESPACE_BEGIN /* Not actually used, just a NULL pointer that gets passed everywhere, which we * hope gets optimized out by the compiler. */ -struct KernelGlobals { - /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */ +struct KernelGlobalsGPU { int unused[1]; }; +typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals; /* Global scene data and textures */ __constant__ KernelData __data; -- cgit v1.2.3