Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorSayak Biswas <sayakAMD>2021-10-21 21:57:17 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-10-22 13:15:29 +0300
commitd092933abbadb3a6d5ab53a0b2b3b865cd5c9079 (patch)
tree9d44286512efc1db894257fa9ed786a5049bb41b /intern
parentd1fcf93f039b0546dfd01c33daf50bd135e34344 (diff)
Cycles: various fixes for HIP and compilation of HIP binaries
* Additional structs added to the hipew loader for device props * Adds hipRTC functions to the loader for future usage * Enables CPU+GPU usage for HIP * Cleanup to the adaptive kernel compilation process * Fix for kernel compilation failures with HIP with latest master Ref T92393, D12958
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/addon/properties.py2
-rw-r--r--intern/cycles/device/hip/device_impl.cpp24
-rw-r--r--intern/cycles/kernel/CMakeLists.txt57
-rw-r--r--intern/cycles/kernel/device/hip/globals.h4
4 files changed, 44 insertions, 43 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 1d8ebe94694..2a51e0be2a4 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1329,7 +1329,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
elif entry.type == 'CPU':
cpu_devices.append(entry)
# Extend all GPU devices with CPU.
- if compute_device_type != 'CPU' and compute_device_type != 'HIP':
+ if compute_device_type != 'CPU':
devices.extend(cpu_devices)
return devices
diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp
index 583ab8ae208..4ae714913ab 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -208,7 +208,7 @@ bool HIPDevice::use_adaptive_compilation()
return DebugFlags().hip.adaptive_compile;
}
-/* Common NVCC flags which stays the same regardless of shading model,
+/* Common HIPCC flags which stays the same regardless of shading model,
* kernel sources md5 and only depends on compiler or compilation settings.
*/
string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
@@ -239,11 +239,13 @@ string HIPDevice::compile_kernel(const uint kernel_features,
int major, minor;
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
+ hipDeviceProp_t props;
+ hipGetDeviceProperties(&props, hipDevId);
/* Attempt to use kernel provided with Blender. */
if (!use_adaptive_compilation()) {
if (!force_ptx) {
- const string fatbin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
+ const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, props.gcnArchName));
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
VLOG(1) << "Using precompiled kernel.";
@@ -283,17 +285,21 @@ string HIPDevice::compile_kernel(const uint kernel_features,
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
const char *const kernel_ext = "genco";
+ std::string options;
# ifdef _WIN32
- const char *const options =
- "save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp";
+ options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -ffast-math");
# else
- const char *const options =
- "save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ggdb";
+ options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ffast-math");
# endif
+# ifdef _DEBUG
+ options.append(" -save-temps");
+# endif
+ options.append(" --amdgpu-target=").append(props.gcnArchName);
+
const string include_path = source_path;
- const char *const kernel_arch = force_ptx ? "compute" : "sm";
+ const char *const kernel_arch = props.gcnArchName;
const string fatbin_file = string_printf(
- "cycles_%s_%s_%d%d_%s", name, kernel_arch, major, minor, kernel_md5.c_str());
+ "cycles_%s_%s_%s", name, kernel_arch, kernel_md5.c_str());
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
@@ -350,7 +356,7 @@ string HIPDevice::compile_kernel(const uint kernel_features,
string command = string_printf("%s -%s -I %s --%s %s -o \"%s\"",
hipcc,
- options,
+ options.c_str(),
include_path.c_str(),
kernel_ext,
source_path.c_str(),
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 7357c5804ed..6c87c9c32f2 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -487,9 +487,6 @@ endif()
# HIP module
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
- # 64 bit only
- set(HIP_BITS 64)
-
# build for each arch
set(hip_sources device/hip/kernel.cpp
${SRC_HEADERS}
@@ -504,32 +501,41 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
set(hip_fatbins)
macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental)
- if(${arch} MATCHES "compute_.*")
- set(format "ptx")
- else()
- set(format "fatbin")
- endif()
+ set(format "fatbin")
set(hip_file ${name}_${arch}.${format})
-
set(kernel_sources ${sources})
if(NOT ${prev_arch} STREQUAL "none")
- if(${prev_arch} MATCHES "compute_.*")
- set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
- else()
- set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
- endif()
+ set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
endif()
set(hip_kernel_src "/device/hip/${name}.cpp")
- set(hip_flags ${flags}
+ if(WIN32)
+ set(hip_command ${CMAKE_COMMAND})
+ set(hip_flags
+ -E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_PATH}"
+ ${HIP_HIPCC_EXECUTABLE}.bat)
+ else()
+ set(hip_command ${HIP_HIPCC_EXECUTABLE})
+ set(hip_flags)
+ endif()
+
+ set(hip_flags
+ ${hip_flags}
+ --amdgpu-target=${arch}
+ ${HIP_HIPCC_FLAGS}
+ --genco
+ ${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
+ ${flags}
-D CCL_NAMESPACE_BEGIN=
-D CCL_NAMESPACE_END=
-D HIPCC
- -m ${HIP_BITS}
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip
- --use_fast_math
+ -Wno-parentheses-equality
+ -Wno-unused-value
+ --hipcc-func-supp
+ -ffast-math
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
if(${experimental})
@@ -541,20 +547,9 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
endif()
- if(WITH_NANOVDB)
- set(hip_flags ${hip_flags}
- -D WITH_NANOVDB
- -I "${NANOVDB_INCLUDE_DIR}")
- endif()
-
- add_custom_command(
- OUTPUT ${hip_file}
- COMMAND ${HIP_HIPCC_EXECUTABLE}
- -arch=${arch}
- ${HIP_HIPCC_FLAGS}
- --${format}
- ${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
- ${hip_flags}
+ add_custom_target(
+ ${hip_file}
+ COMMAND ${hip_command} ${hip_flags}
DEPENDS ${kernel_sources})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND hip_fatbins ${hip_file})
diff --git a/intern/cycles/kernel/device/hip/globals.h b/intern/cycles/kernel/device/hip/globals.h
index 39978ae7899..28e1cc4282f 100644
--- a/intern/cycles/kernel/device/hip/globals.h
+++ b/intern/cycles/kernel/device/hip/globals.h
@@ -27,10 +27,10 @@ CCL_NAMESPACE_BEGIN
/* Not actually used, just a NULL pointer that gets passed everywhere, which we
* hope gets optimized out by the compiler. */
-struct KernelGlobals {
- /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
+struct KernelGlobalsGPU {
int unused[1];
};
+typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
/* Global scene data and textures */
__constant__ KernelData __data;