Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSayak Biswas <sayakAMD>2021-10-21 21:57:17 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-10-22 13:15:29 +0300
commitd092933abbadb3a6d5ab53a0b2b3b865cd5c9079 (patch)
tree9d44286512efc1db894257fa9ed786a5049bb41b
parentd1fcf93f039b0546dfd01c33daf50bd135e34344 (diff)
Cycles: various fixes for HIP and compilation of HIP binaries
* Additional structs added to the hipew loader for device props * Adds hipRTC functions to the loader for future usage * Enables CPU+GPU usage for HIP * Cleanup to the adaptive kernel compilation process * Fix for kernel compilation failures with HIP with latest master Ref T92393, D12958
-rw-r--r--extern/hipew/include/hipew.h141
-rw-r--r--extern/hipew/src/hipew.c23
-rw-r--r--intern/cycles/blender/addon/properties.py2
-rw-r--r--intern/cycles/device/hip/device_impl.cpp24
-rw-r--r--intern/cycles/kernel/CMakeLists.txt57
-rw-r--r--intern/cycles/kernel/device/hip/globals.h4
6 files changed, 208 insertions, 43 deletions
diff --git a/extern/hipew/include/hipew.h b/extern/hipew/include/hipew.h
index aa42fdf8ecd..d18cf67524d 100644
--- a/extern/hipew/include/hipew.h
+++ b/extern/hipew/include/hipew.h
@@ -425,6 +425,105 @@ typedef struct HIPdevprop_st {
int textureAlign;
} HIPdevprop;
+typedef struct {
+ // 32-bit Atomics
+ unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory.
+ unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory.
+ unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory.
+ unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory.
+ unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory.
+
+ // 64-bit Atomics
+ unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory.
+ unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory.
+
+ // Doubles
+ unsigned hasDoubles : 1; ///< Double-precision floating point.
+
+ // Warp cross-lane operations
+ unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all).
+ unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot).
+ unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*).
+ unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps.
+
+ // Sync
+ unsigned hasThreadFenceSystem : 1; ///< __threadfence_system.
+ unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or.
+
+ // Misc
+ unsigned hasSurfaceFuncs : 1; ///< Surface functions.
+ unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D).
+ unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism.
+} hipDeviceArch_t;
+
+typedef struct hipDeviceProp_t {
+ char name[256]; ///< Device name.
+ size_t totalGlobalMem; ///< Size of global memory region (in bytes).
+ size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes).
+ int regsPerBlock; ///< Registers per block.
+ int warpSize; ///< Warp size.
+ int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
+ int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
+ int maxGridSize[3]; ///< Max grid dimensions (XYZ).
+ int clockRate; ///< Max clock frequency of the multiProcessors in khz.
+ int memoryClockRate; ///< Max global memory clock frequency in khz.
+ int memoryBusWidth; ///< Global memory bus width in bits.
+ size_t totalConstMem; ///< Size of shared memory region (in bytes).
+ int major; ///< Major compute capability. On HCC, this is an approximation and features may
+ ///< differ from CUDA CC. See the arch feature flags for portable ways to query
+ ///< feature caps.
+ int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
+ ///< differ from CUDA CC. See the arch feature flags for portable ways to query
+ ///< feature caps.
+ int multiProcessorCount; ///< Number of multi-processors (compute units).
+ int l2CacheSize; ///< L2 cache size.
+ int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
+ int computeMode; ///< Compute mode.
+ int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
+ ///< instructions. New for HIP.
+ hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
+ int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
+ int pciDomainID; ///< PCI Domain ID
+ int pciBusID; ///< PCI Bus ID.
+ int pciDeviceID; ///< PCI Device ID.
+ size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor.
+ int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
+ int canMapHostMemory; ///< Check whether HIP can map host memory
+ int gcnArch; ///< DEPRECATED: use gcnArchName instead
+ char gcnArchName[256]; ///< AMD GCN Arch Name.
+ int integrated; ///< APU vs dGPU
+ int cooperativeLaunch; ///< HIP device supports cooperative launch
+ int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices
+ int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
+ int maxTexture1D; ///< Maximum number of elements in 1D images
+ int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
+ int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements
+ unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
+ unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
+ size_t memPitch; ///<Maximum pitch in bytes allowed by memory copies
+ size_t textureAlignment; ///<Alignment requirement for textures
+ size_t texturePitchAlignment; ///<Pitch alignment requirement for texture references bound to pitched memory
+ int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device
+ int ECCEnabled; ///<Device has ECC support enabled
+ int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
+ int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple
+ ///devices with unmatched functions
+ int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple
+ ///devices with unmatched grid dimensions
+ int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on multiple
+ ///devices with unmatched block dimensions
+ int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on multiple
+ ///devices with unmatched shared memories
+ int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
+ int asicRevision; ///< Revision of the GPU in this device
+ int managedMemory; ///< Device supports allocating managed memory on this system
+ int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration
+ int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with the CPU
+ int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
+ ///< without calling hipHostRegister on it
+ int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables
+} hipDeviceProp_t;
+
typedef enum HIPpointer_attribute_enum {
HIP_POINTER_ATTRIBUTE_CONTEXT = 1,
HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
@@ -951,6 +1050,25 @@ typedef enum HIPGLmap_flags_enum {
HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
} HIPGLmap_flags;
+/**
+* hipRTC related
+*/
+typedef struct _hiprtcProgram* hiprtcProgram;
+
+typedef enum hiprtcResult {
+ HIPRTC_SUCCESS = 0,
+ HIPRTC_ERROR_OUT_OF_MEMORY = 1,
+ HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
+ HIPRTC_ERROR_INVALID_INPUT = 3,
+ HIPRTC_ERROR_INVALID_PROGRAM = 4,
+ HIPRTC_ERROR_INVALID_OPTION = 5,
+ HIPRTC_ERROR_COMPILATION = 6,
+ HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
+ HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
+ HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
+ HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
+ HIPRTC_ERROR_INTERNAL_ERROR = 11
+} hiprtcResult;
/* Function types. */
typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
@@ -958,6 +1076,7 @@ typedef hipError_t HIPAPI thipInit(unsigned int Flags);
typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
typedef hipError_t HIPAPI thipGetDevice(hipDevice_t* device, int ordinal);
typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
+typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
@@ -1071,6 +1190,16 @@ typedef hipError_t HIPAPI thipGraphicsMapResources(unsigned int count, hipGraphi
typedef hipError_t HIPAPI thipGraphicsUnmapResources(unsigned int count, hipGraphicsResource* resources, hipStream_t hStream);
typedef hipError_t HIPAPI thipGraphicsGLRegisterBuffer(hipGraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
typedef hipError_t HIPAPI thipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList);
+typedef hiprtcResult HIPAPI thiprtcGetErrorString(hiprtcResult result);
+typedef hiprtcResult HIPAPI thiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression);
+typedef hiprtcResult HIPAPI thiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options);
+typedef hiprtcResult HIPAPI thiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames);
+typedef hiprtcResult HIPAPI thiprtcDestroyProgram(hiprtcProgram* prog);
+typedef hiprtcResult HIPAPI thiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, const char** lowered_name);
+typedef hiprtcResult HIPAPI thiprtcGetProgramLog(hiprtcProgram prog, char* log);
+typedef hiprtcResult HIPAPI thiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet);
+typedef hiprtcResult HIPAPI thiprtcGetCode(hiprtcProgram prog, char* code);
+typedef hiprtcResult HIPAPI thiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet);
/* Function declarations. */
@@ -1079,6 +1208,7 @@ extern thipInit *hipInit;
extern thipDriverGetVersion *hipDriverGetVersion;
extern thipGetDevice *hipGetDevice;
extern thipGetDeviceCount *hipGetDeviceCount;
+extern thipGetDeviceProperties *hipGetDeviceProperties;
extern thipDeviceGetName *hipDeviceGetName;
extern thipDeviceGetAttribute *hipDeviceGetAttribute;
extern thipDeviceComputeCapability *hipDeviceComputeCapability;
@@ -1187,6 +1317,17 @@ extern thipGraphicsUnmapResources *hipGraphicsUnmapResources;
extern thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
extern thipGLGetDevices *hipGLGetDevices;
+extern thiprtcGetErrorString* hiprtcGetErrorString;
+extern thiprtcAddNameExpression* hiprtcAddNameExpression;
+extern thiprtcCompileProgram* hiprtcCompileProgram;
+extern thiprtcCreateProgram* hiprtcCreateProgram;
+extern thiprtcDestroyProgram* hiprtcDestroyProgram;
+extern thiprtcGetLoweredName* hiprtcGetLoweredName;
+extern thiprtcGetProgramLog* hiprtcGetProgramLog;
+extern thiprtcGetProgramLogSize* hiprtcGetProgramLogSize;
+extern thiprtcGetCode* hiprtcGetCode;
+extern thiprtcGetCodeSize* hiprtcGetCodeSize;
+
enum {
HIPEW_SUCCESS = 0,
diff --git a/extern/hipew/src/hipew.c b/extern/hipew/src/hipew.c
index 9d5a63f869a..02cec1ba28f 100644
--- a/extern/hipew/src/hipew.c
+++ b/extern/hipew/src/hipew.c
@@ -70,6 +70,7 @@ thipInit *hipInit;
thipDriverGetVersion *hipDriverGetVersion;
thipGetDevice *hipGetDevice;
thipGetDeviceCount *hipGetDeviceCount;
+thipGetDeviceProperties *hipGetDeviceProperties;
thipDeviceGetName *hipDeviceGetName;
thipDeviceGetAttribute *hipDeviceGetAttribute;
thipDeviceComputeCapability *hipDeviceComputeCapability;
@@ -178,6 +179,17 @@ thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
thipGLGetDevices *hipGLGetDevices;
+thiprtcGetErrorString* hiprtcGetErrorString;
+thiprtcAddNameExpression* hiprtcAddNameExpression;
+thiprtcCompileProgram* hiprtcCompileProgram;
+thiprtcCreateProgram* hiprtcCreateProgram;
+thiprtcDestroyProgram* hiprtcDestroyProgram;
+thiprtcGetLoweredName* hiprtcGetLoweredName;
+thiprtcGetProgramLog* hiprtcGetProgramLog;
+thiprtcGetProgramLogSize* hiprtcGetProgramLogSize;
+thiprtcGetCode* hiprtcGetCode;
+thiprtcGetCodeSize* hiprtcGetCodeSize;
+
static DynamicLibrary dynamic_library_open_find(const char **paths) {
@@ -242,6 +254,7 @@ static int hipewHipInit(void) {
HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
+ HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
@@ -346,6 +359,16 @@ static int hipewHipInit(void) {
HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
#endif
+ HIP_LIBRARY_FIND_CHECKED(hiprtcGetErrorString);
+ HIP_LIBRARY_FIND_CHECKED(hiprtcAddNameExpression);
+ HIP_LIBRARY_FIND_CHECKED(hiprtcCompileProgram);
+ HIP_LIBRARY_FIND_CHECKED(hiprtcCreateProgram);
+ HIP_LIBRARY_FIND_CHECKED(hiprtcDestroyProgram);
+ HIP_LIBRARY_FIND_CHECKED(hiprtcGetLoweredName);
+ HIP_LIBRARY_FIND_CHECKED(hiprtcGetProgramLog);
+ HIP_LIBRARY_FIND_CHECKED(hiprtcGetProgramLogSize);
+ HIP_LIBRARY_FIND_CHECKED(hiprtcGetCode);
+ HIP_LIBRARY_FIND_CHECKED(hiprtcGetCodeSize);
result = HIPEW_SUCCESS;
return result;
}
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 1d8ebe94694..2a51e0be2a4 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1329,7 +1329,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
elif entry.type == 'CPU':
cpu_devices.append(entry)
# Extend all GPU devices with CPU.
- if compute_device_type != 'CPU' and compute_device_type != 'HIP':
+ if compute_device_type != 'CPU':
devices.extend(cpu_devices)
return devices
diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp
index 583ab8ae208..4ae714913ab 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -208,7 +208,7 @@ bool HIPDevice::use_adaptive_compilation()
return DebugFlags().hip.adaptive_compile;
}
-/* Common NVCC flags which stays the same regardless of shading model,
+/* Common HIPCC flags which stays the same regardless of shading model,
* kernel sources md5 and only depends on compiler or compilation settings.
*/
string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
@@ -239,11 +239,13 @@ string HIPDevice::compile_kernel(const uint kernel_features,
int major, minor;
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
+ hipDeviceProp_t props;
+ hipGetDeviceProperties(&props, hipDevId);
/* Attempt to use kernel provided with Blender. */
if (!use_adaptive_compilation()) {
if (!force_ptx) {
- const string fatbin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
+ const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, props.gcnArchName));
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
VLOG(1) << "Using precompiled kernel.";
@@ -283,17 +285,21 @@ string HIPDevice::compile_kernel(const uint kernel_features,
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
const char *const kernel_ext = "genco";
+ std::string options;
# ifdef _WIN32
- const char *const options =
- "save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp";
+ options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -ffast-math");
# else
- const char *const options =
- "save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ggdb";
+ options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ffast-math");
# endif
+# ifdef _DEBUG
+ options.append(" -save-temps");
+# endif
+ options.append(" --amdgpu-target=").append(props.gcnArchName);
+
const string include_path = source_path;
- const char *const kernel_arch = force_ptx ? "compute" : "sm";
+ const char *const kernel_arch = props.gcnArchName;
const string fatbin_file = string_printf(
- "cycles_%s_%s_%d%d_%s", name, kernel_arch, major, minor, kernel_md5.c_str());
+ "cycles_%s_%s_%s", name, kernel_arch, kernel_md5.c_str());
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
@@ -350,7 +356,7 @@ string HIPDevice::compile_kernel(const uint kernel_features,
string command = string_printf("%s -%s -I %s --%s %s -o \"%s\"",
hipcc,
- options,
+ options.c_str(),
include_path.c_str(),
kernel_ext,
source_path.c_str(),
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 7357c5804ed..6c87c9c32f2 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -487,9 +487,6 @@ endif()
# HIP module
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
- # 64 bit only
- set(HIP_BITS 64)
-
# build for each arch
set(hip_sources device/hip/kernel.cpp
${SRC_HEADERS}
@@ -504,32 +501,41 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
set(hip_fatbins)
macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental)
- if(${arch} MATCHES "compute_.*")
- set(format "ptx")
- else()
- set(format "fatbin")
- endif()
+ set(format "fatbin")
set(hip_file ${name}_${arch}.${format})
-
set(kernel_sources ${sources})
if(NOT ${prev_arch} STREQUAL "none")
- if(${prev_arch} MATCHES "compute_.*")
- set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
- else()
- set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
- endif()
+ set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
endif()
set(hip_kernel_src "/device/hip/${name}.cpp")
- set(hip_flags ${flags}
+ if(WIN32)
+ set(hip_command ${CMAKE_COMMAND})
+ set(hip_flags
+ -E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_PATH}"
+ ${HIP_HIPCC_EXECUTABLE}.bat)
+ else()
+ set(hip_command ${HIP_HIPCC_EXECUTABLE})
+ set(hip_flags)
+ endif()
+
+ set(hip_flags
+ ${hip_flags}
+ --amdgpu-target=${arch}
+ ${HIP_HIPCC_FLAGS}
+ --genco
+ ${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
+ ${flags}
-D CCL_NAMESPACE_BEGIN=
-D CCL_NAMESPACE_END=
-D HIPCC
- -m ${HIP_BITS}
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip
- --use_fast_math
+ -Wno-parentheses-equality
+ -Wno-unused-value
+ --hipcc-func-supp
+ -ffast-math
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
if(${experimental})
@@ -541,20 +547,9 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
endif()
- if(WITH_NANOVDB)
- set(hip_flags ${hip_flags}
- -D WITH_NANOVDB
- -I "${NANOVDB_INCLUDE_DIR}")
- endif()
-
- add_custom_command(
- OUTPUT ${hip_file}
- COMMAND ${HIP_HIPCC_EXECUTABLE}
- -arch=${arch}
- ${HIP_HIPCC_FLAGS}
- --${format}
- ${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
- ${hip_flags}
+ add_custom_target(
+ ${hip_file}
+ COMMAND ${hip_command} ${hip_flags}
DEPENDS ${kernel_sources})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND hip_fatbins ${hip_file})
diff --git a/intern/cycles/kernel/device/hip/globals.h b/intern/cycles/kernel/device/hip/globals.h
index 39978ae7899..28e1cc4282f 100644
--- a/intern/cycles/kernel/device/hip/globals.h
+++ b/intern/cycles/kernel/device/hip/globals.h
@@ -27,10 +27,10 @@ CCL_NAMESPACE_BEGIN
/* Not actually used, just a NULL pointer that gets passed everywhere, which we
* hope gets optimized out by the compiler. */
-struct KernelGlobals {
- /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
+struct KernelGlobalsGPU {
int unused[1];
};
+typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
/* Global scene data and textures */
__constant__ KernelData __data;