diff options
author | Sayak Biswas <sayakAMD> | 2021-10-21 21:57:17 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-10-22 13:15:29 +0300 |
commit | d092933abbadb3a6d5ab53a0b2b3b865cd5c9079 (patch) | |
tree | 9d44286512efc1db894257fa9ed786a5049bb41b | |
parent | d1fcf93f039b0546dfd01c33daf50bd135e34344 (diff) |
Cycles: various fixes for HIP and compilation of HIP binaries
* Additional structs added to the hipew loader for device props
* Adds hipRTC functions to the loader for future usage
* Enables CPU+GPU usage for HIP
* Cleanup to the adaptive kernel compilation process
* Fix for kernel compilation failures with HIP with latest master
Ref T92393, D12958
-rw-r--r-- | extern/hipew/include/hipew.h | 141 | ||||
-rw-r--r-- | extern/hipew/src/hipew.c | 23 | ||||
-rw-r--r-- | intern/cycles/blender/addon/properties.py | 2 | ||||
-rw-r--r-- | intern/cycles/device/hip/device_impl.cpp | 24 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 57 | ||||
-rw-r--r-- | intern/cycles/kernel/device/hip/globals.h | 4 |
6 files changed, 208 insertions, 43 deletions
diff --git a/extern/hipew/include/hipew.h b/extern/hipew/include/hipew.h index aa42fdf8ecd..d18cf67524d 100644 --- a/extern/hipew/include/hipew.h +++ b/extern/hipew/include/hipew.h @@ -425,6 +425,105 @@ typedef struct HIPdevprop_st { int textureAlign; } HIPdevprop; +typedef struct { + // 32-bit Atomics + unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory. + unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory. + unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory. + unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory. + unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory. + + // 64-bit Atomics + unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory. + unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory. + + // Doubles + unsigned hasDoubles : 1; ///< Double-precision floating point. + + // Warp cross-lane operations + unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all). + unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot). + unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*). + unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps. + + // Sync + unsigned hasThreadFenceSystem : 1; ///< __threadfence_system. + unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or. + + // Misc + unsigned hasSurfaceFuncs : 1; ///< Surface functions. + unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D). + unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. +} hipDeviceArch_t; + +typedef struct hipDeviceProp_t { + char name[256]; ///< Device name. + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. + size_t totalConstMem; ///< Size of shared memory region (in bytes). + int major; ///< Major compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int minor; ///< Minor compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int multiProcessorCount; ///< Number of multi-processors (compute units). + int l2CacheSize; ///< L2 cache size. + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int computeMode; ///< Compute mode. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" + ///< instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int pciDomainID; ///< PCI Domain ID + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. + int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. + int canMapHostMemory; ///< Check whether HIP can map host memory + int gcnArch; ///< DEPRECATED: use gcnArchName instead + char gcnArchName[256]; ///< AMD GCN Arch Name. + int integrated; ///< APU vs dGPU + int cooperativeLaunch; ///< HIP device supports cooperative launch + int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices + int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory + int maxTexture1D; ///< Maximum number of elements in 1D images + int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements + int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements + unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register + unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register + size_t memPitch; ///<Maximum pitch in bytes allowed by memory copies + size_t textureAlignment; ///<Alignment requirement for textures + size_t texturePitchAlignment; ///<Pitch alignment requirement for texture references bound to pitched memory + int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device + int ECCEnabled; ///<Device has ECC support enabled + int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0 + int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple + ///devices with unmatched functions + int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple + ///devices with unmatched grid dimensions + int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on multiple + ///devices with unmatched block dimensions + int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on multiple + ///devices with unmatched shared memories + int isLargeBar; ///< 1: if it is a large PCI bar device, else 0 + int asicRevision; ///< Revision of the GPU in this device + int managedMemory; ///< Device supports allocating managed memory on this system + int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration + int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with the CPU + int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables +} hipDeviceProp_t; + typedef enum HIPpointer_attribute_enum { HIP_POINTER_ATTRIBUTE_CONTEXT = 1, HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2, @@ -951,6 +1050,25 @@ typedef enum HIPGLmap_flags_enum { HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02, } HIPGLmap_flags; +/** +* hipRTC related +*/ +typedef struct _hiprtcProgram* hiprtcProgram; + +typedef enum hiprtcResult { + HIPRTC_SUCCESS = 0, + HIPRTC_ERROR_OUT_OF_MEMORY = 1, + HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, + HIPRTC_ERROR_INVALID_INPUT = 3, + HIPRTC_ERROR_INVALID_PROGRAM = 4, + HIPRTC_ERROR_INVALID_OPTION = 5, + HIPRTC_ERROR_COMPILATION = 6, + HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, + HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, + HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, + HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, + HIPRTC_ERROR_INTERNAL_ERROR = 11 +} hiprtcResult; /* Function types. */ typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr); @@ -958,6 +1076,7 @@ typedef hipError_t HIPAPI thipInit(unsigned int Flags); typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion); typedef hipError_t HIPAPI thipGetDevice(hipDevice_t* device, int ordinal); typedef hipError_t HIPAPI thipGetDeviceCount(int* count); +typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId); typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev); typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev); typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev); @@ -1071,6 +1190,16 @@ typedef hipError_t HIPAPI thipGraphicsMapResources(unsigned int count, hipGraphi typedef hipError_t HIPAPI thipGraphicsUnmapResources(unsigned int count, hipGraphicsResource* resources, hipStream_t hStream); typedef hipError_t HIPAPI thipGraphicsGLRegisterBuffer(hipGraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags); typedef hipError_t HIPAPI thipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList); +typedef hiprtcResult HIPAPI thiprtcGetErrorString(hiprtcResult result); +typedef hiprtcResult HIPAPI thiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression); +typedef hiprtcResult HIPAPI thiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options); +typedef hiprtcResult HIPAPI thiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames); +typedef hiprtcResult HIPAPI thiprtcDestroyProgram(hiprtcProgram* prog); +typedef hiprtcResult HIPAPI thiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, const char** lowered_name); +typedef hiprtcResult HIPAPI thiprtcGetProgramLog(hiprtcProgram prog, char* log); +typedef hiprtcResult HIPAPI thiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet); +typedef hiprtcResult HIPAPI thiprtcGetCode(hiprtcProgram prog, char* code); +typedef hiprtcResult HIPAPI thiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet); /* Function declarations. */ @@ -1079,6 +1208,7 @@ extern thipInit *hipInit; extern thipDriverGetVersion *hipDriverGetVersion; extern thipGetDevice *hipGetDevice; extern thipGetDeviceCount *hipGetDeviceCount; +extern thipGetDeviceProperties *hipGetDeviceProperties; extern thipDeviceGetName *hipDeviceGetName; extern thipDeviceGetAttribute *hipDeviceGetAttribute; extern thipDeviceComputeCapability *hipDeviceComputeCapability; @@ -1187,6 +1317,17 @@ extern thipGraphicsUnmapResources *hipGraphicsUnmapResources; extern thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer; extern thipGLGetDevices *hipGLGetDevices; +extern thiprtcGetErrorString* hiprtcGetErrorString; +extern thiprtcAddNameExpression* hiprtcAddNameExpression; +extern thiprtcCompileProgram* hiprtcCompileProgram; +extern thiprtcCreateProgram* hiprtcCreateProgram; +extern thiprtcDestroyProgram* hiprtcDestroyProgram; +extern thiprtcGetLoweredName* hiprtcGetLoweredName; +extern thiprtcGetProgramLog* hiprtcGetProgramLog; +extern thiprtcGetProgramLogSize* hiprtcGetProgramLogSize; +extern thiprtcGetCode* hiprtcGetCode; +extern thiprtcGetCodeSize* hiprtcGetCodeSize; + enum { HIPEW_SUCCESS = 0, diff --git a/extern/hipew/src/hipew.c b/extern/hipew/src/hipew.c index 9d5a63f869a..02cec1ba28f 100644 --- a/extern/hipew/src/hipew.c +++ b/extern/hipew/src/hipew.c @@ -70,6 +70,7 @@ thipInit *hipInit; thipDriverGetVersion *hipDriverGetVersion; thipGetDevice *hipGetDevice; thipGetDeviceCount *hipGetDeviceCount; +thipGetDeviceProperties *hipGetDeviceProperties; thipDeviceGetName *hipDeviceGetName; thipDeviceGetAttribute *hipDeviceGetAttribute; thipDeviceComputeCapability *hipDeviceComputeCapability; @@ -178,6 +179,17 @@ thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer; thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer; thipGLGetDevices *hipGLGetDevices; +thiprtcGetErrorString* hiprtcGetErrorString; +thiprtcAddNameExpression* hiprtcAddNameExpression; +thiprtcCompileProgram* hiprtcCompileProgram; +thiprtcCreateProgram* hiprtcCreateProgram; +thiprtcDestroyProgram* hiprtcDestroyProgram; +thiprtcGetLoweredName* hiprtcGetLoweredName; +thiprtcGetProgramLog* hiprtcGetProgramLog; +thiprtcGetProgramLogSize* hiprtcGetProgramLogSize; +thiprtcGetCode* hiprtcGetCode; +thiprtcGetCodeSize* hiprtcGetCodeSize; + static DynamicLibrary dynamic_library_open_find(const char **paths) { @@ -242,6 +254,7 @@ static int hipewHipInit(void) { HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion); HIP_LIBRARY_FIND_CHECKED(hipGetDevice); HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount); + HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties); HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName); HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute); HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability); @@ -346,6 +359,16 @@ static int hipewHipInit(void) { HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer); HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices); #endif + HIP_LIBRARY_FIND_CHECKED(hiprtcGetErrorString); + HIP_LIBRARY_FIND_CHECKED(hiprtcAddNameExpression); + HIP_LIBRARY_FIND_CHECKED(hiprtcCompileProgram); + HIP_LIBRARY_FIND_CHECKED(hiprtcCreateProgram); + HIP_LIBRARY_FIND_CHECKED(hiprtcDestroyProgram); + HIP_LIBRARY_FIND_CHECKED(hiprtcGetLoweredName); + HIP_LIBRARY_FIND_CHECKED(hiprtcGetProgramLog); + HIP_LIBRARY_FIND_CHECKED(hiprtcGetProgramLogSize); + HIP_LIBRARY_FIND_CHECKED(hiprtcGetCode); + HIP_LIBRARY_FIND_CHECKED(hiprtcGetCodeSize); result = HIPEW_SUCCESS; return result; } diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 1d8ebe94694..2a51e0be2a4 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1329,7 +1329,7 @@ class CyclesPreferences(bpy.types.AddonPreferences): elif entry.type == 'CPU': cpu_devices.append(entry) # Extend all GPU devices with CPU. - if compute_device_type != 'CPU' and compute_device_type != 'HIP': + if compute_device_type != 'CPU': devices.extend(cpu_devices) return devices diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 583ab8ae208..4ae714913ab 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -208,7 +208,7 @@ bool HIPDevice::use_adaptive_compilation() return DebugFlags().hip.adaptive_compile; } -/* Common NVCC flags which stays the same regardless of shading model, +/* Common HIPCC flags which stays the same regardless of shading model, * kernel sources md5 and only depends on compiler or compilation settings. */ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features) @@ -239,11 +239,13 @@ string HIPDevice::compile_kernel(const uint kernel_features, int major, minor; hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId); hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId); + hipDeviceProp_t props; + hipGetDeviceProperties(&props, hipDevId); /* Attempt to use kernel provided with Blender. */ if (!use_adaptive_compilation()) { if (!force_ptx) { - const string fatbin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor)); + const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, props.gcnArchName)); VLOG(1) << "Testing for pre-compiled kernel " << fatbin << "."; if (path_exists(fatbin)) { VLOG(1) << "Using precompiled kernel."; @@ -283,17 +285,21 @@ string HIPDevice::compile_kernel(const uint kernel_features, const string kernel_md5 = util_md5_string(source_md5 + common_cflags); const char *const kernel_ext = "genco"; + std::string options; # ifdef _WIN32 - const char *const options = - "save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp"; + options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -ffast-math"); # else - const char *const options = - "save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ggdb"; + options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ffast-math"); # endif +# ifdef _DEBUG + options.append(" -save-temps"); +# endif + options.append(" --amdgpu-target=").append(props.gcnArchName); + const string include_path = source_path; - const char *const kernel_arch = force_ptx ? "compute" : "sm"; + const char *const kernel_arch = props.gcnArchName; const string fatbin_file = string_printf( - "cycles_%s_%s_%d%d_%s", name, kernel_arch, major, minor, kernel_md5.c_str()); + "cycles_%s_%s_%s", name, kernel_arch, kernel_md5.c_str()); const string fatbin = path_cache_get(path_join("kernels", fatbin_file)); VLOG(1) << "Testing for locally compiled kernel " << fatbin << "."; if (path_exists(fatbin)) { @@ -350,7 +356,7 @@ string HIPDevice::compile_kernel(const uint kernel_features, string command = string_printf("%s -%s -I %s --%s %s -o \"%s\"", hipcc, - options, + options.c_str(), include_path.c_str(), kernel_ext, source_path.c_str(), diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 7357c5804ed..6c87c9c32f2 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -487,9 +487,6 @@ endif() # HIP module if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) - # 64 bit only - set(HIP_BITS 64) - # build for each arch set(hip_sources device/hip/kernel.cpp ${SRC_HEADERS} @@ -504,32 +501,41 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) set(hip_fatbins) macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental) - if(${arch} MATCHES "compute_.*") - set(format "ptx") - else() - set(format "fatbin") - endif() + set(format "fatbin") set(hip_file ${name}_${arch}.${format}) - set(kernel_sources ${sources}) if(NOT ${prev_arch} STREQUAL "none") - if(${prev_arch} MATCHES "compute_.*") - set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx) - else() - set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin) - endif() + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin) endif() set(hip_kernel_src "/device/hip/${name}.cpp") - set(hip_flags ${flags} + if(WIN32) + set(hip_command ${CMAKE_COMMAND}) + set(hip_flags + -E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_PATH}" + ${HIP_HIPCC_EXECUTABLE}.bat) + else() + set(hip_command ${HIP_HIPCC_EXECUTABLE}) + set(hip_flags) + endif() + + set(hip_flags + ${hip_flags} + --amdgpu-target=${arch} + ${HIP_HIPCC_FLAGS} + --genco + ${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src} + ${flags} -D CCL_NAMESPACE_BEGIN= -D CCL_NAMESPACE_END= -D HIPCC - -m ${HIP_BITS} -I ${CMAKE_CURRENT_SOURCE_DIR}/.. -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip - --use_fast_math + -Wno-parentheses-equality + -Wno-unused-value + --hipcc-func-supp + -ffast-math -o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file}) if(${experimental}) @@ -541,20 +547,9 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__) endif() - if(WITH_NANOVDB) - set(hip_flags ${hip_flags} - -D WITH_NANOVDB - -I "${NANOVDB_INCLUDE_DIR}") - endif() - - add_custom_command( - OUTPUT ${hip_file} - COMMAND ${HIP_HIPCC_EXECUTABLE} - -arch=${arch} - ${HIP_HIPCC_FLAGS} - --${format} - ${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src} - ${hip_flags} + add_custom_target( + ${hip_file} + COMMAND ${hip_command} ${hip_flags} DEPENDS ${kernel_sources}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib) list(APPEND hip_fatbins ${hip_file}) diff --git a/intern/cycles/kernel/device/hip/globals.h b/intern/cycles/kernel/device/hip/globals.h index 39978ae7899..28e1cc4282f 100644 --- a/intern/cycles/kernel/device/hip/globals.h +++ b/intern/cycles/kernel/device/hip/globals.h @@ -27,10 +27,10 @@ CCL_NAMESPACE_BEGIN /* Not actually used, just a NULL pointer that gets passed everywhere, which we * hope gets optimized out by the compiler. */ -struct KernelGlobals { - /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */ +struct KernelGlobalsGPU { int unused[1]; }; +typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals; /* Global scene data and textures */ __constant__ KernelData __data; |