/* * Copyright 2011-2021 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License */ #ifndef __HIPEW_H__ #define __HIPEW_H__ #ifdef __cplusplus extern "C" { #endif #include #define HIP_IPC_HANDLE_SIZE 64 #define hipHostMallocDefault 0x00 #define hipHostMallocPortable 0x01 #define hipHostMallocMapped 0x02 #define hipHostMallocWriteCombined 0x04 #define hipHostMallocNumaUser 0x20000000 #define hipHostMallocCoherent 0x40000000 #define hipHostMallocNonCoherent 0x80000000 #define hipHostRegisterPortable 0x01 #define hipHostRegisterMapped 0x02 #define hipHostRegisterIoMemory 0x04 #define hipCooperativeLaunchMultiDeviceNoPreSync 0x01 #define hipCooperativeLaunchMultiDeviceNoPostSync 0x02 #define hipArrayLayered 0x01 #define hipArraySurfaceLoadStore 0x02 #define hipArrayCubemap 0x04 #define hipArrayTextureGather 0x08 #define HIP_TRSA_OVERRIDE_FORMAT 0x01 #define HIP_TRSF_READ_AS_INTEGER 0x01 #define HIP_TRSF_NORMALIZED_COORDINATES 0x02 #define HIP_LAUNCH_PARAM_END ((void*)0x00) #define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) #define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) /* Functions which changed 3.1 -> 3.2 for 64 bit stuff, * the cuda library has both the old ones for compatibility and new * ones with _v2 postfix, */ #define hipModuleGetGlobal hipModuleGetGlobal #define hipMemGetInfo hipMemGetInfo #define hipMemAllocPitch hipMemAllocPitch #define hipMemGetAddressRange hipMemGetAddressRange #define hipMemcpyHtoD hipMemcpyHtoD #define hipMemcpyDtoH hipMemcpyDtoH #define hipMemcpyDtoD hipMemcpyDtoD #define hipMemcpyHtoA hipMemcpyHtoA #define hipMemcpyAtoH hipMemcpyAtoH #define hipMemcpyHtoDAsync hipMemcpyHtoDAsync #define hipMemcpyDtoHAsync hipMemcpyDtoHAsync #define hipMemcpyDtoDAsync hipMemcpyDtoDAsync #define hipMemsetD8 hipMemsetD8 #define hipMemsetD16 hipMemsetD16 #define hipMemsetD32 hipMemsetD32 #define hipArrayCreate hipArrayCreate #define hipArray3DCreate hipArray3DCreate #define hipTexRefSetAddress hipTexRefSetAddress #define hipTexRefGetAddress hipTexRefGetAddress #define hipStreamDestroy hipStreamDestroy #define hipEventDestroy hipEventDestroy #define hipTexRefSetAddress2D hipTexRefSetAddress2D /* Types. */ #ifdef _MSC_VER typedef unsigned __int32 hipuint32_t; typedef unsigned __int64 hipuint64_t; #else #include typedef uint32_t hipuint32_t; typedef uint64_t hipuint64_t; #endif #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__) typedef unsigned long long hipDeviceptr_t; #else typedef unsigned int hipDeviceptr_t; #endif #ifdef _WIN32 # define HIPAPI __stdcall # define HIP_CB __stdcall #else # define HIPAPI # define HIP_CB #endif typedef int hipDevice_t; typedef struct ihipCtx_t* hipCtx_t; typedef struct ihipModule_t* hipModule_t; typedef struct ihipModuleSymbol_t* hipFunction_t; typedef struct hipArray* hArray; typedef struct hipMipmappedArray_st* hipMipmappedArray_t; typedef struct ihipEvent_t* hipEvent_t; typedef struct ihipStream_t* hipStream_t; typedef unsigned long long hipTextureObject_t; typedef struct HIPuuid_st { char bytes[16]; } HIPuuid; typedef enum hipChannelFormatKind { hipChannelFormatKindSigned = 0, hipChannelFormatKindUnsigned = 1, hipChannelFormatKindFloat = 2, hipChannelFormatKindNone = 3, }hipChannelFormatKind; typedef struct hipChannelFormatDesc { int x; int y; int z; int w; enum hipChannelFormatKind f; }hipChannelFormatDesc; typedef enum hipTextureFilterMode { hipFilterModePoint = 0, hipFilterModeLinear = 1, } hipTextureFilterMode; typedef enum hipArray_Format { HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01, HIP_AD_FORMAT_SIGNED_INT8 = 0x08, HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02, HIP_AD_FORMAT_SIGNED_INT16 = 0x09, HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03, HIP_AD_FORMAT_SIGNED_INT32 = 0x0a, HIP_AD_FORMAT_HALF = 0x10, HIP_AD_FORMAT_FLOAT = 0x20, } hipArray_Format; typedef enum hipTextureAddressMode { hipAddressModeWrap = 0, hipAddressModeClamp = 1, hipAddressModeMirror = 2, hipAddressModeBorder = 3, } hipTextureAddressMode; /** * hip texture reference */ typedef struct textureReference { int normalized; //enum hipTextureReadMode readMode;// used only for driver API's enum hipTextureFilterMode filterMode; enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions struct hipChannelFormatDesc channelDesc; int sRGB; // Perform sRGB->linear conversion during texture read unsigned int maxAnisotropy; // Limit to the anisotropy ratio enum hipTextureFilterMode mipmapFilterMode; float mipmapLevelBias; float minMipmapLevelClamp; float maxMipmapLevelClamp; hipTextureObject_t textureObject; int numChannels; enum hipArray_Format format; }textureReference; typedef textureReference* hipTexRef; typedef enum hipMemoryType { hipMemoryTypeHost = 0x00, hipMemoryTypeDevice = 0x01, hipMemoryTypeArray = 0x02, hipMemoryTypeUnified = 0x03, } hipMemoryType; /** * Pointer attributes */ typedef struct hipPointerAttribute_t { enum hipMemoryType memoryType; int device; void* devicePointer; void* hostPointer; int isManaged; unsigned allocationFlags; /* flags specified when memory was allocated*/ /* peers? */ } hipPointerAttribute_t; typedef struct ihipIpcEventHandle_t { char reserved[HIP_IPC_HANDLE_SIZE]; } ihipIpcEventHandle_t; typedef struct hipIpcMemHandle_st { char reserved[HIP_IPC_HANDLE_SIZE]; } hipIpcMemHandle_t; typedef enum HIPipcMem_flags_enum { hipIpcMemLazyEnablePeerAccess = 0x1, } HIPipcMem_flags; typedef enum HIPmemAttach_flags_enum { hipMemAttachGlobal = 0x1, hipMemAttachHost = 0x2, HIP_MEM_ATTACH_SINGLE = 0x4, } HIPmemAttach_flags; typedef enum HIPctx_flags_enum { hipDeviceScheduleAuto = 0x00, hipDeviceScheduleSpin = 0x01, hipDeviceScheduleYield = 0x02, hipDeviceScheduleBlockingSync = 0x04, hipDeviceScheduleMask = 0x07, hipDeviceMapHost = 0x08, hipDeviceLmemResizeToMax = 0x10, } HIPctx_flags; typedef enum HIPstream_flags_enum { hipStreamDefault = 0x0, hipStreamNonBlocking = 0x1, } HIPstream_flags; typedef enum HIPevent_flags_enum { hipEventDefault = 0x0, hipEventBlockingSync = 0x1, hipEventDisableTiming = 0x2, hipEventInterprocess = 0x4, } HIPevent_flags; typedef enum HIPstreamWaitValue_flags_enum { HIP_STREAM_WAIT_VALUE_GEQ = 0x0, HIP_STREAM_WAIT_VALUE_EQ = 0x1, HIP_STREAM_WAIT_VALUE_AND = 0x2, HIP_STREAM_WAIT_VALUE_NOR = 0x3, HIP_STREAM_WAIT_VALUE_FLUSH = (1 << 30), } HIPstreamWaitValue_flags; typedef enum HIPstreamWriteValue_flags_enum { HIP_STREAM_WRITE_VALUE_DEFAULT = 0x0, HIP_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1, } HIPstreamWriteValue_flags; typedef enum HIPstreamBatchMemOpType_enum { HIP_STREAM_MEM_OP_WAIT_VALUE_32 = 1, HIP_STREAM_MEM_OP_WRITE_VALUE_32 = 2, HIP_STREAM_MEM_OP_WAIT_VALUE_64 = 4, HIP_STREAM_MEM_OP_WRITE_VALUE_64 = 5, HIP_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3, } HIPstreamBatchMemOpType; typedef union HIPstreamBatchMemOpParams_union { HIPstreamBatchMemOpType operation; struct HIPstreamMemOpWaitValueParams_st { HIPstreamBatchMemOpType operation; hipDeviceptr_t address; union { hipuint32_t value; hipuint64_t value64; }; unsigned int flags; hipDeviceptr_t alias; } waitValue; struct HIPstreamMemOpWriteValueParams_st { HIPstreamBatchMemOpType operation; hipDeviceptr_t address; union { hipuint32_t value; hipuint64_t value64; }; unsigned int flags; hipDeviceptr_t alias; } writeValue; struct HIPstreamMemOpFlushRemoteWritesParams_st { HIPstreamBatchMemOpType operation; unsigned int flags; } flushRemoteWrites; hipuint64_t pad[6]; } HIPstreamBatchMemOpParams; typedef enum HIPoccupancy_flags_enum { hipOccupancyDefault = 0x0, HIP_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1, } HIPoccupancy_flags; typedef enum hipDeviceAttribute_t { hipDeviceAttributeCudaCompatibleBegin = 0, hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled. hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes. hipDeviceAttributeAsyncEngineCount, ///< Cuda only. Asynchronous engines number. hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory ///< at the same virtual address as the CPU hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. hipDeviceAttributeComputePreemptionSupported, ///< Cuda only. Device supports Compute Preemption. hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently. hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices hipDeviceAttributeDeviceOverlap, ///< Cuda only. Device can concurrently copy memory and execute a kernel. ///< Deprecated. Use instead asyncEngineCount. hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on ///< the device without migration hipDeviceAttributeGlobalL1CacheSupported, ///< Cuda only. Device supports caching globals in L1 hipDeviceAttributeHostNativeAtomicSupported, ///< Cuda only. Link between the device and the host supports native atomic operations hipDeviceAttributeIntegrated, ///< Device is integrated GPU hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported hipDeviceAttributeLuid, ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms hipDeviceAttributeLuidDeviceNodeMask, ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Cuda only. Max block size per multiprocessor hipDeviceAttributeMaxBlockDimX, ///< Max block size in width. hipDeviceAttributeMaxBlockDimY, ///< Max block size in height. hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth. hipDeviceAttributeMaxGridDimX, ///< Max grid size in width. hipDeviceAttributeMaxGridDimY, ///< Max grid size in height. hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth. hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface. hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface. hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface. hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface. hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface. hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface. hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface. hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture. hipDeviceAttributeMaxTexture1DLayered, ///< Cuda only. Maximum dimensions of 1D layered texture. hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture. ///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda. hipDeviceAttributeMaxTexture1DMipmap, ///< Cuda only. Maximum size of 1D mipmapped texture. hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture. hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture. hipDeviceAttributeMaxTexture2DGather, ///< Cuda only. Maximum dimensions of 2D texture if gather operations performed. hipDeviceAttributeMaxTexture2DLayered, ///< Cuda only. Maximum dimensions of 2D layered texture. hipDeviceAttributeMaxTexture2DLinear, ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory. hipDeviceAttributeMaxTexture2DMipmap, ///< Cuda only. Maximum dimensions of 2D mipmapped texture. hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture. hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture. hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture. hipDeviceAttributeMaxTexture3DAlt, ///< Cuda only. Maximum dimensions of alternate 3D texture. hipDeviceAttributeMaxTextureCubemap, ///< Cuda only. Maximum dimensions of Cubemap texture hipDeviceAttributeMaxTextureCubemapLayered, ///< Cuda only. Maximum dimensions of Cubemap layered texture. hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. hipDeviceAttributeMultiGpuBoardGroupID, ///< Cuda only. Unique ID of device group on the same multi-GPU board hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. hipDeviceAttributeName, ///< Device name. hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory ///< without calling hipHostRegister on it hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables hipDeviceAttributePciBusId, ///< PCI Bus ID. hipDeviceAttributePciDeviceId, ///< PCI Device ID. hipDeviceAttributePciDomainID, ///< PCI Domain ID. hipDeviceAttributePersistingL2CacheMaxSize, ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared ///< by all thread blocks simultaneously resident on a multiprocessor. hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block. hipDeviceAttributeReservedSharedMemPerBlock, ///< Cuda11 only. Shared memory reserved by CUDA driver per block. hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes. hipDeviceAttributeSharedMemPerBlockOptin, ///< Cuda only. Maximum shared memory per block usable by special opt in. hipDeviceAttributeSharedMemPerMultiprocessor, ///< Cuda only. Shared memory available per multiprocessor. hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision. hipDeviceAttributeStreamPrioritiesSupported, ///< Cuda only. Whether to support stream priorities. hipDeviceAttributeSurfaceAlignment, ///< Cuda only. Alignment requirement for surfaces hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory; hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice. hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host. hipDeviceAttributeUuid, ///< Cuda only. Unique ID in 16 byte. hipDeviceAttributeWarpSize, ///< Warp size in threads. hipDeviceAttributeCudaCompatibleEnd = 9999, hipDeviceAttributeAmdSpecificBegin = 10000, hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*" hipDeviceAttributeArch, ///< Device architecture hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor. hipDeviceAttributeGcnArch, ///< Device gcn architecture hipDeviceAttributeGcnArchName, ///< Device gcnArch name in 256 bytes hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple ///< devices with unmatched functions hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple ///< devices with unmatched grid dimensions hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple ///< devices with unmatched block dimensions hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple ///< devices with unmatched shared memories hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and ///< hipStreamWaitValue64() , '0' otherwise. hipDeviceAttributeAmdSpecificEnd = 19999, hipDeviceAttributeVendorSpecificBegin = 20000, // Extended attributes for vendors } hipDeviceAttribute_t; typedef struct HIPdevprop_st { int maxThreadsPerBlock; int maxThreadsDim[3]; int maxGridSize[3]; int sharedMemPerBlock; int totalConstantMemory; int SIMDWidth; int memPitch; int regsPerBlock; int clockRate; int textureAlign; } HIPdevprop; typedef struct { // 32-bit Atomics unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory. unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory. unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory. unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory. unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory. // 64-bit Atomics unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory. unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory. // Doubles unsigned hasDoubles : 1; ///< Double-precision floating point. // Warp cross-lane operations unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all). unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot). unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*). unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps. // Sync unsigned hasThreadFenceSystem : 1; ///< __threadfence_system. unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or. // Misc unsigned hasSurfaceFuncs : 1; ///< Surface functions. unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D). unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. } hipDeviceArch_t; typedef struct hipDeviceProp_t { char name[256]; ///< Device name. size_t totalGlobalMem; ///< Size of global memory region (in bytes). size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). int regsPerBlock; ///< Registers per block. int warpSize; ///< Warp size. int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. int maxGridSize[3]; ///< Max grid dimensions (XYZ). int clockRate; ///< Max clock frequency of the multiProcessors in khz. int memoryClockRate; ///< Max global memory clock frequency in khz. int memoryBusWidth; ///< Global memory bus width in bits. size_t totalConstMem; ///< Size of shared memory region (in bytes). int major; ///< Major compute capability. On HCC, this is an approximation and features may ///< differ from CUDA CC. See the arch feature flags for portable ways to query ///< feature caps. int minor; ///< Minor compute capability. On HCC, this is an approximation and features may ///< differ from CUDA CC. See the arch feature flags for portable ways to query ///< feature caps. int multiProcessorCount; ///< Number of multi-processors (compute units). int l2CacheSize; ///< L2 cache size. int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. int computeMode; ///< Compute mode. int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" ///< instructions. New for HIP. hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. int pciDomainID; ///< PCI Domain ID int pciBusID; ///< PCI Bus ID. int pciDeviceID; ///< PCI Device ID. size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. int canMapHostMemory; ///< Check whether HIP can map host memory int gcnArch; ///< DEPRECATED: use gcnArchName instead char gcnArchName[256]; ///< AMD GCN Arch Name. int integrated; ///< APU vs dGPU int cooperativeLaunch; ///< HIP device supports cooperative launch int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory int maxTexture1D; ///< Maximum number of elements in 1D images int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register size_t memPitch; ///