Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/extern
diff options
context:
space:
mode:
authorBrian Savery <bsavery>2021-09-28 17:51:14 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-09-28 20:18:55 +0300
commit044a77352f8a8a0e1f60190369d69ef26587b65f (patch)
tree22096da4d5214cbd7419d1a5e0dadc70e6cacea3 /extern
parent262b2118565826177133013c324212c66d882456 (diff)
Cycles: add HIP device support for AMD GPUs
NOTE: this feature is not ready for user testing, and not yet enabled in daily builds. It is being merged now for easier collaboration on development. HIP is a heterogenous compute interface allowing C++ code to be executed on GPUs similar to CUDA. It is intended to bring back AMD GPU rendering support on Windows and Linux. https://github.com/ROCm-Developer-Tools/HIP. As of the time of writing, it should compile and run on Linux with existing HIP compilers and driver runtimes. Publicly available compilers and drivers for Windows will come later. See task T91571 for more details on the current status and work remaining to be done. Credits: Sayak Biswas (AMD) Arya Rafii (AMD) Brian Savery (AMD) Differential Revision: https://developer.blender.org/D12578
Diffstat (limited to 'extern')
-rw-r--r--extern/CMakeLists.txt3
-rw-r--r--extern/hipew/CMakeLists.txt39
-rw-r--r--extern/hipew/include/hipew.h1207
-rw-r--r--extern/hipew/src/hipew.c533
4 files changed, 1782 insertions, 0 deletions
diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt
index 7f7d91f0765..2b2cca04503 100644
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -70,6 +70,9 @@ if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
if(WITH_CUDA_DYNLOAD)
add_subdirectory(cuew)
endif()
+ if(WITH_HIP_DYNLOAD)
+ add_subdirectory(hipew)
+ endif()
endif()
if(WITH_GHOST_X11 AND WITH_GHOST_XDND)
diff --git a/extern/hipew/CMakeLists.txt b/extern/hipew/CMakeLists.txt
new file mode 100644
index 00000000000..d215ea8c691
--- /dev/null
+++ b/extern/hipew/CMakeLists.txt
@@ -0,0 +1,39 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2021, Blender Foundation
+# All rights reserved.
+# ***** END GPL LICENSE BLOCK *****
+
+set(INC
+ .
+ include
+)
+
+set(INC_SYS
+
+)
+
+set(SRC
+ src/hipew.c
+
+ include/hipew.h
+)
+
+set(LIB
+)
+
+blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
diff --git a/extern/hipew/include/hipew.h b/extern/hipew/include/hipew.h
new file mode 100644
index 00000000000..02fffc331bf
--- /dev/null
+++ b/extern/hipew/include/hipew.h
@@ -0,0 +1,1207 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#ifndef __HIPEW_H__
+#define __HIPEW_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+
+#define HIP_IPC_HANDLE_SIZE 64
+#define hipHostMallocPortable 0x01
+#define hipHostMallocMapped 0x02
+#define hipHostMallocWriteCombined 0x04
+#define hipHostRegisterPortable 0x01
+#define hipHostRegisterMapped 0x02
+#define hipHostRegisterIoMemory 0x04
+#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01
+#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02
+#define hipArrayLayered 0x01
+#define hipArraySurfaceLoadStore 0x02
+#define hipArrayCubemap 0x04
+#define hipArrayTextureGather 0x08
+#define HIP_TRSA_OVERRIDE_FORMAT 0x01
+#define HIP_TRSF_READ_AS_INTEGER 0x01
+#define HIP_TRSF_NORMALIZED_COORDINATES 0x02
+#define HIP_LAUNCH_PARAM_END ((void*)0x00)
+#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
+#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
+
+/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
+ * the cuda library has both the old ones for compatibility and new
+ * ones with _v2 postfix,
+ */
+#define hipModuleGetGlobal hipModuleGetGlobal
+#define hipMemGetInfo hipMemGetInfo
+#define hipMemAllocPitch hipMemAllocPitch
+#define hipMemGetAddressRange hipMemGetAddressRange
+#define hipMemcpyHtoD hipMemcpyHtoD
+#define hipMemcpyDtoH hipMemcpyDtoH
+#define hipMemcpyDtoD hipMemcpyDtoD
+#define hipMemcpyHtoA hipMemcpyHtoA
+#define hipMemcpyAtoH hipMemcpyAtoH
+#define hipMemcpyHtoDAsync hipMemcpyHtoDAsync
+#define hipMemcpyDtoHAsync hipMemcpyDtoHAsync
+#define hipMemcpyDtoDAsync hipMemcpyDtoDAsync
+#define hipMemsetD8 hipMemsetD8
+#define hipMemsetD16 hipMemsetD16
+#define hipMemsetD32 hipMemsetD32
+#define hipArrayCreate hipArrayCreate
+#define hipArray3DCreate hipArray3DCreate
+#define hipTexRefSetAddress hipTexRefSetAddress
+#define hipTexRefGetAddress hipTexRefGetAddress
+#define hipStreamDestroy hipStreamDestroy
+#define hipEventDestroy hipEventDestroy
+#define hipTexRefSetAddress2D hipTexRefSetAddress2D
+
+/* Types. */
+#ifdef _MSC_VER
+typedef unsigned __int32 hipuint32_t;
+typedef unsigned __int64 hipuint64_t;
+#else
+#include <stdint.h>
+typedef uint32_t hipuint32_t;
+typedef uint64_t hipuint64_t;
+#endif
+
+#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__)
+typedef unsigned long long hipDeviceptr_t;
+#else
+typedef unsigned int hipDeviceptr_t;
+#endif
+
+
+#ifdef _WIN32
+# define HIPAPI __stdcall
+# define HIP_CB __stdcall
+#else
+# define HIPAPI
+# define HIP_CB
+#endif
+
+typedef int hipDevice_t;
+typedef struct ihipCtx_t* hipCtx_t;
+typedef struct ihipModule_t* hipModule_t;
+typedef struct ihipModuleSymbol_t* hipFunction_t;
+typedef struct hipArray* hArray;
+typedef struct hipMipmappedArray_st* hipMipmappedArray_t;
+typedef struct ihipEvent_t* hipEvent_t;
+typedef struct ihipStream_t* hipStream_t;
+typedef unsigned long long hipTextureObject_t;
+
+typedef struct HIPuuid_st {
+ char bytes[16];
+} HIPuuid;
+
+typedef enum hipChannelFormatKind {
+ hipChannelFormatKindSigned = 0,
+ hipChannelFormatKindUnsigned = 1,
+ hipChannelFormatKindFloat = 2,
+ hipChannelFormatKindNone = 3,
+}hipChannelFormatKind;
+
+typedef struct hipChannelFormatDesc {
+ int x;
+ int y;
+ int z;
+ int w;
+ enum hipChannelFormatKind f;
+}hipChannelFormatDesc;
+
+typedef enum hipTextureFilterMode {
+ hipFilterModePoint = 0,
+ hipFilterModeLinear = 1,
+} hipTextureFilterMode;
+
+typedef enum hipArray_Format {
+ HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01,
+ HIP_AD_FORMAT_SIGNED_INT8 = 0x08,
+ HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02,
+ HIP_AD_FORMAT_SIGNED_INT16 = 0x09,
+ HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03,
+ HIP_AD_FORMAT_SIGNED_INT32 = 0x0a,
+ HIP_AD_FORMAT_HALF = 0x10,
+ HIP_AD_FORMAT_FLOAT = 0x20,
+} hipArray_Format;
+
+typedef enum hipTextureAddressMode {
+ hipAddressModeWrap = 0,
+ hipAddressModeClamp = 1,
+ hipAddressModeMirror = 2,
+ hipAddressModeBorder = 3,
+} hipTextureAddressMode;
+
+/**
+ * hip texture reference
+ */
+typedef struct textureReference {
+ int normalized;
+ //enum hipTextureReadMode readMode;// used only for driver API's
+ enum hipTextureFilterMode filterMode;
+ enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions
+ struct hipChannelFormatDesc channelDesc;
+ int sRGB; // Perform sRGB->linear conversion during texture read
+ unsigned int maxAnisotropy; // Limit to the anisotropy ratio
+ enum hipTextureFilterMode mipmapFilterMode;
+ float mipmapLevelBias;
+ float minMipmapLevelClamp;
+ float maxMipmapLevelClamp;
+
+ hipTextureObject_t textureObject;
+ int numChannels;
+ enum hipArray_Format format;
+}textureReference;
+
+typedef textureReference* hipTexRef;
+
+typedef enum hipMemoryType {
+ hipMemoryTypeHost = 0x00,
+ hipMemoryTypeDevice = 0x01,
+ hipMemoryTypeArray = 0x02,
+ hipMemoryTypeUnified = 0x03,
+} hipMemoryType;
+
+/**
+ * Pointer attributes
+ */
+typedef struct hipPointerAttribute_t {
+ enum hipMemoryType memoryType;
+ int device;
+ void* devicePointer;
+ void* hostPointer;
+ int isManaged;
+ unsigned allocationFlags; /* flags specified when memory was allocated*/
+ /* peers? */
+} hipPointerAttribute_t;
+
+typedef struct ihipIpcEventHandle_t {
+ char reserved[HIP_IPC_HANDLE_SIZE];
+} ihipIpcEventHandle_t;
+
+typedef struct hipIpcMemHandle_st {
+ char reserved[HIP_IPC_HANDLE_SIZE];
+} hipIpcMemHandle_t;
+
+typedef enum HIPipcMem_flags_enum {
+ hipIpcMemLazyEnablePeerAccess = 0x1,
+} HIPipcMem_flags;
+
+typedef enum HIPmemAttach_flags_enum {
+ hipMemAttachGlobal = 0x1,
+ hipMemAttachHost = 0x2,
+ HIP_MEM_ATTACH_SINGLE = 0x4,
+} HIPmemAttach_flags;
+
+typedef enum HIPctx_flags_enum {
+ hipDeviceScheduleAuto = 0x00,
+ hipDeviceScheduleSpin = 0x01,
+ hipDeviceScheduleYield = 0x02,
+ hipDeviceScheduleBlockingSync = 0x04,
+ hipDeviceScheduleMask = 0x07,
+ hipDeviceMapHost = 0x08,
+ hipDeviceLmemResizeToMax = 0x10,
+} HIPctx_flags;
+
+typedef enum HIPstream_flags_enum {
+ hipStreamDefault = 0x0,
+ hipStreamNonBlocking = 0x1,
+} HIPstream_flags;
+
+typedef enum HIPevent_flags_enum {
+ hipEventDefault = 0x0,
+ hipEventBlockingSync = 0x1,
+ hipEventDisableTiming = 0x2,
+ hipEventInterprocess = 0x4,
+} HIPevent_flags;
+
+typedef enum HIPstreamWaitValue_flags_enum {
+ HIP_STREAM_WAIT_VALUE_GEQ = 0x0,
+ HIP_STREAM_WAIT_VALUE_EQ = 0x1,
+ HIP_STREAM_WAIT_VALUE_AND = 0x2,
+ HIP_STREAM_WAIT_VALUE_NOR = 0x3,
+ HIP_STREAM_WAIT_VALUE_FLUSH = (1 << 30),
+} HIPstreamWaitValue_flags;
+
+typedef enum HIPstreamWriteValue_flags_enum {
+ HIP_STREAM_WRITE_VALUE_DEFAULT = 0x0,
+ HIP_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1,
+} HIPstreamWriteValue_flags;
+
+typedef enum HIPstreamBatchMemOpType_enum {
+ HIP_STREAM_MEM_OP_WAIT_VALUE_32 = 1,
+ HIP_STREAM_MEM_OP_WRITE_VALUE_32 = 2,
+ HIP_STREAM_MEM_OP_WAIT_VALUE_64 = 4,
+ HIP_STREAM_MEM_OP_WRITE_VALUE_64 = 5,
+ HIP_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3,
+} HIPstreamBatchMemOpType;
+
+
+typedef union HIPstreamBatchMemOpParams_union {
+ HIPstreamBatchMemOpType operation;
+ struct HIPstreamMemOpWaitValueParams_st {
+ HIPstreamBatchMemOpType operation;
+ hipDeviceptr_t address;
+ union {
+ hipuint32_t value;
+ hipuint64_t value64;
+ };
+ unsigned int flags;
+ hipDeviceptr_t alias;
+ } waitValue;
+ struct HIPstreamMemOpWriteValueParams_st {
+ HIPstreamBatchMemOpType operation;
+ hipDeviceptr_t address;
+ union {
+ hipuint32_t value;
+ hipuint64_t value64;
+ };
+ unsigned int flags;
+ hipDeviceptr_t alias;
+ } writeValue;
+ struct HIPstreamMemOpFlushRemoteWritesParams_st {
+ HIPstreamBatchMemOpType operation;
+ unsigned int flags;
+ } flushRemoteWrites;
+ hipuint64_t pad[6];
+} HIPstreamBatchMemOpParams;
+
+typedef enum HIPoccupancy_flags_enum {
+ hipOccupancyDefault = 0x0,
+ HIP_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1,
+} HIPoccupancy_flags;
+
+typedef enum hipDeviceAttribute_t {
+ hipDeviceAttributeCudaCompatibleBegin = 0,
+ hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
+ hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes.
+ hipDeviceAttributeAsyncEngineCount, ///< Cuda only. Asynchronous engines number.
+ hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space
+ hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory
+ ///< at the same virtual address as the CPU
+ hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
+ hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
+ hipDeviceAttributeComputePreemptionSupported, ///< Cuda only. Device supports Compute Preemption.
+ hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
+ hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU
+ hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
+ hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
+ hipDeviceAttributeDeviceOverlap, ///< Cuda only. Device can concurrently copy memory and execute a kernel.
+ ///< Deprecated. Use instead asyncEngineCount.
+ hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
+ ///< the device without migration
+ hipDeviceAttributeGlobalL1CacheSupported, ///< Cuda only. Device supports caching globals in L1
+ hipDeviceAttributeHostNativeAtomicSupported, ///< Cuda only. Link between the device and the host supports native atomic operations
+ hipDeviceAttributeIntegrated, ///< Device is integrated GPU
+ hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
+ hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device
+ hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
+ hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported
+ hipDeviceAttributeLuid, ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
+ hipDeviceAttributeLuidDeviceNodeMask, ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
+ hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
+ hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
+ hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Cuda only. Max block size per multiprocessor
+ hipDeviceAttributeMaxBlockDimX, ///< Max block size in width.
+ hipDeviceAttributeMaxBlockDimY, ///< Max block size in height.
+ hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth.
+ hipDeviceAttributeMaxGridDimX, ///< Max grid size in width.
+ hipDeviceAttributeMaxGridDimY, ///< Max grid size in height.
+ hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth.
+ hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface.
+ hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface.
+ hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface.
+ hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface.
+ hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface.
+ hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface.
+ hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface.
+ hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture.
+ hipDeviceAttributeMaxTexture1DLayered, ///< Cuda only. Maximum dimensions of 1D layered texture.
+ hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture.
+ ///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
+ hipDeviceAttributeMaxTexture1DMipmap, ///< Cuda only. Maximum size of 1D mipmapped texture.
+ hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture.
+ hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture.
+ hipDeviceAttributeMaxTexture2DGather, ///< Cuda only. Maximum dimensions of 2D texture if gather operations performed.
+ hipDeviceAttributeMaxTexture2DLayered, ///< Cuda only. Maximum dimensions of 2D layered texture.
+ hipDeviceAttributeMaxTexture2DLinear, ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
+ hipDeviceAttributeMaxTexture2DMipmap, ///< Cuda only. Maximum dimensions of 2D mipmapped texture.
+ hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture.
+ hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture.
+ hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture.
+ hipDeviceAttributeMaxTexture3DAlt, ///< Cuda only. Maximum dimensions of alternate 3D texture.
+ hipDeviceAttributeMaxTextureCubemap, ///< Cuda only. Maximum dimensions of Cubemap texture
+ hipDeviceAttributeMaxTextureCubemapLayered, ///< Cuda only. Maximum dimensions of Cubemap layered texture.
+ hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block
+ hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
+ hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
+ hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
+ hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
+ hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
+ hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
+ hipDeviceAttributeMultiGpuBoardGroupID, ///< Cuda only. Unique ID of device group on the same multi-GPU board
+ hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
+ hipDeviceAttributeName, ///< Device name.
+ hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
+ ///< without calling hipHostRegister on it
+ hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
+ hipDeviceAttributePciBusId, ///< PCI Bus ID.
+ hipDeviceAttributePciDeviceId, ///< PCI Device ID.
+ hipDeviceAttributePciDomainID, ///< PCI Domain ID.
+ hipDeviceAttributePersistingL2CacheMaxSize, ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes
+ hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared
+ ///< by all thread blocks simultaneously resident on a multiprocessor.
+ hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block.
+ hipDeviceAttributeReservedSharedMemPerBlock, ///< Cuda11 only. Shared memory reserved by CUDA driver per block.
+ hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes.
+ hipDeviceAttributeSharedMemPerBlockOptin, ///< Cuda only. Maximum shared memory per block usable by special opt in.
+ hipDeviceAttributeSharedMemPerMultiprocessor, ///< Cuda only. Shared memory available per multiprocessor.
+ hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
+ hipDeviceAttributeStreamPrioritiesSupported, ///< Cuda only. Whether to support stream priorities.
+ hipDeviceAttributeSurfaceAlignment, ///< Cuda only. Alignment requirement for surfaces
+ hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver
+ hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures
+ hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
+ hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
+ hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice.
+ hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host.
+ hipDeviceAttributeUuid, ///< Cuda only. Unique ID in 16 byte.
+ hipDeviceAttributeWarpSize, ///< Warp size in threads.
+ hipDeviceAttributeCudaCompatibleEnd = 9999,
+ hipDeviceAttributeAmdSpecificBegin = 10000,
+ hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*"
+ hipDeviceAttributeArch, ///< Device architecture
+ hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor.
+ hipDeviceAttributeGcnArch, ///< Device gcn architecture
+ hipDeviceAttributeGcnArchName, ///< Device gcnArch name in 256 bytes
+ hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
+ hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
+ hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
+ ///< devices with unmatched functions
+ hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
+ ///< devices with unmatched grid dimensions
+ hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
+ ///< devices with unmatched block dimensions
+ hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
+ ///< devices with unmatched shared memories
+ hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar
+ hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device
+ hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and
+ ///< hipStreamWaitValue64() , '0' otherwise.
+ hipDeviceAttributeAmdSpecificEnd = 19999,
+ hipDeviceAttributeVendorSpecificBegin = 20000,
+ // Extended attributes for vendors
+} hipDeviceAttribute_t;
+
+typedef struct HIPdevprop_st {
+ int maxThreadsPerBlock;
+ int maxThreadsDim[3];
+ int maxGridSize[3];
+ int sharedMemPerBlock;
+ int totalConstantMemory;
+ int SIMDWidth;
+ int memPitch;
+ int regsPerBlock;
+ int clockRate;
+ int textureAlign;
+} HIPdevprop;
+
+typedef enum HIPpointer_attribute_enum {
+ HIP_POINTER_ATTRIBUTE_CONTEXT = 1,
+ HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
+ HIP_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
+ HIP_POINTER_ATTRIBUTE_HOST_POINTER = 4,
+ HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
+ HIP_POINTER_ATTRIBUTE_BUFFER_ID = 7,
+ HIP_POINTER_ATTRIBUTE_IS_MANAGED = 8,
+ HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9,
+} HIPpointer_attribute;
+
+typedef enum hipFunction_attribute {
+ HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
+ HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
+ HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
+ HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
+ HIP_FUNC_ATTRIBUTE_NUM_REGS = 4,
+ HIP_FUNC_ATTRIBUTE_PTX_VERSION = 5,
+ HIP_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
+ HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
+ HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
+ HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9,
+ HIP_FUNC_ATTRIBUTE_MAX,
+} hipFunction_attribute;
+
+typedef enum hipFuncCache_t {
+ hipFuncCachePreferNone = 0x00,
+ hipFuncCachePreferShared = 0x01,
+ hipFuncCachePreferL1 = 0x02,
+ hipFuncCachePreferEqual = 0x03,
+} hipFuncCache_t;
+
+typedef enum hipSharedMemConfig {
+ hipSharedMemBankSizeDefault = 0x00,
+ hipSharedMemBankSizeFourByte = 0x01,
+ hipSharedMemBankSizeEightByte = 0x02,
+} hipSharedMemConfig;
+
+typedef enum HIPshared_carveout_enum {
+ HIP_SHAREDMEM_CARVEOUT_DEFAULT,
+ HIP_SHAREDMEM_CARVEOUT_MAX_SHARED = 100,
+ HIP_SHAREDMEM_CARVEOUT_MAX_L1 = 0,
+} HIPshared_carveout;
+
+
+
+typedef enum hipComputeMode {
+ hipComputeModeDefault = 0,
+ hipComputeModeProhibited = 2,
+ hipComputeModeExclusiveProcess = 3,
+} hipComputeMode;
+
+typedef enum HIPmem_advise_enum {
+ HIP_MEM_ADVISE_SET_READ_MOSTLY = 1,
+ HIP_MEM_ADVISE_UNSET_READ_MOSTLY = 2,
+ HIP_MEM_ADVISE_SET_PREFERRED_LOCATION = 3,
+ HIP_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4,
+ HIP_MEM_ADVISE_SET_ACCESSED_BY = 5,
+ HIP_MEM_ADVISE_UNSET_ACCESSED_BY = 6,
+} HIPmem_advise;
+
+typedef enum HIPmem_range_attribute_enum {
+ HIP_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1,
+ HIP_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2,
+ HIP_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3,
+ HIP_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4,
+} HIPmem_range_attribute;
+
+typedef enum hipJitOption {
+ hipJitOptionMaxRegisters = 0,
+ hipJitOptionThreadsPerBlock,
+ hipJitOptionWallTime,
+ hipJitOptionInfoLogBuffer,
+ hipJitOptionInfoLogBufferSizeBytes,
+ hipJitOptionErrorLogBuffer,
+ hipJitOptionErrorLogBufferSizeBytes,
+ hipJitOptionOptimizationLevel,
+ hipJitOptionTargetFromContext,
+ hipJitOptionTarget,
+ hipJitOptionFallbackStrategy,
+ hipJitOptionGenerateDebugInfo,
+ hipJitOptionLogVerbose,
+ hipJitOptionGenerateLineInfo,
+ hipJitOptionCacheMode,
+ hipJitOptionSm3xOpt,
+ hipJitOptionFastCompile,
+ hipJitOptionNumOptions,
+} hipJitOption;
+
+typedef enum HIPjit_target_enum {
+ HIP_TARGET_COMPUTE_20 = 20,
+ HIP_TARGET_COMPUTE_21 = 21,
+ HIP_TARGET_COMPUTE_30 = 30,
+ HIP_TARGET_COMPUTE_32 = 32,
+ HIP_TARGET_COMPUTE_35 = 35,
+ HIP_TARGET_COMPUTE_37 = 37,
+ HIP_TARGET_COMPUTE_50 = 50,
+ HIP_TARGET_COMPUTE_52 = 52,
+ HIP_TARGET_COMPUTE_53 = 53,
+ HIP_TARGET_COMPUTE_60 = 60,
+ HIP_TARGET_COMPUTE_61 = 61,
+ HIP_TARGET_COMPUTE_62 = 62,
+ HIP_TARGET_COMPUTE_70 = 70,
+ HIP_TARGET_COMPUTE_73 = 73,
+ HIP_TARGET_COMPUTE_75 = 75,
+} HIPjit_target;
+
+typedef enum HIPjit_fallback_enum {
+ HIP_PREFER_PTX = 0,
+ HIP_PREFER_BINARY,
+} HIPjit_fallback;
+
+typedef enum HIPjit_cacheMode_enum {
+ HIP_JIT_CACHE_OPTION_NONE = 0,
+ HIP_JIT_CACHE_OPTION_CG,
+ HIP_JIT_CACHE_OPTION_CA,
+} HIPjit_cacheMode;
+
+typedef enum HIPjitInputType_enum {
+ HIP_JIT_INPUT_HIPBIN = 0,
+ HIP_JIT_INPUT_PTX,
+ HIP_JIT_INPUT_FATBINARY,
+ HIP_JIT_INPUT_OBJECT,
+ HIP_JIT_INPUT_LIBRARY,
+ HIP_JIT_NUM_INPUT_TYPES,
+} HIPjitInputType;
+
+typedef struct HIPlinkState_st* HIPlinkState;
+
+typedef enum hipGLDeviceList {
+ hipGLDeviceListAll = 1, ///< All hip devices used by current OpenGL context.
+ hipGLDeviceListCurrentFrame = 2, ///< Hip devices used by current OpenGL context in current
+ ///< frame
+ hipGLDeviceListNextFrame = 3 ///< Hip devices used by current OpenGL context in next
+ ///< frame.
+} hipGLDeviceList;
+
+typedef enum hipGraphicsRegisterFlags {
+ hipGraphicsRegisterFlagsNone = 0,
+ hipGraphicsRegisterFlagsReadOnly = 1, ///< HIP will not write to this registered resource
+ hipGraphicsRegisterFlagsWriteDiscard =
+ 2, ///< HIP will only write and will not read from this registered resource
+ hipGraphicsRegisterFlagsSurfaceLoadStore = 4, ///< HIP will bind this resource to a surface
+ hipGraphicsRegisterFlagsTextureGather =
+ 8 ///< HIP will perform texture gather operations on this registered resource
+} hipGraphicsRegisterFlags;
+
+typedef enum HIPgraphicsRegisterFlags_enum {
+ HIP_GRAPHICS_REGISTER_FLAGS_NONE = 0x00,
+ HIP_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01,
+ HIP_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02,
+ HIP_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04,
+ HIP_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08,
+} HIPgraphicsRegisterFlags;
+
+typedef enum HIPgraphicsMapResourceFlags_enum {
+ HIP_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
+ HIP_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
+ HIP_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
+} HIPgraphicsMapResourceFlags;
+
+typedef enum HIParray_cubemap_face_enum {
+ HIP_HIPBEMAP_FACE_POSITIVE_X = 0x00,
+ HIP_HIPBEMAP_FACE_NEGATIVE_X = 0x01,
+ HIP_HIPBEMAP_FACE_POSITIVE_Y = 0x02,
+ HIP_HIPBEMAP_FACE_NEGATIVE_Y = 0x03,
+ HIP_HIPBEMAP_FACE_POSITIVE_Z = 0x04,
+ HIP_HIPBEMAP_FACE_NEGATIVE_Z = 0x05,
+} HIParray_cubemap_face;
+
+typedef enum hipLimit_t {
+ HIP_LIMIT_STACK_SIZE = 0x00,
+ HIP_LIMIT_PRINTF_FIFO_SIZE = 0x01,
+ hipLimitMallocHeapSize = 0x02,
+ HIP_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03,
+ HIP_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04,
+ HIP_LIMIT_MAX,
+} hipLimit_t;
+
+typedef enum hipResourceType {
+ hipResourceTypeArray = 0x00,
+ hipResourceTypeMipmappedArray = 0x01,
+ hipResourceTypeLinear = 0x02,
+ hipResourceTypePitch2D = 0x03,
+} hipResourceType;
+
+typedef enum hipError_t {
+ hipSuccess = 0,
+ hipErrorInvalidValue = 1,
+ hipErrorOutOfMemory = 2,
+ hipErrorNotInitialized = 3,
+ hipErrorDeinitialized = 4,
+ hipErrorProfilerDisabled = 5,
+ hipErrorProfilerNotInitialized = 6,
+ hipErrorProfilerAlreadyStarted = 7,
+ hipErrorProfilerAlreadyStopped = 8,
+ hipErrorNoDevice = 100,
+ hipErrorInvalidDevice = 101,
+ hipErrorInvalidImage = 200,
+ hipErrorInvalidContext = 201,
+ hipErrorContextAlreadyCurrent = 202,
+ hipErrorMapFailed = 205,
+ hipErrorUnmapFailed = 206,
+ hipErrorArrayIsMapped = 207,
+ hipErrorAlreadyMapped = 208,
+ hipErrorNoBinaryForGpu = 209,
+ hipErrorAlreadyAcquired = 210,
+ hipErrorNotMapped = 211,
+ hipErrorNotMappedAsArray = 212,
+ hipErrorNotMappedAsPointer = 213,
+ hipErrorECCNotCorrectable = 214,
+ hipErrorUnsupportedLimit = 215,
+ hipErrorContextAlreadyInUse = 216,
+ hipErrorPeerAccessUnsupported = 217,
+ hipErrorInvalidKernelFile = 218,
+ hipErrorInvalidGraphicsContext = 219,
+ hipErrorInvalidSource = 300,
+ hipErrorFileNotFound = 301,
+ hipErrorSharedObjectSymbolNotFound = 302,
+ hipErrorSharedObjectInitFailed = 303,
+ hipErrorOperatingSystem = 304,
+ hipErrorInvalidHandle = 400,
+ hipErrorNotFound = 500,
+ hipErrorNotReady = 600,
+ hipErrorIllegalAddress = 700,
+ hipErrorLaunchOutOfResources = 701,
+ hipErrorLaunchTimeOut = 702,
+ hipErrorPeerAccessAlreadyEnabled = 704,
+ hipErrorPeerAccessNotEnabled = 705,
+ hipErrorSetOnActiveProcess = 708,
+ hipErrorAssert = 710,
+ hipErrorHostMemoryAlreadyRegistered = 712,
+ hipErrorHostMemoryNotRegistered = 713,
+ hipErrorLaunchFailure = 719,
+ hipErrorCooperativeLaunchTooLarge = 720,
+ hipErrorNotSupported = 801,
+ hipErrorUnknown = 999,
+} hipError_t;
+
+/**
+ * Stream CallBack struct
+ */
+typedef void (*hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
+
+typedef enum HIPdevice_P2PAttribute_enum {
+ HIP_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01,
+ HIP_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02,
+ HIP_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03,
+ HIP_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED = 0x04,
+} HIPdevice_P2PAttribute;
+
+typedef struct hipGraphicsResource_st* hipGraphicsResource;
+
+typedef struct hip_Memcpy2D {
+ size_t srcXInBytes;
+ size_t srcY;
+ hipMemoryType srcMemoryType;
+ const void* srcHost;
+ hipDeviceptr_t srcDevice;
+ hArray * srcArray;
+ size_t srcPitch;
+ size_t dstXInBytes;
+ size_t dstY;
+ hipMemoryType dstMemoryType;
+ void* dstHost;
+ hipDeviceptr_t dstDevice;
+ hArray * dstArray;
+ size_t dstPitch;
+ size_t WidthInBytes;
+ size_t Height;
+} hip_Memcpy2D;
+
+typedef enum hipDeviceP2PAttr {
+ hipDevP2PAttrPerformanceRank = 0,
+ hipDevP2PAttrAccessSupported,
+ hipDevP2PAttrNativeAtomicSupported,
+ hipDevP2PAttrHipArrayAccessSupported
+} hipDeviceP2PAttr;
+
+typedef struct HIP_MEMCPY3D {
+ size_t srcXInBytes;
+ size_t srcY;
+ size_t srcZ;
+ size_t srcLOD;
+ hipMemoryType srcMemoryType;
+ const void* srcHost;
+ hipDeviceptr_t srcDevice;
+ hArray * srcArray;
+ void* reserved0;
+ size_t srcPitch;
+ size_t srcHeight;
+ size_t dstXInBytes;
+ size_t dstY;
+ size_t dstZ;
+ size_t dstLOD;
+ hipMemoryType dstMemoryType;
+ void* dstHost;
+ hipDeviceptr_t dstDevice;
+ hArray * dstArray;
+ void* reserved1;
+ size_t dstPitch;
+ size_t dstHeight;
+ size_t WidthInBytes;
+ size_t Height;
+ size_t Depth;
+} HIP_MEMCPY3D;
+
+typedef struct HIP_MEMCPY3D_PEER_st {
+ size_t srcXInBytes;
+ size_t srcY;
+ size_t srcZ;
+ size_t srcLOD;
+ hipMemoryType srcMemoryType;
+ const void* srcHost;
+ hipDeviceptr_t srcDevice;
+ hArray * srcArray;
+ hipCtx_t srcContext;
+ size_t srcPitch;
+ size_t srcHeight;
+ size_t dstXInBytes;
+ size_t dstY;
+ size_t dstZ;
+ size_t dstLOD;
+ hipMemoryType dstMemoryType;
+ void* dstHost;
+ hipDeviceptr_t dstDevice;
+ hArray * dstArray;
+ hipCtx_t dstContext;
+ size_t dstPitch;
+ size_t dstHeight;
+ size_t WidthInBytes;
+ size_t Height;
+ size_t Depth;
+} HIP_MEMCPY3D_PEER;
+
+typedef struct HIP_ARRAY_DESCRIPTOR {
+ size_t Width;
+ size_t Height;
+ hipArray_Format Format;
+ unsigned int NumChannels;
+} HIP_ARRAY_DESCRIPTOR;
+
+typedef struct HIP_ARRAY3D_DESCRIPTOR {
+ size_t Width;
+ size_t Height;
+ size_t Depth;
+ hipArray_Format Format;
+ unsigned int NumChannels;
+ unsigned int Flags;
+} HIP_ARRAY3D_DESCRIPTOR;
+
+typedef struct HIP_RESOURCE_DESC_st {
+ hipResourceType resType;
+ union {
+ struct {
+ hArray * h_Array;
+ } array;
+ struct {
+ hipMipmappedArray_t hMipmappedArray;
+ } mipmap;
+ struct {
+ hipDeviceptr_t devPtr;
+ hipArray_Format format;
+ unsigned int numChannels;
+ size_t sizeInBytes;
+ } linear;
+ struct {
+ hipDeviceptr_t devPtr;
+ hipArray_Format format;
+ unsigned int numChannels;
+ size_t width;
+ size_t height;
+ size_t pitchInBytes;
+ } pitch2D;
+ struct {
+ int reserved[32];
+ } reserved;
+ } res;
+ unsigned int flags;
+} hipResourceDesc;
+
+/**
+ * hip texture resource view formats
+ */
+typedef enum hipResourceViewFormat {
+ hipResViewFormatNone = 0x00,
+ hipResViewFormatUnsignedChar1 = 0x01,
+ hipResViewFormatUnsignedChar2 = 0x02,
+ hipResViewFormatUnsignedChar4 = 0x03,
+ hipResViewFormatSignedChar1 = 0x04,
+ hipResViewFormatSignedChar2 = 0x05,
+ hipResViewFormatSignedChar4 = 0x06,
+ hipResViewFormatUnsignedShort1 = 0x07,
+ hipResViewFormatUnsignedShort2 = 0x08,
+ hipResViewFormatUnsignedShort4 = 0x09,
+ hipResViewFormatSignedShort1 = 0x0a,
+ hipResViewFormatSignedShort2 = 0x0b,
+ hipResViewFormatSignedShort4 = 0x0c,
+ hipResViewFormatUnsignedInt1 = 0x0d,
+ hipResViewFormatUnsignedInt2 = 0x0e,
+ hipResViewFormatUnsignedInt4 = 0x0f,
+ hipResViewFormatSignedInt1 = 0x10,
+ hipResViewFormatSignedInt2 = 0x11,
+ hipResViewFormatSignedInt4 = 0x12,
+ hipResViewFormatHalf1 = 0x13,
+ hipResViewFormatHalf2 = 0x14,
+ hipResViewFormatHalf4 = 0x15,
+ hipResViewFormatFloat1 = 0x16,
+ hipResViewFormatFloat2 = 0x17,
+ hipResViewFormatFloat4 = 0x18,
+ hipResViewFormatUnsignedBlockCompressed1 = 0x19,
+ hipResViewFormatUnsignedBlockCompressed2 = 0x1a,
+ hipResViewFormatUnsignedBlockCompressed3 = 0x1b,
+ hipResViewFormatUnsignedBlockCompressed4 = 0x1c,
+ hipResViewFormatSignedBlockCompressed4 = 0x1d,
+ hipResViewFormatUnsignedBlockCompressed5 = 0x1e,
+ hipResViewFormatSignedBlockCompressed5 = 0x1f,
+ hipResViewFormatUnsignedBlockCompressed6H = 0x20,
+ hipResViewFormatSignedBlockCompressed6H = 0x21,
+ hipResViewFormatUnsignedBlockCompressed7 = 0x22
+}hipResourceViewFormat;
+
+typedef enum HIPresourceViewFormat_enum
+{
+ HIP_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */
+ HIP_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */
+ HIP_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */
+ HIP_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */
+ HIP_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */
+ HIP_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */
+ HIP_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */
+ HIP_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */
+ HIP_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */
+ HIP_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */
+ HIP_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */
+ HIP_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */
+ HIP_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */
+ HIP_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */
+ HIP_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */
+ HIP_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */
+ HIP_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */
+ HIP_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */
+ HIP_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */
+ HIP_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */
+ HIP_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */
+ HIP_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */
+ HIP_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, /**< 1 channel 32-bit floating point */
+ HIP_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, /**< 2 channel 32-bit floating point */
+ HIP_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, /**< 4 channel 32-bit floating point */
+ HIP_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, /**< Block compressed 1 */
+ HIP_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, /**< Block compressed 2 */
+ HIP_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, /**< Block compressed 3 */
+ HIP_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, /**< Block compressed 4 unsigned */
+ HIP_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, /**< Block compressed 4 signed */
+ HIP_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, /**< Block compressed 5 unsigned */
+ HIP_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, /**< Block compressed 5 signed */
+ HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, /**< Block compressed 6 unsigned half-float */
+ HIP_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, /**< Block compressed 6 signed half-float */
+ HIP_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 /**< Block compressed 7 */
+} HIPresourceViewFormat;
+
+/**
+ * hip resource view descriptor
+ */
+struct hipResourceViewDesc {
+ enum hipResourceViewFormat format;
+ size_t width;
+ size_t height;
+ size_t depth;
+ unsigned int firstMipmapLevel;
+ unsigned int lastMipmapLevel;
+ unsigned int firstLayer;
+ unsigned int lastLayer;
+};
+
+
+typedef struct hipTextureDesc_st {
+ hipTextureAddressMode addressMode[3];
+ hipTextureFilterMode filterMode;
+ unsigned int flags;
+ unsigned int maxAnisotropy;
+ hipTextureFilterMode mipmapFilterMode;
+ float mipmapLevelBias;
+ float minMipmapLevelClamp;
+ float maxMipmapLevelClamp;
+ float borderColor[4];
+ int reserved[12];
+} hipTextureDesc;
+
+/**
+ * Resource view descriptor
+ */
+typedef struct HIP_RESOURCE_VIEW_DESC_st {
+ hipResourceViewFormat format;
+ size_t width;
+ size_t height;
+ size_t depth;
+ unsigned int firstMipmapLevel;
+ unsigned int lastMipmapLevel;
+ unsigned int firstLayer;
+ unsigned int lastLayer;
+ unsigned int reserved[16];
+} HIP_RESOURCE_VIEW_DESC;
+
+typedef struct HIP_POINTER_ATTRIBUTE_P2P_TOKENS_st {
+ unsigned long long p2pToken;
+ unsigned int vaSpaceToken;
+} HIP_POINTER_ATTRIBUTE_P2P_TOKENS;
+
+
+typedef unsigned int GLenum;
+typedef unsigned int GLuint;
+typedef int GLint;
+
+typedef enum HIPGLDeviceList_enum {
+ HIP_GL_DEVICE_LIST_ALL = 0x01,
+ HIP_GL_DEVICE_LIST_CURRENT_FRAME = 0x02,
+ HIP_GL_DEVICE_LIST_NEXT_FRAME = 0x03,
+} HIPGLDeviceList;
+
+typedef enum HIPGLmap_flags_enum {
+ HIP_GL_MAP_RESOURCE_FLAGS_NONE = 0x00,
+ HIP_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
+ HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
+} HIPGLmap_flags;
+
+
+/* Function types. */
+typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
+typedef hipError_t HIPAPI thipInit(unsigned int Flags);
+typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
+typedef hipError_t HIPAPI thipGetDevice(hipDevice_t* device, int ordinal);
+typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
+typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
+typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
+typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
+typedef hipError_t HIPAPI thipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev);
+typedef hipError_t HIPAPI thipDevicePrimaryCtxRelease(hipDevice_t dev);
+typedef hipError_t HIPAPI thipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags);
+typedef hipError_t HIPAPI thipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active);
+typedef hipError_t HIPAPI thipDevicePrimaryCtxReset(hipDevice_t dev);
+typedef hipError_t HIPAPI thipCtxCreate(hipCtx_t* pctx, unsigned int flags, hipDevice_t dev);
+typedef hipError_t HIPAPI thipCtxDestroy(hipCtx_t ctx);
+typedef hipError_t HIPAPI thipCtxPushCurrent(hipCtx_t ctx);
+typedef hipError_t HIPAPI thipCtxPopCurrent(hipCtx_t* pctx);
+typedef hipError_t HIPAPI thipCtxSetCurrent(hipCtx_t ctx);
+typedef hipError_t HIPAPI thipCtxGetCurrent(hipCtx_t* pctx);
+typedef hipError_t HIPAPI thipCtxGetDevice(hipDevice_t* device);
+typedef hipError_t HIPAPI thipCtxGetFlags(unsigned int* flags);
+typedef hipError_t HIPAPI thipCtxSynchronize(void);
+typedef hipError_t HIPAPI thipDeviceSynchronize(void);
+typedef hipError_t HIPAPI thipCtxGetCacheConfig(hipFuncCache_t* pconfig);
+typedef hipError_t HIPAPI thipCtxSetCacheConfig(hipFuncCache_t config);
+typedef hipError_t HIPAPI thipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig);
+typedef hipError_t HIPAPI thipCtxSetSharedMemConfig(hipSharedMemConfig config);
+typedef hipError_t HIPAPI thipCtxGetApiVersion(hipCtx_t ctx, unsigned int* version);
+typedef hipError_t HIPAPI thipModuleLoad(hipModule_t* module, const char* fname);
+typedef hipError_t HIPAPI thipModuleLoadData(hipModule_t* module, const void* image);
+typedef hipError_t HIPAPI thipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions, hipJitOption* options, void** optionValues);
+typedef hipError_t HIPAPI thipModuleUnload(hipModule_t hmod);
+typedef hipError_t HIPAPI thipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name);
+typedef hipError_t HIPAPI thipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name);
+typedef hipError_t HIPAPI thipModuleGetTexRef(textureReference** pTexRef, hipModule_t hmod, const char* name);
+typedef hipError_t HIPAPI thipMemGetInfo(size_t* free, size_t* total);
+typedef hipError_t HIPAPI thipMalloc(hipDeviceptr_t* dptr, size_t bytesize);
+typedef hipError_t HIPAPI thipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
+typedef hipError_t HIPAPI thipFree(hipDeviceptr_t dptr);
+typedef hipError_t HIPAPI thipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr);
+typedef hipError_t HIPAPI thipHostMalloc(void** pp, size_t bytesize);
+typedef hipError_t HIPAPI thipHostFree(void* p);
+typedef hipError_t HIPAPI thipMemHostAlloc(void** pp, size_t bytesize, unsigned int Flags);
+typedef hipError_t HIPAPI thipHostGetDevicePointer(hipDeviceptr_t* pdptr, void* p, unsigned int Flags);
+typedef hipError_t HIPAPI thipHostGetFlags(unsigned int* pFlags, void* p);
+typedef hipError_t HIPAPI thipMallocManaged(hipDeviceptr_t* dptr, size_t bytesize, unsigned int flags);
+typedef hipError_t HIPAPI thipDeviceGetByPCIBusId(hipDevice_t* dev, const char* pciBusId);
+typedef hipError_t HIPAPI thipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t dev);
+typedef hipError_t HIPAPI thipMemHostUnregister(void* p);
+typedef hipError_t HIPAPI thipMemcpy(hipDeviceptr_t dst, hipDeviceptr_t src, size_t ByteCount);
+typedef hipError_t HIPAPI thipMemcpyPeer(hipDeviceptr_t dstDevice, hipCtx_t dstContext, hipDeviceptr_t srcDevice, hipCtx_t srcContext, size_t ByteCount);
+typedef hipError_t HIPAPI thipMemcpyHtoD(hipDeviceptr_t dstDevice, void* srcHost, size_t ByteCount);
+typedef hipError_t HIPAPI thipMemcpyDtoH(void* dstHost, hipDeviceptr_t srcDevice, size_t ByteCount);
+typedef hipError_t HIPAPI thipMemcpyDtoD(hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount);
+typedef hipError_t HIPAPI thipDrvMemcpy2DUnaligned(const hip_Memcpy2D* pCopy);
+typedef hipError_t HIPAPI thipMemcpyParam2D(const hip_Memcpy2D* pCopy);
+typedef hipError_t HIPAPI thipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy);
+typedef hipError_t HIPAPI thipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, const void* srcHost, size_t ByteCount, hipStream_t hStream);
+typedef hipError_t HIPAPI thipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
+typedef hipError_t HIPAPI thipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t hStream);
+typedef hipError_t HIPAPI thipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t hStream);
+typedef hipError_t HIPAPI thipMemsetD8(hipDeviceptr_t dstDevice, unsigned char uc, size_t N);
+typedef hipError_t HIPAPI thipMemsetD16(hipDeviceptr_t dstDevice, unsigned short us, size_t N);
+typedef hipError_t HIPAPI thipMemsetD32(hipDeviceptr_t dstDevice, unsigned int ui, size_t N);
+typedef hipError_t HIPAPI thipMemsetD8Async(hipDeviceptr_t dstDevice, unsigned char uc, size_t N, hipStream_t hStream);
+typedef hipError_t HIPAPI thipMemsetD16Async(hipDeviceptr_t dstDevice, unsigned short us, size_t N, hipStream_t hStream);
+typedef hipError_t HIPAPI thipMemsetD32Async(hipDeviceptr_t dstDevice, unsigned int ui, size_t N, hipStream_t hStream);
+typedef hipError_t HIPAPI thipMemsetD2D8Async(hipDeviceptr_t dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, hipStream_t hStream);
+typedef hipError_t HIPAPI thipMemsetD2D16Async(hipDeviceptr_t dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, hipStream_t hStream);
+typedef hipError_t HIPAPI thipMemsetD2D32Async(hipDeviceptr_t dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, hipStream_t hStream);
+typedef hipError_t HIPAPI thipArrayCreate(hArray ** pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray);
+typedef hipError_t HIPAPI thipArrayDestroy(hArray hArray);
+typedef hipError_t HIPAPI thipArray3DCreate(hArray * pHandle, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray);
+typedef hipError_t HIPAPI hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr);
+typedef hipError_t HIPAPI thipStreamCreateWithFlags(hipStream_t* phStream, unsigned int Flags);
+typedef hipError_t HIPAPI thipStreamCreateWithPriority(hipStream_t* phStream, unsigned int flags, int priority);
+typedef hipError_t HIPAPI thipStreamGetPriority(hipStream_t hStream, int* priority);
+typedef hipError_t HIPAPI thipStreamGetFlags(hipStream_t hStream, unsigned int* flags);
+typedef hipError_t HIPAPI thipStreamWaitEvent(hipStream_t hStream, hipEvent_t hEvent, unsigned int Flags);
+typedef hipError_t HIPAPI thipStreamAddCallback(hipStream_t hStream, hipStreamCallback_t callback, void* userData, unsigned int flags);
+typedef hipError_t HIPAPI thipStreamQuery(hipStream_t hStream);
+typedef hipError_t HIPAPI thipStreamSynchronize(hipStream_t hStream);
+typedef hipError_t HIPAPI thipStreamDestroy(hipStream_t hStream);
+typedef hipError_t HIPAPI thipEventCreateWithFlags(hipEvent_t* phEvent, unsigned int Flags);
+typedef hipError_t HIPAPI thipEventRecord(hipEvent_t hEvent, hipStream_t hStream);
+typedef hipError_t HIPAPI thipEventQuery(hipEvent_t hEvent);
+typedef hipError_t HIPAPI thipEventSynchronize(hipEvent_t hEvent);
+typedef hipError_t HIPAPI thipEventDestroy(hipEvent_t hEvent);
+typedef hipError_t HIPAPI thipEventElapsedTime(float* pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
+typedef hipError_t HIPAPI thipFuncGetAttribute(int* pi, hipFunction_attribute attrib, hipFunction_t hfunc);
+typedef hipError_t HIPAPI thipFuncSetCacheConfig(hipFunction_t hfunc, hipFuncCache_t config);
+typedef hipError_t HIPAPI thipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void** kernelParams, void** extra);
+typedef hipError_t HIPAPI thipDrvOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hipFunction_t func, int blockSize, size_t dynamicSMemSize);
+typedef hipError_t HIPAPI thipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, hipFunction_t func, int blockSize, size_t dynamicSMemSize, unsigned int flags);
+typedef hipError_t HIPAPI thipModuleOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, hipFunction_t func, size_t dynamicSMemSize, int blockSizeLimit);
+typedef hipError_t HIPAPI thipTexRefSetArray(hipTexRef hTexRef, hArray * hArray, unsigned int Flags);
+typedef hipError_t HIPAPI thipTexRefSetAddress(size_t* ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr, size_t bytes);
+typedef hipError_t HIPAPI thipTexRefSetAddress2D(hipTexRef hTexRef, const HIP_ARRAY_DESCRIPTOR* desc, hipDeviceptr_t dptr, size_t Pitch);
+typedef hipError_t HIPAPI thipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt, int NumPackedComponents);
+typedef hipError_t HIPAPI thipTexRefSetAddressMode(hipTexRef hTexRef, int dim, hipTextureAddressMode am);
+typedef hipError_t HIPAPI thipTexRefSetFilterMode(hipTexRef hTexRef, hipTextureFilterMode fm);
+typedef hipError_t HIPAPI thipTexRefSetFlags(hipTexRef hTexRef, unsigned int Flags);
+typedef hipError_t HIPAPI thipTexRefGetAddress(hipDeviceptr_t* pdptr, hipTexRef hTexRef);
+typedef hipError_t HIPAPI thipTexRefGetArray(hArray ** phArray, hipTexRef hTexRef);
+typedef hipError_t HIPAPI thipTexRefGetAddressMode(hipTextureAddressMode* pam, hipTexRef hTexRef, int dim);
+typedef hipError_t HIPAPI thipTexObjectCreate(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, const hipTextureDesc* pTexDesc, const HIP_RESOURCE_VIEW_DESC* pResViewDesc);
+typedef hipError_t HIPAPI thipTexObjectDestroy(hipTextureObject_t texObject);
+typedef hipError_t HIPAPI thipDeviceCanAccessPeer(int* canAccessPeer, hipDevice_t dev, hipDevice_t peerDev);
+typedef hipError_t HIPAPI thipCtxEnablePeerAccess(hipCtx_t peerContext, unsigned int Flags);
+typedef hipError_t HIPAPI thipCtxDisablePeerAccess(hipCtx_t peerContext);
+typedef hipError_t HIPAPI thipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attrib, hipDevice_t srcDevice, hipDevice_t dstDevice);
+typedef hipError_t HIPAPI thipGraphicsUnregisterResource(hipGraphicsResource resource);
+typedef hipError_t HIPAPI thipGraphicsResourceGetMappedMipmappedArray(hipMipmappedArray_t* pMipmappedArray, hipGraphicsResource resource);
+typedef hipError_t HIPAPI thipGraphicsResourceGetMappedPointer(hipDeviceptr_t* pDevPtr, size_t* pSize, hipGraphicsResource resource);
+typedef hipError_t HIPAPI thipGraphicsMapResources(unsigned int count, hipGraphicsResource* resources, hipStream_t hStream);
+typedef hipError_t HIPAPI thipGraphicsUnmapResources(unsigned int count, hipGraphicsResource* resources, hipStream_t hStream);
+typedef hipError_t HIPAPI thipGraphicsGLRegisterBuffer(hipGraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
+typedef hipError_t HIPAPI thipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList);
+
+
+/* Function declarations. */
+extern thipGetErrorName *hipGetErrorName;
+extern thipInit *hipInit;
+extern thipDriverGetVersion *hipDriverGetVersion;
+extern thipGetDevice *hipGetDevice;
+extern thipGetDeviceCount *hipGetDeviceCount;
+extern thipDeviceGetName *hipDeviceGetName;
+extern thipDeviceGetAttribute *hipDeviceGetAttribute;
+extern thipDeviceComputeCapability *hipDeviceComputeCapability;
+extern thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
+extern thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
+extern thipDevicePrimaryCtxSetFlags *hipDevicePrimaryCtxSetFlags;
+extern thipDevicePrimaryCtxGetState *hipDevicePrimaryCtxGetState;
+extern thipDevicePrimaryCtxReset *hipDevicePrimaryCtxReset;
+extern thipCtxCreate *hipCtxCreate;
+extern thipCtxDestroy *hipCtxDestroy;
+extern thipCtxPushCurrent *hipCtxPushCurrent;
+extern thipCtxPopCurrent *hipCtxPopCurrent;
+extern thipCtxSetCurrent *hipCtxSetCurrent;
+extern thipCtxGetCurrent *hipCtxGetCurrent;
+extern thipCtxGetDevice *hipCtxGetDevice;
+extern thipCtxGetFlags *hipCtxGetFlags;
+extern thipCtxSynchronize *hipCtxSynchronize;
+extern thipDeviceSynchronize *hipDeviceSynchronize;
+extern thipCtxGetCacheConfig *hipCtxGetCacheConfig;
+extern thipCtxSetCacheConfig *hipCtxSetCacheConfig;
+extern thipCtxGetSharedMemConfig *hipCtxGetSharedMemConfig;
+extern thipCtxSetSharedMemConfig *hipCtxSetSharedMemConfig;
+extern thipCtxGetApiVersion *hipCtxGetApiVersion;
+extern thipModuleLoad *hipModuleLoad;
+extern thipModuleLoadData *hipModuleLoadData;
+extern thipModuleLoadDataEx *hipModuleLoadDataEx;
+extern thipModuleUnload *hipModuleUnload;
+extern thipModuleGetFunction *hipModuleGetFunction;
+extern thipModuleGetGlobal *hipModuleGetGlobal;
+extern thipModuleGetTexRef *hipModuleGetTexRef;
+extern thipMemGetInfo *hipMemGetInfo;
+extern thipMalloc *hipMalloc;
+extern thipMemAllocPitch *hipMemAllocPitch;
+extern thipFree *hipFree;
+extern thipMemGetAddressRange *hipMemGetAddressRange;
+extern thipHostMalloc *hipHostMalloc;
+extern thipHostFree *hipHostFree;
+extern thipHostGetDevicePointer *hipHostGetDevicePointer;
+extern thipHostGetFlags *hipHostGetFlags;
+extern thipMallocManaged *hipMallocManaged;
+extern thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
+extern thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
+extern thipMemcpyPeer *hipMemcpyPeer;
+extern thipMemcpyHtoD *hipMemcpyHtoD;
+extern thipMemcpyDtoH *hipMemcpyDtoH;
+extern thipMemcpyDtoD *hipMemcpyDtoD;
+extern thipDrvMemcpy2DUnaligned *hipDrvMemcpy2DUnaligned;
+extern thipMemcpyParam2D *hipMemcpyParam2D;
+extern thipDrvMemcpy3D *hipDrvMemcpy3D;
+extern thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
+extern thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
+extern thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
+extern thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
+extern thipMemsetD8 *hipMemsetD8;
+extern thipMemsetD16 *hipMemsetD16;
+extern thipMemsetD32 *hipMemsetD32;
+extern thipMemsetD8Async *hipMemsetD8Async;
+extern thipMemsetD16Async *hipMemsetD16Async;
+extern thipMemsetD32Async *hipMemsetD32Async;
+extern thipArrayCreate *hipArrayCreate;
+extern thipArrayDestroy *hipArrayDestroy;
+extern thipArray3DCreate *hipArray3DCreate;
+extern thipStreamCreateWithFlags *hipStreamCreateWithFlags;
+extern thipStreamCreateWithPriority *hipStreamCreateWithPriority;
+extern thipStreamGetPriority *hipStreamGetPriority;
+extern thipStreamGetFlags *hipStreamGetFlags;
+extern thipStreamWaitEvent *hipStreamWaitEvent;
+extern thipStreamAddCallback *hipStreamAddCallback;
+extern thipStreamQuery *hipStreamQuery;
+extern thipStreamSynchronize *hipStreamSynchronize;
+extern thipStreamDestroy *hipStreamDestroy;
+extern thipEventCreateWithFlags *hipEventCreateWithFlags;
+extern thipEventRecord *hipEventRecord;
+extern thipEventQuery *hipEventQuery;
+extern thipEventSynchronize *hipEventSynchronize;
+extern thipEventDestroy *hipEventDestroy;
+extern thipEventElapsedTime *hipEventElapsedTime;
+extern thipFuncGetAttribute *hipFuncGetAttribute;
+extern thipFuncSetCacheConfig *hipFuncSetCacheConfig;
+extern thipModuleLaunchKernel *hipModuleLaunchKernel;
+extern thipDrvOccupancyMaxActiveBlocksPerMultiprocessor *hipDrvOccupancyMaxActiveBlocksPerMultiprocessor;
+extern thipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
+extern thipModuleOccupancyMaxPotentialBlockSize *hipModuleOccupancyMaxPotentialBlockSize;
+extern thipTexRefSetArray *hipTexRefSetArray;
+extern thipTexRefSetAddress *hipTexRefSetAddress;
+extern thipTexRefSetAddress2D *hipTexRefSetAddress2D;
+extern thipTexRefSetFormat *hipTexRefSetFormat;
+extern thipTexRefSetAddressMode *hipTexRefSetAddressMode;
+extern thipTexRefSetFilterMode *hipTexRefSetFilterMode;
+extern thipTexRefSetFlags *hipTexRefSetFlags;
+extern thipTexRefGetAddress *hipTexRefGetAddress;
+extern thipTexRefGetArray *hipTexRefGetArray;
+extern thipTexRefGetAddressMode *hipTexRefGetAddressMode;
+extern thipTexObjectCreate *hipTexObjectCreate;
+extern thipTexObjectDestroy *hipTexObjectDestroy;
+extern thipDeviceCanAccessPeer *hipDeviceCanAccessPeer;
+extern thipCtxEnablePeerAccess *hipCtxEnablePeerAccess;
+extern thipCtxDisablePeerAccess *hipCtxDisablePeerAccess;
+extern thipDeviceGetP2PAttribute *hipDeviceGetP2PAttribute;
+extern thipGraphicsUnregisterResource *hipGraphicsUnregisterResource;
+extern thipGraphicsResourceGetMappedMipmappedArray *hipGraphicsResourceGetMappedMipmappedArray;
+extern thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
+extern thipGraphicsMapResources *hipGraphicsMapResources;
+extern thipGraphicsUnmapResources *hipGraphicsUnmapResources;
+
+extern thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
+extern thipGLGetDevices *hipGLGetDevices;
+
+
+enum {
+ HIPEW_SUCCESS = 0,
+ HIPEW_ERROR_OPEN_FAILED = -1,
+ HIPEW_ERROR_ATEXIT_FAILED = -2,
+};
+
+enum {
+ HIPEW_INIT_HIP = 1,
+};
+
+int hipewInit(hipuint32_t flags);
+const char *hipewErrorString(hipError_t result);
+const char *hipewCompilerPath(void);
+int hipewCompilerVersion(void);
+int hipewNvrtcVersion(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __HIPEW_H__ */
diff --git a/extern/hipew/src/hipew.c b/extern/hipew/src/hipew.c
new file mode 100644
index 00000000000..9d5a63f869a
--- /dev/null
+++ b/extern/hipew/src/hipew.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+#ifdef _MSC_VER
+# if _MSC_VER < 1900
+# define snprintf _snprintf
+# endif
+# define popen _popen
+# define pclose _pclose
+# define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include <hipew.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#ifdef _WIN32
+# define WIN32_LEAN_AND_MEAN
+# define VC_EXTRALEAN
+# include <windows.h>
+
+/* Utility macros. */
+
+typedef HMODULE DynamicLibrary;
+
+# define dynamic_library_open(path) LoadLibraryA(path)
+# define dynamic_library_close(lib) FreeLibrary(lib)
+# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
+#else
+# include <dlfcn.h>
+
+typedef void* DynamicLibrary;
+
+# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
+# define dynamic_library_close(lib) dlclose(lib)
+# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
+#endif
+
+#define _LIBRARY_FIND_CHECKED(lib, name) \
+ name = (t##name *)dynamic_library_find(lib, #name); \
+ assert(name);
+
+#define _LIBRARY_FIND(lib, name) \
+ name = (t##name *)dynamic_library_find(lib, #name);
+
+#define HIP_LIBRARY_FIND_CHECKED(name) \
+ _LIBRARY_FIND_CHECKED(hip_lib, name)
+#define HIP_LIBRARY_FIND(name) _LIBRARY_FIND(hip_lib, name)
+
+
+static DynamicLibrary hip_lib;
+
+/* Function definitions. */
+thipGetErrorName *hipGetErrorName;
+thipInit *hipInit;
+thipDriverGetVersion *hipDriverGetVersion;
+thipGetDevice *hipGetDevice;
+thipGetDeviceCount *hipGetDeviceCount;
+thipDeviceGetName *hipDeviceGetName;
+thipDeviceGetAttribute *hipDeviceGetAttribute;
+thipDeviceComputeCapability *hipDeviceComputeCapability;
+thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
+thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
+thipDevicePrimaryCtxSetFlags *hipDevicePrimaryCtxSetFlags;
+thipDevicePrimaryCtxGetState *hipDevicePrimaryCtxGetState;
+thipDevicePrimaryCtxReset *hipDevicePrimaryCtxReset;
+thipCtxCreate *hipCtxCreate;
+thipCtxDestroy *hipCtxDestroy;
+thipCtxPushCurrent *hipCtxPushCurrent;
+thipCtxPopCurrent *hipCtxPopCurrent;
+thipCtxSetCurrent *hipCtxSetCurrent;
+thipCtxGetCurrent *hipCtxGetCurrent;
+thipCtxGetDevice *hipCtxGetDevice;
+thipCtxGetFlags *hipCtxGetFlags;
+thipCtxSynchronize *hipCtxSynchronize;
+thipDeviceSynchronize *hipDeviceSynchronize;
+thipCtxGetCacheConfig *hipCtxGetCacheConfig;
+thipCtxSetCacheConfig *hipCtxSetCacheConfig;
+thipCtxGetSharedMemConfig *hipCtxGetSharedMemConfig;
+thipCtxSetSharedMemConfig *hipCtxSetSharedMemConfig;
+thipCtxGetApiVersion *hipCtxGetApiVersion;
+thipModuleLoad *hipModuleLoad;
+thipModuleLoadData *hipModuleLoadData;
+thipModuleLoadDataEx *hipModuleLoadDataEx;
+thipModuleUnload *hipModuleUnload;
+thipModuleGetFunction *hipModuleGetFunction;
+thipModuleGetGlobal *hipModuleGetGlobal;
+thipModuleGetTexRef *hipModuleGetTexRef;
+thipMemGetInfo *hipMemGetInfo;
+thipMalloc *hipMalloc;
+thipMemAllocPitch *hipMemAllocPitch;
+thipFree *hipFree;
+thipMemGetAddressRange *hipMemGetAddressRange;
+thipHostMalloc *hipHostMalloc;
+thipHostFree *hipHostFree;
+thipHostGetDevicePointer *hipHostGetDevicePointer;
+thipHostGetFlags *hipHostGetFlags;
+thipMallocManaged *hipMallocManaged;
+thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
+thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
+thipMemcpyPeer *hipMemcpyPeer;
+thipMemcpyHtoD *hipMemcpyHtoD;
+thipMemcpyDtoH *hipMemcpyDtoH;
+thipMemcpyDtoD *hipMemcpyDtoD;
+thipDrvMemcpy2DUnaligned *hipDrvMemcpy2DUnaligned;
+thipMemcpyParam2D *hipMemcpyParam2D;
+thipDrvMemcpy3D *hipDrvMemcpy3D;
+thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
+thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
+thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
+thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
+thipMemsetD8 *hipMemsetD8;
+thipMemsetD16 *hipMemsetD16;
+thipMemsetD32 *hipMemsetD32;
+thipMemsetD8Async *hipMemsetD8Async;
+thipMemsetD16Async *hipMemsetD16Async;
+thipMemsetD32Async *hipMemsetD32Async;
+thipArrayCreate *hipArrayCreate;
+thipArrayDestroy *hipArrayDestroy;
+thipArray3DCreate *hipArray3DCreate;
+thipStreamCreateWithFlags *hipStreamCreateWithFlags;
+thipStreamCreateWithPriority *hipStreamCreateWithPriority;
+thipStreamGetPriority *hipStreamGetPriority;
+thipStreamGetFlags *hipStreamGetFlags;
+thipStreamWaitEvent *hipStreamWaitEvent;
+thipStreamAddCallback *hipStreamAddCallback;
+thipStreamQuery *hipStreamQuery;
+thipStreamSynchronize *hipStreamSynchronize;
+thipStreamDestroy *hipStreamDestroy;
+thipEventCreateWithFlags *hipEventCreateWithFlags;
+thipEventRecord *hipEventRecord;
+thipEventQuery *hipEventQuery;
+thipEventSynchronize *hipEventSynchronize;
+thipEventDestroy *hipEventDestroy;
+thipEventElapsedTime *hipEventElapsedTime;
+thipFuncGetAttribute *hipFuncGetAttribute;
+thipFuncSetCacheConfig *hipFuncSetCacheConfig;
+thipModuleLaunchKernel *hipModuleLaunchKernel;
+thipDrvOccupancyMaxActiveBlocksPerMultiprocessor *hipDrvOccupancyMaxActiveBlocksPerMultiprocessor;
+thipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
+thipModuleOccupancyMaxPotentialBlockSize *hipModuleOccupancyMaxPotentialBlockSize;
+thipTexRefSetArray *hipTexRefSetArray;
+thipTexRefSetAddress *hipTexRefSetAddress;
+thipTexRefSetAddress2D *hipTexRefSetAddress2D;
+thipTexRefSetFormat *hipTexRefSetFormat;
+thipTexRefSetAddressMode *hipTexRefSetAddressMode;
+thipTexRefSetFilterMode *hipTexRefSetFilterMode;
+thipTexRefSetFlags *hipTexRefSetFlags;
+thipTexRefGetAddress *hipTexRefGetAddress;
+thipTexRefGetArray *hipTexRefGetArray;
+thipTexRefGetAddressMode *hipTexRefGetAddressMode;
+thipTexObjectCreate *hipTexObjectCreate;
+thipTexObjectDestroy *hipTexObjectDestroy;
+thipDeviceCanAccessPeer *hipDeviceCanAccessPeer;
+
+thipCtxEnablePeerAccess *hipCtxEnablePeerAccess;
+thipCtxDisablePeerAccess *hipCtxDisablePeerAccess;
+thipDeviceGetP2PAttribute *hipDeviceGetP2PAttribute;
+thipGraphicsUnregisterResource *hipGraphicsUnregisterResource;
+thipGraphicsMapResources *hipGraphicsMapResources;
+thipGraphicsUnmapResources *hipGraphicsUnmapResources;
+thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
+
+thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
+thipGLGetDevices *hipGLGetDevices;
+
+
+
+static DynamicLibrary dynamic_library_open_find(const char **paths) {
+ int i = 0;
+ while (paths[i] != NULL) {
+ DynamicLibrary lib = dynamic_library_open(paths[i]);
+ if (lib != NULL) {
+ return lib;
+ }
+ ++i;
+ }
+ return NULL;
+}
+
+/* Implementation function. */
+static void hipewHipExit(void) {
+ if (hip_lib != NULL) {
+ /* Ignore errors. */
+ dynamic_library_close(hip_lib);
+ hip_lib = NULL;
+ }
+}
+
+static int hipewHipInit(void) {
+ /* Library paths. */
+#ifdef _WIN32
+ /* Expected in c:/windows/system or similar, no path needed. */
+ const char *hip_paths[] = {"amdhip64.dll", NULL};
+#elif defined(__APPLE__)
+ /* Default installation path. */
+ const char *hip_paths[] = {"", NULL};
+#else
+ const char *hip_paths[] = {"/opt/rocm/hip/lib/libamdhip64.so", NULL};
+#endif
+ static int initialized = 0;
+ static int result = 0;
+ int error, driver_version;
+
+ if (initialized) {
+ return result;
+ }
+
+ initialized = 1;
+
+ error = atexit(hipewHipExit);
+ if (error) {
+ result = HIPEW_ERROR_ATEXIT_FAILED;
+ return result;
+ }
+
+ /* Load library. */
+ hip_lib = dynamic_library_open_find(hip_paths);
+
+ if (hip_lib == NULL) {
+ result = HIPEW_ERROR_OPEN_FAILED;
+ return result;
+ }
+
+ /* Fetch all function pointers. */
+ HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
+ HIP_LIBRARY_FIND_CHECKED(hipInit);
+ HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
+ HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
+ HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
+ HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
+ HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
+ HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
+ HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRetain);
+ HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRelease);
+ HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxSetFlags);
+ HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxGetState);
+ HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxReset);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxCreate);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxDestroy);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxPushCurrent);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxPopCurrent);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxSetCurrent);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxGetCurrent);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxGetDevice);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxGetFlags);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxSynchronize);
+ HIP_LIBRARY_FIND_CHECKED(hipDeviceSynchronize);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxGetCacheConfig);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxSetCacheConfig);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxGetSharedMemConfig);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxSetSharedMemConfig);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxGetApiVersion);
+ HIP_LIBRARY_FIND_CHECKED(hipModuleLoad);
+ HIP_LIBRARY_FIND_CHECKED(hipModuleLoadData);
+ HIP_LIBRARY_FIND_CHECKED(hipModuleLoadDataEx);
+ HIP_LIBRARY_FIND_CHECKED(hipModuleUnload);
+ HIP_LIBRARY_FIND_CHECKED(hipModuleGetFunction);
+ HIP_LIBRARY_FIND_CHECKED(hipModuleGetGlobal);
+ HIP_LIBRARY_FIND_CHECKED(hipModuleGetTexRef);
+ HIP_LIBRARY_FIND_CHECKED(hipMemGetInfo);
+ HIP_LIBRARY_FIND_CHECKED(hipMalloc);
+ HIP_LIBRARY_FIND_CHECKED(hipMemAllocPitch);
+ HIP_LIBRARY_FIND_CHECKED(hipFree);
+ HIP_LIBRARY_FIND_CHECKED(hipMemGetAddressRange);
+ HIP_LIBRARY_FIND_CHECKED(hipHostMalloc);
+ HIP_LIBRARY_FIND_CHECKED(hipHostFree);
+ HIP_LIBRARY_FIND_CHECKED(hipHostGetDevicePointer);
+ HIP_LIBRARY_FIND_CHECKED(hipHostGetFlags);
+ HIP_LIBRARY_FIND_CHECKED(hipMallocManaged);
+ HIP_LIBRARY_FIND_CHECKED(hipDeviceGetByPCIBusId);
+ HIP_LIBRARY_FIND_CHECKED(hipDeviceGetPCIBusId);
+ HIP_LIBRARY_FIND_CHECKED(hipMemcpyPeer);
+ HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoD);
+ HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoH);
+ HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoD);
+ HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2D);
+ HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3D);
+ HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoDAsync);
+ HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoHAsync);
+ HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy2DUnaligned);
+ HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2DAsync);
+ HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3DAsync);
+ HIP_LIBRARY_FIND_CHECKED(hipMemsetD8);
+ HIP_LIBRARY_FIND_CHECKED(hipMemsetD16);
+ HIP_LIBRARY_FIND_CHECKED(hipMemsetD32);
+ HIP_LIBRARY_FIND_CHECKED(hipMemsetD8Async);
+ HIP_LIBRARY_FIND_CHECKED(hipMemsetD16Async);
+ HIP_LIBRARY_FIND_CHECKED(hipMemsetD32Async);
+ HIP_LIBRARY_FIND_CHECKED(hipArrayCreate);
+ HIP_LIBRARY_FIND_CHECKED(hipArrayDestroy);
+ HIP_LIBRARY_FIND_CHECKED(hipArray3DCreate);
+ HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithFlags);
+ HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithPriority);
+ HIP_LIBRARY_FIND_CHECKED(hipStreamGetPriority);
+ HIP_LIBRARY_FIND_CHECKED(hipStreamGetFlags);
+ HIP_LIBRARY_FIND_CHECKED(hipStreamWaitEvent);
+ HIP_LIBRARY_FIND_CHECKED(hipStreamAddCallback);
+ HIP_LIBRARY_FIND_CHECKED(hipStreamQuery);
+ HIP_LIBRARY_FIND_CHECKED(hipStreamSynchronize);
+ HIP_LIBRARY_FIND_CHECKED(hipStreamDestroy);
+ HIP_LIBRARY_FIND_CHECKED(hipEventCreateWithFlags);
+ HIP_LIBRARY_FIND_CHECKED(hipEventRecord);
+ HIP_LIBRARY_FIND_CHECKED(hipEventQuery);
+ HIP_LIBRARY_FIND_CHECKED(hipEventSynchronize);
+ HIP_LIBRARY_FIND_CHECKED(hipEventDestroy);
+ HIP_LIBRARY_FIND_CHECKED(hipEventElapsedTime);
+ HIP_LIBRARY_FIND_CHECKED(hipFuncGetAttribute);
+ HIP_LIBRARY_FIND_CHECKED(hipFuncSetCacheConfig);
+ HIP_LIBRARY_FIND_CHECKED(hipModuleLaunchKernel);
+ HIP_LIBRARY_FIND_CHECKED(hipModuleOccupancyMaxPotentialBlockSize);
+ HIP_LIBRARY_FIND_CHECKED(hipTexRefSetArray);
+ HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress);
+ HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress2D);
+ HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFormat);
+ HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddressMode);
+ HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFilterMode);
+ HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFlags);
+ HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddress);
+ HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddressMode);
+ HIP_LIBRARY_FIND_CHECKED(hipTexObjectCreate);
+ HIP_LIBRARY_FIND_CHECKED(hipTexObjectDestroy);
+ HIP_LIBRARY_FIND_CHECKED(hipDeviceCanAccessPeer);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxEnablePeerAccess);
+ HIP_LIBRARY_FIND_CHECKED(hipCtxDisablePeerAccess);
+ HIP_LIBRARY_FIND_CHECKED(hipDeviceGetP2PAttribute);
+#ifdef _WIN32
+ HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnregisterResource);
+ HIP_LIBRARY_FIND_CHECKED(hipGraphicsMapResources);
+ HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnmapResources);
+ HIP_LIBRARY_FIND_CHECKED(hipGraphicsResourceGetMappedPointer);
+ HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
+ HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
+#endif
+ result = HIPEW_SUCCESS;
+ return result;
+}
+
+
+
+int hipewInit(hipuint32_t flags) {
+ int result = HIPEW_SUCCESS;
+
+ if (flags & HIPEW_INIT_HIP) {
+ result = hipewHipInit();
+ if (result != HIPEW_SUCCESS) {
+ return result;
+ }
+ }
+
+ return result;
+}
+
+
+const char *hipewErrorString(hipError_t result) {
+ switch (result) {
+ case hipSuccess: return "No errors";
+ case hipErrorInvalidValue: return "Invalid value";
+ case hipErrorOutOfMemory: return "Out of memory";
+ case hipErrorNotInitialized: return "Driver not initialized";
+ case hipErrorDeinitialized: return "Driver deinitialized";
+ case hipErrorProfilerDisabled: return "Profiler disabled";
+ case hipErrorProfilerNotInitialized: return "Profiler not initialized";
+ case hipErrorProfilerAlreadyStarted: return "Profiler already started";
+ case hipErrorProfilerAlreadyStopped: return "Profiler already stopped";
+ case hipErrorNoDevice: return "No HIP-capable device available";
+ case hipErrorInvalidDevice: return "Invalid device";
+ case hipErrorInvalidImage: return "Invalid kernel image";
+ case hipErrorInvalidContext: return "Invalid context";
+ case hipErrorContextAlreadyCurrent: return "Context already current";
+ case hipErrorMapFailed: return "Map failed";
+ case hipErrorUnmapFailed: return "Unmap failed";
+ case hipErrorArrayIsMapped: return "Array is mapped";
+ case hipErrorAlreadyMapped: return "Already mapped";
+ case hipErrorNoBinaryForGpu: return "No binary for GPU";
+ case hipErrorAlreadyAcquired: return "Already acquired";
+ case hipErrorNotMapped: return "Not mapped";
+ case hipErrorNotMappedAsArray: return "Mapped resource not available for access as an array";
+ case hipErrorNotMappedAsPointer: return "Mapped resource not available for access as a pointer";
+ case hipErrorECCNotCorrectable: return "Uncorrectable ECC error detected";
+ case hipErrorUnsupportedLimit: return "hipLimit_t not supported by device";
+ case hipErrorContextAlreadyInUse: return "Context already in use";
+ case hipErrorPeerAccessUnsupported: return "Peer access unsupported";
+ case hipErrorInvalidKernelFile: return "Invalid ptx";
+ case hipErrorInvalidGraphicsContext: return "Invalid graphics context";
+ case hipErrorInvalidSource: return "Invalid source";
+ case hipErrorFileNotFound: return "File not found";
+ case hipErrorSharedObjectSymbolNotFound: return "Link to a shared object failed to resolve";
+ case hipErrorSharedObjectInitFailed: return "Shared object initialization failed";
+ case hipErrorOperatingSystem: return "Operating system";
+ case hipErrorInvalidHandle: return "Invalid handle";
+ case hipErrorNotFound: return "Not found";
+ case hipErrorNotReady: return "HIP not ready";
+ case hipErrorIllegalAddress: return "Illegal address";
+ case hipErrorLaunchOutOfResources: return "Launch exceeded resources";
+ case hipErrorLaunchTimeOut: return "Launch exceeded timeout";
+ case hipErrorPeerAccessAlreadyEnabled: return "Peer access already enabled";
+ case hipErrorPeerAccessNotEnabled: return "Peer access not enabled";
+ case hipErrorSetOnActiveProcess: return "Primary context active";
+ case hipErrorAssert: return "Assert";
+ case hipErrorHostMemoryAlreadyRegistered: return "Host memory already registered";
+ case hipErrorHostMemoryNotRegistered: return "Host memory not registered";
+ case hipErrorLaunchFailure: return "Launch failed";
+ case hipErrorCooperativeLaunchTooLarge: return "Cooperative launch too large";
+ case hipErrorNotSupported: return "Not supported";
+ case hipErrorUnknown: return "Unknown error";
+ default: return "Unknown HIP error value";
+ }
+}
+
+static void path_join(const char *path1,
+ const char *path2,
+ int maxlen,
+ char *result) {
+#if defined(WIN32) || defined(_WIN32)
+ const char separator = '\\';
+#else
+ const char separator = '/';
+#endif
+ int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
+ if (n != -1 && n < maxlen) {
+ result[n] = '\0';
+ }
+ else {
+ result[maxlen - 1] = '\0';
+ }
+}
+
+static int path_exists(const char *path) {
+ struct stat st;
+ if (stat(path, &st)) {
+ return 0;
+ }
+ return 1;
+}
+
+const char *hipewCompilerPath(void) {
+ #ifdef _WIN32
+ const char *hipPath = getenv("HIP_ROCCLR_HOME");
+ const char *windowsCommand = "perl ";
+ const char *executable = "bin/hipcc";
+
+ static char hipcc[65536];
+ static char finalCommand[65536];
+ if(hipPath) {
+ path_join(hipPath, executable, sizeof(hipcc), hipcc);
+ if(path_exists(hipcc)) {
+ snprintf(finalCommand, sizeof(hipcc), "%s %s", windowsCommand, hipcc);
+ return finalCommand;
+ } else {
+ printf("Could not find hipcc. Make sure HIP_ROCCLR_HOME points to the directory holding /bin/hipcc");
+ }
+ }
+ #else
+ const char *hipPath = "opt/rocm/hip/bin";
+ const char *executable = "hipcc";
+
+ static char hipcc[65536];
+ if(hipPath) {
+ path_join(hipPath, executable, sizeof(hipcc), hipcc);
+ if(path_exists(hipcc)){
+ return hipcc;
+ }
+ }
+ #endif
+
+ {
+#ifdef _WIN32
+ FILE *handle = popen("where hipcc", "r");
+#else
+ FILE *handle = popen("which hipcc", "r");
+#endif
+ if (handle) {
+ char buffer[4096] = {0};
+ int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
+ buffer[len] = '\0';
+ pclose(handle);
+ if (buffer[0]) {
+ return "hipcc";
+ }
+ }
+ }
+
+ return NULL;
+}
+
+int hipewCompilerVersion(void) {
+ const char *path = hipewCompilerPath();
+ const char *marker = "Hip compilation tools, release ";
+ FILE *pipe;
+ int major, minor;
+ char *versionstr;
+ char buf[128];
+ char output[65536] = "\0";
+ char command[65536] = "\0";
+
+ if (path == NULL) {
+ return 0;
+ }
+
+ /* get --version output */
+ strcat(command, "\"");
+ strncat(command, path, sizeof(command) - 1);
+ strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
+ pipe = popen(command, "r");
+ if (!pipe) {
+ fprintf(stderr, "HIP: failed to run compiler to retrieve version");
+ return 0;
+ }
+
+ while (!feof(pipe)) {
+ if (fgets(buf, sizeof(buf), pipe) != NULL) {
+ strncat(output, buf, sizeof(output) - strlen(output) - 1);
+ }
+ }
+
+ pclose(pipe);
+ return 40;
+}