From d2a822fe07eb845180d8b51f8b4c45e51ee5075b Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Wed, 25 Nov 2015 23:27:40 +0500 Subject: CUEW: Update to latest version It is now updated against CUDA Toolkit 7.5. Currently should be no functional changes, just begin some ground work for the future. --- extern/cuew/auto/cuda_extra.py | 2 +- extern/cuew/auto/cuew_gen.py | 9 +++- extern/cuew/include/cuew.h | 66 ++++++++++++++++++++++------ extern/cuew/src/cuew.c | 99 ++++++++++++++++++++++++++---------------- 4 files changed, 122 insertions(+), 54 deletions(-) (limited to 'extern') diff --git a/extern/cuew/auto/cuda_extra.py b/extern/cuew/auto/cuda_extra.py index fd4f466df83..5fd2c179339 100644 --- a/extern/cuew/auto/cuda_extra.py +++ b/extern/cuew/auto/cuda_extra.py @@ -101,7 +101,7 @@ int cuewCompilerVersion(void) { while (!feof(pipe)) { if (fgets(buf, sizeof(buf), pipe) != NULL) { - strncat(output, buf, sizeof(output) - strlen(output)); + strncat(output, buf, sizeof(output) - strlen(output) - 1); } } diff --git a/extern/cuew/auto/cuew_gen.py b/extern/cuew/auto/cuew_gen.py index a94525c52b1..75e5bf876f4 100644 --- a/extern/cuew/auto/cuew_gen.py +++ b/extern/cuew/auto/cuew_gen.py @@ -276,7 +276,11 @@ def parse_files(): if line[0].isspace() and line.lstrip().startswith("#define"): line = line[12:-1] token = line.split() - if len(token) == 2 and token[1].endswith("_v2"): + if len(token) == 2 and (token[1].endswith("_v2") or + token[1].endswith("_v2)")): + if token[1].startswith('__CUDA_API_PTDS') or \ + token[1].startswith('__CUDA_API_PTSZ'): + token[1] = token[1][16:-1] DEFINES_V2.append(token) v = FuncDefVisitor() @@ -560,7 +564,8 @@ def print_implementation(): if error in CUDA_ERRORS: str = CUDA_ERRORS[error] else: - str = error[11:] + temp = error[11:].replace('_', ' ') + str = temp[0] + temp[1:].lower() print(" case %s: return \"%s\";" % (error, str)) print(" default: return \"Unknown CUDA error value\";") diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h index fd03311ad41..1b12e5b4463 100644 --- a/extern/cuew/include/cuew.h +++ b/extern/cuew/include/cuew.h @@ -27,13 +27,16 @@ extern "C" { #define CUEW_VERSION_MAJOR 1 #define CUEW_VERSION_MINOR 2 -#define CUDA_VERSION 6000 +#define CUDA_VERSION 7050 #define CU_IPC_HANDLE_SIZE 64 +#define CU_STREAM_LEGACY ((CUstream)0x1) +#define CU_STREAM_PER_THREAD ((CUstream)0x2) #define CU_MEMHOSTALLOC_PORTABLE 0x01 #define CU_MEMHOSTALLOC_DEVICEMAP 0x02 #define CU_MEMHOSTALLOC_WRITECOMBINED 0x04 #define CU_MEMHOSTREGISTER_PORTABLE 0x01 #define CU_MEMHOSTREGISTER_DEVICEMAP 0x02 +#define CU_MEMHOSTREGISTER_IOMEMORY 0x04 #define CUDA_ARRAY3D_LAYERED 0x01 #define CUDA_ARRAY3D_2DARRAY 0x01 #define CUDA_ARRAY3D_SURFACE_LDST 0x02 @@ -100,10 +103,16 @@ extern "C" { #define cuCtxPushCurrent cuCtxPushCurrent_v2 #define cuStreamDestroy cuStreamDestroy_v2 #define cuEventDestroy cuEventDestroy_v2 +#define cuLinkCreate cuLinkCreate_v2 +#define cuLinkAddData cuLinkAddData_v2 +#define cuLinkAddFile cuLinkAddFile_v2 +#define cuMemHostRegister cuMemHostRegister_v2 +#define cuGraphicsResourceSetMapFlags cuGraphicsResourceSetMapFlags_v2 #define cuTexRefSetAddress2D cuTexRefSetAddress2D_v2 #define cuGLCtxCreate cuGLCtxCreate_v2 #define cuGLMapBufferObject cuGLMapBufferObject_v2 #define cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2 +#define cuGLGetDevices cuGLGetDevices_v2 /* Types. */ #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) @@ -172,6 +181,11 @@ typedef enum CUevent_flags_enum { CU_EVENT_INTERPROCESS = 0x4, } CUevent_flags; +typedef enum CUoccupancy_flags_enum { + CU_OCCUPANCY_DEFAULT = 0x0, + CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1, +} CUoccupancy_flags; + typedef enum CUarray_format_enum { CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, @@ -381,7 +395,9 @@ typedef enum CUjit_target_enum { CU_TARGET_COMPUTE_30 = 30, CU_TARGET_COMPUTE_32 = 32, CU_TARGET_COMPUTE_35 = 35, + CU_TARGET_COMPUTE_37 = 37, CU_TARGET_COMPUTE_50 = 50, + CU_TARGET_COMPUTE_52 = 52, } CUjit_target; typedef enum CUjit_fallback_enum { @@ -474,6 +490,7 @@ typedef enum cudaError_enum { CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217, CUDA_ERROR_INVALID_PTX = 218, + CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, CUDA_ERROR_INVALID_SOURCE = 300, CUDA_ERROR_FILE_NOT_FOUND = 301, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, @@ -506,6 +523,7 @@ typedef enum cudaError_enum { } CUresult; typedef void* CUstreamCallback; +typedef size_t* CUoccupancyB2DSize; typedef struct CUDA_MEMCPY2D_st { size_t srcXInBytes; @@ -730,6 +748,11 @@ typedef CUresult CUDAAPI tcuDeviceTotalMem_v2(size_t* bytes, CUdevice dev); typedef CUresult CUDAAPI tcuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev); typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop* prop, CUdevice dev); typedef CUresult CUDAAPI tcuDeviceComputeCapability(int* major, int* minor, CUdevice dev); +typedef CUresult CUDAAPI tcuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev); +typedef CUresult CUDAAPI tcuDevicePrimaryCtxRelease(CUdevice dev); +typedef CUresult CUDAAPI tcuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned flags); +typedef CUresult CUDAAPI tcuDevicePrimaryCtxGetState(CUdevice dev, unsigned* flags, int* active); +typedef CUresult CUDAAPI tcuDevicePrimaryCtxReset(CUdevice dev); typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext* pctx, unsigned flags, CUdevice dev); typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx); typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext ctx); @@ -737,6 +760,7 @@ typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext* pctx); typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx); typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext* pctx); typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice* device); +typedef CUresult CUDAAPI tcuCtxGetFlags(unsigned* flags); typedef CUresult CUDAAPI tcuCtxSynchronize(void); typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value); typedef CUresult CUDAAPI tcuCtxGetLimit(size_t* pvalue, CUlimit limit); @@ -757,9 +781,9 @@ typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name); typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name); typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name); -typedef CUresult CUDAAPI tcuLinkCreate(unsigned numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut); -typedef CUresult CUDAAPI tcuLinkAddData(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned numOptions, CUjit_option* options, void* optionValues); -typedef CUresult CUDAAPI tcuLinkAddFile(CUlinkState state, CUjitInputType type, const char* path, unsigned numOptions, CUjit_option* options, void* optionValues); +typedef CUresult CUDAAPI tcuLinkCreate_v2(unsigned numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut); +typedef CUresult CUDAAPI tcuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned numOptions, CUjit_option* options, void* optionValues); +typedef CUresult CUDAAPI tcuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path, unsigned numOptions, CUjit_option* options, void* optionValues); typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void* cubinOut, size_t* sizeOut); typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state); typedef CUresult CUDAAPI tcuMemGetInfo_v2(size_t* free, size_t* total); @@ -780,7 +804,7 @@ typedef CUresult CUDAAPI tcuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandl typedef CUresult CUDAAPI tcuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr); typedef CUresult CUDAAPI tcuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned Flags); typedef CUresult CUDAAPI tcuIpcCloseMemHandle(CUdeviceptr dptr); -typedef CUresult CUDAAPI tcuMemHostRegister(void* p, size_t bytesize, unsigned Flags); +typedef CUresult CUDAAPI tcuMemHostRegister_v2(void* p, size_t bytesize, unsigned Flags); typedef CUresult CUDAAPI tcuMemHostUnregister(void* p); typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount); typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount); @@ -828,6 +852,7 @@ typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipma typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray); typedef CUresult CUDAAPI tcuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr); typedef CUresult CUDAAPI tcuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr); +typedef CUresult CUDAAPI tcuPointerGetAttributes(unsigned numAttributes, CUpointer_attribute* attributes, void* data, CUdeviceptr ptr); typedef CUresult CUDAAPI tcuStreamCreate(CUstream* phStream, unsigned Flags); typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream* phStream, unsigned flags, int priority); typedef CUresult CUDAAPI tcuStreamGetPriority(CUstream hStream, int* priority); @@ -858,6 +883,10 @@ typedef CUresult CUDAAPI tcuLaunch(CUfunction f); typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height); typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream); typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef); +typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize); +typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned flags); +typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit); +typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned flags); typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned Flags); typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned Flags); typedef CUresult CUDAAPI tcuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes); @@ -900,14 +929,14 @@ typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resour typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned arrayIndex, unsigned mipLevel); typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource); typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource); -typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned flags); +typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned flags); typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream); typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream); typedef CUresult CUDAAPI tcuGetExportTable(const void* ppExportTable, const CUuuid* pExportTableId); typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned Flags); typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned Flags); -typedef CUresult CUDAAPI tcuGLGetDevices(unsigned* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned cudaDeviceCount, CUGLDeviceList deviceList); +typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned cudaDeviceCount, CUGLDeviceList deviceList); typedef CUresult CUDAAPI tcuGLCtxCreate_v2(CUcontext* pCtx, unsigned Flags, CUdevice device); typedef CUresult CUDAAPI tcuGLInit(void); typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer); @@ -931,6 +960,11 @@ extern tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2; extern tcuDeviceGetAttribute *cuDeviceGetAttribute; extern tcuDeviceGetProperties *cuDeviceGetProperties; extern tcuDeviceComputeCapability *cuDeviceComputeCapability; +extern tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain; +extern tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease; +extern tcuDevicePrimaryCtxSetFlags *cuDevicePrimaryCtxSetFlags; +extern tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState; +extern tcuDevicePrimaryCtxReset *cuDevicePrimaryCtxReset; extern tcuCtxCreate_v2 *cuCtxCreate_v2; extern tcuCtxDestroy_v2 *cuCtxDestroy_v2; extern tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2; @@ -938,6 +972,7 @@ extern tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2; extern tcuCtxSetCurrent *cuCtxSetCurrent; extern tcuCtxGetCurrent *cuCtxGetCurrent; extern tcuCtxGetDevice *cuCtxGetDevice; +extern tcuCtxGetFlags *cuCtxGetFlags; extern tcuCtxSynchronize *cuCtxSynchronize; extern tcuCtxSetLimit *cuCtxSetLimit; extern tcuCtxGetLimit *cuCtxGetLimit; @@ -958,9 +993,9 @@ extern tcuModuleGetFunction *cuModuleGetFunction; extern tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2; extern tcuModuleGetTexRef *cuModuleGetTexRef; extern tcuModuleGetSurfRef *cuModuleGetSurfRef; -extern tcuLinkCreate *cuLinkCreate; -extern tcuLinkAddData *cuLinkAddData; -extern tcuLinkAddFile *cuLinkAddFile; +extern tcuLinkCreate_v2 *cuLinkCreate_v2; +extern tcuLinkAddData_v2 *cuLinkAddData_v2; +extern tcuLinkAddFile_v2 *cuLinkAddFile_v2; extern tcuLinkComplete *cuLinkComplete; extern tcuLinkDestroy *cuLinkDestroy; extern tcuMemGetInfo_v2 *cuMemGetInfo_v2; @@ -981,7 +1016,7 @@ extern tcuIpcOpenEventHandle *cuIpcOpenEventHandle; extern tcuIpcGetMemHandle *cuIpcGetMemHandle; extern tcuIpcOpenMemHandle *cuIpcOpenMemHandle; extern tcuIpcCloseMemHandle *cuIpcCloseMemHandle; -extern tcuMemHostRegister *cuMemHostRegister; +extern tcuMemHostRegister_v2 *cuMemHostRegister_v2; extern tcuMemHostUnregister *cuMemHostUnregister; extern tcuMemcpy *cuMemcpy; extern tcuMemcpyPeer *cuMemcpyPeer; @@ -1029,6 +1064,7 @@ extern tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel; extern tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy; extern tcuPointerGetAttribute *cuPointerGetAttribute; extern tcuPointerSetAttribute *cuPointerSetAttribute; +extern tcuPointerGetAttributes *cuPointerGetAttributes; extern tcuStreamCreate *cuStreamCreate; extern tcuStreamCreateWithPriority *cuStreamCreateWithPriority; extern tcuStreamGetPriority *cuStreamGetPriority; @@ -1059,6 +1095,10 @@ extern tcuLaunch *cuLaunch; extern tcuLaunchGrid *cuLaunchGrid; extern tcuLaunchGridAsync *cuLaunchGridAsync; extern tcuParamSetTexRef *cuParamSetTexRef; +extern tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor; +extern tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; +extern tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize; +extern tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags; extern tcuTexRefSetArray *cuTexRefSetArray; extern tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray; extern tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2; @@ -1101,14 +1141,14 @@ extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; extern tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray; extern tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2; -extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags; +extern tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2; extern tcuGraphicsMapResources *cuGraphicsMapResources; extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources; extern tcuGetExportTable *cuGetExportTable; extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; -extern tcuGLGetDevices *cuGLGetDevices; +extern tcuGLGetDevices_v2 *cuGLGetDevices_v2; extern tcuGLCtxCreate_v2 *cuGLCtxCreate_v2; extern tcuGLInit *cuGLInit; extern tcuGLRegisterBufferObject *cuGLRegisterBufferObject; diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c index da892efc0f4..3058e29d89f 100644 --- a/extern/cuew/src/cuew.c +++ b/extern/cuew/src/cuew.c @@ -36,7 +36,7 @@ typedef HMODULE DynamicLibrary; -# define dynamic_library_open(path) LoadLibrary(path) +# define dynamic_library_open(path) LoadLibraryA(path) # define dynamic_library_close(lib) FreeLibrary(lib) # define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol) #else @@ -70,6 +70,11 @@ tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2; tcuDeviceGetAttribute *cuDeviceGetAttribute; tcuDeviceGetProperties *cuDeviceGetProperties; tcuDeviceComputeCapability *cuDeviceComputeCapability; +tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain; +tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease; +tcuDevicePrimaryCtxSetFlags *cuDevicePrimaryCtxSetFlags; +tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState; +tcuDevicePrimaryCtxReset *cuDevicePrimaryCtxReset; tcuCtxCreate_v2 *cuCtxCreate_v2; tcuCtxDestroy_v2 *cuCtxDestroy_v2; tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2; @@ -77,6 +82,7 @@ tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2; tcuCtxSetCurrent *cuCtxSetCurrent; tcuCtxGetCurrent *cuCtxGetCurrent; tcuCtxGetDevice *cuCtxGetDevice; +tcuCtxGetFlags *cuCtxGetFlags; tcuCtxSynchronize *cuCtxSynchronize; tcuCtxSetLimit *cuCtxSetLimit; tcuCtxGetLimit *cuCtxGetLimit; @@ -97,9 +103,9 @@ tcuModuleGetFunction *cuModuleGetFunction; tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2; tcuModuleGetTexRef *cuModuleGetTexRef; tcuModuleGetSurfRef *cuModuleGetSurfRef; -tcuLinkCreate *cuLinkCreate; -tcuLinkAddData *cuLinkAddData; -tcuLinkAddFile *cuLinkAddFile; +tcuLinkCreate_v2 *cuLinkCreate_v2; +tcuLinkAddData_v2 *cuLinkAddData_v2; +tcuLinkAddFile_v2 *cuLinkAddFile_v2; tcuLinkComplete *cuLinkComplete; tcuLinkDestroy *cuLinkDestroy; tcuMemGetInfo_v2 *cuMemGetInfo_v2; @@ -120,7 +126,7 @@ tcuIpcOpenEventHandle *cuIpcOpenEventHandle; tcuIpcGetMemHandle *cuIpcGetMemHandle; tcuIpcOpenMemHandle *cuIpcOpenMemHandle; tcuIpcCloseMemHandle *cuIpcCloseMemHandle; -tcuMemHostRegister *cuMemHostRegister; +tcuMemHostRegister_v2 *cuMemHostRegister_v2; tcuMemHostUnregister *cuMemHostUnregister; tcuMemcpy *cuMemcpy; tcuMemcpyPeer *cuMemcpyPeer; @@ -168,6 +174,7 @@ tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel; tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy; tcuPointerGetAttribute *cuPointerGetAttribute; tcuPointerSetAttribute *cuPointerSetAttribute; +tcuPointerGetAttributes *cuPointerGetAttributes; tcuStreamCreate *cuStreamCreate; tcuStreamCreateWithPriority *cuStreamCreateWithPriority; tcuStreamGetPriority *cuStreamGetPriority; @@ -198,6 +205,10 @@ tcuLaunch *cuLaunch; tcuLaunchGrid *cuLaunchGrid; tcuLaunchGridAsync *cuLaunchGridAsync; tcuParamSetTexRef *cuParamSetTexRef; +tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor; +tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; +tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize; +tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags; tcuTexRefSetArray *cuTexRefSetArray; tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray; tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2; @@ -240,14 +251,14 @@ tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray; tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2; -tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags; +tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2; tcuGraphicsMapResources *cuGraphicsMapResources; tcuGraphicsUnmapResources *cuGraphicsUnmapResources; tcuGetExportTable *cuGetExportTable; tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; -tcuGLGetDevices *cuGLGetDevices; +tcuGLGetDevices_v2 *cuGLGetDevices_v2; tcuGLCtxCreate_v2 *cuGLCtxCreate_v2; tcuGLInit *cuGLInit; tcuGLRegisterBufferObject *cuGLRegisterBufferObject; @@ -328,6 +339,11 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuDeviceGetAttribute); CUDA_LIBRARY_FIND(cuDeviceGetProperties); CUDA_LIBRARY_FIND(cuDeviceComputeCapability); + CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRetain); + CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRelease); + CUDA_LIBRARY_FIND(cuDevicePrimaryCtxSetFlags); + CUDA_LIBRARY_FIND(cuDevicePrimaryCtxGetState); + CUDA_LIBRARY_FIND(cuDevicePrimaryCtxReset); CUDA_LIBRARY_FIND(cuCtxCreate_v2); CUDA_LIBRARY_FIND(cuCtxDestroy_v2); CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2); @@ -335,6 +351,7 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuCtxSetCurrent); CUDA_LIBRARY_FIND(cuCtxGetCurrent); CUDA_LIBRARY_FIND(cuCtxGetDevice); + CUDA_LIBRARY_FIND(cuCtxGetFlags); CUDA_LIBRARY_FIND(cuCtxSynchronize); CUDA_LIBRARY_FIND(cuCtxSetLimit); CUDA_LIBRARY_FIND(cuCtxGetLimit); @@ -355,9 +372,9 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2); CUDA_LIBRARY_FIND(cuModuleGetTexRef); CUDA_LIBRARY_FIND(cuModuleGetSurfRef); - CUDA_LIBRARY_FIND(cuLinkCreate); - CUDA_LIBRARY_FIND(cuLinkAddData); - CUDA_LIBRARY_FIND(cuLinkAddFile); + CUDA_LIBRARY_FIND(cuLinkCreate_v2); + CUDA_LIBRARY_FIND(cuLinkAddData_v2); + CUDA_LIBRARY_FIND(cuLinkAddFile_v2); CUDA_LIBRARY_FIND(cuLinkComplete); CUDA_LIBRARY_FIND(cuLinkDestroy); CUDA_LIBRARY_FIND(cuMemGetInfo_v2); @@ -378,7 +395,7 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuIpcGetMemHandle); CUDA_LIBRARY_FIND(cuIpcOpenMemHandle); CUDA_LIBRARY_FIND(cuIpcCloseMemHandle); - CUDA_LIBRARY_FIND(cuMemHostRegister); + CUDA_LIBRARY_FIND(cuMemHostRegister_v2); CUDA_LIBRARY_FIND(cuMemHostUnregister); CUDA_LIBRARY_FIND(cuMemcpy); CUDA_LIBRARY_FIND(cuMemcpyPeer); @@ -426,6 +443,7 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy); CUDA_LIBRARY_FIND(cuPointerGetAttribute); CUDA_LIBRARY_FIND(cuPointerSetAttribute); + CUDA_LIBRARY_FIND(cuPointerGetAttributes); CUDA_LIBRARY_FIND(cuStreamCreate); CUDA_LIBRARY_FIND(cuStreamCreateWithPriority); CUDA_LIBRARY_FIND(cuStreamGetPriority); @@ -456,6 +474,10 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuLaunchGrid); CUDA_LIBRARY_FIND(cuLaunchGridAsync); CUDA_LIBRARY_FIND(cuParamSetTexRef); + CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessor); + CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags); + CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSize); + CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSizeWithFlags); CUDA_LIBRARY_FIND(cuTexRefSetArray); CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray); CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2); @@ -498,14 +520,14 @@ int cuewInit(void) { CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray); CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray); CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2); - CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags); + CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags_v2); CUDA_LIBRARY_FIND(cuGraphicsMapResources); CUDA_LIBRARY_FIND(cuGraphicsUnmapResources); CUDA_LIBRARY_FIND(cuGetExportTable); CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer); CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage); - CUDA_LIBRARY_FIND(cuGLGetDevices); + CUDA_LIBRARY_FIND(cuGLGetDevices_v2); CUDA_LIBRARY_FIND(cuGLCtxCreate_v2); CUDA_LIBRARY_FIND(cuGLInit); CUDA_LIBRARY_FIND(cuGLRegisterBufferObject); @@ -528,10 +550,10 @@ const char *cuewErrorString(CUresult result) { case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory"; case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized"; case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized"; - case CUDA_ERROR_PROFILER_DISABLED: return "PROFILER_DISABLED"; - case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "PROFILER_NOT_INITIALIZED"; - case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "PROFILER_ALREADY_STARTED"; - case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "PROFILER_ALREADY_STOPPED"; + case CUDA_ERROR_PROFILER_DISABLED: return "Profiler disabled"; + case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "Profiler not initialized"; + case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "Profiler already started"; + case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "Profiler already stopped"; case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available"; case CUDA_ERROR_INVALID_DEVICE: return "Invalid device"; case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image"; @@ -548,37 +570,38 @@ const char *cuewErrorString(CUresult result) { case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer"; case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected"; case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device"; - case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "CONTEXT_ALREADY_IN_USE"; - case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "PEER_ACCESS_UNSUPPORTED"; - case CUDA_ERROR_INVALID_PTX: return "INVALID_PTX"; + case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use"; + case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "Peer access unsupported"; + case CUDA_ERROR_INVALID_PTX: return "Invalid ptx"; + case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context"; case CUDA_ERROR_INVALID_SOURCE: return "Invalid source"; case CUDA_ERROR_FILE_NOT_FOUND: return "File not found"; case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve"; case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed"; - case CUDA_ERROR_OPERATING_SYSTEM: return "OPERATING_SYSTEM"; + case CUDA_ERROR_OPERATING_SYSTEM: return "Operating system"; case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle"; case CUDA_ERROR_NOT_FOUND: return "Not found"; case CUDA_ERROR_NOT_READY: return "CUDA not ready"; - case CUDA_ERROR_ILLEGAL_ADDRESS: return "ILLEGAL_ADDRESS"; + case CUDA_ERROR_ILLEGAL_ADDRESS: return "Illegal address"; case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources"; case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout"; case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing"; - case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "PEER_ACCESS_ALREADY_ENABLED"; - case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "PEER_ACCESS_NOT_ENABLED"; - case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "PRIMARY_CONTEXT_ACTIVE"; - case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "CONTEXT_IS_DESTROYED"; - case CUDA_ERROR_ASSERT: return "ASSERT"; - case CUDA_ERROR_TOO_MANY_PEERS: return "TOO_MANY_PEERS"; - case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "HOST_MEMORY_ALREADY_REGISTERED"; - case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "HOST_MEMORY_NOT_REGISTERED"; - case CUDA_ERROR_HARDWARE_STACK_ERROR: return "HARDWARE_STACK_ERROR"; - case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "ILLEGAL_INSTRUCTION"; - case CUDA_ERROR_MISALIGNED_ADDRESS: return "MISALIGNED_ADDRESS"; - case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "INVALID_ADDRESS_SPACE"; - case CUDA_ERROR_INVALID_PC: return "INVALID_PC"; + case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "Peer access already enabled"; + case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "Peer access not enabled"; + case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "Primary context active"; + case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "Context is destroyed"; + case CUDA_ERROR_ASSERT: return "Assert"; + case CUDA_ERROR_TOO_MANY_PEERS: return "Too many peers"; + case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "Host memory already registered"; + case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "Host memory not registered"; + case CUDA_ERROR_HARDWARE_STACK_ERROR: return "Hardware stack error"; + case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "Illegal instruction"; + case CUDA_ERROR_MISALIGNED_ADDRESS: return "Misaligned address"; + case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space"; + case CUDA_ERROR_INVALID_PC: return "Invalid pc"; case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed"; - case CUDA_ERROR_NOT_PERMITTED: return "NOT_PERMITTED"; - case CUDA_ERROR_NOT_SUPPORTED: return "NOT_SUPPORTED"; + case CUDA_ERROR_NOT_PERMITTED: return "Not permitted"; + case CUDA_ERROR_NOT_SUPPORTED: return "Not supported"; case CUDA_ERROR_UNKNOWN: return "Unknown error"; default: return "Unknown CUDA error value"; } @@ -686,7 +709,7 @@ int cuewCompilerVersion(void) { while (!feof(pipe)) { if (fgets(buf, sizeof(buf), pipe) != NULL) { - strncat(output, buf, sizeof(output) - strlen(output) - 1 ); + strncat(output, buf, sizeof(output) - strlen(output) - 1); } } -- cgit v1.2.3