Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2018-02-06 16:54:18 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2018-02-07 13:53:01 +0300
commit1dafe759edc3bc9a89f99a894d1c1fb3a14f44a9 (patch)
treed4862fceb7c38873de5ce581d9cdbfdc0cb59423
parent38d35603f224e92c7f4dfe1802b0ebfceebec46a (diff)
Update CUEW to latest version
This brings separate initialization for libcuda and libnvrtc, which fixes Cycles nvrtc compilation not working on build machines without CUDA hardware available. Differential Revision: https://developer.blender.org/D3045
-rw-r--r--extern/cuew/include/cuew.h74
-rw-r--r--extern/cuew/src/cuew.c146
-rw-r--r--intern/cycles/app/cycles_cubin_cc.cpp4
-rw-r--r--intern/cycles/device/device_cuda.cpp2
-rw-r--r--intern/opensubdiv/opensubdiv_device_context_cuda.cc2
5 files changed, 177 insertions, 51 deletions
diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h
index f5009d4f2c7..fa334678e54 100644
--- a/extern/cuew/include/cuew.h
+++ b/extern/cuew/include/cuew.h
@@ -24,10 +24,10 @@ extern "C" {
#include <stdlib.h>
/* Defines. */
-#define CUEW_VERSION_MAJOR 1
-#define CUEW_VERSION_MINOR 2
+#define CUEW_VERSION_MAJOR 2
+#define CUEW_VERSION_MINOR 0
-#define CUDA_VERSION 8000
+#define CUDA_VERSION 9010
#define CU_IPC_HANDLE_SIZE 64
#define CU_STREAM_LEGACY ((CUstream)0x1)
#define CU_STREAM_PER_THREAD ((CUstream)0x2)
@@ -37,6 +37,8 @@ extern "C" {
#define CU_MEMHOSTREGISTER_PORTABLE 0x01
#define CU_MEMHOSTREGISTER_DEVICEMAP 0x02
#define CU_MEMHOSTREGISTER_IOMEMORY 0x04
+#define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC 0x01
+#define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC 0x02
#define CUDA_ARRAY3D_LAYERED 0x01
#define CUDA_ARRAY3D_2DARRAY 0x01
#define CUDA_ARRAY3D_SURFACE_LDST 0x02
@@ -204,6 +206,7 @@ typedef enum CUstreamWaitValue_flags_enum {
CU_STREAM_WAIT_VALUE_GEQ = 0x0,
CU_STREAM_WAIT_VALUE_EQ = 0x1,
CU_STREAM_WAIT_VALUE_AND = 0x2,
+ CU_STREAM_WAIT_VALUE_NOR = 0x3,
CU_STREAM_WAIT_VALUE_FLUSH = (1 << 30),
} CUstreamWaitValue_flags;
@@ -215,6 +218,8 @@ typedef enum CUstreamWriteValue_flags_enum {
typedef enum CUstreamBatchMemOpType_enum {
CU_STREAM_MEM_OP_WAIT_VALUE_32 = 1,
CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2,
+ CU_STREAM_MEM_OP_WAIT_VALUE_64 = 4,
+ CU_STREAM_MEM_OP_WRITE_VALUE_64 = 5,
CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3,
} CUstreamBatchMemOpType;
@@ -225,7 +230,7 @@ typedef union CUstreamBatchMemOpParams_union {
CUdeviceptr address;
union {
cuuint32_t value;
- cuuint64_t pad;
+ cuuint64_t value64;
};
unsigned int flags;
CUdeviceptr alias;
@@ -235,7 +240,7 @@ typedef union CUstreamBatchMemOpParams_union {
CUdeviceptr address;
union {
cuuint32_t value;
- cuuint64_t pad;
+ cuuint64_t value64;
};
unsigned int flags;
CUdeviceptr alias;
@@ -372,6 +377,12 @@ typedef enum CUdevice_attribute_enum {
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
+ CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,
+ CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
+ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
CU_DEVICE_ATTRIBUTE_MAX,
} CUdevice_attribute;
@@ -408,6 +419,8 @@ typedef enum CUfunction_attribute_enum {
CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
+ CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
+ CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9,
CU_FUNC_ATTRIBUTE_MAX,
} CUfunction_attribute;
@@ -424,6 +437,12 @@ typedef enum CUsharedconfig_enum {
CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02,
} CUsharedconfig;
+typedef enum CUshared_carveout_enum {
+ CU_SHAREDMEM_CARVEOUT_DEFAULT,
+ CU_SHAREDMEM_CARVEOUT_MAX_SHARED = 100,
+ CU_SHAREDMEM_CARVEOUT_MAX_L1 = 0,
+} CUshared_carveout;
+
typedef enum CUmemorytype_enum {
CU_MEMORYTYPE_HOST = 0x01,
CU_MEMORYTYPE_DEVICE = 0x02,
@@ -475,10 +494,6 @@ typedef enum CUjit_option_enum {
} CUjit_option;
typedef enum CUjit_target_enum {
- CU_TARGET_COMPUTE_10 = 10,
- CU_TARGET_COMPUTE_11 = 11,
- CU_TARGET_COMPUTE_12 = 12,
- CU_TARGET_COMPUTE_13 = 13,
CU_TARGET_COMPUTE_20 = 20,
CU_TARGET_COMPUTE_21 = 21,
CU_TARGET_COMPUTE_30 = 30,
@@ -491,6 +506,9 @@ typedef enum CUjit_target_enum {
CU_TARGET_COMPUTE_60 = 60,
CU_TARGET_COMPUTE_61 = 61,
CU_TARGET_COMPUTE_62 = 62,
+ CU_TARGET_COMPUTE_70 = 70,
+ CU_TARGET_COMPUTE_73 = 73,
+ CU_TARGET_COMPUTE_75 = 75,
} CUjit_target;
typedef enum CUjit_fallback_enum {
@@ -585,6 +603,7 @@ typedef enum cudaError_enum {
CUDA_ERROR_INVALID_PTX = 218,
CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
+ CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
CUDA_ERROR_INVALID_SOURCE = 300,
CUDA_ERROR_FILE_NOT_FOUND = 301,
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
@@ -611,6 +630,7 @@ typedef enum cudaError_enum {
CUDA_ERROR_INVALID_ADDRESS_SPACE = 717,
CUDA_ERROR_INVALID_PC = 718,
CUDA_ERROR_LAUNCH_FAILED = 719,
+ CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,
CUDA_ERROR_NOT_PERMITTED = 800,
CUDA_ERROR_NOT_SUPPORTED = 801,
CUDA_ERROR_UNKNOWN = 999,
@@ -813,6 +833,19 @@ typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
unsigned long long p2pToken;
unsigned int vaSpaceToken;
} CUDA_POINTER_ATTRIBUTE_P2P_TOKENS;
+
+typedef struct CUDA_LAUNCH_PARAMS_st {
+ CUfunction function;
+ unsigned int gridDimX;
+ unsigned int gridDimY;
+ unsigned int gridDimZ;
+ unsigned int blockDimX;
+ unsigned int blockDimY;
+ unsigned int blockDimZ;
+ unsigned int sharedMemBytes;
+ CUstream hStream;
+ void** kernelParams;
+} CUDA_LAUNCH_PARAMS;
typedef unsigned int GLenum;
typedef unsigned int GLuint;
typedef int GLint;
@@ -845,6 +878,8 @@ typedef enum {
} nvrtcResult;
typedef struct _nvrtcProgram* nvrtcProgram;
+
+
/* Function types. */
typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pStr);
typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pStr);
@@ -983,12 +1018,17 @@ typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
typedef CUresult CUDAAPI tcuEventDestroy_v2(CUevent hEvent);
typedef CUresult CUDAAPI tcuEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd);
typedef CUresult CUDAAPI tcuStreamWaitValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
+typedef CUresult CUDAAPI tcuStreamWaitValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamWriteValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
+typedef CUresult CUDAAPI tcuStreamWriteValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamBatchMemOp(CUstream stream, unsigned int count, CUstreamBatchMemOpParams* paramArray, unsigned int flags);
typedef CUresult CUDAAPI tcuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc);
+typedef CUresult CUDAAPI tcuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value);
typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
typedef CUresult CUDAAPI tcuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config);
typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra);
+typedef CUresult CUDAAPI tcuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams);
+typedef CUresult CUDAAPI tcuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS* launchParamsList, unsigned int numDevices, unsigned int flags);
typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
@@ -1041,9 +1081,9 @@ typedef CUresult CUDAAPI tcuSurfObjectCreate(CUsurfObject* pSurfObject, const CU
typedef CUresult CUDAAPI tcuSurfObjectDestroy(CUsurfObject surfObject);
typedef CUresult CUDAAPI tcuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject);
typedef CUresult CUDAAPI tcuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev);
-typedef CUresult CUDAAPI tcuDeviceGetP2PAttribute(int* value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice);
typedef CUresult CUDAAPI tcuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags);
typedef CUresult CUDAAPI tcuCtxDisablePeerAccess(CUcontext peerContext);
+typedef CUresult CUDAAPI tcuDeviceGetP2PAttribute(int* value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice);
typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource);
@@ -1217,12 +1257,17 @@ extern tcuEventSynchronize *cuEventSynchronize;
extern tcuEventDestroy_v2 *cuEventDestroy_v2;
extern tcuEventElapsedTime *cuEventElapsedTime;
extern tcuStreamWaitValue32 *cuStreamWaitValue32;
+extern tcuStreamWaitValue64 *cuStreamWaitValue64;
extern tcuStreamWriteValue32 *cuStreamWriteValue32;
+extern tcuStreamWriteValue64 *cuStreamWriteValue64;
extern tcuStreamBatchMemOp *cuStreamBatchMemOp;
extern tcuFuncGetAttribute *cuFuncGetAttribute;
+extern tcuFuncSetAttribute *cuFuncSetAttribute;
extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
extern tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
extern tcuLaunchKernel *cuLaunchKernel;
+extern tcuLaunchCooperativeKernel *cuLaunchCooperativeKernel;
+extern tcuLaunchCooperativeKernelMultiDevice *cuLaunchCooperativeKernelMultiDevice;
extern tcuFuncSetBlockShape *cuFuncSetBlockShape;
extern tcuFuncSetSharedSize *cuFuncSetSharedSize;
extern tcuParamSetSize *cuParamSetSize;
@@ -1275,9 +1320,9 @@ extern tcuSurfObjectCreate *cuSurfObjectCreate;
extern tcuSurfObjectDestroy *cuSurfObjectDestroy;
extern tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
extern tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
-extern tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
extern tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
extern tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
+extern tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
extern tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
@@ -1319,7 +1364,12 @@ enum {
CUEW_ERROR_ATEXIT_FAILED = -2,
};
-int cuewInit(void);
+enum {
+ CUEW_INIT_CUDA = 1,
+ CUEW_INIT_NVRTC = 2
+};
+
+int cuewInit(cuuint32_t flags);
const char *cuewErrorString(CUresult result);
const char *cuewCompilerPath(void);
int cuewCompilerVersion(void);
diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c
index b68dc597049..329dfbad3aa 100644
--- a/extern/cuew/src/cuew.c
+++ b/extern/cuew/src/cuew.c
@@ -207,12 +207,17 @@ tcuEventSynchronize *cuEventSynchronize;
tcuEventDestroy_v2 *cuEventDestroy_v2;
tcuEventElapsedTime *cuEventElapsedTime;
tcuStreamWaitValue32 *cuStreamWaitValue32;
+tcuStreamWaitValue64 *cuStreamWaitValue64;
tcuStreamWriteValue32 *cuStreamWriteValue32;
+tcuStreamWriteValue64 *cuStreamWriteValue64;
tcuStreamBatchMemOp *cuStreamBatchMemOp;
tcuFuncGetAttribute *cuFuncGetAttribute;
+tcuFuncSetAttribute *cuFuncSetAttribute;
tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
tcuLaunchKernel *cuLaunchKernel;
+tcuLaunchCooperativeKernel *cuLaunchCooperativeKernel;
+tcuLaunchCooperativeKernelMultiDevice *cuLaunchCooperativeKernelMultiDevice;
tcuFuncSetBlockShape *cuFuncSetBlockShape;
tcuFuncSetSharedSize *cuFuncSetSharedSize;
tcuParamSetSize *cuParamSetSize;
@@ -265,9 +270,9 @@ tcuSurfObjectCreate *cuSurfObjectCreate;
tcuSurfObjectDestroy *cuSurfObjectDestroy;
tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
-tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
+tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
@@ -315,34 +320,25 @@ static DynamicLibrary dynamic_library_open_find(const char **paths) {
return NULL;
}
-static void cuewExit(void) {
- if(cuda_lib != NULL) {
+/* Implementation function. */
+static void cuewCudaExit(void) {
+ if (cuda_lib != NULL) {
/* Ignore errors. */
dynamic_library_close(cuda_lib);
cuda_lib = NULL;
}
}
-/* Implementation function. */
-int cuewInit(void) {
+static int cuewCudaInit(void) {
/* Library paths. */
#ifdef _WIN32
/* Expected in c:/windows/system or similar, no path needed. */
const char *cuda_paths[] = {"nvcuda.dll", NULL};
- const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL};
#elif defined(__APPLE__)
/* Default installation path. */
const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL};
- const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
#else
const char *cuda_paths[] = {"libcuda.so", NULL};
- const char *nvrtc_paths[] = {"libnvrtc.so",
-# if defined(__x86_64__) || defined(_M_X64)
- "/usr/local/cuda/lib64/libnvrtc.so",
-#else
- "/usr/local/cuda/lib/libnvrtc.so",
-#endif
- NULL};
#endif
static int initialized = 0;
static int result = 0;
@@ -354,7 +350,7 @@ int cuewInit(void) {
initialized = 1;
- error = atexit(cuewExit);
+ error = atexit(cuewCudaExit);
if (error) {
result = CUEW_ERROR_ATEXIT_FAILED;
return result;
@@ -362,9 +358,7 @@ int cuewInit(void) {
/* Load library. */
cuda_lib = dynamic_library_open_find(cuda_paths);
- nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
- /* CUDA library is mandatory to have, while nvrtc might be missing. */
if (cuda_lib == NULL) {
result = CUEW_ERROR_OPEN_FAILED;
return result;
@@ -521,12 +515,17 @@ int cuewInit(void) {
CUDA_LIBRARY_FIND(cuEventDestroy_v2);
CUDA_LIBRARY_FIND(cuEventElapsedTime);
CUDA_LIBRARY_FIND(cuStreamWaitValue32);
+ CUDA_LIBRARY_FIND(cuStreamWaitValue64);
CUDA_LIBRARY_FIND(cuStreamWriteValue32);
+ CUDA_LIBRARY_FIND(cuStreamWriteValue64);
CUDA_LIBRARY_FIND(cuStreamBatchMemOp);
CUDA_LIBRARY_FIND(cuFuncGetAttribute);
+ CUDA_LIBRARY_FIND(cuFuncSetAttribute);
CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig);
CUDA_LIBRARY_FIND(cuLaunchKernel);
+ CUDA_LIBRARY_FIND(cuLaunchCooperativeKernel);
+ CUDA_LIBRARY_FIND(cuLaunchCooperativeKernelMultiDevice);
CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
CUDA_LIBRARY_FIND(cuParamSetSize);
@@ -579,9 +578,9 @@ int cuewInit(void) {
CUDA_LIBRARY_FIND(cuSurfObjectDestroy);
CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc);
CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer);
- CUDA_LIBRARY_FIND(cuDeviceGetP2PAttribute);
CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess);
CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess);
+ CUDA_LIBRARY_FIND(cuDeviceGetP2PAttribute);
CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
@@ -604,27 +603,99 @@ int cuewInit(void) {
CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2);
CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
+ result = CUEW_SUCCESS;
+ return result;
+}
+static void cuewExitNvrtc(void) {
if (nvrtc_lib != NULL) {
- NVRTC_LIBRARY_FIND(nvrtcGetErrorString);
- NVRTC_LIBRARY_FIND(nvrtcVersion);
- NVRTC_LIBRARY_FIND(nvrtcCreateProgram);
- NVRTC_LIBRARY_FIND(nvrtcDestroyProgram);
- NVRTC_LIBRARY_FIND(nvrtcCompileProgram);
- NVRTC_LIBRARY_FIND(nvrtcGetPTXSize);
- NVRTC_LIBRARY_FIND(nvrtcGetPTX);
- NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize);
- NVRTC_LIBRARY_FIND(nvrtcGetProgramLog);
- NVRTC_LIBRARY_FIND(nvrtcAddNameExpression);
- NVRTC_LIBRARY_FIND(nvrtcGetLoweredName);
+ /* Ignore errors. */
+ dynamic_library_close(nvrtc_lib);
+ nvrtc_lib = NULL;
}
+}
+
+static int cuewNvrtcInit(void) {
+ /* Library paths. */
+#ifdef _WIN32
+ /* Expected in c:/windows/system or similar, no path needed. */
+ const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL};
+#elif defined(__APPLE__)
+ /* Default installation path. */
+ const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
+#else
+ const char *nvrtc_paths[] = {"libnvrtc.so",
+# if defined(__x86_64__) || defined(_M_X64)
+ "/usr/local/cuda/lib64/libnvrtc.so",
+#else
+ "/usr/local/cuda/lib/libnvrtc.so",
+#endif
+ NULL};
+#endif
+ static int initialized = 0;
+ static int result = 0;
+ int error;
+
+ if (initialized) {
+ return result;
+ }
+
+ initialized = 1;
+
+ error = atexit(cuewExitNvrtc);
+ if (error) {
+ result = CUEW_ERROR_ATEXIT_FAILED;
+ return result;
+ }
+
+ /* Load library. */
+ nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
+
+ if (nvrtc_lib == NULL) {
+ result = CUEW_ERROR_OPEN_FAILED;
+ return result;
+ }
+
+ NVRTC_LIBRARY_FIND(nvrtcGetErrorString);
+ NVRTC_LIBRARY_FIND(nvrtcVersion);
+ NVRTC_LIBRARY_FIND(nvrtcCreateProgram);
+ NVRTC_LIBRARY_FIND(nvrtcDestroyProgram);
+ NVRTC_LIBRARY_FIND(nvrtcCompileProgram);
+ NVRTC_LIBRARY_FIND(nvrtcGetPTXSize);
+ NVRTC_LIBRARY_FIND(nvrtcGetPTX);
+ NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize);
+ NVRTC_LIBRARY_FIND(nvrtcGetProgramLog);
+ NVRTC_LIBRARY_FIND(nvrtcAddNameExpression);
+ NVRTC_LIBRARY_FIND(nvrtcGetLoweredName);
result = CUEW_SUCCESS;
return result;
}
+
+int cuewInit(cuuint32_t flags) {
+ int result = CUEW_SUCCESS;
+
+ if (flags & CUEW_INIT_CUDA) {
+ result = cuewCudaInit();
+ if (result != CUEW_SUCCESS) {
+ return result;
+ }
+ }
+
+ if (flags & CUEW_INIT_NVRTC) {
+ result = cuewNvrtcInit();
+ if (result != CUEW_SUCCESS) {
+ return result;
+ }
+ }
+
+ return result;
+}
+
+
const char *cuewErrorString(CUresult result) {
- switch(result) {
+ switch (result) {
case CUDA_SUCCESS: return "No errors";
case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
@@ -655,6 +726,7 @@ const char *cuewErrorString(CUresult result) {
case CUDA_ERROR_INVALID_PTX: return "Invalid ptx";
case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context";
case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
+ case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found";
case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
@@ -681,6 +753,7 @@ const char *cuewErrorString(CUresult result) {
case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space";
case CUDA_ERROR_INVALID_PC: return "Invalid pc";
case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
+ case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: return "Cooperative launch too large";
case CUDA_ERROR_NOT_PERMITTED: return "Not permitted";
case CUDA_ERROR_NOT_SUPPORTED: return "Not supported";
case CUDA_ERROR_UNKNOWN: return "Unknown error";
@@ -738,14 +811,16 @@ const char *cuewCompilerPath(void) {
if (binpath) {
path_join(binpath, executable, sizeof(nvcc), nvcc);
- if (path_exists(nvcc))
+ if (path_exists(nvcc)) {
return nvcc;
+ }
}
for (i = 0; defaultpaths[i]; ++i) {
path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
- if (path_exists(nvcc))
+ if (path_exists(nvcc)) {
return nvcc;
+ }
}
#ifndef _WIN32
@@ -756,9 +831,9 @@ const char *cuewCompilerPath(void) {
int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
buffer[len] = '\0';
pclose(handle);
-
- if (buffer[0])
+ if (buffer[0]) {
return "nvcc";
+ }
}
}
#endif
@@ -785,8 +860,9 @@ int cuewCompilerVersion(void) {
char output[65536] = "\0";
char command[65536] = "\0";
- if (path == NULL)
+ if (path == NULL) {
return 0;
+ }
/* get --version output */
strncpy(command, path, sizeof(command));
diff --git a/intern/cycles/app/cycles_cubin_cc.cpp b/intern/cycles/app/cycles_cubin_cc.cpp
index 73d0cd5130c..3c83bf2dae3 100644
--- a/intern/cycles/app/cycles_cubin_cc.cpp
+++ b/intern/cycles/app/cycles_cubin_cc.cpp
@@ -162,7 +162,7 @@ bool link_ptxas(CompilationSettings &settings)
if (settings.verbose)
{
ptx += " --verbose";
- printf(ptx.c_str());
+ printf("%s\n", ptx.c_str());
}
int pxresult = system(ptx.c_str());
@@ -186,7 +186,7 @@ bool init(CompilationSettings &settings)
}
#endif
- int cuewresult = cuewInit();
+ int cuewresult = cuewInit(CUEW_INIT_NVRTC);
if(cuewresult != CUEW_SUCCESS) {
fprintf(stderr, "Error: cuew init fialed (0x%x)\n\n", cuewresult);
return false;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 18a83672a6d..dfedf922ca9 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -2449,7 +2449,7 @@ bool device_cuda_init(void)
return result;
initialized = true;
- int cuew_result = cuewInit();
+ int cuew_result = cuewInit(CUEW_INIT_CUDA);
if(cuew_result == CUEW_SUCCESS) {
VLOG(1) << "CUEW initialization succeeded";
if(CUDADevice::have_precompiled_kernels()) {
diff --git a/intern/opensubdiv/opensubdiv_device_context_cuda.cc b/intern/opensubdiv/opensubdiv_device_context_cuda.cc
index 81c52f5d6cf..46b66a6b35e 100644
--- a/intern/opensubdiv/opensubdiv_device_context_cuda.cc
+++ b/intern/opensubdiv/opensubdiv_device_context_cuda.cc
@@ -187,7 +187,7 @@ bool CudaDeviceContext::HAS_CUDA_VERSION_4_0()
cudaInitialized = true;
# ifdef OPENSUBDIV_HAS_CUEW
- cudaLoadSuccess = cuewInit() == CUEW_SUCCESS;
+ cudaLoadSuccess = cuewInit(CUEW_INIT_CUDA) == CUEW_SUCCESS;
if (!cudaLoadSuccess) {
fprintf(stderr, "Loading CUDA failed.\n");
}