diff options
Diffstat (limited to 'intern/cycles/util/util_cuda.cpp')
-rw-r--r-- | intern/cycles/util/util_cuda.cpp | 379 |
1 files changed, 379 insertions, 0 deletions
diff --git a/intern/cycles/util/util_cuda.cpp b/intern/cycles/util/util_cuda.cpp new file mode 100644 index 00000000000..15ce7efd9ee --- /dev/null +++ b/intern/cycles/util/util_cuda.cpp @@ -0,0 +1,379 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "util_cuda.h" +#include "util_debug.h" +#include "util_dynlib.h" + +/* function defininitions */ + +tcuInit *cuInit; +tcuDriverGetVersion *cuDriverGetVersion; +tcuDeviceGet *cuDeviceGet; +tcuDeviceGetCount *cuDeviceGetCount; +tcuDeviceGetName *cuDeviceGetName; +tcuDeviceComputeCapability *cuDeviceComputeCapability; +tcuDeviceTotalMem *cuDeviceTotalMem; +tcuDeviceGetProperties *cuDeviceGetProperties; +tcuDeviceGetAttribute *cuDeviceGetAttribute; +tcuCtxCreate *cuCtxCreate; +tcuCtxDestroy *cuCtxDestroy; +tcuCtxAttach *cuCtxAttach; +tcuCtxDetach *cuCtxDetach; +tcuCtxPushCurrent *cuCtxPushCurrent; +tcuCtxPopCurrent *cuCtxPopCurrent; +tcuCtxGetDevice *cuCtxGetDevice; +tcuCtxSynchronize *cuCtxSynchronize; +tcuModuleLoad *cuModuleLoad; +tcuModuleLoadData *cuModuleLoadData; +tcuModuleLoadDataEx *cuModuleLoadDataEx; +tcuModuleLoadFatBinary *cuModuleLoadFatBinary; +tcuModuleUnload *cuModuleUnload; +tcuModuleGetFunction *cuModuleGetFunction; +tcuModuleGetGlobal *cuModuleGetGlobal; +tcuModuleGetTexRef *cuModuleGetTexRef; +tcuModuleGetSurfRef *cuModuleGetSurfRef; +tcuMemGetInfo *cuMemGetInfo; +tcuMemAlloc *cuMemAlloc; +tcuMemAllocPitch *cuMemAllocPitch; +tcuMemFree *cuMemFree; +tcuMemGetAddressRange *cuMemGetAddressRange; +tcuMemAllocHost *cuMemAllocHost; +tcuMemFreeHost *cuMemFreeHost; +tcuMemHostAlloc *cuMemHostAlloc; +tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer; +tcuMemHostGetFlags *cuMemHostGetFlags; +tcuMemcpyHtoD *cuMemcpyHtoD; +tcuMemcpyDtoH *cuMemcpyDtoH; +tcuMemcpyDtoD *cuMemcpyDtoD; +tcuMemcpyDtoA *cuMemcpyDtoA; +tcuMemcpyAtoD *cuMemcpyAtoD; +tcuMemcpyHtoA *cuMemcpyHtoA; +tcuMemcpyAtoH *cuMemcpyAtoH; +tcuMemcpyAtoA *cuMemcpyAtoA; +tcuMemcpy2D *cuMemcpy2D; +tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned; +tcuMemcpy3D *cuMemcpy3D; +tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync; +tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync; +tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync; +tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync; +tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync; +tcuMemcpy2DAsync *cuMemcpy2DAsync; +tcuMemcpy3DAsync *cuMemcpy3DAsync; +tcuMemsetD8 *cuMemsetD8; +tcuMemsetD16 *cuMemsetD16; +tcuMemsetD32 *cuMemsetD32; +tcuMemsetD2D8 *cuMemsetD2D8; +tcuMemsetD2D16 *cuMemsetD2D16; +tcuMemsetD2D32 *cuMemsetD2D32; +tcuFuncSetBlockShape *cuFuncSetBlockShape; +tcuFuncSetSharedSize *cuFuncSetSharedSize; +tcuFuncGetAttribute *cuFuncGetAttribute; +tcuFuncSetCacheConfig *cuFuncSetCacheConfig; +tcuArrayCreate *cuArrayCreate; +tcuArrayGetDescriptor *cuArrayGetDescriptor; +tcuArrayDestroy *cuArrayDestroy; +tcuArray3DCreate *cuArray3DCreate; +tcuArray3DGetDescriptor *cuArray3DGetDescriptor; +tcuTexRefCreate *cuTexRefCreate; +tcuTexRefDestroy *cuTexRefDestroy; +tcuTexRefSetArray *cuTexRefSetArray; +tcuTexRefSetAddress *cuTexRefSetAddress; +tcuTexRefSetAddress2D *cuTexRefSetAddress2D; +tcuTexRefSetFormat *cuTexRefSetFormat; +tcuTexRefSetAddressMode *cuTexRefSetAddressMode; +tcuTexRefSetFilterMode *cuTexRefSetFilterMode; +tcuTexRefSetFlags *cuTexRefSetFlags; +tcuTexRefGetAddress *cuTexRefGetAddress; +tcuTexRefGetArray *cuTexRefGetArray; +tcuTexRefGetAddressMode *cuTexRefGetAddressMode; +tcuTexRefGetFilterMode *cuTexRefGetFilterMode; +tcuTexRefGetFormat *cuTexRefGetFormat; +tcuTexRefGetFlags *cuTexRefGetFlags; +tcuSurfRefSetArray *cuSurfRefSetArray; +tcuSurfRefGetArray *cuSurfRefGetArray; +tcuParamSetSize *cuParamSetSize; +tcuParamSeti *cuParamSeti; +tcuParamSetf *cuParamSetf; +tcuParamSetv *cuParamSetv; +tcuParamSetTexRef *cuParamSetTexRef; +tcuLaunch *cuLaunch; +tcuLaunchGrid *cuLaunchGrid; +tcuLaunchGridAsync *cuLaunchGridAsync; +tcuEventCreate *cuEventCreate; +tcuEventRecord *cuEventRecord; +tcuEventQuery *cuEventQuery; +tcuEventSynchronize *cuEventSynchronize; +tcuEventDestroy *cuEventDestroy; +tcuEventElapsedTime *cuEventElapsedTime; +tcuStreamCreate *cuStreamCreate; +tcuStreamQuery *cuStreamQuery; +tcuStreamSynchronize *cuStreamSynchronize; +tcuStreamDestroy *cuStreamDestroy; +tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; +tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; +tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer; +tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags; +tcuGraphicsMapResources *cuGraphicsMapResources; +tcuGraphicsUnmapResources *cuGraphicsUnmapResources; +tcuGetExportTable *cuGetExportTable; +tcuCtxSetLimit *cuCtxSetLimit; +tcuCtxGetLimit *cuCtxGetLimit; +tcuGLCtxCreate *cuGLCtxCreate; +tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; +tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; +tcuCtxSetCurrent *cuCtxSetCurrent; + +CCL_NAMESPACE_BEGIN + +/* utility macros */ + +#define CUDA_LIBRARY_FIND(name) \ + name = (t##name*)dynamic_library_find(lib, #name); \ + assert(name); + +#define CUDA_LIBRARY_FIND_V2(name) \ + name = (t##name*)dynamic_library_find(lib, #name "_v2"); \ + assert(name); + +/* initialization function */ + +bool cuLibraryInit() +{ + static bool initialized = false; + static bool result = false; + + if(initialized) + return result; + + initialized = true; + + /* library paths */ +#ifdef _WIN32 + /* expected in c:/windows/system or similar, no path needed */ + const char *path = "nvcuda.dll"; +#elif defined(__APPLE__) + /* default installation path */ + const char *path = "/usr/local/cuda/lib/libcuda.dylib"; +#else + const char *path = "libcuda.so"; +#endif + + /* load library */ + DynamicLibrary *lib = dynamic_library_open(path); + + if(lib == NULL) + return false; + + /* detect driver version */ + int driver_version = 1000; + + CUDA_LIBRARY_FIND(cuDriverGetVersion); + if(cuDriverGetVersion) + cuDriverGetVersion(&driver_version); + + /* we require version 4.0 */ + if(driver_version < 4000) + return false; + + /* fetch all function pointers */ + CUDA_LIBRARY_FIND(cuInit); + CUDA_LIBRARY_FIND(cuDeviceGet); + CUDA_LIBRARY_FIND(cuDeviceGetCount); + CUDA_LIBRARY_FIND(cuDeviceGetName); + CUDA_LIBRARY_FIND(cuDeviceComputeCapability); + CUDA_LIBRARY_FIND(cuDeviceTotalMem); + CUDA_LIBRARY_FIND(cuDeviceGetProperties); + CUDA_LIBRARY_FIND(cuDeviceGetAttribute); + CUDA_LIBRARY_FIND(cuCtxCreate); + CUDA_LIBRARY_FIND(cuCtxDestroy); + CUDA_LIBRARY_FIND(cuCtxAttach); + CUDA_LIBRARY_FIND(cuCtxDetach); + CUDA_LIBRARY_FIND(cuCtxPushCurrent); + CUDA_LIBRARY_FIND(cuCtxPopCurrent); + CUDA_LIBRARY_FIND(cuCtxGetDevice); + CUDA_LIBRARY_FIND(cuCtxSynchronize); + CUDA_LIBRARY_FIND(cuModuleLoad); + CUDA_LIBRARY_FIND(cuModuleLoadData); + CUDA_LIBRARY_FIND(cuModuleUnload); + CUDA_LIBRARY_FIND(cuModuleGetFunction); + CUDA_LIBRARY_FIND(cuModuleGetGlobal); + CUDA_LIBRARY_FIND(cuModuleGetTexRef); + CUDA_LIBRARY_FIND(cuMemGetInfo); + CUDA_LIBRARY_FIND(cuMemAlloc); + CUDA_LIBRARY_FIND(cuMemAllocPitch); + CUDA_LIBRARY_FIND(cuMemFree); + CUDA_LIBRARY_FIND(cuMemGetAddressRange); + CUDA_LIBRARY_FIND(cuMemAllocHost); + CUDA_LIBRARY_FIND(cuMemFreeHost); + CUDA_LIBRARY_FIND(cuMemHostAlloc); + CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer); + CUDA_LIBRARY_FIND(cuMemcpyHtoD); + CUDA_LIBRARY_FIND(cuMemcpyDtoH); + CUDA_LIBRARY_FIND(cuMemcpyDtoD); + CUDA_LIBRARY_FIND(cuMemcpyDtoA); + CUDA_LIBRARY_FIND(cuMemcpyAtoD); + CUDA_LIBRARY_FIND(cuMemcpyHtoA); + CUDA_LIBRARY_FIND(cuMemcpyAtoH); + CUDA_LIBRARY_FIND(cuMemcpyAtoA); + CUDA_LIBRARY_FIND(cuMemcpy2D); + CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned); + CUDA_LIBRARY_FIND(cuMemcpy3D); + CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync); + CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync); + CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync); + CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync); + CUDA_LIBRARY_FIND(cuMemcpy2DAsync); + CUDA_LIBRARY_FIND(cuMemcpy3DAsync); + CUDA_LIBRARY_FIND(cuMemsetD8); + CUDA_LIBRARY_FIND(cuMemsetD16); + CUDA_LIBRARY_FIND(cuMemsetD32); + CUDA_LIBRARY_FIND(cuMemsetD2D8); + CUDA_LIBRARY_FIND(cuMemsetD2D16); + CUDA_LIBRARY_FIND(cuMemsetD2D32); + CUDA_LIBRARY_FIND(cuFuncSetBlockShape); + CUDA_LIBRARY_FIND(cuFuncSetSharedSize); + CUDA_LIBRARY_FIND(cuFuncGetAttribute); + CUDA_LIBRARY_FIND(cuArrayCreate); + CUDA_LIBRARY_FIND(cuArrayGetDescriptor); + CUDA_LIBRARY_FIND(cuArrayDestroy); + CUDA_LIBRARY_FIND(cuArray3DCreate); + CUDA_LIBRARY_FIND(cuArray3DGetDescriptor); + CUDA_LIBRARY_FIND(cuTexRefCreate); + CUDA_LIBRARY_FIND(cuTexRefDestroy); + CUDA_LIBRARY_FIND(cuTexRefSetArray); + CUDA_LIBRARY_FIND(cuTexRefSetAddress); + CUDA_LIBRARY_FIND(cuTexRefSetAddress2D); + CUDA_LIBRARY_FIND(cuTexRefSetFormat); + CUDA_LIBRARY_FIND(cuTexRefSetAddressMode); + CUDA_LIBRARY_FIND(cuTexRefSetFilterMode); + CUDA_LIBRARY_FIND(cuTexRefSetFlags); + CUDA_LIBRARY_FIND(cuTexRefGetAddress); + CUDA_LIBRARY_FIND(cuTexRefGetArray); + CUDA_LIBRARY_FIND(cuTexRefGetAddressMode); + CUDA_LIBRARY_FIND(cuTexRefGetFilterMode); + CUDA_LIBRARY_FIND(cuTexRefGetFormat); + CUDA_LIBRARY_FIND(cuTexRefGetFlags); + CUDA_LIBRARY_FIND(cuParamSetSize); + CUDA_LIBRARY_FIND(cuParamSeti); + CUDA_LIBRARY_FIND(cuParamSetf); + CUDA_LIBRARY_FIND(cuParamSetv); + CUDA_LIBRARY_FIND(cuParamSetTexRef); + CUDA_LIBRARY_FIND(cuLaunch); + CUDA_LIBRARY_FIND(cuLaunchGrid); + CUDA_LIBRARY_FIND(cuLaunchGridAsync); + CUDA_LIBRARY_FIND(cuEventCreate); + CUDA_LIBRARY_FIND(cuEventRecord); + CUDA_LIBRARY_FIND(cuEventQuery); + CUDA_LIBRARY_FIND(cuEventSynchronize); + CUDA_LIBRARY_FIND(cuEventDestroy); + CUDA_LIBRARY_FIND(cuEventElapsedTime); + CUDA_LIBRARY_FIND(cuStreamCreate); + CUDA_LIBRARY_FIND(cuStreamQuery); + CUDA_LIBRARY_FIND(cuStreamSynchronize); + CUDA_LIBRARY_FIND(cuStreamDestroy); + + /* cuda 2.1 */ + CUDA_LIBRARY_FIND(cuModuleLoadDataEx); + CUDA_LIBRARY_FIND(cuModuleLoadFatBinary); + CUDA_LIBRARY_FIND(cuGLCtxCreate); + CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer); + CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage); + + /* cuda 2.3 */ + CUDA_LIBRARY_FIND(cuMemHostGetFlags); + CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer); + CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage); + + /* cuda 3.0 */ + CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync); + CUDA_LIBRARY_FIND(cuFuncSetCacheConfig); + CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource); + CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray); + CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer); + CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags); + CUDA_LIBRARY_FIND(cuGraphicsMapResources); + CUDA_LIBRARY_FIND(cuGraphicsUnmapResources); + CUDA_LIBRARY_FIND(cuGetExportTable); + + /* cuda 3.1 */ + CUDA_LIBRARY_FIND(cuModuleGetSurfRef); + CUDA_LIBRARY_FIND(cuSurfRefSetArray); + CUDA_LIBRARY_FIND(cuSurfRefGetArray); + CUDA_LIBRARY_FIND(cuCtxSetLimit); + CUDA_LIBRARY_FIND(cuCtxGetLimit); + + /* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library + has both the old ones for compatibility and new ones with _v2 postfix, + we load the _v2 ones here. */ + CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem); + CUDA_LIBRARY_FIND_V2(cuCtxCreate); + CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal); + CUDA_LIBRARY_FIND_V2(cuMemGetInfo); + CUDA_LIBRARY_FIND_V2(cuMemAlloc); + CUDA_LIBRARY_FIND_V2(cuMemAllocPitch); + CUDA_LIBRARY_FIND_V2(cuMemFree); + CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange); + CUDA_LIBRARY_FIND_V2(cuMemAllocHost); + CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer); + CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD); + CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH); + CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD); + CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA); + CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD); + CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA); + CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH); + CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA); + CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync); + CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync); + CUDA_LIBRARY_FIND_V2(cuMemcpy2D); + CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned); + CUDA_LIBRARY_FIND_V2(cuMemcpy3D); + CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync); + CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync); + CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync); + CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync); + CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync); + CUDA_LIBRARY_FIND_V2(cuMemsetD8); + CUDA_LIBRARY_FIND_V2(cuMemsetD16); + CUDA_LIBRARY_FIND_V2(cuMemsetD32); + CUDA_LIBRARY_FIND_V2(cuMemsetD2D8); + CUDA_LIBRARY_FIND_V2(cuMemsetD2D16); + CUDA_LIBRARY_FIND_V2(cuMemsetD2D32); + CUDA_LIBRARY_FIND_V2(cuArrayCreate); + CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor); + CUDA_LIBRARY_FIND_V2(cuArray3DCreate); + CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor); + CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress); + CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D); + CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress); + CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer); + CUDA_LIBRARY_FIND_V2(cuGLCtxCreate); + + /* cuda 4.0 */ + CUDA_LIBRARY_FIND(cuCtxSetCurrent); + + /* success */ + result = true; + + return result; +} + +CCL_NAMESPACE_END + |