Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTon Roosendaal <ton@blender.org>2011-04-27 15:58:34 +0400
committerTon Roosendaal <ton@blender.org>2011-04-27 15:58:34 +0400
commitda376e0237517543aa21740ee2363234ee1c20ae (patch)
tree014a513ed8d0eccc5e54fef42347781e85bae56a /intern/cycles/util/util_cuda.cpp
parent693780074388111e7b9ef1c3825e462f398dc6c4 (diff)
Cycles render engine, initial commit. This is the engine itself, blender modifications and build instructions will follow later.
Cycles uses code from some great open source projects, many thanks them: * BVH building and traversal code from NVidia's "Understanding the Efficiency of Ray Traversal on GPUs": http://code.google.com/p/understanding-the-efficiency-of-ray-traversal-on-gpus/ * Open Shading Language for a large part of the shading system: http://code.google.com/p/openshadinglanguage/ * Blender for procedural textures and a few other nodes. * Approximate Catmull Clark subdivision from NVidia Mesh tools: http://code.google.com/p/nvidia-mesh-tools/ * Sobol direction vectors from: http://web.maths.unsw.edu.au/~fkuo/sobol/ * Film response functions from: http://www.cs.columbia.edu/CAVE/software/softlib/dorf.php
Diffstat (limited to 'intern/cycles/util/util_cuda.cpp')
-rw-r--r--intern/cycles/util/util_cuda.cpp379
1 files changed, 379 insertions, 0 deletions
diff --git a/intern/cycles/util/util_cuda.cpp b/intern/cycles/util/util_cuda.cpp
new file mode 100644
index 00000000000..15ce7efd9ee
--- /dev/null
+++ b/intern/cycles/util/util_cuda.cpp
@@ -0,0 +1,379 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "util_cuda.h"
+#include "util_debug.h"
+#include "util_dynlib.h"
+
+/* function defininitions */
+
+tcuInit *cuInit;
+tcuDriverGetVersion *cuDriverGetVersion;
+tcuDeviceGet *cuDeviceGet;
+tcuDeviceGetCount *cuDeviceGetCount;
+tcuDeviceGetName *cuDeviceGetName;
+tcuDeviceComputeCapability *cuDeviceComputeCapability;
+tcuDeviceTotalMem *cuDeviceTotalMem;
+tcuDeviceGetProperties *cuDeviceGetProperties;
+tcuDeviceGetAttribute *cuDeviceGetAttribute;
+tcuCtxCreate *cuCtxCreate;
+tcuCtxDestroy *cuCtxDestroy;
+tcuCtxAttach *cuCtxAttach;
+tcuCtxDetach *cuCtxDetach;
+tcuCtxPushCurrent *cuCtxPushCurrent;
+tcuCtxPopCurrent *cuCtxPopCurrent;
+tcuCtxGetDevice *cuCtxGetDevice;
+tcuCtxSynchronize *cuCtxSynchronize;
+tcuModuleLoad *cuModuleLoad;
+tcuModuleLoadData *cuModuleLoadData;
+tcuModuleLoadDataEx *cuModuleLoadDataEx;
+tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
+tcuModuleUnload *cuModuleUnload;
+tcuModuleGetFunction *cuModuleGetFunction;
+tcuModuleGetGlobal *cuModuleGetGlobal;
+tcuModuleGetTexRef *cuModuleGetTexRef;
+tcuModuleGetSurfRef *cuModuleGetSurfRef;
+tcuMemGetInfo *cuMemGetInfo;
+tcuMemAlloc *cuMemAlloc;
+tcuMemAllocPitch *cuMemAllocPitch;
+tcuMemFree *cuMemFree;
+tcuMemGetAddressRange *cuMemGetAddressRange;
+tcuMemAllocHost *cuMemAllocHost;
+tcuMemFreeHost *cuMemFreeHost;
+tcuMemHostAlloc *cuMemHostAlloc;
+tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
+tcuMemHostGetFlags *cuMemHostGetFlags;
+tcuMemcpyHtoD *cuMemcpyHtoD;
+tcuMemcpyDtoH *cuMemcpyDtoH;
+tcuMemcpyDtoD *cuMemcpyDtoD;
+tcuMemcpyDtoA *cuMemcpyDtoA;
+tcuMemcpyAtoD *cuMemcpyAtoD;
+tcuMemcpyHtoA *cuMemcpyHtoA;
+tcuMemcpyAtoH *cuMemcpyAtoH;
+tcuMemcpyAtoA *cuMemcpyAtoA;
+tcuMemcpy2D *cuMemcpy2D;
+tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
+tcuMemcpy3D *cuMemcpy3D;
+tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
+tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
+tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
+tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
+tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
+tcuMemcpy2DAsync *cuMemcpy2DAsync;
+tcuMemcpy3DAsync *cuMemcpy3DAsync;
+tcuMemsetD8 *cuMemsetD8;
+tcuMemsetD16 *cuMemsetD16;
+tcuMemsetD32 *cuMemsetD32;
+tcuMemsetD2D8 *cuMemsetD2D8;
+tcuMemsetD2D16 *cuMemsetD2D16;
+tcuMemsetD2D32 *cuMemsetD2D32;
+tcuFuncSetBlockShape *cuFuncSetBlockShape;
+tcuFuncSetSharedSize *cuFuncSetSharedSize;
+tcuFuncGetAttribute *cuFuncGetAttribute;
+tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
+tcuArrayCreate *cuArrayCreate;
+tcuArrayGetDescriptor *cuArrayGetDescriptor;
+tcuArrayDestroy *cuArrayDestroy;
+tcuArray3DCreate *cuArray3DCreate;
+tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
+tcuTexRefCreate *cuTexRefCreate;
+tcuTexRefDestroy *cuTexRefDestroy;
+tcuTexRefSetArray *cuTexRefSetArray;
+tcuTexRefSetAddress *cuTexRefSetAddress;
+tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
+tcuTexRefSetFormat *cuTexRefSetFormat;
+tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
+tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
+tcuTexRefSetFlags *cuTexRefSetFlags;
+tcuTexRefGetAddress *cuTexRefGetAddress;
+tcuTexRefGetArray *cuTexRefGetArray;
+tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
+tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
+tcuTexRefGetFormat *cuTexRefGetFormat;
+tcuTexRefGetFlags *cuTexRefGetFlags;
+tcuSurfRefSetArray *cuSurfRefSetArray;
+tcuSurfRefGetArray *cuSurfRefGetArray;
+tcuParamSetSize *cuParamSetSize;
+tcuParamSeti *cuParamSeti;
+tcuParamSetf *cuParamSetf;
+tcuParamSetv *cuParamSetv;
+tcuParamSetTexRef *cuParamSetTexRef;
+tcuLaunch *cuLaunch;
+tcuLaunchGrid *cuLaunchGrid;
+tcuLaunchGridAsync *cuLaunchGridAsync;
+tcuEventCreate *cuEventCreate;
+tcuEventRecord *cuEventRecord;
+tcuEventQuery *cuEventQuery;
+tcuEventSynchronize *cuEventSynchronize;
+tcuEventDestroy *cuEventDestroy;
+tcuEventElapsedTime *cuEventElapsedTime;
+tcuStreamCreate *cuStreamCreate;
+tcuStreamQuery *cuStreamQuery;
+tcuStreamSynchronize *cuStreamSynchronize;
+tcuStreamDestroy *cuStreamDestroy;
+tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
+tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
+tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
+tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
+tcuGraphicsMapResources *cuGraphicsMapResources;
+tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
+tcuGetExportTable *cuGetExportTable;
+tcuCtxSetLimit *cuCtxSetLimit;
+tcuCtxGetLimit *cuCtxGetLimit;
+tcuGLCtxCreate *cuGLCtxCreate;
+tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
+tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
+tcuCtxSetCurrent *cuCtxSetCurrent;
+
+CCL_NAMESPACE_BEGIN
+
+/* utility macros */
+
+#define CUDA_LIBRARY_FIND(name) \
+ name = (t##name*)dynamic_library_find(lib, #name); \
+ assert(name);
+
+#define CUDA_LIBRARY_FIND_V2(name) \
+ name = (t##name*)dynamic_library_find(lib, #name "_v2"); \
+ assert(name);
+
+/* initialization function */
+
+bool cuLibraryInit()
+{
+ static bool initialized = false;
+ static bool result = false;
+
+ if(initialized)
+ return result;
+
+ initialized = true;
+
+ /* library paths */
+#ifdef _WIN32
+ /* expected in c:/windows/system or similar, no path needed */
+ const char *path = "nvcuda.dll";
+#elif defined(__APPLE__)
+ /* default installation path */
+ const char *path = "/usr/local/cuda/lib/libcuda.dylib";
+#else
+ const char *path = "libcuda.so";
+#endif
+
+ /* load library */
+ DynamicLibrary *lib = dynamic_library_open(path);
+
+ if(lib == NULL)
+ return false;
+
+ /* detect driver version */
+ int driver_version = 1000;
+
+ CUDA_LIBRARY_FIND(cuDriverGetVersion);
+ if(cuDriverGetVersion)
+ cuDriverGetVersion(&driver_version);
+
+ /* we require version 4.0 */
+ if(driver_version < 4000)
+ return false;
+
+ /* fetch all function pointers */
+ CUDA_LIBRARY_FIND(cuInit);
+ CUDA_LIBRARY_FIND(cuDeviceGet);
+ CUDA_LIBRARY_FIND(cuDeviceGetCount);
+ CUDA_LIBRARY_FIND(cuDeviceGetName);
+ CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
+ CUDA_LIBRARY_FIND(cuDeviceTotalMem);
+ CUDA_LIBRARY_FIND(cuDeviceGetProperties);
+ CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
+ CUDA_LIBRARY_FIND(cuCtxCreate);
+ CUDA_LIBRARY_FIND(cuCtxDestroy);
+ CUDA_LIBRARY_FIND(cuCtxAttach);
+ CUDA_LIBRARY_FIND(cuCtxDetach);
+ CUDA_LIBRARY_FIND(cuCtxPushCurrent);
+ CUDA_LIBRARY_FIND(cuCtxPopCurrent);
+ CUDA_LIBRARY_FIND(cuCtxGetDevice);
+ CUDA_LIBRARY_FIND(cuCtxSynchronize);
+ CUDA_LIBRARY_FIND(cuModuleLoad);
+ CUDA_LIBRARY_FIND(cuModuleLoadData);
+ CUDA_LIBRARY_FIND(cuModuleUnload);
+ CUDA_LIBRARY_FIND(cuModuleGetFunction);
+ CUDA_LIBRARY_FIND(cuModuleGetGlobal);
+ CUDA_LIBRARY_FIND(cuModuleGetTexRef);
+ CUDA_LIBRARY_FIND(cuMemGetInfo);
+ CUDA_LIBRARY_FIND(cuMemAlloc);
+ CUDA_LIBRARY_FIND(cuMemAllocPitch);
+ CUDA_LIBRARY_FIND(cuMemFree);
+ CUDA_LIBRARY_FIND(cuMemGetAddressRange);
+ CUDA_LIBRARY_FIND(cuMemAllocHost);
+ CUDA_LIBRARY_FIND(cuMemFreeHost);
+ CUDA_LIBRARY_FIND(cuMemHostAlloc);
+ CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer);
+ CUDA_LIBRARY_FIND(cuMemcpyHtoD);
+ CUDA_LIBRARY_FIND(cuMemcpyDtoH);
+ CUDA_LIBRARY_FIND(cuMemcpyDtoD);
+ CUDA_LIBRARY_FIND(cuMemcpyDtoA);
+ CUDA_LIBRARY_FIND(cuMemcpyAtoD);
+ CUDA_LIBRARY_FIND(cuMemcpyHtoA);
+ CUDA_LIBRARY_FIND(cuMemcpyAtoH);
+ CUDA_LIBRARY_FIND(cuMemcpyAtoA);
+ CUDA_LIBRARY_FIND(cuMemcpy2D);
+ CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned);
+ CUDA_LIBRARY_FIND(cuMemcpy3D);
+ CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync);
+ CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync);
+ CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync);
+ CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync);
+ CUDA_LIBRARY_FIND(cuMemcpy2DAsync);
+ CUDA_LIBRARY_FIND(cuMemcpy3DAsync);
+ CUDA_LIBRARY_FIND(cuMemsetD8);
+ CUDA_LIBRARY_FIND(cuMemsetD16);
+ CUDA_LIBRARY_FIND(cuMemsetD32);
+ CUDA_LIBRARY_FIND(cuMemsetD2D8);
+ CUDA_LIBRARY_FIND(cuMemsetD2D16);
+ CUDA_LIBRARY_FIND(cuMemsetD2D32);
+ CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
+ CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
+ CUDA_LIBRARY_FIND(cuFuncGetAttribute);
+ CUDA_LIBRARY_FIND(cuArrayCreate);
+ CUDA_LIBRARY_FIND(cuArrayGetDescriptor);
+ CUDA_LIBRARY_FIND(cuArrayDestroy);
+ CUDA_LIBRARY_FIND(cuArray3DCreate);
+ CUDA_LIBRARY_FIND(cuArray3DGetDescriptor);
+ CUDA_LIBRARY_FIND(cuTexRefCreate);
+ CUDA_LIBRARY_FIND(cuTexRefDestroy);
+ CUDA_LIBRARY_FIND(cuTexRefSetArray);
+ CUDA_LIBRARY_FIND(cuTexRefSetAddress);
+ CUDA_LIBRARY_FIND(cuTexRefSetAddress2D);
+ CUDA_LIBRARY_FIND(cuTexRefSetFormat);
+ CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
+ CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
+ CUDA_LIBRARY_FIND(cuTexRefSetFlags);
+ CUDA_LIBRARY_FIND(cuTexRefGetAddress);
+ CUDA_LIBRARY_FIND(cuTexRefGetArray);
+ CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
+ CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
+ CUDA_LIBRARY_FIND(cuTexRefGetFormat);
+ CUDA_LIBRARY_FIND(cuTexRefGetFlags);
+ CUDA_LIBRARY_FIND(cuParamSetSize);
+ CUDA_LIBRARY_FIND(cuParamSeti);
+ CUDA_LIBRARY_FIND(cuParamSetf);
+ CUDA_LIBRARY_FIND(cuParamSetv);
+ CUDA_LIBRARY_FIND(cuParamSetTexRef);
+ CUDA_LIBRARY_FIND(cuLaunch);
+ CUDA_LIBRARY_FIND(cuLaunchGrid);
+ CUDA_LIBRARY_FIND(cuLaunchGridAsync);
+ CUDA_LIBRARY_FIND(cuEventCreate);
+ CUDA_LIBRARY_FIND(cuEventRecord);
+ CUDA_LIBRARY_FIND(cuEventQuery);
+ CUDA_LIBRARY_FIND(cuEventSynchronize);
+ CUDA_LIBRARY_FIND(cuEventDestroy);
+ CUDA_LIBRARY_FIND(cuEventElapsedTime);
+ CUDA_LIBRARY_FIND(cuStreamCreate);
+ CUDA_LIBRARY_FIND(cuStreamQuery);
+ CUDA_LIBRARY_FIND(cuStreamSynchronize);
+ CUDA_LIBRARY_FIND(cuStreamDestroy);
+
+ /* cuda 2.1 */
+ CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
+ CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
+ CUDA_LIBRARY_FIND(cuGLCtxCreate);
+ CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
+ CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
+
+ /* cuda 2.3 */
+ CUDA_LIBRARY_FIND(cuMemHostGetFlags);
+ CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
+ CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
+
+ /* cuda 3.0 */
+ CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync);
+ CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
+ CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
+ CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
+ CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer);
+ CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
+ CUDA_LIBRARY_FIND(cuGraphicsMapResources);
+ CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
+ CUDA_LIBRARY_FIND(cuGetExportTable);
+
+ /* cuda 3.1 */
+ CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
+ CUDA_LIBRARY_FIND(cuSurfRefSetArray);
+ CUDA_LIBRARY_FIND(cuSurfRefGetArray);
+ CUDA_LIBRARY_FIND(cuCtxSetLimit);
+ CUDA_LIBRARY_FIND(cuCtxGetLimit);
+
+ /* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library
+ has both the old ones for compatibility and new ones with _v2 postfix,
+ we load the _v2 ones here. */
+ CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem);
+ CUDA_LIBRARY_FIND_V2(cuCtxCreate);
+ CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal);
+ CUDA_LIBRARY_FIND_V2(cuMemGetInfo);
+ CUDA_LIBRARY_FIND_V2(cuMemAlloc);
+ CUDA_LIBRARY_FIND_V2(cuMemAllocPitch);
+ CUDA_LIBRARY_FIND_V2(cuMemFree);
+ CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange);
+ CUDA_LIBRARY_FIND_V2(cuMemAllocHost);
+ CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync);
+ CUDA_LIBRARY_FIND_V2(cuMemcpy2D);
+ CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned);
+ CUDA_LIBRARY_FIND_V2(cuMemcpy3D);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync);
+ CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync);
+ CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync);
+ CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync);
+ CUDA_LIBRARY_FIND_V2(cuMemsetD8);
+ CUDA_LIBRARY_FIND_V2(cuMemsetD16);
+ CUDA_LIBRARY_FIND_V2(cuMemsetD32);
+ CUDA_LIBRARY_FIND_V2(cuMemsetD2D8);
+ CUDA_LIBRARY_FIND_V2(cuMemsetD2D16);
+ CUDA_LIBRARY_FIND_V2(cuMemsetD2D32);
+ CUDA_LIBRARY_FIND_V2(cuArrayCreate);
+ CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor);
+ CUDA_LIBRARY_FIND_V2(cuArray3DCreate);
+ CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor);
+ CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress);
+ CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D);
+ CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress);
+ CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer);
+ CUDA_LIBRARY_FIND_V2(cuGLCtxCreate);
+
+ /* cuda 4.0 */
+ CUDA_LIBRARY_FIND(cuCtxSetCurrent);
+
+ /* success */
+ result = true;
+
+ return result;
+}
+
+CCL_NAMESPACE_END
+