diff options
-rw-r--r-- | intern/cycles/CMakeLists.txt | 3 | ||||
-rw-r--r-- | intern/cycles/blender/addon/engine.py | 6 | ||||
-rw-r--r-- | intern/cycles/blender/blender_python.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 103 | ||||
-rw-r--r-- | intern/cycles/device/device_opencl.cpp | 149 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cuda.h | 6 | ||||
-rw-r--r-- | intern/cycles/render/session.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/util/util_cuda.cpp | 23 | ||||
-rw-r--r-- | intern/cycles/util/util_path.cpp | 57 | ||||
-rw-r--r-- | intern/cycles/util/util_path.h | 7 |
11 files changed, 317 insertions, 46 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 5d76f57fa73..05b9d4ad3bb 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -46,7 +46,7 @@ if(WITH_CYCLES_MULTI) endif() if(WITH_CYCLES_CUDA) - add_definitions(-DWITH_CUDA) + add_definitions(-DWITH_CUDA_BINARIES) endif() if(WITH_CYCLES_OSL) @@ -58,6 +58,7 @@ if(WITH_CYCLES_PARTIO) endif() add_definitions(-DWITH_OPENCL) +add_definitions(-DWITH_CUDA) include_directories( ${BOOST_INCLUDE_DIR} diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index d6ea15a435f..8cd7be567e6 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -21,7 +21,11 @@ import bpy def init(): import libcycles_blender as lib import os.path - lib.init(os.path.dirname(__file__)) + + path = os.path.dirname(__file__) + user_path = os.path.dirname(os.path.abspath(bpy.utils.user_resource('CONFIG', ''))) + + lib.init(path, user_path) def create(engine, data, scene, region = 0, v3d = 0, rv3d = 0): import libcycles_blender as lib diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index b40f5206fea..1026d420c02 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -28,12 +28,12 @@ CCL_NAMESPACE_BEGIN static PyObject *init_func(PyObject *self, PyObject *args) { - const char *path; + const char *path, *user_path; - if(!PyArg_ParseTuple(args, "s", &path)) + if(!PyArg_ParseTuple(args, "ss", &path, &user_path)) return NULL; - path_init(path); + path_init(path, user_path); Py_INCREF(Py_None); return Py_None; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index a6a66873b92..49ffd3d0834 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -28,7 +28,9 @@ #include "util_map.h" #include "util_opengl.h" #include "util_path.h" +#include "util_system.h" #include "util_types.h" +#include "util_time.h" CCL_NAMESPACE_BEGIN @@ -125,6 +127,15 @@ public: } \ } + bool cuda_error(CUresult result) + { + if(result == CUDA_SUCCESS) + return false; + + fprintf(stderr, "CUDA error: %s\n", cuda_error_string(result)); + return true; + } + void cuda_push_context() { cuda_assert(cuCtxSetCurrent(cuContext)) @@ -140,17 +151,26 @@ public: background = background_; cuDevId = 0; + cuDevice = 0; + cuContext = 0; /* intialize */ - cuda_assert(cuInit(0)) + if(cuda_error(cuInit(0))) + return; /* setup device and context */ - cuda_assert(cuDeviceGet(&cuDevice, cuDevId)) + if(cuda_error(cuDeviceGet(&cuDevice, cuDevId))) + return; + + CUresult result; if(background) - cuda_assert(cuCtxCreate(&cuContext, 0, cuDevice)) + result = cuCtxCreate(&cuContext, 0, cuDevice); else - cuda_assert(cuGLCtxCreate(&cuContext, 0, cuDevice)) + result = cuGLCtxCreate(&cuContext, 0, cuDevice); + + if(cuda_error(result)) + return; cuda_pop_context(); } @@ -173,21 +193,80 @@ public: return string("CUDA ") + deviceName; } + string compile_kernel() + { + /* compute cubin name */ + int major, minor; + cuDeviceComputeCapability(&major, &minor, cuDevId); + + /* attempt to use kernel provided with blender */ + string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); + if(path_exists(cubin)) + return cubin; + + /* not found, try to use locally compiled kernel */ + string kernel_path = path_get("kernel"); + string md5 = path_files_md5_hash(kernel_path); + + cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());; + cubin = path_user_get(path_join("cache", cubin)); + + /* if exists already, use it */ + if(path_exists(cubin)) + return cubin; + + /* if not, find CUDA compiler */ + string nvcc = cuCompilerPath(); + + if(nvcc == "") { + fprintf(stderr, "CUDA nvcc compiler not found. Install CUDA toolkit in default location.\n"); + return ""; + } + + /* compile */ + string kernel = path_join(kernel_path, "kernel.cu"); + string include = kernel_path; + const int machine = system_cpu_bits(); + const int maxreg = 24; + + double starttime = time_dt(); + printf("Compiling CUDA kernel ...\n"); + + string command = string_printf("%s -arch=sm_%d%d -m%d --cubin \"%s\" --use_fast_math " + "-o \"%s\" --ptxas-options=\"-v\" --maxrregcount=%d --opencc-options -OPT:Olimit=0 -I\"%s\" -DNVCC", + nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), maxreg, include.c_str()); + + system(command.c_str()); + + /* verify if compilation succeeded */ + if(!path_exists(cubin)) { + fprintf(stderr, "CUDA kernel compilation failed.\n"); + return ""; + } + + printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); + + return cubin; + } bool load_kernels() { - CUresult result; - int major, minor; + /* check if cuda init succeeded */ + if(cuContext == 0) + return false; - cuda_push_context(); + /* get kernel */ + string cubin = compile_kernel(); + + if(cubin == "") + return false; /* open module */ - cuDeviceComputeCapability(&major, &minor, cuDevId); - string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); + cuda_push_context(); - result = cuModuleLoad(&cuModule, cubin.c_str()); - if(result != CUDA_SUCCESS) - fprintf(stderr, "Failed loading CUDA kernel %s (%s).\n", cubin.c_str(), cuda_error_string(result)); + CUresult result = cuModuleLoad(&cuModule, cubin.c_str()); + if(cuda_error(result)) + fprintf(stderr, "Failed loading CUDA kernel %s.\n", cubin.c_str()); cuda_pop_context(); diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index c142701c873..a41238ffe09 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -27,6 +27,7 @@ #include "util_map.h" #include "util_math.h" +#include "util_md5.h" #include "util_opencl.h" #include "util_opengl.h" #include "util_path.h" @@ -118,7 +119,7 @@ public: void opencl_assert(cl_int err) { if(err != CL_SUCCESS) { - printf("error (%d): %s\n", err, opencl_error_string(err)); + fprintf(stderr, "OpenCL error (%d): %s\n", err, opencl_error_string(err)); #ifndef NDEBUG abort(); #endif @@ -157,7 +158,7 @@ public: cpPlatform = platform_ids[0]; /* todo: pick specified platform && device */ - ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_ALL, 1, &cdDevice, NULL); + ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 1, &cdDevice, NULL); if(opencl_error(ciErr)) return; @@ -208,38 +209,67 @@ public: return true; } - bool load_kernels() + bool load_binary(const string& kernel_path, const string& clbin) { - /* verify if device was initialized */ - if(!device_initialized) + /* read binary into memory */ + vector<uint8_t> binary; + + if(!path_read_binary(clbin, binary)) { + fprintf(stderr, "OpenCL failed to read cached binary %s.\n", clbin.c_str()); return false; + } - /* verify we have right opencl version */ - if(!opencl_version_check()) + /* create program */ + cl_int status; + size_t size = binary.size(); + const uint8_t *bytes = &binary[0]; + + cpProgram = clCreateProgramWithBinary(cxContext, 1, &cdDevice, + &size, &bytes, &status, &ciErr); + + if(opencl_error(status) || opencl_error(ciErr)) { + fprintf(stderr, "OpenCL failed create program from cached binary %s.\n", clbin.c_str()); return false; + } - /* we compile kernels consisting of many files. unfortunately opencl - kernel caches do not seem to recognize changes in included files. - so we force recompile on changes by adding the md5 hash of all files */ - string kernel_path = path_get("kernel"); - string kernel_md5 = path_files_md5_hash(kernel_path); + if(!build_kernel(kernel_path)) + return false; - string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n"; - size_t source_len = source.size(); - const char *source_str = source.c_str(); + return true; + } + + bool save_binary(const string& clbin) + { + size_t size = 0; + clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL); + + if(!size) + return false; + + vector<uint8_t> binary(size); + uint8_t *bytes = &binary[0]; + + clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL); + + if(!path_write_binary(clbin, binary)) { + fprintf(stderr, "OpenCL failed to write cached binary %s.\n", clbin.c_str()); + return false; + } + + return true; + } + bool build_kernel(const string& kernel_path) + { string build_options = ""; build_options += "-I " + kernel_path + ""; /* todo: escape path */ build_options += " -cl-fast-relaxed-math -cl-strict-aliasing"; - cpProgram = clCreateProgramWithSource(cxContext, 1, &source_str, &source_len, &ciErr); - if(opencl_error(ciErr)) - return false; - ciErr = clBuildProgram(cpProgram, 0, NULL, build_options.c_str(), NULL, NULL); if(ciErr != CL_SUCCESS) { + /* show build errors */ char *build_log; size_t ret_val_size; @@ -256,6 +286,87 @@ public: return false; } + return true; + } + + bool compile_kernel(const string& kernel_path, const string& kernel_md5) + { + /* we compile kernels consisting of many files. unfortunately opencl + kernel caches do not seem to recognize changes in included files. + so we force recompile on changes by adding the md5 hash of all files */ + string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n"; + size_t source_len = source.size(); + const char *source_str = source.c_str(); + + cpProgram = clCreateProgramWithSource(cxContext, 1, &source_str, &source_len, &ciErr); + + if(opencl_error(ciErr)) + return false; + + double starttime = time_dt(); + printf("Compiling OpenCL kernel ...\n"); + + if(!build_kernel(kernel_path)) + return false; + + printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); + + return true; + } + + string device_md5_hash() + { + MD5Hash md5; + char version[256], driver[256], name[256], vendor[256]; + + clGetPlatformInfo(cpPlatform, CL_PLATFORM_VENDOR, sizeof(vendor), &vendor, NULL); + clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL); + clGetDeviceInfo(cdDevice, CL_DEVICE_NAME, sizeof(name), &name, NULL); + clGetDeviceInfo(cdDevice, CL_DRIVER_VERSION, sizeof(driver), &driver, NULL); + + md5.append((uint8_t*)vendor, strlen(vendor)); + md5.append((uint8_t*)version, strlen(version)); + md5.append((uint8_t*)name, strlen(name)); + md5.append((uint8_t*)driver, strlen(driver)); + + return md5.get_hex(); + } + + bool load_kernels() + { + /* verify if device was initialized */ + if(!device_initialized) { + fprintf(stderr, "OpenCL: failed to initialize device.\n"); + return false; + } + + /* verify we have right opencl version */ + if(!opencl_version_check()) + return false; + + /* md5 hash to detect changes */ + string kernel_path = path_get("kernel"); + string kernel_md5 = path_files_md5_hash(kernel_path); + string device_md5 = device_md5_hash(); + + /* try to use cache binary */ + string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());; + clbin = path_user_get(path_join("cache", clbin)); + + if(path_exists(clbin)) { + /* if exists already, try use it */ + if(!load_binary(kernel_path, clbin)) + return false; + } + else { + /* compile kernel */ + if(!compile_kernel(kernel_path, kernel_md5)) + return false; + + /* save binary for reuse */ + save_binary(clbin); + } + /* find kernels */ ckPathTraceKernel = clCreateKernel(cpProgram, "kernel_ocl_path_trace", &ciErr); if(opencl_error(ciErr)) diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 31fe6994a1a..9714f941082 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -132,6 +132,7 @@ endif() #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${kernel_preprocessed}" ${CYCLES_INSTALL_PATH}/kernel) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cl" ${CYCLES_INSTALL_PATH}/kernel) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cu" ${CYCLES_INSTALL_PATH}/kernel) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${headers}" ${CYCLES_INSTALL_PATH}/kernel) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${svm_headers}" ${CYCLES_INSTALL_PATH}/kernel/svm) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${util_headers}" ${CYCLES_INSTALL_PATH}/kernel) diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 3a0eff5210c..72aef463cab 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -21,14 +21,14 @@ #define __KERNEL_GPU__ #define __KERNEL_CUDA__ +#define CCL_NAMESPACE_BEGIN +#define CCL_NAMESPACE_END #include <cuda.h> #include <float.h> #include "util_types.h" -CCL_NAMESPACE_BEGIN - /* Qualifier wrappers for different names on different devices */ #define __device __device__ __inline__ @@ -60,7 +60,5 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4; #define kernel_data __data -CCL_NAMESPACE_END - #endif /* __KERNEL_COMPAT_CUDA_H__ */ diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index f66c4a64b0b..5fb687971ef 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -384,7 +384,7 @@ void Session::run_cpu() void Session::run() { /* load kernels */ - progress.set_status("Loading render kernels (may take a few minutes)"); + progress.set_status("Loading render kernels (may take a few minutes the first time)"); if(!device->load_kernels()) { progress.set_status("Failed loading render kernel, see console for errors"); diff --git a/intern/cycles/util/util_cuda.cpp b/intern/cycles/util/util_cuda.cpp index b371f4e9499..14e3f9a159b 100644 --- a/intern/cycles/util/util_cuda.cpp +++ b/intern/cycles/util/util_cuda.cpp @@ -16,6 +16,8 @@ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include <stdlib.h> + #include "util_cuda.h" #include "util_debug.h" #include "util_dynlib.h" @@ -371,6 +373,11 @@ bool cuLibraryInit() /* cuda 4.0 */ CUDA_LIBRARY_FIND(cuCtxSetCurrent); +#ifndef WITH_CUDA_BINARIES + if(cuCompilerPath() == "") + return false; +#endif + /* success */ result = true; @@ -379,13 +386,23 @@ bool cuLibraryInit() string cuCompilerPath() { - /* todo: better nvcc detection */ #ifdef _WIN32 - string nvcc = "C:/CUDA/bin/nvcc.exe"; + const char *defaultpath = "C:/CUDA/bin"; + const char *executable = "nvcc.exe"; #else - string nvcc = "/usr/local/cuda/bin/nvcc"; + const char *defaultpath = "/usr/local/cuda/bin"; + const char *executable = "nvcc"; #endif + const char *binpath = getenv("CUDA_BIN_PATH"); + + string nvcc; + + if(binpath) + nvcc = path_join(binpath, executable); + else + nvcc = path_join(defaultpath, executable); + return (path_exists(nvcc))? nvcc: ""; } diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp index 086063bcb81..90093e32d58 100644 --- a/intern/cycles/util/util_path.cpp +++ b/intern/cycles/util/util_path.cpp @@ -24,6 +24,8 @@ #include <OpenImageIO/sysutil.h> OIIO_NAMESPACE_USING +#include <stdio.h> + #define BOOST_FILESYSTEM_VERSION 2 #include <boost/filesystem.hpp> @@ -32,10 +34,12 @@ OIIO_NAMESPACE_USING CCL_NAMESPACE_BEGIN static string cached_path = ""; +static string cached_user_path = ""; -void path_init(const string& path) +void path_init(const string& path, const string& user_path) { cached_path = path; + cached_user_path = user_path; } string path_get(const string& sub) @@ -46,6 +50,14 @@ string path_get(const string& sub) return path_join(cached_path, sub); } +string path_user_get(const string& sub) +{ + if(cached_user_path == "") + cached_user_path = path_dirname(Sysutil::this_program_path()); + + return path_join(cached_user_path, sub); +} + string path_filename(const string& path) { return boost::filesystem::path(path).filename(); @@ -97,5 +109,48 @@ string path_files_md5_hash(const string& dir) return hash.get_hex(); } +bool path_write_binary(const string& path, const vector<uint8_t>& binary) +{ + /* write binary file from memory */ + boost::filesystem::create_directories(path_dirname(path)); + + FILE *f = fopen(path.c_str(), "wb"); + + if(!f) + return false; + + if(binary.size() > 0) + fwrite(&binary[0], sizeof(uint8_t), binary.size(), f); + + fclose(f); + + return true; +} + +bool path_read_binary(const string& path, vector<uint8_t>& binary) +{ + binary.resize(boost::filesystem::file_size(path)); + + /* read binary file into memory */ + FILE *f = fopen(path.c_str(), "rb"); + + if(!f) + return false; + + if(binary.size() == 0) { + fclose(f); + return false; + } + + if(fread(&binary[0], sizeof(uint8_t), binary.size(), f) != binary.size()) { + fclose(f); + return false; + } + + fclose(f); + + return true; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h index 3e07f9524bf..ec3524d362a 100644 --- a/intern/cycles/util/util_path.h +++ b/intern/cycles/util/util_path.h @@ -25,11 +25,13 @@ * then makes all paths relative to that. */ #include "util_string.h" +#include "util_vector.h" CCL_NAMESPACE_BEGIN -void path_init(const string& path = ""); +void path_init(const string& path = "", const string& user_path = ""); string path_get(const string& sub = ""); +string path_user_get(const string& sub = ""); string path_filename(const string& path); string path_dirname(const string& path); @@ -39,6 +41,9 @@ string path_escape(const string& path); bool path_exists(const string& path); string path_files_md5_hash(const string& dir); +bool path_write_binary(const string& path, const vector<uint8_t>& binary); +bool path_read_binary(const string& path, vector<uint8_t>& binary); + CCL_NAMESPACE_END #endif |