diff options
-rw-r--r-- | SConstruct | 5 | ||||
-rw-r--r-- | intern/cycles/blender/addon/ui.py | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 18 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 67 | ||||
-rw-r--r-- | intern/cycles/kernel/SConscript | 49 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 7 | ||||
-rw-r--r-- | source/blender/editors/space_node/drawnode.c | 7 |
7 files changed, 94 insertions, 61 deletions
diff --git a/SConstruct b/SConstruct index 12f80b66552..7263a5ddfa1 100644 --- a/SConstruct +++ b/SConstruct @@ -984,8 +984,9 @@ if env['OURPLATFORM']!='darwin': dir=os.path.join(env['BF_INSTALLDIR'], VERSION, 'scripts', 'addons','cycles', 'lib') for arch in env['BF_CYCLES_CUDA_BINARIES_ARCH']: kernel_build_dir = os.path.join(B.root_build_dir, 'intern/cycles/kernel') - cubin_file = os.path.join(kernel_build_dir, "kernel_%s.cubin" % arch) - cubininstall.append(env.Install(dir=dir,source=cubin_file)) + for suffix in ('', '_experimental'): + cubin_file = os.path.join(kernel_build_dir, "kernel%s_%s.cubin" % (suffix, arch)) + cubininstall.append(env.Install(dir=dir,source=cubin_file)) # osl shaders if env['WITH_BF_CYCLES_OSL']: diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index d2d2a2038d7..fa827c3b1dc 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -154,7 +154,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel): sub.prop(cscene, "subsurface_samples", text="Subsurface") sub.prop(cscene, "volume_samples", text="Volume") - if use_cpu(context): + if use_cpu(context) or cscene.feature_set == 'EXPERIMENTAL': layout.row().prop(cscene, "sampling_pattern", text="Pattern") for rl in scene.render.layers: diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index f2e470c21d5..1ed26717f4b 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -197,14 +197,18 @@ public: return true; } - string compile_kernel() + string compile_kernel(bool experimental) { /* compute cubin name */ int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); /* attempt to use kernel provided with blender */ - string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); + string cubin; + if(experimental) + cubin = path_get(string_printf("lib/kernel_experimental_sm_%d%d.cubin", major, minor)); + else + cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); if(path_exists(cubin)) return cubin; @@ -212,7 +216,10 @@ public: string kernel_path = path_get("kernel"); string md5 = path_files_md5_hash(kernel_path); - cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str()); + if(experimental) + cubin = string_printf("cycles_kernel_experimental_sm%d%d_%s.cubin", major, minor, md5.c_str()); + else + cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str()); cubin = path_user_get(path_join("cache", cubin)); /* if exists already, use it */ @@ -263,6 +270,9 @@ public: string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" " "-o \"%s\" --ptxas-options=\"-v\" -I\"%s\" -DNVCC -D__KERNEL_CUDA_VERSION__=%d", nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version); + + if(experimental) + command += " -D__KERNEL_CUDA_EXPERIMENTAL__"; printf("%s\n", command.c_str()); @@ -293,7 +303,7 @@ public: return false; /* get kernel */ - string cubin = compile_kernel(); + string cubin = compile_kernel(experimental); if(cubin == "") return false; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 34ef9e5a387..8857f86890c 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -160,37 +160,50 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) set(cuda_cubins) - foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) - set(cuda_cubin kernel_${arch}.cubin) + macro(CYCLES_CUDA_KERNEL_ADD arch experimental) + if(${experimental}) + set(cuda_extra_flags "-D__KERNEL_CUDA_EXPERIMENTAL__") + set(cuda_cubin kernel_experimental_${arch}.cubin) + else() + set(cuda_extra_flags "") + set(cuda_cubin kernel_${arch}.cubin) + endif() set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") set(cuda_math_flags "--use_fast_math") - if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50") - message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping") - else() - add_custom_command( - OUTPUT ${cuda_cubin} - COMMAND ${CUDA_NVCC_EXECUTABLE} - -arch=${arch} - -m${CUDA_BITS} - --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} - --ptxas-options="-v" - ${cuda_arch_flags} - ${cuda_version_flags} - ${cuda_math_flags} - -I${CMAKE_CURRENT_SOURCE_DIR}/../util - -I${CMAKE_CURRENT_SOURCE_DIR}/svm - -DCCL_NAMESPACE_BEGIN= - -DCCL_NAMESPACE_END= - -DNVCC - - DEPENDS ${cuda_sources}) - - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND cuda_cubins ${cuda_cubin}) - endif() + add_custom_command( + OUTPUT ${cuda_cubin} + COMMAND ${CUDA_NVCC_EXECUTABLE} + -arch=${arch} + -m${CUDA_BITS} + --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} + --ptxas-options="-v" + ${cuda_arch_flags} + ${cuda_version_flags} + ${cuda_math_flags} + ${cuda_extra_flags} + -I${CMAKE_CURRENT_SOURCE_DIR}/../util + -I${CMAKE_CURRENT_SOURCE_DIR}/svm + -DCCL_NAMESPACE_BEGIN= + -DCCL_NAMESPACE_END= + -DNVCC + + DEPENDS ${cuda_sources}) + + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND cuda_cubins ${cuda_cubin}) + + unset(cuda_extra_flags) + endmacro() + + foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) + # Compile regular kernel + CYCLES_CUDA_KERNEL_ADD(${arch} FALSE) + + # Compile experimental kernel + CYCLES_CUDA_KERNEL_ADD(${arch} TRUE) endforeach() add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index 39f0dd44774..5a9e57c5342 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -83,30 +83,35 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h') last_cubin_file = None + configs = (("kernel_%s.cubin", ''), + ("kernel_experimental_%s.cubin", ' -D__KERNEL_CUDA_EXPERIMENTAL__')) + # add command for each cuda architecture for arch in cuda_archs: - if cuda_version < 60 and arch == "sm_50": - print("Can't build kernel for CUDA sm_50 architecture, skipping") - continue - - cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch) - - if env['BF_CYCLES_CUDA_ENV']: - MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd" - command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, kernel_file, cubin_file) - else: - command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, kernel_file, cubin_file) - - kernel.Command(cubin_file, 'kernel.cu', command) - kernel.Depends(cubin_file, dependencies) - - kernel_binaries.append(cubin_file) - - if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']: - # trick to compile one kernel at a time to reduce memory usage - if last_cubin_file: - kernel.Depends(cubin_file, last_cubin_file) - last_cubin_file = cubin_file + for config in configs: + # TODO(sergey): Use dict instead ocouple in order to increase readability? + name = config[0] + extra_flags = config[1] + + cubin_file = os.path.join(build_dir, name % arch) + current_flags = nvcc_flags + extra_flags + + if env['BF_CYCLES_CUDA_ENV']: + MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd" + command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, current_flags, kernel_file, cubin_file) + else: + command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file) + + kernel.Command(cubin_file, 'kernel.cu', command) + kernel.Depends(cubin_file, dependencies) + + kernel_binaries.append(cubin_file) + + if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']: + # trick to compile one kernel at a time to reduce memory usage + if last_cubin_file: + kernel.Depends(cubin_file, last_cubin_file) + last_cubin_file = cubin_file Return('kernel_binaries') diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 1ed4281ea75..292283cbbfd 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -79,8 +79,11 @@ CCL_NAMESPACE_BEGIN #define __VOLUME_SCATTER__ /* Experimental on GPU */ -//#define __VOLUME_DECOUPLED__ -//#define __SUBSURFACE__ +#ifdef __KERNEL_CUDA_EXPERIMENTAL__ +#define __SUBSURFACE__ +#define __CMJ__ +#endif + #endif #ifdef __KERNEL_OPENCL__ diff --git a/source/blender/editors/space_node/drawnode.c b/source/blender/editors/space_node/drawnode.c index 89cbbb59835..286585f5f1d 100644 --- a/source/blender/editors/space_node/drawnode.c +++ b/source/blender/editors/space_node/drawnode.c @@ -942,12 +942,13 @@ static void node_shader_buts_anisotropic(uiLayout *layout, bContext *UNUSED(C), static void node_shader_buts_subsurface(uiLayout *layout, bContext *C, PointerRNA *ptr) { - /* SSS does not work on GPU yet */ + /* SSS only enabled in Experimental Kernel */ PointerRNA scene = CTX_data_pointer_get(C, "scene"); if (scene.data) { PointerRNA cscene = RNA_pointer_get(&scene, "cycles"); - if (cscene.data && (RNA_enum_get(&cscene, "device") == 1 && U.compute_device_type != 0)) - uiItemL(layout, IFACE_("SSS not supported on GPU"), ICON_ERROR); + if (cscene.data && (RNA_enum_get(&cscene, "device") == 1 && U.compute_device_type != 0 + && RNA_enum_get(&cscene, "feature_set") == 0)) + uiItemL(layout, IFACE_("Only enabled in experimental GPU kernel"), ICON_ERROR); } uiItemR(layout, ptr, "falloff", 0, "", ICON_NONE); |