Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2014-08-26 19:02:03 +0400
committerThomas Dinges <blender@dingto.org>2014-08-26 19:02:26 +0400
commitfb3f32760d68134aadb7978922360857f0ecccb7 (patch)
tree95e7d3e1a284fce4fc319f8affde3320f8ba26cb /intern
parentf6e049cd5a61f00328c68eb9cb62b59f2bf7d451 (diff)
Cycles: Add an experimental CUDA kernel.
Now we build 2 .cubins per architecture (e.g. kernel_sm_21.cubin, kernel_experimental_sm_21.cubin). The experimental kernel can be used by switching to the Experimental Feature Set: http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Experimental_Features This enables Subsurface Scattering and Correlated Multi Jitter Sampling on GPU, while keeping the stability and performance of the regular kernel. Differential Revision: https://developer.blender.org/D762 Patch by Sergey and myself. Developer / Builder Note: CUDA Toolkit 6.5 is highly recommended for this, also note that building the experimental kernel requires a lot of system memory (~7-8GB).
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/addon/ui.py2
-rw-r--r--intern/cycles/device/device_cuda.cpp18
-rw-r--r--intern/cycles/kernel/CMakeLists.txt67
-rw-r--r--intern/cycles/kernel/SConscript49
-rw-r--r--intern/cycles/kernel/kernel_types.h7
5 files changed, 87 insertions, 56 deletions
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index d2d2a2038d7..fa827c3b1dc 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -154,7 +154,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
sub.prop(cscene, "subsurface_samples", text="Subsurface")
sub.prop(cscene, "volume_samples", text="Volume")
- if use_cpu(context):
+ if use_cpu(context) or cscene.feature_set == 'EXPERIMENTAL':
layout.row().prop(cscene, "sampling_pattern", text="Pattern")
for rl in scene.render.layers:
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index f2e470c21d5..1ed26717f4b 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -197,14 +197,18 @@ public:
return true;
}
- string compile_kernel()
+ string compile_kernel(bool experimental)
{
/* compute cubin name */
int major, minor;
cuDeviceComputeCapability(&major, &minor, cuDevId);
/* attempt to use kernel provided with blender */
- string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
+ string cubin;
+ if(experimental)
+ cubin = path_get(string_printf("lib/kernel_experimental_sm_%d%d.cubin", major, minor));
+ else
+ cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
if(path_exists(cubin))
return cubin;
@@ -212,7 +216,10 @@ public:
string kernel_path = path_get("kernel");
string md5 = path_files_md5_hash(kernel_path);
- cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
+ if(experimental)
+ cubin = string_printf("cycles_kernel_experimental_sm%d%d_%s.cubin", major, minor, md5.c_str());
+ else
+ cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
cubin = path_user_get(path_join("cache", cubin));
/* if exists already, use it */
@@ -263,6 +270,9 @@ public:
string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" "
"-o \"%s\" --ptxas-options=\"-v\" -I\"%s\" -DNVCC -D__KERNEL_CUDA_VERSION__=%d",
nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
+
+ if(experimental)
+ command += " -D__KERNEL_CUDA_EXPERIMENTAL__";
printf("%s\n", command.c_str());
@@ -293,7 +303,7 @@ public:
return false;
/* get kernel */
- string cubin = compile_kernel();
+ string cubin = compile_kernel(experimental);
if(cubin == "")
return false;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 34ef9e5a387..8857f86890c 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -160,37 +160,50 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
set(cuda_cubins)
- foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
- set(cuda_cubin kernel_${arch}.cubin)
+ macro(CYCLES_CUDA_KERNEL_ADD arch experimental)
+ if(${experimental})
+ set(cuda_extra_flags "-D__KERNEL_CUDA_EXPERIMENTAL__")
+ set(cuda_cubin kernel_experimental_${arch}.cubin)
+ else()
+ set(cuda_extra_flags "")
+ set(cuda_cubin kernel_${arch}.cubin)
+ endif()
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
set(cuda_math_flags "--use_fast_math")
- if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50")
- message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping")
- else()
- add_custom_command(
- OUTPUT ${cuda_cubin}
- COMMAND ${CUDA_NVCC_EXECUTABLE}
- -arch=${arch}
- -m${CUDA_BITS}
- --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu
- -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
- --ptxas-options="-v"
- ${cuda_arch_flags}
- ${cuda_version_flags}
- ${cuda_math_flags}
- -I${CMAKE_CURRENT_SOURCE_DIR}/../util
- -I${CMAKE_CURRENT_SOURCE_DIR}/svm
- -DCCL_NAMESPACE_BEGIN=
- -DCCL_NAMESPACE_END=
- -DNVCC
-
- DEPENDS ${cuda_sources})
-
- delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
- list(APPEND cuda_cubins ${cuda_cubin})
- endif()
+ add_custom_command(
+ OUTPUT ${cuda_cubin}
+ COMMAND ${CUDA_NVCC_EXECUTABLE}
+ -arch=${arch}
+ -m${CUDA_BITS}
+ --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu
+ -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
+ --ptxas-options="-v"
+ ${cuda_arch_flags}
+ ${cuda_version_flags}
+ ${cuda_math_flags}
+ ${cuda_extra_flags}
+ -I${CMAKE_CURRENT_SOURCE_DIR}/../util
+ -I${CMAKE_CURRENT_SOURCE_DIR}/svm
+ -DCCL_NAMESPACE_BEGIN=
+ -DCCL_NAMESPACE_END=
+ -DNVCC
+
+ DEPENDS ${cuda_sources})
+
+ delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
+ list(APPEND cuda_cubins ${cuda_cubin})
+
+ unset(cuda_extra_flags)
+ endmacro()
+
+ foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
+ # Compile regular kernel
+ CYCLES_CUDA_KERNEL_ADD(${arch} FALSE)
+
+ # Compile experimental kernel
+ CYCLES_CUDA_KERNEL_ADD(${arch} TRUE)
endforeach()
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript
index 39f0dd44774..5a9e57c5342 100644
--- a/intern/cycles/kernel/SConscript
+++ b/intern/cycles/kernel/SConscript
@@ -83,30 +83,35 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h')
last_cubin_file = None
+ configs = (("kernel_%s.cubin", ''),
+ ("kernel_experimental_%s.cubin", ' -D__KERNEL_CUDA_EXPERIMENTAL__'))
+
# add command for each cuda architecture
for arch in cuda_archs:
- if cuda_version < 60 and arch == "sm_50":
- print("Can't build kernel for CUDA sm_50 architecture, skipping")
- continue
-
- cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
-
- if env['BF_CYCLES_CUDA_ENV']:
- MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd"
- command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, kernel_file, cubin_file)
- else:
- command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, kernel_file, cubin_file)
-
- kernel.Command(cubin_file, 'kernel.cu', command)
- kernel.Depends(cubin_file, dependencies)
-
- kernel_binaries.append(cubin_file)
-
- if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']:
- # trick to compile one kernel at a time to reduce memory usage
- if last_cubin_file:
- kernel.Depends(cubin_file, last_cubin_file)
- last_cubin_file = cubin_file
+ for config in configs:
+ # TODO(sergey): Use dict instead ocouple in order to increase readability?
+ name = config[0]
+ extra_flags = config[1]
+
+ cubin_file = os.path.join(build_dir, name % arch)
+ current_flags = nvcc_flags + extra_flags
+
+ if env['BF_CYCLES_CUDA_ENV']:
+ MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd"
+ command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, current_flags, kernel_file, cubin_file)
+ else:
+ command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file)
+
+ kernel.Command(cubin_file, 'kernel.cu', command)
+ kernel.Depends(cubin_file, dependencies)
+
+ kernel_binaries.append(cubin_file)
+
+ if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']:
+ # trick to compile one kernel at a time to reduce memory usage
+ if last_cubin_file:
+ kernel.Depends(cubin_file, last_cubin_file)
+ last_cubin_file = cubin_file
Return('kernel_binaries')
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 1ed4281ea75..292283cbbfd 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -79,8 +79,11 @@ CCL_NAMESPACE_BEGIN
#define __VOLUME_SCATTER__
/* Experimental on GPU */
-//#define __VOLUME_DECOUPLED__
-//#define __SUBSURFACE__
+#ifdef __KERNEL_CUDA_EXPERIMENTAL__
+#define __SUBSURFACE__
+#define __CMJ__
+#endif
+
#endif
#ifdef __KERNEL_OPENCL__