Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2013-10-08 19:29:28 +0400
committerThomas Dinges <blender@dingto.org>2013-10-08 19:29:28 +0400
commitb5a5773fa98b5ddf18dc68bc77df15cc79211ef5 (patch)
tree8fef2a5fb5e6e89ce7d6d966b0e6acd74b3d9f06 /intern
parentdfe16105041292a1fc7ee29d825c25135a4f6a3c (diff)
Cycles / CUDA:
* Remove support for CUDA Toolkit 4.x, only Toolkit 5.0 and above are supported now. * Remove support for sm_1x cards (< Fermi) for good. We didn't officially support those cards for a few releases already, now remove some special code that was still there.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/device_cuda.cpp54
-rw-r--r--intern/cycles/kernel/CMakeLists.txt38
-rw-r--r--intern/cycles/kernel/SConscript34
-rw-r--r--intern/cycles/kernel/kernel_shader.h23
4 files changed, 36 insertions, 113 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 5440bd91987..4ce7f6fd729 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -227,14 +227,12 @@ public:
bool support_device(bool experimental)
{
- if(!experimental) {
- int major, minor;
- cuDeviceComputeCapability(&major, &minor, cuDevId);
+ int major, minor;
+ cuDeviceComputeCapability(&major, &minor, cuDevId);
- if(major < 2) {
- cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
- return false;
- }
+ if(major < 2) {
+ cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
+ return false;
}
return true;
@@ -286,8 +284,12 @@ public:
cuda_error_message("CUDA nvcc compiler version could not be parsed.");
return "";
}
+ if(cuda_version < 50) {
+ printf("Unsupported CUDA version %d.%d detected, you need CUDA 5.0.\n", cuda_version/10, cuda_version%10);
+ return "";
+ }
- if(cuda_version != 50)
+ else if(cuda_version > 50)
printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10);
/* compile */
@@ -296,36 +298,14 @@ public:
const int machine = system_cpu_bits();
string arch_flags;
- /* build flags depending on CUDA version and arch */
- if(cuda_version < 50) {
- /* CUDA 4.x */
- if(major == 1) {
- /* sm_1x */
- arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0";
- }
- else if(major == 2) {
- /* sm_2x */
- arch_flags = "--maxrregcount=24";
- }
- else {
- /* sm_3x */
- arch_flags = "--maxrregcount=32";
- }
+ /* CUDA 5.x build flags for different archs */
+ if(major == 2) {
+ /* sm_2x */
+ arch_flags = "--maxrregcount=32 --use_fast_math";
}
- else {
- /* CUDA 5.x */
- if(major == 1) {
- /* sm_1x */
- arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math";
- }
- else if(major == 2) {
- /* sm_2x */
- arch_flags = "--maxrregcount=32 --use_fast_math";
- }
- else {
- /* sm_3x */
- arch_flags = "--maxrregcount=32 --use_fast_math";
- }
+ else if(major == 3) {
+ /* sm_3x */
+ arch_flags = "--maxrregcount=32 --use_fast_math";
}
double starttime = time_dt();
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index eaa4e304ebb..56ba0e08743 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -151,36 +151,16 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
- # build flags depending on CUDA version and arch
- if(CUDA_VERSION LESS 50)
- # CUDA 4.x
- if(${arch} MATCHES "sm_1[0-9]")
- # sm_1x
- set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
- elseif(${arch} MATCHES "sm_2[0-9]")
- # sm_2x
- set(cuda_arch_flags "--maxrregcount=24")
- else()
- # sm_3x
- set(cuda_arch_flags "--maxrregcount=32")
- endif()
-
- set(cuda_math_flags "")
- else()
- # CUDA 5.x
- if(${arch} MATCHES "sm_1[0-9]")
- # sm_1x
- set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
- elseif(${arch} MATCHES "sm_2[0-9]")
- # sm_2x
- set(cuda_arch_flags "--maxrregcount=32")
- else()
- # sm_3x
- set(cuda_arch_flags "--maxrregcount=32")
- endif()
-
- set(cuda_math_flags "--use_fast_math")
+ # CUDA 5.x build flags for different archs
+ if(${arch} MATCHES "sm_2[0-9]")
+ # sm_2x
+ set(cuda_arch_flags "--maxrregcount=32")
+ elseif(${arch} MATCHES "sm_3[0-9]")
+ # sm_3x
+ set(cuda_arch_flags "--maxrregcount=32")
endif()
+
+ set(cuda_math_flags "--use_fast_math")
if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript
index 6459c3ed183..a0522d9ba8e 100644
--- a/intern/cycles/kernel/SConscript
+++ b/intern/cycles/kernel/SConscript
@@ -86,33 +86,13 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
for arch in cuda_archs:
cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
- # build flags depending on CUDA version and arch
- if cuda_version < 50:
- if arch == "sm_35":
- print("Can't build kernel for CUDA sm_35 architecture, skipping")
- continue
-
- # CUDA 4.x
- if arch.startswith("sm_1"):
- # sm_1x
- cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0"
- elif arch.startswith("sm_2"):
- # sm_2x
- cuda_arch_flags = "--maxrregcount=24"
- else:
- # sm_3x
- cuda_arch_flags = "--maxrregcount=32"
- else:
- # CUDA 5.x
- if arch.startswith("sm_1"):
- # sm_1x
- cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math"
- elif arch.startswith("sm_2"):
- # sm_2x
- cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
- else:
- # sm_3x
- cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
+ # CUDA 5.x build flags for different archs
+ if arch.startswith("sm_2"):
+ # sm_2x
+ cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
+ elif arch.startswith("sm_3"):
+ # sm_3x
+ cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file)
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index ee71a0cfcf4..81630caed9a 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -36,15 +36,8 @@ CCL_NAMESPACE_BEGIN
/* ShaderData setup from incoming ray */
#ifdef __OBJECT_MOTION__
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
-__device_noinline
-#else
-__device
-#endif
-void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
+__device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
{
- /* note that this is a separate non-inlined function to work around crash
- * on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */
if(sd->flag & SD_OBJECT_MOTION) {
sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
sd->ob_itfm= transform_quick_inverse(sd->ob_tfm);
@@ -56,12 +49,7 @@ void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float tim
}
#endif
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
-__device_noinline
-#else
-__device
-#endif
-void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
+__device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
const Intersection *isect, const Ray *ray, int bounce)
{
#ifdef __INSTANCING__
@@ -249,12 +237,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
/* ShaderData setup from position sampled on mesh */
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
-__device_noinline
-#else
-__device
-#endif
-void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
+__device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
const float3 P, const float3 Ng, const float3 I,
int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment)
{