Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2014-06-14 00:23:58 +0400
committerThomas Dinges <blender@dingto.org>2014-06-14 00:26:20 +0400
commit866c7fb6e63d128fa4800e28e0a091f874112344 (patch)
tree096daad79ca3eb7c47e339e7b1c568caf47a5733 /intern/cycles/util
parentb4aa51f8d736f5431799fdf1df5f678a732ef6b9 (diff)
Cycles: Add an AVX2 CPU kernel.
This kernel is compiled with AVX2, FMA3, and BMI compiler flags. At the moment only Intel Haswell benefits from this, but future AMD CPUs will have these instructions as well. Makes rendering on Haswell CPUs a few percent faster, only benchmarked with clang on OS X though. Part of my GSoC 2014.
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/util_optimization.h5
-rw-r--r--intern/cycles/util/util_system.cpp17
-rw-r--r--intern/cycles/util/util_system.h1
3 files changed, 23 insertions, 0 deletions
diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h
index 0a6013cddd4..5d0fea34761 100644
--- a/intern/cycles/util/util_optimization.h
+++ b/intern/cycles/util/util_optimization.h
@@ -65,10 +65,15 @@
#define WITH_CYCLES_OPTIMIZED_KERNEL_AVX
#endif
+#ifdef WITH_KERNEL_AVX2
+#define WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+#endif
+
/* MSVC 2008, no SSE41 (broken blendv intrinsic) and no AVX support */
#if defined(_MSC_VER) && (_MSC_VER < 1700)
#undef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
#undef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+#undef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
#endif
#endif
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index 0764f7d9345..7c0445577e2 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -127,9 +127,12 @@ struct CPUCapabilities {
bool sse42;
bool sse4a;
bool avx;
+ bool avx2;
bool xop;
bool fma3;
bool fma4;
+ bool bmi1;
+ bool bmi2;
};
static CPUCapabilities& system_cpu_capabilities()
@@ -180,6 +183,11 @@ static CPUCapabilities& system_cpu_capabilities()
#endif
caps.avx = (xcr_feature_mask & 0x6) == 0x6;
}
+
+ __cpuid(result, 0x00000007);
+ caps.bmi1 = (result[1] & ((int)1 << 3)) != 0;
+ caps.bmi2 = (result[1] & ((int)1 << 8)) != 0;
+ caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
}
#if 0
@@ -221,6 +229,11 @@ bool system_cpu_support_avx()
CPUCapabilities& caps = system_cpu_capabilities();
return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx;
}
+bool system_cpu_support_avx2()
+{
+ CPUCapabilities& caps = system_cpu_capabilities();
+ return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2;
+}
#else
bool system_cpu_support_sse2()
@@ -242,6 +255,10 @@ bool system_cpu_support_avx()
{
return false;
}
+bool system_cpu_support_avx2()
+{
+ return false;
+}
#endif
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index 4409ea752cd..0e8868c7dfc 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -28,6 +28,7 @@ bool system_cpu_support_sse2();
bool system_cpu_support_sse3();
bool system_cpu_support_sse41();
bool system_cpu_support_avx();
+bool system_cpu_support_avx2();
CCL_NAMESPACE_END