Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2014-01-16 20:04:11 +0400
committerThomas Dinges <blender@dingto.org>2014-01-16 20:04:11 +0400
commitde28a4d4b2c9397c5233a5ee1dbf1400f450a15c (patch)
tree1fe23de963e206af3fb2ff2d9e2e3393cd89149c /intern/cycles/device
parent7c6d52eb07c4bd8142a95eca1dbdc794063859b8 (diff)
Cycles: Add an AVX kernel for CPU rendering.
* AVX is available on Intel Sandy Bridge and newer and AMD Bulldozer and newer. * We don't use dedicated AVX intrinsics yet, but gcc auto vectorization gives a 3% performance improvement for Caminandes. Tested on an i5-3570, Linux x64. * No change for Windows yet, MSVC 2008 does not support AVX. Reviewed by: brecht Differential Revision: https://developer.blender.org/D216
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/device_cpu.cpp52
1 files changed, 52 insertions, 0 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index b29d64eb454..76123fe44d2 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -61,6 +61,7 @@ public:
system_cpu_support_sse2();
system_cpu_support_sse3();
system_cpu_support_sse41();
+ system_cpu_support_avx();
}
~CPUDevice()
@@ -166,6 +167,28 @@ public:
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+ if(system_cpu_support_avx()) {
+ for(int sample = start_sample; sample < end_sample; sample++) {
+ if (task.get_cancel() || task_pool.canceled()) {
+ if(task.need_finish_queue == false)
+ break;
+ }
+
+ for(int y = tile.y; y < tile.y + tile.h; y++) {
+ for(int x = tile.x; x < tile.x + tile.w; x++) {
+ kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state,
+ sample, x, y, tile.offset, tile.stride);
+ }
+ }
+
+ tile.sample = sample + 1;
+
+ task.update_progress(tile);
+ }
+ }
+ else
+#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int sample = start_sample; sample < end_sample; sample++) {
@@ -270,6 +293,15 @@ public:
float sample_scale = 1.0f/(task.sample + 1);
if(task.rgba_half) {
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+ if(system_cpu_support_avx()) {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_avx_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
+ sample_scale, x, y, task.offset, task.stride);
+ }
+ else
+#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int y = task.y; y < task.y + task.h; y++)
@@ -305,6 +337,15 @@ public:
}
}
else {
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+ if(system_cpu_support_avx()) {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_avx_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
+ sample_scale, x, y, task.offset, task.stride);
+ }
+ else
+#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int y = task.y; y < task.y + task.h; y++)
@@ -349,6 +390,17 @@ public:
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+ if(system_cpu_support_avx()) {
+ for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
+ kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+
+ if(task_pool.canceled())
+ break;
+ }
+ }
+ else
+#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {