From 73f20560529457ea177cb93e8e8eaaf44a589643 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Wed, 14 Feb 2018 11:23:30 +0100 Subject: Cycles: Add BVH8 and packeted triangle intersection This is an initial implementation of BVH8 optimization structure and packated triangle intersection. The aim is to get faster ray to scene intersection checks. Scene BVH4 BVH8 barbershop_interior 10:24.94 10:10.74 bmw27 02:41.25 02:38.83 classroom 08:16.49 07:56.15 fishy_cat 04:24.56 04:17.29 koro 06:03.06 06:01.45 pavillon_barcelona 09:21.26 09:02.98 victor 23:39.65 22:53.71 As memory goes, peak usage raises by about 4.7% in a complex scenes. Note that BVH8 is disabled when using OSL, this is because OSL kernel does not get per-microarchitecture optimizations and hence always considers BVH3 is used. Original BVH8 patch from Anton Gavrikov. Batched triangles intersection from Victoria Zhislina. Extra work and tests and fixes from Maxym Dmytrychenko. --- intern/cycles/kernel/kernel_compat_cpu.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'intern/cycles/kernel/kernel_compat_cpu.h') diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 2f955741797..aa7a16afa1d 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -71,15 +71,13 @@ CCL_NAMESPACE_BEGIN /* Texture types to be compatible with CUDA textures. These are really just * simple arrays and after inlining fetch hopefully revert to being a simple * pointer lookup. */ - template struct texture { ccl_always_inline const T& fetch(int index) { kernel_assert(index >= 0 && index < width); return data[index]; } - -#ifdef __KERNEL_AVX__ +#if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) /* Reads 256 bytes but indexes in blocks of 128 bytes to maintain * compatibility with existing indicies and data structures. */ @@ -90,7 +88,6 @@ template struct texture { ssef *ssef_node_data = &ssef_data[index]; return _mm256_loadu_ps((float *)ssef_node_data); } - #endif #ifdef __KERNEL_SSE2__ @@ -148,6 +145,10 @@ ccl_device_inline void print_sse3i(const char *label, sse3i& a) print_ssei(label, a.z); } +#if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) +typedef vector3 avx3f; +#endif + #endif CCL_NAMESPACE_END -- cgit v1.2.3