diff options
author | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2013-06-27 03:29:33 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2013-06-27 03:29:33 +0400 |
commit | 7902fa57b6553c8c6e5ae19d1a25ce2200ea4a95 (patch) | |
tree | 7e07f248cd7aaff6630bedcceff55f35bae2e49b /intern | |
parent | 372c4f80f4e8bdec79c9ae6aa0a29c97c9d8797c (diff) |
Code cleanup: cycles
* Reshuffle SSE #ifdefs to try to avoid compilation errors enabling SSE on 32 bit.
* Remove CUDA kernel launch size exception on Mac, is not needed.
* Make OSL file compilation quiet like c/cpp files.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 14 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_opencl.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse2.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/shaders/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/shaders/SConscript | 2 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 68 | ||||
-rw-r--r-- | intern/cycles/util/util_vector.h | 20 |
7 files changed, 40 insertions, 69 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 6b0c9120bd9..4441ff1c69f 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -607,13 +607,8 @@ public: cuda_assert(cuParamSetSize(cuPathTrace, offset)) /* launch kernel: todo find optimal size, cache config for fermi */ -#ifndef __APPLE__ int xthreads = 16; int ythreads = 16; -#else - int xthreads = 8; - int ythreads = 8; -#endif int xblocks = (rtile.w + xthreads - 1)/xthreads; int yblocks = (rtile.h + ythreads - 1)/ythreads; @@ -676,13 +671,8 @@ public: cuda_assert(cuParamSetSize(cuFilmConvert, offset)) /* launch kernel: todo find optimal size, cache config for fermi */ -#ifndef __APPLE__ int xthreads = 16; int ythreads = 16; -#else - int xthreads = 8; - int ythreads = 8; -#endif int xblocks = (task.w + xthreads - 1)/xthreads; int yblocks = (task.h + ythreads - 1)/ythreads; @@ -730,11 +720,7 @@ public: cuda_assert(cuParamSetSize(cuDisplace, offset)) /* launch kernel: todo find optimal size, cache config for fermi */ -#ifndef __APPLE__ int xthreads = 16; -#else - int xthreads = 8; -#endif int xblocks = (task.shader_w + xthreads - 1)/xthreads; cuda_assert(cuFuncSetCacheConfig(cuDisplace, CU_FUNC_CACHE_PREFER_L1)) diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 2bf3b58ce99..c4cb7a516c2 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -108,7 +108,6 @@ /* data lookup defines */ #define kernel_data (*kg->data) -#define kernel_tex_lookup(t, x, offset, size) kernel_tex_lookup_(kg->t, offset, size, x) #define kernel_tex_fetch(t, index) kg->t[index] /* define NULL */ diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index 59852a31047..cfadb02b93b 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -22,7 +22,7 @@ #ifdef WITH_OPTIMIZED_KERNEL -//#define __KERNEL_SSE2__ +#define __KERNEL_SSE2__ #include "kernel.h" #include "kernel_compat_cpu.h" diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt index f2beb3da9c0..23112f56536 100644 --- a/intern/cycles/kernel/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/shaders/CMakeLists.txt @@ -91,7 +91,7 @@ foreach(_file ${SRC_OSL}) string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE}) add_custom_command( OUTPUT ${_OSO_FILE} - COMMAND ${OSL_COMPILER} -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" ${_OSL_FILE} + COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" ${_OSL_FILE} DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS}) list(APPEND SRC_OSO ${_OSO_FILE} diff --git a/intern/cycles/kernel/shaders/SConscript b/intern/cycles/kernel/shaders/SConscript index daf2b67ad2b..aad6e23e4d4 100644 --- a/intern/cycles/kernel/shaders/SConscript +++ b/intern/cycles/kernel/shaders/SConscript @@ -57,7 +57,7 @@ if env['WITH_BF_CYCLES_OSL']: osl_file = os.path.join(source_dir, f) oso_file = os.path.join(build_dir, f.replace('.osl', '.oso')) - command = "%s -O2 -I%s -o %s %s" % (osl_compiler, source_dir, oso_file, osl_file) + command = "%s -q -O2 -I%s -o %s %s" % (osl_compiler, source_dir, oso_file, osl_file) shaders.Command(oso_file, f, command) shaders.Depends(oso_file, [f] + dependencies) diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 868502bc0ac..b2d842edccd 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -61,59 +61,49 @@ #ifndef __KERNEL_GPU__ -/* not enabled, globally applying it just gives slowdown, - * but useful for testing. */ -//#define __KERNEL_SSE__ -#ifdef __KERNEL_SSE__ - -#include <xmmintrin.h> /* SSE 1 */ -#include <emmintrin.h> /* SSE 2 */ -#include <pmmintrin.h> /* SSE 3 */ -#include <tmmintrin.h> /* SSSE 3 */ -#include <smmintrin.h> /* SSE 4 */ - +/* not enabled, globally applying it gives slowdown, only for testing. */ +#if 0 +#define __KERNEL_SSE__ #ifndef __KERNEL_SSE2__ #define __KERNEL_SSE2__ #endif - #ifndef __KERNEL_SSE3__ #define __KERNEL_SSE3__ #endif - #ifndef __KERNEL_SSSE3__ #define __KERNEL_SSSE3__ #endif - #ifndef __KERNEL_SSE4__ #define __KERNEL_SSE4__ #endif +#endif -#else +/* SSE2 is always available on x86_64 CPUs, so auto enable */ +#if defined(__x86_64__) && !defined(__KERNEL_SSE2__) +#define __KERNEL_SSE2__ +#endif -#if defined(__x86_64__) || defined(__KERNEL_SSSE3__) +/* SSE intrinsics headers */ +#ifndef FREE_WINDOWS64 -/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>. - * Since we can't avoid including <windows.h>, better only include that */ -#ifdef FREE_WINDOWS64 -#include <windows.h> -#else +#ifdef __KERNEL_SSE2__ #include <xmmintrin.h> /* SSE 1 */ #include <emmintrin.h> /* SSE 2 */ +#endif #ifdef __KERNEL_SSE3__ #include <pmmintrin.h> /* SSE 3 */ #endif + #ifdef __KERNEL_SSSE3__ #include <tmmintrin.h> /* SSSE 3 */ #endif -#endif -/* SSE2 is available on x64 and SSE3 CPUs, so enable here as well */ -#ifndef __KERNEL_SSE2__ -#define __KERNEL_SSE2__ -#endif +#else -#endif +/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>. + * Since we can't avoid including <windows.h>, better only include that */ +#include <windows.h> #endif @@ -553,30 +543,6 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __device_inline const __m12 } #endif -#ifndef __KERNEL_GPU__ - -static inline void *malloc_aligned(size_t size, size_t alignment) -{ - void *data = (void*)malloc(size + sizeof(void*) + alignment - 1); - - union { void *ptr; size_t offset; } u; - u.ptr = (char*)data + sizeof(void*); - u.offset = (u.offset + alignment - 1) & ~(alignment - 1); - *(((void**)u.ptr) - 1) = data; - - return u.ptr; -} - -static inline void free_aligned(void *ptr) -{ - if(ptr) { - void *data = *(((void**)ptr) - 1); - free(data); - } -} - -#endif - CCL_NAMESPACE_END #endif /* __UTIL_TYPES_H__ */ diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h index 0fd4488d91d..65aad37c88c 100644 --- a/intern/cycles/util/util_vector.h +++ b/intern/cycles/util/util_vector.h @@ -30,6 +30,26 @@ CCL_NAMESPACE_BEGIN using std::vector; +static inline void *malloc_aligned(size_t size, size_t alignment) +{ + void *data = (void*)malloc(size + sizeof(void*) + alignment - 1); + + union { void *ptr; size_t offset; } u; + u.ptr = (char*)data + sizeof(void*); + u.offset = (u.offset + alignment - 1) & ~(alignment - 1); + *(((void**)u.ptr) - 1) = data; + + return u.ptr; +} + +static inline void free_aligned(void *ptr) +{ + if(ptr) { + void *data = *(((void**)ptr) - 1); + free(data); + } +} + /* Array * * Simplified version of vector, serving multiple purposes: |