Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@pandora.be>2013-06-27 03:29:33 +0400
committerBrecht Van Lommel <brechtvanlommel@pandora.be>2013-06-27 03:29:33 +0400
commit7902fa57b6553c8c6e5ae19d1a25ce2200ea4a95 (patch)
tree7e07f248cd7aaff6630bedcceff55f35bae2e49b
parent372c4f80f4e8bdec79c9ae6aa0a29c97c9d8797c (diff)
Code cleanup: cycles
* Reshuffle SSE #ifdefs to try to avoid compilation errors enabling SSE on 32 bit. * Remove CUDA kernel launch size exception on Mac, is not needed. * Make OSL file compilation quiet like c/cpp files.
-rw-r--r--intern/cycles/device/device_cuda.cpp14
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h1
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp2
-rw-r--r--intern/cycles/kernel/shaders/CMakeLists.txt2
-rw-r--r--intern/cycles/kernel/shaders/SConscript2
-rw-r--r--intern/cycles/util/util_types.h68
-rw-r--r--intern/cycles/util/util_vector.h20
7 files changed, 40 insertions, 69 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 6b0c9120bd9..4441ff1c69f 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -607,13 +607,8 @@ public:
cuda_assert(cuParamSetSize(cuPathTrace, offset))
/* launch kernel: todo find optimal size, cache config for fermi */
-#ifndef __APPLE__
int xthreads = 16;
int ythreads = 16;
-#else
- int xthreads = 8;
- int ythreads = 8;
-#endif
int xblocks = (rtile.w + xthreads - 1)/xthreads;
int yblocks = (rtile.h + ythreads - 1)/ythreads;
@@ -676,13 +671,8 @@ public:
cuda_assert(cuParamSetSize(cuFilmConvert, offset))
/* launch kernel: todo find optimal size, cache config for fermi */
-#ifndef __APPLE__
int xthreads = 16;
int ythreads = 16;
-#else
- int xthreads = 8;
- int ythreads = 8;
-#endif
int xblocks = (task.w + xthreads - 1)/xthreads;
int yblocks = (task.h + ythreads - 1)/ythreads;
@@ -730,11 +720,7 @@ public:
cuda_assert(cuParamSetSize(cuDisplace, offset))
/* launch kernel: todo find optimal size, cache config for fermi */
-#ifndef __APPLE__
int xthreads = 16;
-#else
- int xthreads = 8;
-#endif
int xblocks = (task.shader_w + xthreads - 1)/xthreads;
cuda_assert(cuFuncSetCacheConfig(cuDisplace, CU_FUNC_CACHE_PREFER_L1))
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 2bf3b58ce99..c4cb7a516c2 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -108,7 +108,6 @@
/* data lookup defines */
#define kernel_data (*kg->data)
-#define kernel_tex_lookup(t, x, offset, size) kernel_tex_lookup_(kg->t, offset, size, x)
#define kernel_tex_fetch(t, index) kg->t[index]
/* define NULL */
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 59852a31047..cfadb02b93b 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -22,7 +22,7 @@
#ifdef WITH_OPTIMIZED_KERNEL
-//#define __KERNEL_SSE2__
+#define __KERNEL_SSE2__
#include "kernel.h"
#include "kernel_compat_cpu.h"
diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt
index f2beb3da9c0..23112f56536 100644
--- a/intern/cycles/kernel/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/shaders/CMakeLists.txt
@@ -91,7 +91,7 @@ foreach(_file ${SRC_OSL})
string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE})
add_custom_command(
OUTPUT ${_OSO_FILE}
- COMMAND ${OSL_COMPILER} -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" ${_OSL_FILE}
+ COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" ${_OSL_FILE}
DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS})
list(APPEND SRC_OSO
${_OSO_FILE}
diff --git a/intern/cycles/kernel/shaders/SConscript b/intern/cycles/kernel/shaders/SConscript
index daf2b67ad2b..aad6e23e4d4 100644
--- a/intern/cycles/kernel/shaders/SConscript
+++ b/intern/cycles/kernel/shaders/SConscript
@@ -57,7 +57,7 @@ if env['WITH_BF_CYCLES_OSL']:
osl_file = os.path.join(source_dir, f)
oso_file = os.path.join(build_dir, f.replace('.osl', '.oso'))
- command = "%s -O2 -I%s -o %s %s" % (osl_compiler, source_dir, oso_file, osl_file)
+ command = "%s -q -O2 -I%s -o %s %s" % (osl_compiler, source_dir, oso_file, osl_file)
shaders.Command(oso_file, f, command)
shaders.Depends(oso_file, [f] + dependencies)
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 868502bc0ac..b2d842edccd 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -61,59 +61,49 @@
#ifndef __KERNEL_GPU__
-/* not enabled, globally applying it just gives slowdown,
- * but useful for testing. */
-//#define __KERNEL_SSE__
-#ifdef __KERNEL_SSE__
-
-#include <xmmintrin.h> /* SSE 1 */
-#include <emmintrin.h> /* SSE 2 */
-#include <pmmintrin.h> /* SSE 3 */
-#include <tmmintrin.h> /* SSSE 3 */
-#include <smmintrin.h> /* SSE 4 */
-
+/* not enabled, globally applying it gives slowdown, only for testing. */
+#if 0
+#define __KERNEL_SSE__
#ifndef __KERNEL_SSE2__
#define __KERNEL_SSE2__
#endif
-
#ifndef __KERNEL_SSE3__
#define __KERNEL_SSE3__
#endif
-
#ifndef __KERNEL_SSSE3__
#define __KERNEL_SSSE3__
#endif
-
#ifndef __KERNEL_SSE4__
#define __KERNEL_SSE4__
#endif
+#endif
-#else
+/* SSE2 is always available on x86_64 CPUs, so auto enable */
+#if defined(__x86_64__) && !defined(__KERNEL_SSE2__)
+#define __KERNEL_SSE2__
+#endif
-#if defined(__x86_64__) || defined(__KERNEL_SSSE3__)
+/* SSE intrinsics headers */
+#ifndef FREE_WINDOWS64
-/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
- * Since we can't avoid including <windows.h>, better only include that */
-#ifdef FREE_WINDOWS64
-#include <windows.h>
-#else
+#ifdef __KERNEL_SSE2__
#include <xmmintrin.h> /* SSE 1 */
#include <emmintrin.h> /* SSE 2 */
+#endif
#ifdef __KERNEL_SSE3__
#include <pmmintrin.h> /* SSE 3 */
#endif
+
#ifdef __KERNEL_SSSE3__
#include <tmmintrin.h> /* SSSE 3 */
#endif
-#endif
-/* SSE2 is available on x64 and SSE3 CPUs, so enable here as well */
-#ifndef __KERNEL_SSE2__
-#define __KERNEL_SSE2__
-#endif
+#else
-#endif
+/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
+ * Since we can't avoid including <windows.h>, better only include that */
+#include <windows.h>
#endif
@@ -553,30 +543,6 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __device_inline const __m12
}
#endif
-#ifndef __KERNEL_GPU__
-
-static inline void *malloc_aligned(size_t size, size_t alignment)
-{
- void *data = (void*)malloc(size + sizeof(void*) + alignment - 1);
-
- union { void *ptr; size_t offset; } u;
- u.ptr = (char*)data + sizeof(void*);
- u.offset = (u.offset + alignment - 1) & ~(alignment - 1);
- *(((void**)u.ptr) - 1) = data;
-
- return u.ptr;
-}
-
-static inline void free_aligned(void *ptr)
-{
- if(ptr) {
- void *data = *(((void**)ptr) - 1);
- free(data);
- }
-}
-
-#endif
-
CCL_NAMESPACE_END
#endif /* __UTIL_TYPES_H__ */
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
index 0fd4488d91d..65aad37c88c 100644
--- a/intern/cycles/util/util_vector.h
+++ b/intern/cycles/util/util_vector.h
@@ -30,6 +30,26 @@ CCL_NAMESPACE_BEGIN
using std::vector;
+static inline void *malloc_aligned(size_t size, size_t alignment)
+{
+ void *data = (void*)malloc(size + sizeof(void*) + alignment - 1);
+
+ union { void *ptr; size_t offset; } u;
+ u.ptr = (char*)data + sizeof(void*);
+ u.offset = (u.offset + alignment - 1) & ~(alignment - 1);
+ *(((void**)u.ptr) - 1) = data;
+
+ return u.ptr;
+}
+
+static inline void free_aligned(void *ptr)
+{
+ if(ptr) {
+ void *data = *(((void**)ptr) - 1);
+ free(data);
+ }
+}
+
/* Array
*
* Simplified version of vector, serving multiple purposes: