Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@pandora.be>2013-06-22 18:35:09 +0400
committerBrecht Van Lommel <brechtvanlommel@pandora.be>2013-06-22 18:35:09 +0400
commit240fb6fa26674b8fefeed2a226352ab4c31cfe74 (patch)
tree7ed01846c1b394af495dbf1d8c3a60b2ea9793da /intern
parent5da48f425fcfeb7b3eae67df4ac0d29e698809f6 (diff)
Cycles: ensure any SSE data is allocated 16 byte aligned, happens automatically
on many platforms but is not assured everywhere.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/util/util_types.h24
-rw-r--r--intern/cycles/util/util_vector.h31
2 files changed, 43 insertions, 12 deletions
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index a67c55acf3d..891fb82d06b 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -552,6 +552,30 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __device_inline const __m12
}
#endif
+#ifndef __KERNEL_GPU__
+
+static inline void *malloc_aligned(size_t size, size_t alignment)
+{
+ void *data = (void*)malloc(size + sizeof(void*) + alignment - 1);
+
+ union { void *ptr; size_t offset; } u;
+ u.ptr = (char*)data + sizeof(void*);
+ u.offset = (u.offset + alignment - 1) & ~(alignment - 1);
+ *(((void**)u.ptr) - 1) = data;
+
+ return u.ptr;
+}
+
+static inline void free_aligned(void *ptr)
+{
+ if(ptr) {
+ void *data = *(((void**)ptr) - 1);
+ free(data);
+ }
+}
+
+#endif
+
CCL_NAMESPACE_END
#endif /* __UTIL_TYPES_H__ */
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
index 931ad272612..0fd4488d91d 100644
--- a/intern/cycles/util/util_vector.h
+++ b/intern/cycles/util/util_vector.h
@@ -24,18 +24,22 @@
#include <string.h>
#include <vector>
+#include "util_types.h"
+
CCL_NAMESPACE_BEGIN
using std::vector;
/* Array
*
- * Simplified version of vector, serving two purposes:
+ * Simplified version of vector, serving multiple purposes:
* - somewhat faster in that it does not clear memory on resize/alloc,
- * this was actually showing up in profiles quite significantly
- * - if this is used, we are not tempted to use inefficient operations */
+ * this was actually showing up in profiles quite significantly. it
+ * also does not run any constructors/destructors
+ * - if this is used, we are not tempted to use inefficient operations
+ * - aligned allocation for SSE data types */
-template<typename T>
+template<typename T, size_t alignment = 16>
class array
{
public:
@@ -52,7 +56,7 @@ public:
datasize = 0;
}
else {
- data = new T[newsize];
+ data = (T*)malloc_aligned(sizeof(T)*newsize, alignment);
datasize = newsize;
}
}
@@ -69,7 +73,7 @@ public:
datasize = 0;
}
else {
- data = new T[from.datasize];
+ data = (T*)malloc_aligned(sizeof(T)*from.datasize, alignment);
memcpy(data, from.data, from.datasize*sizeof(T));
datasize = from.datasize;
}
@@ -83,7 +87,10 @@ public:
data = NULL;
if(datasize > 0) {
- data = new T[datasize];
+ data = (T*)malloc_aligned(sizeof(T)*datasize, alignment);
+ memcpy(data, &from[0], datasize*sizeof(T));
+ free_aligned(data);
+ data = (T*)malloc_aligned(sizeof(T)*datasize, alignment);
memcpy(data, &from[0], datasize*sizeof(T));
}
@@ -92,7 +99,7 @@ public:
~array()
{
- delete [] data;
+ free_aligned(data);
}
void resize(size_t newsize)
@@ -100,10 +107,10 @@ public:
if(newsize == 0) {
clear();
}
- else {
- T *newdata = new T[newsize];
+ else if(newsize != datasize) {
+ T *newdata = (T*)malloc_aligned(sizeof(T)*newsize, alignment);
memcpy(newdata, data, ((datasize < newsize)? datasize: newsize)*sizeof(T));
- delete [] data;
+ free_aligned(data);
data = newdata;
datasize = newsize;
@@ -112,7 +119,7 @@ public:
void clear()
{
- delete [] data;
+ free_aligned(data);
data = NULL;
datasize = 0;
}