diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-11-16 16:03:59 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-11-17 19:29:41 +0300 |
commit | 9937d5379ca936b4ba93534185477fa7e529181c (patch) | |
tree | 69fc56fad9dbb8c40f8e08f14355b75584a9220b /intern/cycles/device | |
parent | 89d5714d8f233b4bbb83f6a7b33237e2ec04ee79 (diff) |
Cycles: add packed_float3 type for storage
Introduce a packed_float3 type for smaller storage that is exactly 3
floats, instead of 4. For computation float3 is still used since it can
use SIMD instructions.
Ref T92212
Differential Revision: https://developer.blender.org/D13243
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/memory.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/memory.h | 125 |
2 files changed, 53 insertions, 74 deletions
diff --git a/intern/cycles/device/memory.cpp b/intern/cycles/device/memory.cpp index f162b00d9f7..259bc2e5334 100644 --- a/intern/cycles/device/memory.cpp +++ b/intern/cycles/device/memory.cpp @@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN device_memory::device_memory(Device *device, const char *name, MemoryType type) : data_type(device_type_traits<uchar>::data_type), - data_elements(device_type_traits<uchar>::num_elements_cpu), + data_elements(device_type_traits<uchar>::num_elements), data_size(0), device_size(0), data_width(0), diff --git a/intern/cycles/device/memory.h b/intern/cycles/device/memory.h index 281c54cc6a5..b2aa88b4e97 100644 --- a/intern/cycles/device/memory.h +++ b/intern/cycles/device/memory.h @@ -81,155 +81,140 @@ static constexpr size_t datatype_size(DataType datatype) template<typename T> struct device_type_traits { static const DataType data_type = TYPE_UNKNOWN; - static const size_t num_elements_cpu = sizeof(T); - static const size_t num_elements_gpu = sizeof(T); + static const size_t num_elements = sizeof(T); }; template<> struct device_type_traits<uchar> { static const DataType data_type = TYPE_UCHAR; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(uchar) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<uchar2> { static const DataType data_type = TYPE_UCHAR; - static const size_t num_elements_cpu = 2; - static const size_t num_elements_gpu = 2; - static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 2; + static_assert(sizeof(uchar2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<uchar3> { static const DataType data_type = TYPE_UCHAR; - static const size_t num_elements_cpu = 3; - static const size_t num_elements_gpu = 3; - static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 3; + static_assert(sizeof(uchar3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<uchar4> { static const DataType data_type = TYPE_UCHAR; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(uchar4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<uint> { static const DataType data_type = TYPE_UINT; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(uint) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<uint2> { static const DataType data_type = TYPE_UINT; - static const size_t num_elements_cpu = 2; - static const size_t num_elements_gpu = 2; - static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 2; + static_assert(sizeof(uint2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<uint3> { static const DataType data_type = TYPE_UINT; - static const size_t num_elements_cpu = 3; - static const size_t num_elements_gpu = 3; - static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 3; + static_assert(sizeof(uint3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<uint4> { static const DataType data_type = TYPE_UINT; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(uint4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<int> { static const DataType data_type = TYPE_INT; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(int) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<int2> { static const DataType data_type = TYPE_INT; - static const size_t num_elements_cpu = 2; - static const size_t num_elements_gpu = 2; - static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 2; + static_assert(sizeof(int2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<int3> { static const DataType data_type = TYPE_INT; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 3; - static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(int3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<int4> { static const DataType data_type = TYPE_INT; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(int4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<float> { static const DataType data_type = TYPE_FLOAT; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(float) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<float2> { static const DataType data_type = TYPE_FLOAT; - static const size_t num_elements_cpu = 2; - static const size_t num_elements_gpu = 2; - static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 2; + static_assert(sizeof(float2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<float3> { + /* float3 has different size depending on the device, can't use it for interchanging + * memory between CPU and GPU. + * + * Leave body empty to trigger a compile error if used. */ +}; + +template<> struct device_type_traits<packed_float3> { static const DataType data_type = TYPE_FLOAT; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 3; - static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 3; + static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<float4> { static const DataType data_type = TYPE_FLOAT; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(float4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<half> { static const DataType data_type = TYPE_HALF; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(half) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<ushort4> { static const DataType data_type = TYPE_UINT16; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(ushort4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<uint16_t> { static const DataType data_type = TYPE_UINT16; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(uint16_t) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<half4> { static const DataType data_type = TYPE_HALF; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(half4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits<uint64_t> { static const DataType data_type = TYPE_UINT64; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(uint64_t) == num_elements * datatype_size(data_type)); }; /* Device Memory @@ -320,9 +305,7 @@ template<typename T> class device_only_memory : public device_memory { : device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY) { data_type = device_type_traits<T>::data_type; - data_elements = max(device_is_cpu() ? device_type_traits<T>::num_elements_cpu : - device_type_traits<T>::num_elements_gpu, - 1); + data_elements = max(device_type_traits<T>::num_elements, 1); } device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other)) @@ -378,15 +361,11 @@ template<typename T> class device_only_memory : public device_memory { template<typename T> class device_vector : public device_memory { public: - /* Can only use this for types that have the same size on CPU and GPU. */ - static_assert(device_type_traits<T>::num_elements_cpu == - device_type_traits<T>::num_elements_gpu); - device_vector(Device *device, const char *name, MemoryType type) : device_memory(device, name, type) { data_type = device_type_traits<T>::data_type; - data_elements = device_type_traits<T>::num_elements_cpu; + data_elements = device_type_traits<T>::num_elements; modified = true; need_realloc_ = true; |