From 9937d5379ca936b4ba93534185477fa7e529181c Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 16 Nov 2021 14:03:59 +0100 Subject: Cycles: add packed_float3 type for storage Introduce a packed_float3 type for smaller storage that is exactly 3 floats, instead of 4. For computation float3 is still used since it can use SIMD instructions. Ref T92212 Differential Revision: https://developer.blender.org/D13243 --- intern/cycles/device/memory.cpp | 2 +- intern/cycles/device/memory.h | 125 +++++++++++++++++----------------------- 2 files changed, 53 insertions(+), 74 deletions(-) (limited to 'intern/cycles/device') diff --git a/intern/cycles/device/memory.cpp b/intern/cycles/device/memory.cpp index f162b00d9f7..259bc2e5334 100644 --- a/intern/cycles/device/memory.cpp +++ b/intern/cycles/device/memory.cpp @@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN device_memory::device_memory(Device *device, const char *name, MemoryType type) : data_type(device_type_traits::data_type), - data_elements(device_type_traits::num_elements_cpu), + data_elements(device_type_traits::num_elements), data_size(0), device_size(0), data_width(0), diff --git a/intern/cycles/device/memory.h b/intern/cycles/device/memory.h index 281c54cc6a5..b2aa88b4e97 100644 --- a/intern/cycles/device/memory.h +++ b/intern/cycles/device/memory.h @@ -81,155 +81,140 @@ static constexpr size_t datatype_size(DataType datatype) template struct device_type_traits { static const DataType data_type = TYPE_UNKNOWN; - static const size_t num_elements_cpu = sizeof(T); - static const size_t num_elements_gpu = sizeof(T); + static const size_t num_elements = sizeof(T); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UCHAR; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(uchar) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UCHAR; - static const size_t num_elements_cpu = 2; - static const size_t num_elements_gpu = 2; - static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 2; + static_assert(sizeof(uchar2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UCHAR; - static const size_t num_elements_cpu = 3; - static const size_t num_elements_gpu = 3; - static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 3; + static_assert(sizeof(uchar3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UCHAR; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(uchar4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(uint) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT; - static const size_t num_elements_cpu = 2; - static const size_t num_elements_gpu = 2; - static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 2; + static_assert(sizeof(uint2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT; - static const size_t num_elements_cpu = 3; - static const size_t num_elements_gpu = 3; - static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 3; + static_assert(sizeof(uint3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(uint4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_INT; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(int) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_INT; - static const size_t num_elements_cpu = 2; - static const size_t num_elements_gpu = 2; - static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 2; + static_assert(sizeof(int2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_INT; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 3; - static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(int3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_INT; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(int4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_FLOAT; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(float) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_FLOAT; - static const size_t num_elements_cpu = 2; - static const size_t num_elements_gpu = 2; - static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 2; + static_assert(sizeof(float2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { + /* float3 has different size depending on the device, can't use it for interchanging + * memory between CPU and GPU. + * + * Leave body empty to trigger a compile error if used. */ +}; + +template<> struct device_type_traits { static const DataType data_type = TYPE_FLOAT; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 3; - static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 3; + static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_FLOAT; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(float4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_HALF; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(half) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT16; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(ushort4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT16; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(uint16_t) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_HALF; - static const size_t num_elements_cpu = 4; - static const size_t num_elements_gpu = 4; - static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 4; + static_assert(sizeof(half4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT64; - static const size_t num_elements_cpu = 1; - static const size_t num_elements_gpu = 1; - static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type)); + static const size_t num_elements = 1; + static_assert(sizeof(uint64_t) == num_elements * datatype_size(data_type)); }; /* Device Memory @@ -320,9 +305,7 @@ template class device_only_memory : public device_memory { : device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY) { data_type = device_type_traits::data_type; - data_elements = max(device_is_cpu() ? device_type_traits::num_elements_cpu : - device_type_traits::num_elements_gpu, - 1); + data_elements = max(device_type_traits::num_elements, 1); } device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other)) @@ -378,15 +361,11 @@ template class device_only_memory : public device_memory { template class device_vector : public device_memory { public: - /* Can only use this for types that have the same size on CPU and GPU. */ - static_assert(device_type_traits::num_elements_cpu == - device_type_traits::num_elements_gpu); - device_vector(Device *device, const char *name, MemoryType type) : device_memory(device, name, type) { data_type = device_type_traits::data_type; - data_elements = device_type_traits::num_elements_cpu; + data_elements = device_type_traits::num_elements; modified = true; need_realloc_ = true; -- cgit v1.2.3