/* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ #ifndef __DEVICE_MEMORY_H__ #define __DEVICE_MEMORY_H__ /* Device Memory * * Data types for allocating, copying and freeing device memory. */ #include "util/array.h" #include "util/half.h" #include "util/string.h" #include "util/texture.h" #include "util/types.h" #include "util/vector.h" CCL_NAMESPACE_BEGIN class Device; enum MemoryType { MEM_READ_ONLY, MEM_READ_WRITE, MEM_DEVICE_ONLY, MEM_GLOBAL, MEM_TEXTURE, }; /* Supported Data Types */ enum DataType { TYPE_UNKNOWN, TYPE_UCHAR, TYPE_UINT16, TYPE_UINT, TYPE_INT, TYPE_FLOAT, TYPE_HALF, TYPE_UINT64, }; static constexpr size_t datatype_size(DataType datatype) { switch (datatype) { case TYPE_UNKNOWN: return 1; case TYPE_UCHAR: return sizeof(uchar); case TYPE_FLOAT: return sizeof(float); case TYPE_UINT: return sizeof(uint); case TYPE_UINT16: return sizeof(uint16_t); case TYPE_INT: return sizeof(int); case TYPE_HALF: return sizeof(half); case TYPE_UINT64: return sizeof(uint64_t); default: return 0; } } /* Traits for data types */ template struct device_type_traits { static const DataType data_type = TYPE_UNKNOWN; static const size_t num_elements = sizeof(T); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UCHAR; static const size_t num_elements = 1; static_assert(sizeof(uchar) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UCHAR; static const size_t num_elements = 2; static_assert(sizeof(uchar2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UCHAR; static const size_t num_elements = 3; static_assert(sizeof(uchar3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UCHAR; static const size_t num_elements = 4; static_assert(sizeof(uchar4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT; static const size_t num_elements = 1; static_assert(sizeof(uint) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT; static const size_t num_elements = 2; static_assert(sizeof(uint2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT; static const size_t num_elements = 3; static_assert(sizeof(uint3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT; static const size_t num_elements = 4; static_assert(sizeof(uint4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_INT; static const size_t num_elements = 1; static_assert(sizeof(int) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_INT; static const size_t num_elements = 2; static_assert(sizeof(int2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_INT; static const size_t num_elements = 4; static_assert(sizeof(int3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_INT; static const size_t num_elements = 4; static_assert(sizeof(int4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_FLOAT; static const size_t num_elements = 1; static_assert(sizeof(float) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_FLOAT; static const size_t num_elements = 2; static_assert(sizeof(float2) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { /* float3 has different size depending on the device, can't use it for interchanging * memory between CPU and GPU. * * Leave body empty to trigger a compile error if used. */ }; template<> struct device_type_traits { static const DataType data_type = TYPE_FLOAT; static const size_t num_elements = 3; static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_FLOAT; static const size_t num_elements = 4; static_assert(sizeof(float4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_HALF; static const size_t num_elements = 1; static_assert(sizeof(half) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT16; static const size_t num_elements = 4; static_assert(sizeof(ushort4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT16; static const size_t num_elements = 1; static_assert(sizeof(uint16_t) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_HALF; static const size_t num_elements = 4; static_assert(sizeof(half4) == num_elements * datatype_size(data_type)); }; template<> struct device_type_traits { static const DataType data_type = TYPE_UINT64; static const size_t num_elements = 1; static_assert(sizeof(uint64_t) == num_elements * datatype_size(data_type)); }; /* Device Memory * * Base class for all device memory. This should not be allocated directly, * instead the appropriate subclass can be used. */ class device_memory { public: size_t memory_size() { return data_size * data_elements * datatype_size(data_type); } size_t memory_elements_size(int elements) { return elements * data_elements * datatype_size(data_type); } /* Data information. */ DataType data_type; int data_elements; size_t data_size; size_t device_size; size_t data_width; size_t data_height; size_t data_depth; MemoryType type; const char *name; /* Pointers. */ Device *device; device_ptr device_pointer; void *host_pointer; void *shared_pointer; /* reference counter for shared_pointer */ int shared_counter; virtual ~device_memory(); void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr); void restore_device(); bool is_resident(Device *sub_device) const; protected: friend class CUDADevice; friend class OptiXDevice; friend class HIPDevice; friend class MetalDevice; /* Only create through subclasses. */ device_memory(Device *device, const char *name, MemoryType type); /* No copying and allowed. * * This is because device implementation might need to register device memory in an allocation * map of some sort and use pointer as a key to identify blocks. Moving data from one place to * another bypassing device allocation routines will make those maps hard to maintain. */ device_memory(const device_memory &) = delete; device_memory(device_memory &&other) noexcept = delete; device_memory &operator=(const device_memory &) = delete; device_memory &operator=(device_memory &&) = delete; /* Host allocation on the device. All host_pointer memory should be * allocated with these functions, for devices that support using * the same pointer for host and device. */ void *host_alloc(size_t size); void host_free(); /* Device memory allocation and copying. */ void device_alloc(); void device_free(); void device_copy_to(); void device_copy_from(size_t y, size_t w, size_t h, size_t elem); void device_zero(); bool device_is_cpu(); device_ptr original_device_ptr; size_t original_device_size; Device *original_device; bool need_realloc_; bool modified; }; /* Device Only Memory * * Working memory only needed by the device, with no corresponding allocation * on the host. Only used internally in the device implementations. */ template class device_only_memory : public device_memory { public: device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback = false) : device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY) { data_type = device_type_traits::data_type; data_elements = max(device_type_traits::num_elements, size_t(1)); } device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other)) { } virtual ~device_only_memory() { free(); } void alloc_to_device(size_t num, bool shrink_to_fit = true) { size_t new_size = num; bool reallocate; if (shrink_to_fit) { reallocate = (data_size != new_size); } else { reallocate = (data_size < new_size); } if (reallocate) { device_free(); data_size = new_size; device_alloc(); } } void free() { device_free(); data_size = 0; } void zero_to_device() { device_zero(); } }; /* Device Vector * * Data vector to exchange data between host and device. Memory will be * allocated on the host first with alloc() and resize, and then filled * in and copied to the device with copy_to_device(). Or alternatively * allocated and set to zero on the device with zero_to_device(). * * When using memory type MEM_GLOBAL, a pointer to this memory will be * automatically attached to kernel globals, using the provided name * matching an entry in kernel/data_arrays.h. */ template class device_vector : public device_memory { public: device_vector(Device *device, const char *name, MemoryType type) : device_memory(device, name, type) { data_type = device_type_traits::data_type; data_elements = device_type_traits::num_elements; modified = true; need_realloc_ = true; assert(data_elements > 0); } virtual ~device_vector() { free(); } /* Host memory allocation. */ T *alloc(size_t width, size_t height = 0, size_t depth = 0) { size_t new_size = size(width, height, depth); if (new_size != data_size) { device_free(); host_free(); host_pointer = host_alloc(sizeof(T) * new_size); modified = true; assert(device_pointer == 0); } data_size = new_size; data_width = width; data_height = height; data_depth = depth; return data(); } /* Host memory resize. Only use this if the original data needs to be * preserved, it is faster to call alloc() if it can be discarded. */ T *resize(size_t width, size_t height = 0, size_t depth = 0) { size_t new_size = size(width, height, depth); if (new_size != data_size) { void *new_ptr = host_alloc(sizeof(T) * new_size); if (new_size && data_size) { size_t min_size = ((new_size < data_size) ? new_size : data_size); memcpy((T *)new_ptr, (T *)host_pointer, sizeof(T) * min_size); } device_free(); host_free(); host_pointer = new_ptr; assert(device_pointer == 0); } data_size = new_size; data_width = width; data_height = height; data_depth = depth; return data(); } /* Take over data from an existing array. */ void steal_data(array &from) { device_free(); host_free(); data_size = from.size(); data_width = 0; data_height = 0; data_depth = 0; host_pointer = from.steal_pointer(); assert(device_pointer == 0); } void give_data(array &to) { device_free(); to.set_data((T *)host_pointer, data_size); data_size = 0; data_width = 0; data_height = 0; data_depth = 0; host_pointer = 0; assert(device_pointer == 0); } /* Free device and host memory. */ void free() { device_free(); host_free(); data_size = 0; data_width = 0; data_height = 0; data_depth = 0; host_pointer = 0; modified = true; need_realloc_ = true; assert(device_pointer == 0); } void free_if_need_realloc(bool force_free) { if (need_realloc_ || force_free) { free(); } } bool is_modified() const { return modified; } bool need_realloc() { return need_realloc_; } void tag_modified() { modified = true; } void tag_realloc() { need_realloc_ = true; tag_modified(); } size_t size() const { return data_size; } T *data() { return (T *)host_pointer; } const T *data() const { return (T *)host_pointer; } T &operator[](size_t i) { assert(i < data_size); return data()[i]; } void copy_to_device() { if (data_size != 0) { device_copy_to(); } } void copy_to_device_if_modified() { if (!modified) { return; } copy_to_device(); } void clear_modified() { modified = false; need_realloc_ = false; } void copy_from_device() { device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T)); } void copy_from_device(size_t y, size_t w, size_t h) { device_copy_from(y, w, h, sizeof(T)); } void zero_to_device() { device_zero(); } void move_device(Device *new_device) { copy_from_device(); device_free(); device = new_device; copy_to_device(); } protected: size_t size(size_t width, size_t height, size_t depth) { return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth); } }; /* Device Sub Memory * * Pointer into existing memory. It is not allocated separately, but created * from an already allocated base memory. It is freed automatically when it * goes out of scope, which should happen before base memory is freed. * * NOTE: some devices require offset and size of the sub_ptr to be properly * aligned to device->mem_address_alingment(). */ class device_sub_ptr { public: device_sub_ptr(device_memory &mem, size_t offset, size_t size); ~device_sub_ptr(); device_ptr operator*() const { return ptr; } protected: /* No copying. */ device_sub_ptr &operator=(const device_sub_ptr &); Device *device; device_ptr ptr; }; /* Device Texture * * 2D or 3D image texture memory. */ class device_texture : public device_memory { public: device_texture(Device *device, const char *name, const uint slot, ImageDataType image_data_type, InterpolationType interpolation, ExtensionType extension); ~device_texture(); void *alloc(const size_t width, const size_t height, const size_t depth = 0); void copy_to_device(); uint slot; TextureInfo info; protected: size_t size(const size_t width, const size_t height, const size_t depth) { return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth); } }; CCL_NAMESPACE_END #endif /* __DEVICE_MEMORY_H__ */