diff options
author | Brecht Van Lommel <brecht@blender.org> | 2022-06-17 18:16:37 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2022-06-20 13:30:48 +0300 |
commit | ff1883307f12a8b734bfcf87b01743dc73afae75 (patch) | |
tree | 95fbecc1e681e89f6a5d030cb5f5f96879dc7fa7 /intern/cycles/device | |
parent | b73a52302edcd99f086fc26fc62a8ed4db29d562 (diff) |
Cleanup: renaming and consistency for kernel data
* Rename "texture" to "data array". This has not used textures for a long time,
there are just global memory arrays now. (On old CUDA GPUs there was a cache
for textures but not global memory, so we used to put all data in textures.)
* For CUDA and HIP, put globals in KernelParams struct like other devices.
* Drop __ prefix for data array names, no possibility for naming conflict now that
these are in a struct.
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/cpu/device_impl.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/device/cuda/device_impl.cpp | 21 | ||||
-rw-r--r-- | intern/cycles/device/hip/device_impl.cpp | 20 | ||||
-rw-r--r-- | intern/cycles/device/memory.h | 2 | ||||
-rw-r--r-- | intern/cycles/device/metal/device_impl.mm | 14 | ||||
-rw-r--r-- | intern/cycles/device/metal/queue.mm | 4 | ||||
-rw-r--r-- | intern/cycles/device/optix/device_impl.cpp | 18 |
7 files changed, 53 insertions, 30 deletions
diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp index 0a4eb089037..d4f0532aa5e 100644 --- a/intern/cycles/device/cpu/device_impl.cpp +++ b/intern/cycles/device/cpu/device_impl.cpp @@ -51,7 +51,7 @@ CCL_NAMESPACE_BEGIN CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_) - : Device(info_, stats_, profiler_), texture_info(this, "__texture_info", MEM_GLOBAL) + : Device(info_, stats_, profiler_), texture_info(this, "texture_info", MEM_GLOBAL) { /* Pick any kernel, all of them are supposed to have same level of microarchitecture * optimization. */ @@ -192,7 +192,7 @@ device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_ void CPUDevice::const_copy_to(const char *name, void *host, size_t size) { #ifdef WITH_EMBREE - if (strcmp(name, "__data") == 0) { + if (strcmp(name, "data") == 0) { assert(size <= sizeof(KernelData)); // Update scene handle (since it is different for each device on multi devices) diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index e75224abe90..00851a8e91c 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -23,6 +23,8 @@ # include "util/types.h" # include "util/windows.h" +# include "kernel/device/cuda/globals.h" + CCL_NAMESPACE_BEGIN class CUDADevice; @@ -51,7 +53,7 @@ void CUDADevice::set_error(const string &error) } CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) - : Device(info, stats, profiler), texture_info(this, "__texture_info", MEM_GLOBAL) + : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL) { first_error = true; @@ -900,9 +902,19 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size) CUdeviceptr mem; size_t bytes; - cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)); - // assert(bytes == size); - cuda_assert(cuMemcpyHtoD(mem, host, size)); + cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, "kernel_params")); + assert(bytes == sizeof(KernelParamsCUDA)); + + /* Update data storage pointers in launch parameters. */ +# define KERNEL_DATA_ARRAY(data_type, data_name) \ + if (strcmp(name, #data_name) == 0) { \ + cuda_assert(cuMemcpyHtoD(mem + offsetof(KernelParamsCUDA, data_name), host, size)); \ + return; \ + } + KERNEL_DATA_ARRAY(KernelData, data) + KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state) +# include "kernel/data_arrays.h" +# undef KERNEL_DATA_ARRAY } void CUDADevice::global_alloc(device_memory &mem) @@ -926,7 +938,6 @@ void CUDADevice::tex_alloc(device_texture &mem) { CUDAContextScope scope(this); - string bind_name = mem.name; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 652c1001f85..82db55ea715 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -24,6 +24,8 @@ # include "util/types.h" # include "util/windows.h" +# include "kernel/device/hip/globals.h" + CCL_NAMESPACE_BEGIN class HIPDevice; @@ -52,7 +54,7 @@ void HIPDevice::set_error(const string &error) } HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) - : Device(info, stats, profiler), texture_info(this, "__texture_info", MEM_GLOBAL) + : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL) { first_error = true; @@ -856,8 +858,19 @@ void HIPDevice::const_copy_to(const char *name, void *host, size_t size) hipDeviceptr_t mem; size_t bytes; - hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule, name)); - hip_assert(hipMemcpyHtoD(mem, host, size)); + hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule, "kernel_params")); + assert(bytes == sizeof(KernelParamsHIP)); + + /* Update data storage pointers in launch parameters. */ +# define KERNEL_DATA_ARRAY(data_type, data_name) \ + if (strcmp(name, #data_name) == 0) { \ + hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIP, data_name), host, size)); \ + return; \ + } + KERNEL_DATA_ARRAY(KernelData, data) + KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state) +# include "kernel/data_arrays.h" +# undef KERNEL_DATA_ARRAY } void HIPDevice::global_alloc(device_memory &mem) @@ -881,7 +894,6 @@ void HIPDevice::tex_alloc(device_texture &mem) { HIPContextScope scope(this); - string bind_name = mem.name; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); diff --git a/intern/cycles/device/memory.h b/intern/cycles/device/memory.h index 55d6d39cef8..5f44475077e 100644 --- a/intern/cycles/device/memory.h +++ b/intern/cycles/device/memory.h @@ -350,7 +350,7 @@ template<typename T> class device_only_memory : public device_memory { * * When using memory type MEM_GLOBAL, a pointer to this memory will be * automatically attached to kernel globals, using the provided name - * matching an entry in kernel_textures.h. */ + * matching an entry in kernel/data_arrays.h. */ template<typename T> class device_vector : public device_memory { public: diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index a0ac677beda..0a89055af34 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -35,7 +35,7 @@ void MetalDevice::set_error(const string &error) } MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) - : Device(info, stats, profiler), texture_info(this, "__texture_info", MEM_GLOBAL) + : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL) { mtlDevId = info.num; @@ -625,7 +625,7 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz void MetalDevice::const_copy_to(const char *name, void *host, size_t size) { - if (strcmp(name, "__data") == 0) { + if (strcmp(name, "data") == 0) { assert(size == sizeof(KernelData)); memcpy((uint8_t *)&launch_params + offsetof(KernelParamsMetal, data), host, size); return; @@ -646,19 +646,19 @@ void MetalDevice::const_copy_to(const char *name, void *host, size_t size) }; /* Update data storage pointers in launch parameters. */ - if (strcmp(name, "__integrator_state") == 0) { + if (strcmp(name, "integrator_state") == 0) { /* IntegratorStateGPU is contiguous pointers */ const size_t pointer_block_size = sizeof(IntegratorStateGPU); update_launch_pointers( - offsetof(KernelParamsMetal, __integrator_state), host, size, pointer_block_size); + offsetof(KernelParamsMetal, integrator_state), host, size, pointer_block_size); } -# define KERNEL_TEX(data_type, tex_name) \ +# define KERNEL_DATA_ARRAY(data_type, tex_name) \ else if (strcmp(name, #tex_name) == 0) \ { \ update_launch_pointers(offsetof(KernelParamsMetal, tex_name), host, size, size); \ } -# include "kernel/textures.h" -# undef KERNEL_TEX +# include "kernel/data_arrays.h" +# undef KERNEL_DATA_ARRAY } void MetalDevice::global_alloc(device_memory &mem) diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm index 55db7c5afce..da5408373bb 100644 --- a/intern/cycles/device/metal/queue.mm +++ b/intern/cycles/device/metal/queue.mm @@ -358,7 +358,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, /* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */ /* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */ - size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, __integrator_state) + + size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, integrator_state) + sizeof(IntegratorStateGPU); size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset; memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset, @@ -415,7 +415,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, } /* this relies on IntegratorStateGPU layout being contiguous device_ptrs */ - const size_t pointer_block_end = offsetof(KernelParamsMetal, __integrator_state) + + const size_t pointer_block_end = offsetof(KernelParamsMetal, integrator_state) + sizeof(IntegratorStateGPU); for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) { int pointer_index = int(offset / sizeof(device_ptr)); diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 53697db5c04..e7dcc29a2da 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -246,7 +246,7 @@ OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) : CUDADevice(info, stats, profiler), sbt_data(this, "__sbt", MEM_READ_ONLY), - launch_params(this, "__params", false), + launch_params(this, "kernel_params", false), denoiser_(this) { /* Make the CUDA context current. */ @@ -421,7 +421,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) pipeline_options.numPayloadValues = 8; pipeline_options.numAttributeValues = 2; /* u, v */ pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; - pipeline_options.pipelineLaunchParamsVariableName = "__params"; /* See globals.h */ + pipeline_options.pipelineLaunchParamsVariableName = "kernel_params"; /* See globals.h */ pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE; if (kernel_features & KERNEL_FEATURE_HAIR) { @@ -2042,7 +2042,7 @@ void OptiXDevice::const_copy_to(const char *name, void *host, size_t size) /* Set constant memory for CUDA module. */ CUDADevice::const_copy_to(name, host, size); - if (strcmp(name, "__data") == 0) { + if (strcmp(name, "data") == 0) { assert(size <= sizeof(KernelData)); /* Update traversable handle (since it is different for each device on multi devices). */ @@ -2054,14 +2054,14 @@ void OptiXDevice::const_copy_to(const char *name, void *host, size_t size) } /* Update data storage pointers in launch parameters. */ -# define KERNEL_TEX(data_type, tex_name) \ - if (strcmp(name, #tex_name) == 0) { \ - update_launch_params(offsetof(KernelParamsOptiX, tex_name), host, size); \ +# define KERNEL_DATA_ARRAY(data_type, data_name) \ + if (strcmp(name, #data_name) == 0) { \ + update_launch_params(offsetof(KernelParamsOptiX, data_name), host, size); \ return; \ } - KERNEL_TEX(IntegratorStateGPU, __integrator_state) -# include "kernel/textures.h" -# undef KERNEL_TEX + KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state) +# include "kernel/data_arrays.h" +# undef KERNEL_DATA_ARRAY } void OptiXDevice::update_launch_params(size_t offset, void *data, size_t data_size) |