Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/hip/queue.cpp')
-rw-r--r--intern/cycles/device/hip/queue.cpp209
1 files changed, 209 insertions, 0 deletions
diff --git a/intern/cycles/device/hip/queue.cpp b/intern/cycles/device/hip/queue.cpp
new file mode 100644
index 00000000000..78c77e5fdae
--- /dev/null
+++ b/intern/cycles/device/hip/queue.cpp
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_HIP
+
+# include "device/hip/queue.h"
+
+# include "device/hip/device_impl.h"
+# include "device/hip/graphics_interop.h"
+# include "device/hip/kernel.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* HIPDeviceQueue */
+
+HIPDeviceQueue::HIPDeviceQueue(HIPDevice *device)
+ : DeviceQueue(device), hip_device_(device), hip_stream_(nullptr)
+{
+ const HIPContextScope scope(hip_device_);
+ hip_device_assert(hip_device_, hipStreamCreateWithFlags(&hip_stream_, hipStreamNonBlocking));
+}
+
+HIPDeviceQueue::~HIPDeviceQueue()
+{
+ const HIPContextScope scope(hip_device_);
+ hipStreamDestroy(hip_stream_);
+}
+
+int HIPDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
+{
+ /* TODO: compute automatically. */
+ /* TODO: must have at least num_threads_per_block. */
+ return 14416128;
+}
+
+int HIPDeviceQueue::num_concurrent_busy_states() const
+{
+ const int max_num_threads = hip_device_->get_num_multiprocessors() *
+ hip_device_->get_max_num_threads_per_multiprocessor();
+
+ if (max_num_threads == 0) {
+ return 65536;
+ }
+
+ return 4 * max_num_threads;
+}
+
+void HIPDeviceQueue::init_execution()
+{
+ /* Synchronize all textures and memory copies before executing task. */
+ HIPContextScope scope(hip_device_);
+ hip_device_->load_texture_info();
+ hip_device_assert(hip_device_, hipDeviceSynchronize());
+
+ debug_init_execution();
+}
+
+bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const
+{
+ return hip_device_->kernels.available(kernel);
+}
+
+bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
+{
+ if (hip_device_->have_error()) {
+ return false;
+ }
+
+ debug_enqueue(kernel, work_size);
+
+ const HIPContextScope scope(hip_device_);
+ const HIPDeviceKernel &hip_kernel = hip_device_->kernels.get(kernel);
+
+ /* Compute kernel launch parameters. */
+ const int num_threads_per_block = hip_kernel.num_threads_per_block;
+ const int num_blocks = divide_up(work_size, num_threads_per_block);
+
+ int shared_mem_bytes = 0;
+
+ switch (kernel) {
+ case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
+ case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
+ case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
+ case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
+ case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
+ case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
+ /* See parall_active_index.h for why this amount of shared memory is needed. */
+ shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int);
+ break;
+ default:
+ break;
+ }
+
+ /* Launch kernel. */
+ hip_device_assert(hip_device_,
+ hipModuleLaunchKernel(hip_kernel.function,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ shared_mem_bytes,
+ hip_stream_,
+ args,
+ 0));
+ return !(hip_device_->have_error());
+}
+
+bool HIPDeviceQueue::synchronize()
+{
+ if (hip_device_->have_error()) {
+ return false;
+ }
+
+ const HIPContextScope scope(hip_device_);
+ hip_device_assert(hip_device_, hipStreamSynchronize(hip_stream_));
+ debug_synchronize();
+
+ return !(hip_device_->have_error());
+}
+
+void HIPDeviceQueue::zero_to_device(device_memory &mem)
+{
+ assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
+
+ if (mem.memory_size() == 0) {
+ return;
+ }
+
+ /* Allocate on demand. */
+ if (mem.device_pointer == 0) {
+ hip_device_->mem_alloc(mem);
+ }
+
+ /* Zero memory on device. */
+ assert(mem.device_pointer != 0);
+
+ const HIPContextScope scope(hip_device_);
+ hip_device_assert(
+ hip_device_,
+ hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_));
+}
+
+void HIPDeviceQueue::copy_to_device(device_memory &mem)
+{
+ assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
+
+ if (mem.memory_size() == 0) {
+ return;
+ }
+
+ /* Allocate on demand. */
+ if (mem.device_pointer == 0) {
+ hip_device_->mem_alloc(mem);
+ }
+
+ assert(mem.device_pointer != 0);
+ assert(mem.host_pointer != nullptr);
+
+ /* Copy memory to device. */
+ const HIPContextScope scope(hip_device_);
+ hip_device_assert(
+ hip_device_,
+ hipMemcpyHtoDAsync(
+ (hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_));
+}
+
+void HIPDeviceQueue::copy_from_device(device_memory &mem)
+{
+ assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
+
+ if (mem.memory_size() == 0) {
+ return;
+ }
+
+ assert(mem.device_pointer != 0);
+ assert(mem.host_pointer != nullptr);
+
+ /* Copy memory from device. */
+ const HIPContextScope scope(hip_device_);
+ hip_device_assert(
+ hip_device_,
+ hipMemcpyDtoHAsync(
+ mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_));
+}
+
+// TODO : (Arya) Enable this after stabilizing dev branch
+unique_ptr<DeviceGraphicsInterop> HIPDeviceQueue::graphics_interop_create()
+{
+ return make_unique<HIPDeviceGraphicsInterop>(this);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_HIP */