intern/cycles/device/device_queue.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

/*
 * Copyright 2011-2021 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#include "device/device_kernel.h"

#include "device/device_graphics_interop.h"
#include "util/util_logging.h"
#include "util/util_map.h"
#include "util/util_string.h"
#include "util/util_unique_ptr.h"

CCL_NAMESPACE_BEGIN

class Device;
class device_memory;

struct KernelWorkTile;

/* Abstraction of a command queue for a device.
 * Provides API to schedule kernel execution in a specific queue with minimal possible overhead
 * from driver side.
 *
 * This class encapsulates all properties needed for commands execution. */
class DeviceQueue {
 public:
  virtual ~DeviceQueue();

  /* Number of concurrent states to process for integrator,
   * based on number of cores and/or available memory. */
  virtual int num_concurrent_states(const size_t state_size) const = 0;

  /* Number of states which keeps the device occupied with work without loosing performance.
   * The renderer will add more work (when available) when number of active paths falls below this
   * value. */
  virtual int num_concurrent_busy_states() const = 0;

  /* Initialize execution of kernels on this queue.
   *
   * Will, for example, load all data required by the kernels from Device to global or path state.
   *
   * Use this method after device synchronization has finished before enqueueing any kernels. */
  virtual void init_execution() = 0;

  /* Test if an optional device kernel is available. */
  virtual bool kernel_available(DeviceKernel kernel) const = 0;

  /* Enqueue kernel execution.
   *
   * Execute the kernel work_size times on the device.
   * Supported arguments types:
   * - int: pass pointer to the int
   * - device memory: pass pointer to device_memory.device_pointer
   * Return false if there was an error executing this or a previous kernel. */
  virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) = 0;

  /* Wait unit all enqueued kernels have finished execution.
   * Return false if there was an error executing any of the enqueued kernels. */
  virtual bool synchronize() = 0;

  /* Copy memory to/from device as part of the command queue, to ensure
   * operations are done in order without having to synchronize. */
  virtual void zero_to_device(device_memory &mem) = 0;
  virtual void copy_to_device(device_memory &mem) = 0;
  virtual void copy_from_device(device_memory &mem) = 0;

  /* Graphics resources interoperability.
   *
   * The interoperability comes here by the meaning that the device is capable of computing result
   * directly into an OpenGL (or other graphics library) buffer. */

  /* Create graphics interoperability context which will be taking care of mapping graphics
   * resource as a buffer writable by kernels of this device. */
  virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create()
  {
    LOG(FATAL) << "Request of GPU interop of a device which does not support it.";
    return nullptr;
  }

  /* Device this queue has been created for. */
  Device *device;

 protected:
  /* Hide construction so that allocation via `Device` API is enforced. */
  explicit DeviceQueue(Device *device);

  /* Implementations call these from the corresponding methods to generate debugging logs. */
  void debug_init_execution();
  void debug_enqueue(DeviceKernel kernel, const int work_size);
  void debug_synchronize();
  string debug_active_kernels();

  /* Combination of kernels enqueued together sync last synchronize. */
  DeviceKernelMask last_kernels_enqueued_;
  /* Time of synchronize call. */
  double last_sync_time_;
  /* Accumulated execution time for combinations of kernels launched together. */
  map<DeviceKernelMask, double> stats_kernel_time_;
};

CCL_NAMESPACE_END