Cycles: merge of cycles-x branch, a major update to the renderer

This includes much improved GPU rendering performance, viewport interactivity, new shadow catcher, revamped sampling settings, subsurface scattering anisotropy, new GPU volume sampling, improved PMJ sampling pattern, and more. Some features have also been removed or changed, breaking backwards compatibility. Including the removal of the OpenCL backend, for which alternatives are under development. Release notes and code docs: https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles https://wiki.blender.org/wiki/Source/Render/Cycles Credits: * Sergey Sharybin * Brecht Van Lommel * Patrick Mours (OptiX backend) * Christophe Hery (subsurface scattering anisotropy) * William Leeson (PMJ sampling pattern) * Alaska (various fixes and tweaks) * Thomas Dinges (various fixes) For the full commit history, see the cycles-x branch. This squashes together all the changes since intermediate changes would often fail building or tests. Ref T87839, T87837, T87836 Fixes T90734, T89353, T80267, T80267, T77185, T69800
author: Brecht Van Lommel <brecht@blender.org> 2021-09-20 18:59:20 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2021-09-21 15:55:54 +0300
commit: 08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree: 6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/device/cpu
parent: fa6b1007bad065440950cd67deb16a04f368856f (diff)
9 files changed, 1117 insertions, 0 deletions
diff --git a/intern/cycles/device/cpu/device.cpp b/intern/cycles/device/cpu/device.cpp
new file mode 100644
index 00000000000..68ca8e8bb22
--- /dev/null
+++ b/intern/cycles/device/cpu/device.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/cpu/device.h"
+#include "device/cpu/device_impl.h"
+
+/* Used for `info.denoisers`. */
+/* TODO(sergey): The denoisers are probably to be moved completely out of the device into their
+ * own class. But until then keep API consistent with how it used to work before. */
+#include "util/util_openimagedenoise.h"
+
+CCL_NAMESPACE_BEGIN
+
+Device *device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
+{
+  return new CPUDevice(info, stats, profiler);
+}
+
+void device_cpu_info(vector<DeviceInfo> &devices)
+{
+  DeviceInfo info;
+
+  info.type = DEVICE_CPU;
+  info.description = system_cpu_brand_string();
+  info.id = "CPU";
+  info.num = 0;
+  info.has_osl = true;
+  info.has_half_images = true;
+  info.has_nanovdb = true;
+  info.has_profiling = true;
+  if (openimagedenoise_supported()) {
+    info.denoisers |= DENOISER_OPENIMAGEDENOISE;
+  }
+
+  devices.insert(devices.begin(), info);
+}
+
+string device_cpu_capabilities()
+{
+  string capabilities = "";
+  capabilities += system_cpu_support_sse2() ? "SSE2 " : "";
+  capabilities += system_cpu_support_sse3() ? "SSE3 " : "";
+  capabilities += system_cpu_support_sse41() ? "SSE41 " : "";
+  capabilities += system_cpu_support_avx() ? "AVX " : "";
+  capabilities += system_cpu_support_avx2() ? "AVX2" : "";
+  if (capabilities[capabilities.size() - 1] == ' ')
+    capabilities.resize(capabilities.size() - 1);
+  return capabilities;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/device.h b/intern/cycles/device/cpu/device.h
new file mode 100644
index 00000000000..9cb2e80068d
--- /dev/null
+++ b/intern/cycles/device/cpu/device.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "util/util_string.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Device;
+class DeviceInfo;
+class Profiler;
+class Stats;
+
+Device *device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
+
+void device_cpu_info(vector<DeviceInfo> &devices);
+
+string device_cpu_capabilities();
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp
new file mode 100644
index 00000000000..3b0db6bdd0e
--- /dev/null
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -0,0 +1,481 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/cpu/device_impl.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/* So ImathMath is included before our kernel_cpu_compat. */
+#ifdef WITH_OSL
+/* So no context pollution happens from indirectly included windows.h */
+#  include "util/util_windows.h"
+#  include <OSL/oslexec.h>
+#endif
+
+#ifdef WITH_EMBREE
+#  include <embree3/rtcore.h>
+#endif
+
+#include "device/cpu/kernel.h"
+#include "device/cpu/kernel_thread_globals.h"
+
+#include "device/device.h"
+
+// clang-format off
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+#include "kernel/device/cpu/kernel.h"
+#include "kernel/kernel_types.h"
+
+#include "kernel/osl/osl_shader.h"
+#include "kernel/osl/osl_globals.h"
+// clang-format on
+
+#include "bvh/bvh_embree.h"
+
+#include "render/buffers.h"
+
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_function.h"
+#include "util/util_logging.h"
+#include "util/util_map.h"
+#include "util/util_opengl.h"
+#include "util/util_openimagedenoise.h"
+#include "util/util_optimization.h"
+#include "util/util_progress.h"
+#include "util/util_system.h"
+#include "util/util_task.h"
+#include "util/util_thread.h"
+
+CCL_NAMESPACE_BEGIN
+
+CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
+    : Device(info_, stats_, profiler_), texture_info(this, "__texture_info", MEM_GLOBAL)
+{
+  /* Pick any kernel, all of them are supposed to have same level of microarchitecture
+   * optimization. */
+  VLOG(1) << "Will be using " << kernels.integrator_init_from_camera.get_uarch_name()
+          << " kernels.";
+
+  if (info.cpu_threads == 0) {
+    info.cpu_threads = TaskScheduler::num_threads();
+  }
+
+#ifdef WITH_OSL
+  kernel_globals.osl = &osl_globals;
+#endif
+#ifdef WITH_EMBREE
+  embree_device = rtcNewDevice("verbose=0");
+#endif
+  need_texture_info = false;
+}
+
+CPUDevice::~CPUDevice()
+{
+#ifdef WITH_EMBREE
+  rtcReleaseDevice(embree_device);
+#endif
+
+  texture_info.free();
+}
+
+bool CPUDevice::show_samples() const
+{
+  return (info.cpu_threads == 1);
+}
+
+BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
+{
+  BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
+#ifdef WITH_EMBREE
+  bvh_layout_mask |= BVH_LAYOUT_EMBREE;
+#endif /* WITH_EMBREE */
+  return bvh_layout_mask;
+}
+
+bool CPUDevice::load_texture_info()
+{
+  if (!need_texture_info) {
+    return false;
+  }
+
+  texture_info.copy_to_device();
+  need_texture_info = false;
+
+  return true;
+}
+
+void CPUDevice::mem_alloc(device_memory &mem)
+{
+  if (mem.type == MEM_TEXTURE) {
+    assert(!"mem_alloc not supported for textures.");
+  }
+  else if (mem.type == MEM_GLOBAL) {
+    assert(!"mem_alloc not supported for global memory.");
+  }
+  else {
+    if (mem.name) {
+      VLOG(1) << "Buffer allocate: " << mem.name << ", "
+              << string_human_readable_number(mem.memory_size()) << " bytes. ("
+              << string_human_readable_size(mem.memory_size()) << ")";
+    }
+
+    if (mem.type == MEM_DEVICE_ONLY) {
+      assert(!mem.host_pointer);
+      size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
+      void *data = util_aligned_malloc(mem.memory_size(), alignment);
+      mem.device_pointer = (device_ptr)data;
+    }
+    else {
+      mem.device_pointer = (device_ptr)mem.host_pointer;
+    }
+
+    mem.device_size = mem.memory_size();
+    stats.mem_alloc(mem.device_size);
+  }
+}
+
+void CPUDevice::mem_copy_to(device_memory &mem)
+{
+  if (mem.type == MEM_GLOBAL) {
+    global_free(mem);
+    global_alloc(mem);
+  }
+  else if (mem.type == MEM_TEXTURE) {
+    tex_free((device_texture &)mem);
+    tex_alloc((device_texture &)mem);
+  }
+  else {
+    if (!mem.device_pointer) {
+      mem_alloc(mem);
+    }
+
+    /* copy is no-op */
+  }
+}
+
+void CPUDevice::mem_copy_from(
+    device_memory & /*mem*/, int /*y*/, int /*w*/, int /*h*/, int /*elem*/)
+{
+  /* no-op */
+}
+
+void CPUDevice::mem_zero(device_memory &mem)
+{
+  if (!mem.device_pointer) {
+    mem_alloc(mem);
+  }
+
+  if (mem.device_pointer) {
+    memset((void *)mem.device_pointer, 0, mem.memory_size());
+  }
+}
+
+void CPUDevice::mem_free(device_memory &mem)
+{
+  if (mem.type == MEM_GLOBAL) {
+    global_free(mem);
+  }
+  else if (mem.type == MEM_TEXTURE) {
+    tex_free((device_texture &)mem);
+  }
+  else if (mem.device_pointer) {
+    if (mem.type == MEM_DEVICE_ONLY) {
+      util_aligned_free((void *)mem.device_pointer);
+    }
+    mem.device_pointer = 0;
+    stats.mem_free(mem.device_size);
+    mem.device_size = 0;
+  }
+}
+
+device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
+{
+  return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
+}
+
+void CPUDevice::const_copy_to(const char *name, void *host, size_t size)
+{
+#if WITH_EMBREE
+  if (strcmp(name, "__data") == 0) {
+    assert(size <= sizeof(KernelData));
+
+    // Update scene handle (since it is different for each device on multi devices)
+    KernelData *const data = (KernelData *)host;
+    data->bvh.scene = embree_scene;
+  }
+#endif
+  kernel_const_copy(&kernel_globals, name, host, size);
+}
+
+void CPUDevice::global_alloc(device_memory &mem)
+{
+  VLOG(1) << "Global memory allocate: " << mem.name << ", "
+          << string_human_readable_number(mem.memory_size()) << " bytes. ("
+          << string_human_readable_size(mem.memory_size()) << ")";
+
+  kernel_global_memory_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
+
+  mem.device_pointer = (device_ptr)mem.host_pointer;
+  mem.device_size = mem.memory_size();
+  stats.mem_alloc(mem.device_size);
+}
+
+void CPUDevice::global_free(device_memory &mem)
+{
+  if (mem.device_pointer) {
+    mem.device_pointer = 0;
+    stats.mem_free(mem.device_size);
+    mem.device_size = 0;
+  }
+}
+
+void CPUDevice::tex_alloc(device_texture &mem)
+{
+  VLOG(1) << "Texture allocate: " << mem.name << ", "
+          << string_human_readable_number(mem.memory_size()) << " bytes. ("
+          << string_human_readable_size(mem.memory_size()) << ")";
+
+  mem.device_pointer = (device_ptr)mem.host_pointer;
+  mem.device_size = mem.memory_size();
+  stats.mem_alloc(mem.device_size);
+
+  const uint slot = mem.slot;
+  if (slot >= texture_info.size()) {
+    /* Allocate some slots in advance, to reduce amount of re-allocations. */
+    texture_info.resize(slot + 128);
+  }
+
+  texture_info[slot] = mem.info;
+  texture_info[slot].data = (uint64_t)mem.host_pointer;
+  need_texture_info = true;
+}
+
+void CPUDevice::tex_free(device_texture &mem)
+{
+  if (mem.device_pointer) {
+    mem.device_pointer = 0;
+    stats.mem_free(mem.device_size);
+    mem.device_size = 0;
+    need_texture_info = true;
+  }
+}
+
+void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
+{
+#ifdef WITH_EMBREE
+  if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
+      bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) {
+    BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
+    if (refit) {
+      bvh_embree->refit(progress);
+    }
+    else {
+      bvh_embree->build(progress, &stats, embree_device);
+    }
+
+    if (bvh->params.top_level) {
+      embree_scene = bvh_embree->scene;
+    }
+  }
+  else
+#endif
+    Device::build_bvh(bvh, progress, refit);
+}
+
+#if 0
+void CPUDevice::render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
+{
+  const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
+
+  scoped_timer timer(&tile.buffers->render_time);
+
+  Coverage coverage(kg, tile);
+  if (use_coverage) {
+    coverage.init_path_trace();
+  }
+
+  float *render_buffer = (float *)tile.buffer;
+  int start_sample = tile.start_sample;
+  int end_sample = tile.start_sample + tile.num_samples;
+
+  /* Needed for Embree. */
+  SIMD_SET_FLUSH_TO_ZERO;
+
+  for (int sample = start_sample; sample < end_sample; sample++) {
+    if (task.get_cancel() || TaskPool::canceled()) {
+      if (task.need_finish_queue == false)
+        break;
+    }
+
+    if (tile.stealing_state == RenderTile::CAN_BE_STOLEN && task.get_tile_stolen()) {
+      tile.stealing_state = RenderTile::WAS_STOLEN;
+      break;
+    }
+
+    if (tile.task == RenderTile::PATH_TRACE) {
+      for (int y = tile.y; y < tile.y + tile.h; y++) {
+        for (int x = tile.x; x < tile.x + tile.w; x++) {
+          if (use_coverage) {
+            coverage.init_pixel(x, y);
+          }
+          kernels.path_trace(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+        }
+      }
+    }
+    else {
+      for (int y = tile.y; y < tile.y + tile.h; y++) {
+        for (int x = tile.x; x < tile.x + tile.w; x++) {
+          kernels.bake(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+        }
+      }
+    }
+    tile.sample = sample + 1;
+
+    if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) {
+      const bool stop = adaptive_sampling_filter(kg, tile, sample);
+      if (stop) {
+        const int num_progress_samples = end_sample - sample;
+        tile.sample = end_sample;
+        task.update_progress(&tile, tile.w * tile.h * num_progress_samples);
+        break;
+      }
+    }
+
+    task.update_progress(&tile, tile.w * tile.h);
+  }
+  if (use_coverage) {
+    coverage.finalize();
+  }
+
+  if (task.adaptive_sampling.use && (tile.stealing_state != RenderTile::WAS_STOLEN)) {
+    adaptive_sampling_post(tile, kg);
+  }
+}
+
+void CPUDevice::thread_render(DeviceTask &task)
+{
+  if (TaskPool::canceled()) {
+    if (task.need_finish_queue == false)
+      return;
+  }
+
+  /* allocate buffer for kernel globals */
+  CPUKernelThreadGlobals kg(kernel_globals, get_cpu_osl_memory());
+
+  profiler.add_state(&kg.profiler);
+
+  /* NLM denoiser. */
+  DenoisingTask *denoising = NULL;
+
+  /* OpenImageDenoise: we can only denoise with one thread at a time, so to
+   * avoid waiting with mutex locks in the denoiser, we let only a single
+   * thread acquire denoising tiles. */
+  uint tile_types = task.tile_types;
+  bool hold_denoise_lock = false;
+  if ((tile_types & RenderTile::DENOISE) && task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+    if (!oidn_task_lock.try_lock()) {
+      tile_types &= ~RenderTile::DENOISE;
+      hold_denoise_lock = true;
+    }
+  }
+
+  RenderTile tile;
+  while (task.acquire_tile(this, tile, tile_types)) {
+    if (tile.task == RenderTile::PATH_TRACE) {
+      render(task, tile, &kg);
+    }
+    else if (tile.task == RenderTile::BAKE) {
+      render(task, tile, &kg);
+    }
+    else if (tile.task == RenderTile::DENOISE) {
+      denoise_openimagedenoise(task, tile);
+      task.update_progress(&tile, tile.w * tile.h);
+    }
+
+    task.release_tile(tile);
+
+    if (TaskPool::canceled()) {
+      if (task.need_finish_queue == false)
+        break;
+    }
+  }
+
+  if (hold_denoise_lock) {
+    oidn_task_lock.unlock();
+  }
+
+  profiler.remove_state(&kg.profiler);
+
+  delete denoising;
+}
+
+void CPUDevice::thread_denoise(DeviceTask &task)
+{
+  RenderTile tile;
+  tile.x = task.x;
+  tile.y = task.y;
+  tile.w = task.w;
+  tile.h = task.h;
+  tile.buffer = task.buffer;
+  tile.sample = task.sample + task.num_samples;
+  tile.num_samples = task.num_samples;
+  tile.start_sample = task.sample;
+  tile.offset = task.offset;
+  tile.stride = task.stride;
+  tile.buffers = task.buffers;
+
+  denoise_openimagedenoise(task, tile);
+
+  task.update_progress(&tile, tile.w * tile.h);
+}
+#endif
+
+const CPUKernels *CPUDevice::get_cpu_kernels() const
+{
+  return &kernels;
+}
+
+void CPUDevice::get_cpu_kernel_thread_globals(
+    vector<CPUKernelThreadGlobals> &kernel_thread_globals)
+{
+  /* Ensure latest texture info is loaded into kernel globals before returning. */
+  load_texture_info();
+
+  kernel_thread_globals.clear();
+  void *osl_memory = get_cpu_osl_memory();
+  for (int i = 0; i < info.cpu_threads; i++) {
+    kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler);
+  }
+}
+
+void *CPUDevice::get_cpu_osl_memory()
+{
+#ifdef WITH_OSL
+  return &osl_globals;
+#else
+  return NULL;
+#endif
+}
+
+bool CPUDevice::load_kernels(const uint /*kernel_features*/)
+{
+  return true;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/device_impl.h b/intern/cycles/device/cpu/device_impl.h
new file mode 100644
index 00000000000..7d222808652
--- /dev/null
+++ b/intern/cycles/device/cpu/device_impl.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+/* So ImathMath is included before our kernel_cpu_compat. */
+#ifdef WITH_OSL
+/* So no context pollution happens from indirectly included windows.h */
+#  include "util/util_windows.h"
+#  include <OSL/oslexec.h>
+#endif
+
+#ifdef WITH_EMBREE
+#  include <embree3/rtcore.h>
+#endif
+
+#include "device/cpu/kernel.h"
+#include "device/device.h"
+#include "device/device_memory.h"
+
+// clang-format off
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/kernel.h"
+#include "kernel/device/cpu/globals.h"
+
+#include "kernel/osl/osl_shader.h"
+#include "kernel/osl/osl_globals.h"
+// clang-format on
+
+CCL_NAMESPACE_BEGIN
+
+class CPUDevice : public Device {
+ public:
+  KernelGlobals kernel_globals;
+
+  device_vector<TextureInfo> texture_info;
+  bool need_texture_info;
+
+#ifdef WITH_OSL
+  OSLGlobals osl_globals;
+#endif
+#ifdef WITH_EMBREE
+  RTCScene embree_scene = NULL;
+  RTCDevice embree_device;
+#endif
+
+  CPUKernels kernels;
+
+  CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
+  ~CPUDevice();
+
+  virtual bool show_samples() const override;
+
+  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+
+  /* Returns true if the texture info was copied to the device (meaning, some more
+   * re-initialization might be needed). */
+  bool load_texture_info();
+
+  virtual void mem_alloc(device_memory &mem) override;
+  virtual void mem_copy_to(device_memory &mem) override;
+  virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
+  virtual void mem_zero(device_memory &mem) override;
+  virtual void mem_free(device_memory &mem) override;
+  virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
+
+  virtual void const_copy_to(const char *name, void *host, size_t size) override;
+
+  void global_alloc(device_memory &mem);
+  void global_free(device_memory &mem);
+
+  void tex_alloc(device_texture &mem);
+  void tex_free(device_texture &mem);
+
+  void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
+
+  virtual const CPUKernels *get_cpu_kernels() const override;
+  virtual void get_cpu_kernel_thread_globals(
+      vector<CPUKernelThreadGlobals> &kernel_thread_globals) override;
+  virtual void *get_cpu_osl_memory() override;
+
+ protected:
+  virtual bool load_kernels(uint /*kernel_features*/) override;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel.cpp b/intern/cycles/device/cpu/kernel.cpp
new file mode 100644
index 00000000000..0ab58ff8600
--- /dev/null
+++ b/intern/cycles/device/cpu/kernel.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/cpu/kernel.h"
+
+#include "kernel/device/cpu/kernel.h"
+
+CCL_NAMESPACE_BEGIN
+
+#define KERNEL_FUNCTIONS(name) \
+  KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
+      KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
+      KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
+
+#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
+
+CPUKernels::CPUKernels()
+    : /* Integrator. */
+      REGISTER_KERNEL(integrator_init_from_camera),
+      REGISTER_KERNEL(integrator_init_from_bake),
+      REGISTER_KERNEL(integrator_intersect_closest),
+      REGISTER_KERNEL(integrator_intersect_shadow),
+      REGISTER_KERNEL(integrator_intersect_subsurface),
+      REGISTER_KERNEL(integrator_intersect_volume_stack),
+      REGISTER_KERNEL(integrator_shade_background),
+      REGISTER_KERNEL(integrator_shade_light),
+      REGISTER_KERNEL(integrator_shade_shadow),
+      REGISTER_KERNEL(integrator_shade_surface),
+      REGISTER_KERNEL(integrator_shade_volume),
+      REGISTER_KERNEL(integrator_megakernel),
+      /* Shader evaluation. */
+      REGISTER_KERNEL(shader_eval_displace),
+      REGISTER_KERNEL(shader_eval_background),
+      /* Adaptive campling. */
+      REGISTER_KERNEL(adaptive_sampling_convergence_check),
+      REGISTER_KERNEL(adaptive_sampling_filter_x),
+      REGISTER_KERNEL(adaptive_sampling_filter_y),
+      /* Cryptomatte. */
+      REGISTER_KERNEL(cryptomatte_postprocess),
+      /* Bake. */
+      REGISTER_KERNEL(bake)
+{
+}
+
+#undef REGISTER_KERNEL
+#undef KERNEL_FUNCTIONS
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h
new file mode 100644
index 00000000000..54b18308544
--- /dev/null
+++ b/intern/cycles/device/cpu/kernel.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "device/cpu/kernel_function.h"
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+struct KernelGlobals;
+struct IntegratorStateCPU;
+struct TileInfo;
+
+class CPUKernels {
+ public:
+  /* Integrator. */
+
+  using IntegratorFunction =
+      CPUKernelFunction<void (*)(const KernelGlobals *kg, IntegratorStateCPU *state)>;
+  using IntegratorShadeFunction = CPUKernelFunction<void (*)(
+      const KernelGlobals *kg, IntegratorStateCPU *state, ccl_global float *render_buffer)>;
+  using IntegratorInitFunction = CPUKernelFunction<bool (*)(const KernelGlobals *kg,
+                                                            IntegratorStateCPU *state,
+                                                            KernelWorkTile *tile,
+                                                            ccl_global float *render_buffer)>;
+
+  IntegratorInitFunction integrator_init_from_camera;
+  IntegratorInitFunction integrator_init_from_bake;
+  IntegratorFunction integrator_intersect_closest;
+  IntegratorFunction integrator_intersect_shadow;
+  IntegratorFunction integrator_intersect_subsurface;
+  IntegratorFunction integrator_intersect_volume_stack;
+  IntegratorShadeFunction integrator_shade_background;
+  IntegratorShadeFunction integrator_shade_light;
+  IntegratorShadeFunction integrator_shade_shadow;
+  IntegratorShadeFunction integrator_shade_surface;
+  IntegratorShadeFunction integrator_shade_volume;
+  IntegratorShadeFunction integrator_megakernel;
+
+  /* Shader evaluation. */
+
+  using ShaderEvalFunction = CPUKernelFunction<void (*)(
+      const KernelGlobals *kg, const KernelShaderEvalInput *, float4 *, const int)>;
+
+  ShaderEvalFunction shader_eval_displace;
+  ShaderEvalFunction shader_eval_background;
+
+  /* Adaptive stopping. */
+
+  using AdaptiveSamplingConvergenceCheckFunction =
+      CPUKernelFunction<bool (*)(const KernelGlobals *kg,
+                                 ccl_global float *render_buffer,
+                                 int x,
+                                 int y,
+                                 float threshold,
+                                 bool reset,
+                                 int offset,
+                                 int stride)>;
+
+  using AdaptiveSamplingFilterXFunction =
+      CPUKernelFunction<void (*)(const KernelGlobals *kg,
+                                 ccl_global float *render_buffer,
+                                 int y,
+                                 int start_x,
+                                 int width,
+                                 int offset,
+                                 int stride)>;
+
+  using AdaptiveSamplingFilterYFunction =
+      CPUKernelFunction<void (*)(const KernelGlobals *kg,
+                                 ccl_global float *render_buffer,
+                                 int x,
+                                 int start_y,
+                                 int height,
+                                 int offset,
+                                 int stride)>;
+
+  AdaptiveSamplingConvergenceCheckFunction adaptive_sampling_convergence_check;
+
+  AdaptiveSamplingFilterXFunction adaptive_sampling_filter_x;
+  AdaptiveSamplingFilterYFunction adaptive_sampling_filter_y;
+
+  /* Cryptomatte. */
+
+  using CryptomattePostprocessFunction = CPUKernelFunction<void (*)(
+      const KernelGlobals *kg, ccl_global float *render_buffer, int pixel_index)>;
+
+  CryptomattePostprocessFunction cryptomatte_postprocess;
+
+  /* Bake. */
+
+  CPUKernelFunction<void (*)(const KernelGlobals *, float *, int, int, int, int, int)> bake;
+
+  CPUKernels();
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel_function.h b/intern/cycles/device/cpu/kernel_function.h
new file mode 100644
index 00000000000..aa18720cc24
--- /dev/null
+++ b/intern/cycles/device/cpu/kernel_function.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "util/util_debug.h"
+#include "util/util_system.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* A wrapper around per-microarchitecture variant of a kernel function.
+ *
+ * Provides a function-call-like API which gets routed to the most suitable implementation.
+ *
+ * For example, on a computer which only has SSE4.1 the kernel_sse41 will be used. */
+template<typename FunctionType> class CPUKernelFunction {
+ public:
+  CPUKernelFunction(FunctionType kernel_default,
+                    FunctionType kernel_sse2,
+                    FunctionType kernel_sse3,
+                    FunctionType kernel_sse41,
+                    FunctionType kernel_avx,
+                    FunctionType kernel_avx2)
+  {
+    kernel_info_ = get_best_kernel_info(
+        kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2);
+  }
+
+  template<typename... Args> inline auto operator()(Args... args) const
+  {
+    assert(kernel_info_.kernel);
+
+    return kernel_info_.kernel(args...);
+  }
+
+  const char *get_uarch_name() const
+  {
+    return kernel_info_.uarch_name;
+  }
+
+ protected:
+  /* Helper class which allows to pass human-readable microarchitecture name together with function
+   * pointer. */
+  class KernelInfo {
+   public:
+    KernelInfo() : KernelInfo("", nullptr)
+    {
+    }
+
+    /* TODO(sergey): Use string view, to have higher-level functionality (i.e. comparison) without
+     * memory allocation. */
+    KernelInfo(const char *uarch_name, FunctionType kernel)
+        : uarch_name(uarch_name), kernel(kernel)
+    {
+    }
+
+    const char *uarch_name;
+    FunctionType kernel;
+  };
+
+  KernelInfo get_best_kernel_info(FunctionType kernel_default,
+                                  FunctionType kernel_sse2,
+                                  FunctionType kernel_sse3,
+                                  FunctionType kernel_sse41,
+                                  FunctionType kernel_avx,
+                                  FunctionType kernel_avx2)
+  {
+    /* Silence warnings about unused variables when compiling without some architectures. */
+    (void)kernel_sse2;
+    (void)kernel_sse3;
+    (void)kernel_sse41;
+    (void)kernel_avx;
+    (void)kernel_avx2;
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+    if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
+      return KernelInfo("AVX2", kernel_avx2);
+    }
+#endif
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+    if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
+      return KernelInfo("AVX", kernel_avx);
+    }
+#endif
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
+    if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
+      return KernelInfo("SSE4.1", kernel_sse41);
+    }
+#endif
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+    if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
+      return KernelInfo("SSE3", kernel_sse3);
+    }
+#endif
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+    if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
+      return KernelInfo("SSE2", kernel_sse2);
+    }
+#endif
+
+    return KernelInfo("default", kernel_default);
+  }
+
+  KernelInfo kernel_info_;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel_thread_globals.cpp b/intern/cycles/device/cpu/kernel_thread_globals.cpp
new file mode 100644
index 00000000000..988b00cd1f0
--- /dev/null
+++ b/intern/cycles/device/cpu/kernel_thread_globals.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/cpu/kernel_thread_globals.h"
+
+// clang-format off
+#include "kernel/osl/osl_shader.h"
+#include "kernel/osl/osl_globals.h"
+// clang-format on
+
+#include "util/util_profiling.h"
+
+CCL_NAMESPACE_BEGIN
+
+CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobals &kernel_globals,
+                                               void *osl_globals_memory,
+                                               Profiler &cpu_profiler)
+    : KernelGlobals(kernel_globals), cpu_profiler_(cpu_profiler)
+{
+  reset_runtime_memory();
+
+#ifdef WITH_OSL
+  OSLShader::thread_init(this, reinterpret_cast<OSLGlobals *>(osl_globals_memory));
+#else
+  (void)osl_globals_memory;
+#endif
+}
+
+CPUKernelThreadGlobals::CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept
+    : KernelGlobals(std::move(other)), cpu_profiler_(other.cpu_profiler_)
+{
+  other.reset_runtime_memory();
+}
+
+CPUKernelThreadGlobals::~CPUKernelThreadGlobals()
+{
+#ifdef WITH_OSL
+  OSLShader::thread_free(this);
+#endif
+}
+
+CPUKernelThreadGlobals &CPUKernelThreadGlobals::operator=(CPUKernelThreadGlobals &&other)
+{
+  if (this == &other) {
+    return *this;
+  }
+
+  *static_cast<KernelGlobals *>(this) = *static_cast<KernelGlobals *>(&other);
+
+  other.reset_runtime_memory();
+
+  return *this;
+}
+
+void CPUKernelThreadGlobals::reset_runtime_memory()
+{
+#ifdef WITH_OSL
+  osl = nullptr;
+#endif
+}
+
+void CPUKernelThreadGlobals::start_profiling()
+{
+  cpu_profiler_.add_state(&profiler);
+}
+
+void CPUKernelThreadGlobals::stop_profiling()
+{
+  cpu_profiler_.remove_state(&profiler);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel_thread_globals.h b/intern/cycles/device/cpu/kernel_thread_globals.h
new file mode 100644
index 00000000000..d005c3bb56c
--- /dev/null
+++ b/intern/cycles/device/cpu/kernel_thread_globals.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Profiler;
+
+/* A special class which extends memory ownership of the `KernelGlobals` decoupling any resource
+ * which is not thread-safe for access. Every worker thread which needs to operate on
+ * `KernelGlobals` needs to initialize its own copy of this object.
+ *
+ * NOTE: Only minimal subset of objects are copied: `KernelData` is never copied. This means that
+ * there is no unnecessary data duplication happening when using this object. */
+class CPUKernelThreadGlobals : public KernelGlobals {
+ public:
+  /* TODO(sergey): Would be nice to have properly typed OSLGlobals even in the case when building
+   * without OSL support. Will avoid need to those unnamed pointers and casts. */
+  CPUKernelThreadGlobals(const KernelGlobals &kernel_globals,
+                         void *osl_globals_memory,
+                         Profiler &cpu_profiler);
+
+  ~CPUKernelThreadGlobals();
+
+  CPUKernelThreadGlobals(const CPUKernelThreadGlobals &other) = delete;
+  CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept;
+
+  CPUKernelThreadGlobals &operator=(const CPUKernelThreadGlobals &other) = delete;
+  CPUKernelThreadGlobals &operator=(CPUKernelThreadGlobals &&other);
+
+  void start_profiling();
+  void stop_profiling();
+
+ protected:
+  void reset_runtime_memory();
+
+  Profiler &cpu_profiler_;
+};
+
+CCL_NAMESPACE_END
author	Brecht Van Lommel <brecht@blender.org>	2021-09-20 18:59:20 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2021-09-21 15:55:54 +0300
commit	08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree	6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/device/cpu
parent	fa6b1007bad065440950cd67deb16a04f368856f (diff)