Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/cpu')
-rw-r--r--intern/cycles/device/cpu/device.cpp64
-rw-r--r--intern/cycles/device/cpu/device.h35
-rw-r--r--intern/cycles/device/cpu/device_impl.cpp481
-rw-r--r--intern/cycles/device/cpu/device_impl.h99
-rw-r--r--intern/cycles/device/cpu/kernel.cpp61
-rw-r--r--intern/cycles/device/cpu/kernel.h111
-rw-r--r--intern/cycles/device/cpu/kernel_function.h124
-rw-r--r--intern/cycles/device/cpu/kernel_thread_globals.cpp85
-rw-r--r--intern/cycles/device/cpu/kernel_thread_globals.h57
9 files changed, 1117 insertions, 0 deletions
diff --git a/intern/cycles/device/cpu/device.cpp b/intern/cycles/device/cpu/device.cpp
new file mode 100644
index 00000000000..68ca8e8bb22
--- /dev/null
+++ b/intern/cycles/device/cpu/device.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/cpu/device.h"
+#include "device/cpu/device_impl.h"
+
+/* Used for `info.denoisers`. */
+/* TODO(sergey): The denoisers are probably to be moved completely out of the device into their
+ * own class. But until then keep API consistent with how it used to work before. */
+#include "util/util_openimagedenoise.h"
+
+CCL_NAMESPACE_BEGIN
+
+Device *device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
+{
+ return new CPUDevice(info, stats, profiler);
+}
+
+void device_cpu_info(vector<DeviceInfo> &devices)
+{
+ DeviceInfo info;
+
+ info.type = DEVICE_CPU;
+ info.description = system_cpu_brand_string();
+ info.id = "CPU";
+ info.num = 0;
+ info.has_osl = true;
+ info.has_half_images = true;
+ info.has_nanovdb = true;
+ info.has_profiling = true;
+ if (openimagedenoise_supported()) {
+ info.denoisers |= DENOISER_OPENIMAGEDENOISE;
+ }
+
+ devices.insert(devices.begin(), info);
+}
+
+string device_cpu_capabilities()
+{
+ string capabilities = "";
+ capabilities += system_cpu_support_sse2() ? "SSE2 " : "";
+ capabilities += system_cpu_support_sse3() ? "SSE3 " : "";
+ capabilities += system_cpu_support_sse41() ? "SSE41 " : "";
+ capabilities += system_cpu_support_avx() ? "AVX " : "";
+ capabilities += system_cpu_support_avx2() ? "AVX2" : "";
+ if (capabilities[capabilities.size() - 1] == ' ')
+ capabilities.resize(capabilities.size() - 1);
+ return capabilities;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/device.h b/intern/cycles/device/cpu/device.h
new file mode 100644
index 00000000000..9cb2e80068d
--- /dev/null
+++ b/intern/cycles/device/cpu/device.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "util/util_string.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Device;
+class DeviceInfo;
+class Profiler;
+class Stats;
+
+Device *device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
+
+void device_cpu_info(vector<DeviceInfo> &devices);
+
+string device_cpu_capabilities();
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp
new file mode 100644
index 00000000000..3b0db6bdd0e
--- /dev/null
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -0,0 +1,481 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/cpu/device_impl.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/* So ImathMath is included before our kernel_cpu_compat. */
+#ifdef WITH_OSL
+/* So no context pollution happens from indirectly included windows.h */
+# include "util/util_windows.h"
+# include <OSL/oslexec.h>
+#endif
+
+#ifdef WITH_EMBREE
+# include <embree3/rtcore.h>
+#endif
+
+#include "device/cpu/kernel.h"
+#include "device/cpu/kernel_thread_globals.h"
+
+#include "device/device.h"
+
+// clang-format off
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+#include "kernel/device/cpu/kernel.h"
+#include "kernel/kernel_types.h"
+
+#include "kernel/osl/osl_shader.h"
+#include "kernel/osl/osl_globals.h"
+// clang-format on
+
+#include "bvh/bvh_embree.h"
+
+#include "render/buffers.h"
+
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_function.h"
+#include "util/util_logging.h"
+#include "util/util_map.h"
+#include "util/util_opengl.h"
+#include "util/util_openimagedenoise.h"
+#include "util/util_optimization.h"
+#include "util/util_progress.h"
+#include "util/util_system.h"
+#include "util/util_task.h"
+#include "util/util_thread.h"
+
+CCL_NAMESPACE_BEGIN
+
+CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
+ : Device(info_, stats_, profiler_), texture_info(this, "__texture_info", MEM_GLOBAL)
+{
+ /* Pick any kernel, all of them are supposed to have same level of microarchitecture
+ * optimization. */
+ VLOG(1) << "Will be using " << kernels.integrator_init_from_camera.get_uarch_name()
+ << " kernels.";
+
+ if (info.cpu_threads == 0) {
+ info.cpu_threads = TaskScheduler::num_threads();
+ }
+
+#ifdef WITH_OSL
+ kernel_globals.osl = &osl_globals;
+#endif
+#ifdef WITH_EMBREE
+ embree_device = rtcNewDevice("verbose=0");
+#endif
+ need_texture_info = false;
+}
+
+CPUDevice::~CPUDevice()
+{
+#ifdef WITH_EMBREE
+ rtcReleaseDevice(embree_device);
+#endif
+
+ texture_info.free();
+}
+
+bool CPUDevice::show_samples() const
+{
+ return (info.cpu_threads == 1);
+}
+
+BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
+{
+ BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
+#ifdef WITH_EMBREE
+ bvh_layout_mask |= BVH_LAYOUT_EMBREE;
+#endif /* WITH_EMBREE */
+ return bvh_layout_mask;
+}
+
+bool CPUDevice::load_texture_info()
+{
+ if (!need_texture_info) {
+ return false;
+ }
+
+ texture_info.copy_to_device();
+ need_texture_info = false;
+
+ return true;
+}
+
+void CPUDevice::mem_alloc(device_memory &mem)
+{
+ if (mem.type == MEM_TEXTURE) {
+ assert(!"mem_alloc not supported for textures.");
+ }
+ else if (mem.type == MEM_GLOBAL) {
+ assert(!"mem_alloc not supported for global memory.");
+ }
+ else {
+ if (mem.name) {
+ VLOG(1) << "Buffer allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+ }
+
+ if (mem.type == MEM_DEVICE_ONLY) {
+ assert(!mem.host_pointer);
+ size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
+ void *data = util_aligned_malloc(mem.memory_size(), alignment);
+ mem.device_pointer = (device_ptr)data;
+ }
+ else {
+ mem.device_pointer = (device_ptr)mem.host_pointer;
+ }
+
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+ }
+}
+
+void CPUDevice::mem_copy_to(device_memory &mem)
+{
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ global_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ tex_alloc((device_texture &)mem);
+ }
+ else {
+ if (!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
+ /* copy is no-op */
+ }
+}
+
+void CPUDevice::mem_copy_from(
+ device_memory & /*mem*/, int /*y*/, int /*w*/, int /*h*/, int /*elem*/)
+{
+ /* no-op */
+}
+
+void CPUDevice::mem_zero(device_memory &mem)
+{
+ if (!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
+ if (mem.device_pointer) {
+ memset((void *)mem.device_pointer, 0, mem.memory_size());
+ }
+}
+
+void CPUDevice::mem_free(device_memory &mem)
+{
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ }
+ else if (mem.device_pointer) {
+ if (mem.type == MEM_DEVICE_ONLY) {
+ util_aligned_free((void *)mem.device_pointer);
+ }
+ mem.device_pointer = 0;
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ }
+}
+
+device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
+{
+ return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
+}
+
+void CPUDevice::const_copy_to(const char *name, void *host, size_t size)
+{
+#if WITH_EMBREE
+ if (strcmp(name, "__data") == 0) {
+ assert(size <= sizeof(KernelData));
+
+ // Update scene handle (since it is different for each device on multi devices)
+ KernelData *const data = (KernelData *)host;
+ data->bvh.scene = embree_scene;
+ }
+#endif
+ kernel_const_copy(&kernel_globals, name, host, size);
+}
+
+void CPUDevice::global_alloc(device_memory &mem)
+{
+ VLOG(1) << "Global memory allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+
+ kernel_global_memory_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
+
+ mem.device_pointer = (device_ptr)mem.host_pointer;
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+}
+
+void CPUDevice::global_free(device_memory &mem)
+{
+ if (mem.device_pointer) {
+ mem.device_pointer = 0;
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ }
+}
+
+void CPUDevice::tex_alloc(device_texture &mem)
+{
+ VLOG(1) << "Texture allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+
+ mem.device_pointer = (device_ptr)mem.host_pointer;
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+
+ const uint slot = mem.slot;
+ if (slot >= texture_info.size()) {
+ /* Allocate some slots in advance, to reduce amount of re-allocations. */
+ texture_info.resize(slot + 128);
+ }
+
+ texture_info[slot] = mem.info;
+ texture_info[slot].data = (uint64_t)mem.host_pointer;
+ need_texture_info = true;
+}
+
+void CPUDevice::tex_free(device_texture &mem)
+{
+ if (mem.device_pointer) {
+ mem.device_pointer = 0;
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ need_texture_info = true;
+ }
+}
+
+void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
+{
+#ifdef WITH_EMBREE
+ if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
+ bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) {
+ BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
+ if (refit) {
+ bvh_embree->refit(progress);
+ }
+ else {
+ bvh_embree->build(progress, &stats, embree_device);
+ }
+
+ if (bvh->params.top_level) {
+ embree_scene = bvh_embree->scene;
+ }
+ }
+ else
+#endif
+ Device::build_bvh(bvh, progress, refit);
+}
+
+#if 0
+void CPUDevice::render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
+{
+ const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
+
+ scoped_timer timer(&tile.buffers->render_time);
+
+ Coverage coverage(kg, tile);
+ if (use_coverage) {
+ coverage.init_path_trace();
+ }
+
+ float *render_buffer = (float *)tile.buffer;
+ int start_sample = tile.start_sample;
+ int end_sample = tile.start_sample + tile.num_samples;
+
+ /* Needed for Embree. */
+ SIMD_SET_FLUSH_TO_ZERO;
+
+ for (int sample = start_sample; sample < end_sample; sample++) {
+ if (task.get_cancel() || TaskPool::canceled()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+
+ if (tile.stealing_state == RenderTile::CAN_BE_STOLEN && task.get_tile_stolen()) {
+ tile.stealing_state = RenderTile::WAS_STOLEN;
+ break;
+ }
+
+ if (tile.task == RenderTile::PATH_TRACE) {
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ if (use_coverage) {
+ coverage.init_pixel(x, y);
+ }
+ kernels.path_trace(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+ }
+ }
+ }
+ else {
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ kernels.bake(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+ }
+ }
+ }
+ tile.sample = sample + 1;
+
+ if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) {
+ const bool stop = adaptive_sampling_filter(kg, tile, sample);
+ if (stop) {
+ const int num_progress_samples = end_sample - sample;
+ tile.sample = end_sample;
+ task.update_progress(&tile, tile.w * tile.h * num_progress_samples);
+ break;
+ }
+ }
+
+ task.update_progress(&tile, tile.w * tile.h);
+ }
+ if (use_coverage) {
+ coverage.finalize();
+ }
+
+ if (task.adaptive_sampling.use && (tile.stealing_state != RenderTile::WAS_STOLEN)) {
+ adaptive_sampling_post(tile, kg);
+ }
+}
+
+void CPUDevice::thread_render(DeviceTask &task)
+{
+ if (TaskPool::canceled()) {
+ if (task.need_finish_queue == false)
+ return;
+ }
+
+ /* allocate buffer for kernel globals */
+ CPUKernelThreadGlobals kg(kernel_globals, get_cpu_osl_memory());
+
+ profiler.add_state(&kg.profiler);
+
+ /* NLM denoiser. */
+ DenoisingTask *denoising = NULL;
+
+ /* OpenImageDenoise: we can only denoise with one thread at a time, so to
+ * avoid waiting with mutex locks in the denoiser, we let only a single
+ * thread acquire denoising tiles. */
+ uint tile_types = task.tile_types;
+ bool hold_denoise_lock = false;
+ if ((tile_types & RenderTile::DENOISE) && task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ if (!oidn_task_lock.try_lock()) {
+ tile_types &= ~RenderTile::DENOISE;
+ hold_denoise_lock = true;
+ }
+ }
+
+ RenderTile tile;
+ while (task.acquire_tile(this, tile, tile_types)) {
+ if (tile.task == RenderTile::PATH_TRACE) {
+ render(task, tile, &kg);
+ }
+ else if (tile.task == RenderTile::BAKE) {
+ render(task, tile, &kg);
+ }
+ else if (tile.task == RenderTile::DENOISE) {
+ denoise_openimagedenoise(task, tile);
+ task.update_progress(&tile, tile.w * tile.h);
+ }
+
+ task.release_tile(tile);
+
+ if (TaskPool::canceled()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+ }
+
+ if (hold_denoise_lock) {
+ oidn_task_lock.unlock();
+ }
+
+ profiler.remove_state(&kg.profiler);
+
+ delete denoising;
+}
+
+void CPUDevice::thread_denoise(DeviceTask &task)
+{
+ RenderTile tile;
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ denoise_openimagedenoise(task, tile);
+
+ task.update_progress(&tile, tile.w * tile.h);
+}
+#endif
+
+const CPUKernels *CPUDevice::get_cpu_kernels() const
+{
+ return &kernels;
+}
+
+void CPUDevice::get_cpu_kernel_thread_globals(
+ vector<CPUKernelThreadGlobals> &kernel_thread_globals)
+{
+ /* Ensure latest texture info is loaded into kernel globals before returning. */
+ load_texture_info();
+
+ kernel_thread_globals.clear();
+ void *osl_memory = get_cpu_osl_memory();
+ for (int i = 0; i < info.cpu_threads; i++) {
+ kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler);
+ }
+}
+
+void *CPUDevice::get_cpu_osl_memory()
+{
+#ifdef WITH_OSL
+ return &osl_globals;
+#else
+ return NULL;
+#endif
+}
+
+bool CPUDevice::load_kernels(const uint /*kernel_features*/)
+{
+ return true;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/device_impl.h b/intern/cycles/device/cpu/device_impl.h
new file mode 100644
index 00000000000..7d222808652
--- /dev/null
+++ b/intern/cycles/device/cpu/device_impl.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+/* So ImathMath is included before our kernel_cpu_compat. */
+#ifdef WITH_OSL
+/* So no context pollution happens from indirectly included windows.h */
+# include "util/util_windows.h"
+# include <OSL/oslexec.h>
+#endif
+
+#ifdef WITH_EMBREE
+# include <embree3/rtcore.h>
+#endif
+
+#include "device/cpu/kernel.h"
+#include "device/device.h"
+#include "device/device_memory.h"
+
+// clang-format off
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/kernel.h"
+#include "kernel/device/cpu/globals.h"
+
+#include "kernel/osl/osl_shader.h"
+#include "kernel/osl/osl_globals.h"
+// clang-format on
+
+CCL_NAMESPACE_BEGIN
+
+class CPUDevice : public Device {
+ public:
+ KernelGlobals kernel_globals;
+
+ device_vector<TextureInfo> texture_info;
+ bool need_texture_info;
+
+#ifdef WITH_OSL
+ OSLGlobals osl_globals;
+#endif
+#ifdef WITH_EMBREE
+ RTCScene embree_scene = NULL;
+ RTCDevice embree_device;
+#endif
+
+ CPUKernels kernels;
+
+ CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
+ ~CPUDevice();
+
+ virtual bool show_samples() const override;
+
+ virtual BVHLayoutMask get_bvh_layout_mask() const override;
+
+ /* Returns true if the texture info was copied to the device (meaning, some more
+ * re-initialization might be needed). */
+ bool load_texture_info();
+
+ virtual void mem_alloc(device_memory &mem) override;
+ virtual void mem_copy_to(device_memory &mem) override;
+ virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
+ virtual void mem_zero(device_memory &mem) override;
+ virtual void mem_free(device_memory &mem) override;
+ virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
+
+ virtual void const_copy_to(const char *name, void *host, size_t size) override;
+
+ void global_alloc(device_memory &mem);
+ void global_free(device_memory &mem);
+
+ void tex_alloc(device_texture &mem);
+ void tex_free(device_texture &mem);
+
+ void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
+
+ virtual const CPUKernels *get_cpu_kernels() const override;
+ virtual void get_cpu_kernel_thread_globals(
+ vector<CPUKernelThreadGlobals> &kernel_thread_globals) override;
+ virtual void *get_cpu_osl_memory() override;
+
+ protected:
+ virtual bool load_kernels(uint /*kernel_features*/) override;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel.cpp b/intern/cycles/device/cpu/kernel.cpp
new file mode 100644
index 00000000000..0ab58ff8600
--- /dev/null
+++ b/intern/cycles/device/cpu/kernel.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/cpu/kernel.h"
+
+#include "kernel/device/cpu/kernel.h"
+
+CCL_NAMESPACE_BEGIN
+
+#define KERNEL_FUNCTIONS(name) \
+ KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
+ KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
+ KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
+
+#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
+
+CPUKernels::CPUKernels()
+ : /* Integrator. */
+ REGISTER_KERNEL(integrator_init_from_camera),
+ REGISTER_KERNEL(integrator_init_from_bake),
+ REGISTER_KERNEL(integrator_intersect_closest),
+ REGISTER_KERNEL(integrator_intersect_shadow),
+ REGISTER_KERNEL(integrator_intersect_subsurface),
+ REGISTER_KERNEL(integrator_intersect_volume_stack),
+ REGISTER_KERNEL(integrator_shade_background),
+ REGISTER_KERNEL(integrator_shade_light),
+ REGISTER_KERNEL(integrator_shade_shadow),
+ REGISTER_KERNEL(integrator_shade_surface),
+ REGISTER_KERNEL(integrator_shade_volume),
+ REGISTER_KERNEL(integrator_megakernel),
+ /* Shader evaluation. */
+ REGISTER_KERNEL(shader_eval_displace),
+ REGISTER_KERNEL(shader_eval_background),
+ /* Adaptive campling. */
+ REGISTER_KERNEL(adaptive_sampling_convergence_check),
+ REGISTER_KERNEL(adaptive_sampling_filter_x),
+ REGISTER_KERNEL(adaptive_sampling_filter_y),
+ /* Cryptomatte. */
+ REGISTER_KERNEL(cryptomatte_postprocess),
+ /* Bake. */
+ REGISTER_KERNEL(bake)
+{
+}
+
+#undef REGISTER_KERNEL
+#undef KERNEL_FUNCTIONS
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h
new file mode 100644
index 00000000000..54b18308544
--- /dev/null
+++ b/intern/cycles/device/cpu/kernel.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "device/cpu/kernel_function.h"
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+struct KernelGlobals;
+struct IntegratorStateCPU;
+struct TileInfo;
+
+class CPUKernels {
+ public:
+ /* Integrator. */
+
+ using IntegratorFunction =
+ CPUKernelFunction<void (*)(const KernelGlobals *kg, IntegratorStateCPU *state)>;
+ using IntegratorShadeFunction = CPUKernelFunction<void (*)(
+ const KernelGlobals *kg, IntegratorStateCPU *state, ccl_global float *render_buffer)>;
+ using IntegratorInitFunction = CPUKernelFunction<bool (*)(const KernelGlobals *kg,
+ IntegratorStateCPU *state,
+ KernelWorkTile *tile,
+ ccl_global float *render_buffer)>;
+
+ IntegratorInitFunction integrator_init_from_camera;
+ IntegratorInitFunction integrator_init_from_bake;
+ IntegratorFunction integrator_intersect_closest;
+ IntegratorFunction integrator_intersect_shadow;
+ IntegratorFunction integrator_intersect_subsurface;
+ IntegratorFunction integrator_intersect_volume_stack;
+ IntegratorShadeFunction integrator_shade_background;
+ IntegratorShadeFunction integrator_shade_light;
+ IntegratorShadeFunction integrator_shade_shadow;
+ IntegratorShadeFunction integrator_shade_surface;
+ IntegratorShadeFunction integrator_shade_volume;
+ IntegratorShadeFunction integrator_megakernel;
+
+ /* Shader evaluation. */
+
+ using ShaderEvalFunction = CPUKernelFunction<void (*)(
+ const KernelGlobals *kg, const KernelShaderEvalInput *, float4 *, const int)>;
+
+ ShaderEvalFunction shader_eval_displace;
+ ShaderEvalFunction shader_eval_background;
+
+ /* Adaptive stopping. */
+
+ using AdaptiveSamplingConvergenceCheckFunction =
+ CPUKernelFunction<bool (*)(const KernelGlobals *kg,
+ ccl_global float *render_buffer,
+ int x,
+ int y,
+ float threshold,
+ bool reset,
+ int offset,
+ int stride)>;
+
+ using AdaptiveSamplingFilterXFunction =
+ CPUKernelFunction<void (*)(const KernelGlobals *kg,
+ ccl_global float *render_buffer,
+ int y,
+ int start_x,
+ int width,
+ int offset,
+ int stride)>;
+
+ using AdaptiveSamplingFilterYFunction =
+ CPUKernelFunction<void (*)(const KernelGlobals *kg,
+ ccl_global float *render_buffer,
+ int x,
+ int start_y,
+ int height,
+ int offset,
+ int stride)>;
+
+ AdaptiveSamplingConvergenceCheckFunction adaptive_sampling_convergence_check;
+
+ AdaptiveSamplingFilterXFunction adaptive_sampling_filter_x;
+ AdaptiveSamplingFilterYFunction adaptive_sampling_filter_y;
+
+ /* Cryptomatte. */
+
+ using CryptomattePostprocessFunction = CPUKernelFunction<void (*)(
+ const KernelGlobals *kg, ccl_global float *render_buffer, int pixel_index)>;
+
+ CryptomattePostprocessFunction cryptomatte_postprocess;
+
+ /* Bake. */
+
+ CPUKernelFunction<void (*)(const KernelGlobals *, float *, int, int, int, int, int)> bake;
+
+ CPUKernels();
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel_function.h b/intern/cycles/device/cpu/kernel_function.h
new file mode 100644
index 00000000000..aa18720cc24
--- /dev/null
+++ b/intern/cycles/device/cpu/kernel_function.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "util/util_debug.h"
+#include "util/util_system.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* A wrapper around per-microarchitecture variant of a kernel function.
+ *
+ * Provides a function-call-like API which gets routed to the most suitable implementation.
+ *
+ * For example, on a computer which only has SSE4.1 the kernel_sse41 will be used. */
+template<typename FunctionType> class CPUKernelFunction {
+ public:
+ CPUKernelFunction(FunctionType kernel_default,
+ FunctionType kernel_sse2,
+ FunctionType kernel_sse3,
+ FunctionType kernel_sse41,
+ FunctionType kernel_avx,
+ FunctionType kernel_avx2)
+ {
+ kernel_info_ = get_best_kernel_info(
+ kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2);
+ }
+
+ template<typename... Args> inline auto operator()(Args... args) const
+ {
+ assert(kernel_info_.kernel);
+
+ return kernel_info_.kernel(args...);
+ }
+
+ const char *get_uarch_name() const
+ {
+ return kernel_info_.uarch_name;
+ }
+
+ protected:
+ /* Helper class which allows to pass human-readable microarchitecture name together with function
+ * pointer. */
+ class KernelInfo {
+ public:
+ KernelInfo() : KernelInfo("", nullptr)
+ {
+ }
+
+ /* TODO(sergey): Use string view, to have higher-level functionality (i.e. comparison) without
+ * memory allocation. */
+ KernelInfo(const char *uarch_name, FunctionType kernel)
+ : uarch_name(uarch_name), kernel(kernel)
+ {
+ }
+
+ const char *uarch_name;
+ FunctionType kernel;
+ };
+
+ KernelInfo get_best_kernel_info(FunctionType kernel_default,
+ FunctionType kernel_sse2,
+ FunctionType kernel_sse3,
+ FunctionType kernel_sse41,
+ FunctionType kernel_avx,
+ FunctionType kernel_avx2)
+ {
+ /* Silence warnings about unused variables when compiling without some architectures. */
+ (void)kernel_sse2;
+ (void)kernel_sse3;
+ (void)kernel_sse41;
+ (void)kernel_avx;
+ (void)kernel_avx2;
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+ if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
+ return KernelInfo("AVX2", kernel_avx2);
+ }
+#endif
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+ if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
+ return KernelInfo("AVX", kernel_avx);
+ }
+#endif
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
+ if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
+ return KernelInfo("SSE4.1", kernel_sse41);
+ }
+#endif
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+ if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
+ return KernelInfo("SSE3", kernel_sse3);
+ }
+#endif
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+ if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
+ return KernelInfo("SSE2", kernel_sse2);
+ }
+#endif
+
+ return KernelInfo("default", kernel_default);
+ }
+
+ KernelInfo kernel_info_;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel_thread_globals.cpp b/intern/cycles/device/cpu/kernel_thread_globals.cpp
new file mode 100644
index 00000000000..988b00cd1f0
--- /dev/null
+++ b/intern/cycles/device/cpu/kernel_thread_globals.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/cpu/kernel_thread_globals.h"
+
+// clang-format off
+#include "kernel/osl/osl_shader.h"
+#include "kernel/osl/osl_globals.h"
+// clang-format on
+
+#include "util/util_profiling.h"
+
+CCL_NAMESPACE_BEGIN
+
+CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobals &kernel_globals,
+ void *osl_globals_memory,
+ Profiler &cpu_profiler)
+ : KernelGlobals(kernel_globals), cpu_profiler_(cpu_profiler)
+{
+ reset_runtime_memory();
+
+#ifdef WITH_OSL
+ OSLShader::thread_init(this, reinterpret_cast<OSLGlobals *>(osl_globals_memory));
+#else
+ (void)osl_globals_memory;
+#endif
+}
+
+CPUKernelThreadGlobals::CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept
+ : KernelGlobals(std::move(other)), cpu_profiler_(other.cpu_profiler_)
+{
+ other.reset_runtime_memory();
+}
+
+CPUKernelThreadGlobals::~CPUKernelThreadGlobals()
+{
+#ifdef WITH_OSL
+ OSLShader::thread_free(this);
+#endif
+}
+
+CPUKernelThreadGlobals &CPUKernelThreadGlobals::operator=(CPUKernelThreadGlobals &&other)
+{
+ if (this == &other) {
+ return *this;
+ }
+
+ *static_cast<KernelGlobals *>(this) = *static_cast<KernelGlobals *>(&other);
+
+ other.reset_runtime_memory();
+
+ return *this;
+}
+
+void CPUKernelThreadGlobals::reset_runtime_memory()
+{
+#ifdef WITH_OSL
+ osl = nullptr;
+#endif
+}
+
+void CPUKernelThreadGlobals::start_profiling()
+{
+ cpu_profiler_.add_state(&profiler);
+}
+
+void CPUKernelThreadGlobals::stop_profiling()
+{
+ cpu_profiler_.remove_state(&profiler);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel_thread_globals.h b/intern/cycles/device/cpu/kernel_thread_globals.h
new file mode 100644
index 00000000000..d005c3bb56c
--- /dev/null
+++ b/intern/cycles/device/cpu/kernel_thread_globals.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Profiler;
+
+/* A special class which extends memory ownership of the `KernelGlobals` decoupling any resource
+ * which is not thread-safe for access. Every worker thread which needs to operate on
+ * `KernelGlobals` needs to initialize its own copy of this object.
+ *
+ * NOTE: Only minimal subset of objects are copied: `KernelData` is never copied. This means that
+ * there is no unnecessary data duplication happening when using this object. */
+class CPUKernelThreadGlobals : public KernelGlobals {
+ public:
+ /* TODO(sergey): Would be nice to have properly typed OSLGlobals even in the case when building
+ * without OSL support. Will avoid need to those unnamed pointers and casts. */
+ CPUKernelThreadGlobals(const KernelGlobals &kernel_globals,
+ void *osl_globals_memory,
+ Profiler &cpu_profiler);
+
+ ~CPUKernelThreadGlobals();
+
+ CPUKernelThreadGlobals(const CPUKernelThreadGlobals &other) = delete;
+ CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept;
+
+ CPUKernelThreadGlobals &operator=(const CPUKernelThreadGlobals &other) = delete;
+ CPUKernelThreadGlobals &operator=(CPUKernelThreadGlobals &&other);
+
+ void start_profiling();
+ void stop_profiling();
+
+ protected:
+ void reset_runtime_memory();
+
+ Profiler &cpu_profiler_;
+};
+
+CCL_NAMESPACE_END