Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey@blender.org>2022-06-08 11:58:35 +0300
committerSergey Sharybin <sergey@blender.org>2022-06-08 11:58:35 +0300
commitf31cef6248fd12039a4d2dfb76b26f3426477a70 (patch)
tree4ffb531519115c06766354c4fa4acaebeeb5161e /intern/cycles
parent15ee45fa029de175b5e366b0b1e9243a89dae543 (diff)
parent5b0e9bd97504cd89f57dbbaa4814b7d0dd0f2ccd (diff)
Merge branch 'master' into cycles_oneapi
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/blender/addon/__init__.py2
-rw-r--r--intern/cycles/blender/addon/camera.py2
-rw-r--r--intern/cycles/blender/addon/engine.py2
-rw-r--r--intern/cycles/blender/addon/operators.py2
-rw-r--r--intern/cycles/blender/addon/osl.py2
-rw-r--r--intern/cycles/blender/addon/presets.py2
-rw-r--r--intern/cycles/blender/addon/properties.py2
-rw-r--r--intern/cycles/blender/addon/ui.py2
-rw-r--r--intern/cycles/blender/addon/version_update.py2
-rw-r--r--intern/cycles/blender/image.cpp165
-rw-r--r--intern/cycles/blender/mesh.cpp234
-rw-r--r--intern/cycles/blender/volume.cpp24
-rw-r--r--intern/cycles/bvh/build.cpp2
-rw-r--r--intern/cycles/cmake/external_libs.cmake4
-rw-r--r--intern/cycles/device/cuda/device_impl.cpp6
-rw-r--r--intern/cycles/device/hip/device_impl.cpp6
-rw-r--r--intern/cycles/device/kernel.cpp2
-rw-r--r--intern/cycles/device/memory.cpp2
-rw-r--r--intern/cycles/device/metal/bvh.mm2
-rw-r--r--intern/cycles/device/metal/device_impl.h2
-rw-r--r--intern/cycles/device/metal/device_impl.mm9
-rw-r--r--intern/cycles/device/metal/kernel.mm3
-rw-r--r--intern/cycles/device/metal/queue.h34
-rw-r--r--intern/cycles/device/metal/queue.mm227
-rw-r--r--intern/cycles/device/metal/util.h2
-rw-r--r--intern/cycles/device/optix/device_impl.cpp71
-rw-r--r--intern/cycles/device/optix/device_impl.h3
-rw-r--r--intern/cycles/device/optix/queue.cpp8
-rw-r--r--intern/cycles/graph/node_enum.h6
-rw-r--r--intern/cycles/hydra/display_driver.cpp85
-rw-r--r--intern/cycles/hydra/display_driver.h9
-rw-r--r--intern/cycles/hydra/output_driver.cpp8
-rw-r--r--intern/cycles/hydra/render_buffer.cpp18
-rw-r--r--intern/cycles/hydra/render_buffer.h4
-rw-r--r--intern/cycles/integrator/path_trace_work_gpu.cpp16
-rw-r--r--intern/cycles/integrator/path_trace_work_gpu.h1
-rw-r--r--intern/cycles/kernel/device/cpu/image.h10
-rw-r--r--intern/cycles/kernel/device/gpu/image.h13
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h29
-rw-r--r--intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu8
-rw-r--r--intern/cycles/kernel/film/adaptive_sampling.h8
-rw-r--r--intern/cycles/kernel/integrator/init_from_bake.h9
-rw-r--r--intern/cycles/kernel/integrator/intersect_closest.h31
-rw-r--r--intern/cycles/kernel/integrator/megakernel.h3
-rw-r--r--intern/cycles/kernel/integrator/mnee.h32
-rw-r--r--intern/cycles/kernel/integrator/shade_background.h2
-rw-r--r--intern/cycles/kernel/integrator/shade_surface.h14
-rw-r--r--intern/cycles/kernel/integrator/subsurface.h9
-rw-r--r--intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp3
-rw-r--r--intern/cycles/kernel/osl/bsdf_phong_ramp.cpp3
-rw-r--r--intern/cycles/kernel/osl/bssrdf.cpp2
-rw-r--r--intern/cycles/kernel/osl/closures.cpp23
-rw-r--r--intern/cycles/kernel/osl/closures.h1
-rw-r--r--intern/cycles/kernel/svm/blackbody.h1
-rw-r--r--intern/cycles/kernel/svm/displace.h1
-rw-r--r--intern/cycles/kernel/svm/math_util.h30
-rw-r--r--intern/cycles/kernel/tables.h39
-rw-r--r--intern/cycles/kernel/types.h9
-rw-r--r--intern/cycles/scene/attribute.cpp52
-rw-r--r--intern/cycles/scene/attribute.h1
-rw-r--r--intern/cycles/scene/image.cpp20
-rw-r--r--intern/cycles/scene/image_oiio.cpp45
-rw-r--r--intern/cycles/scene/image_vdb.cpp37
-rw-r--r--intern/cycles/scene/image_vdb.h1
-rw-r--r--intern/cycles/scene/object.cpp6
-rw-r--r--intern/cycles/scene/scene.cpp3
-rw-r--r--intern/cycles/scene/shader_graph.cpp12
-rw-r--r--intern/cycles/scene/shader_nodes.cpp2
-rw-r--r--intern/cycles/session/denoising.cpp2
-rw-r--r--intern/cycles/test/render_graph_finalize_test.cpp2
-rw-r--r--intern/cycles/util/half.h8
-rw-r--r--intern/cycles/util/image.h20
-rw-r--r--intern/cycles/util/texture.h2
73 files changed, 1066 insertions, 398 deletions
diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py
index 74b28b8ea21..05f27bdbd4d 100644
--- a/intern/cycles/blender/addon/__init__.py
+++ b/intern/cycles/blender/addon/__init__.py
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
-
-# <pep8 compliant>
from __future__ import annotations
bl_info = {
diff --git a/intern/cycles/blender/addon/camera.py b/intern/cycles/blender/addon/camera.py
index 0e78112699e..3c821c98128 100644
--- a/intern/cycles/blender/addon/camera.py
+++ b/intern/cycles/blender/addon/camera.py
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
-# <pep8 compliant>
-
# Fit to match default projective camera with focal_length 50 and sensor_width 36.
default_fisheye_polynomial = [
-1.1735143712967577e-05,
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index 724e1b8f727..e211f53cf31 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
-
-# <pep8 compliant>
from __future__ import annotations
diff --git a/intern/cycles/blender/addon/operators.py b/intern/cycles/blender/addon/operators.py
index e5d7f00a381..ab474cda0ab 100644
--- a/intern/cycles/blender/addon/operators.py
+++ b/intern/cycles/blender/addon/operators.py
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
-
-# <pep8 compliant>
from __future__ import annotations
import bpy
diff --git a/intern/cycles/blender/addon/osl.py b/intern/cycles/blender/addon/osl.py
index 9430dc5d115..1ee7ae421e3 100644
--- a/intern/cycles/blender/addon/osl.py
+++ b/intern/cycles/blender/addon/osl.py
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
-
-# <pep8 compliant>
from __future__ import annotations
import bpy
diff --git a/intern/cycles/blender/addon/presets.py b/intern/cycles/blender/addon/presets.py
index 5eaa592a9de..cc6d574da99 100644
--- a/intern/cycles/blender/addon/presets.py
+++ b/intern/cycles/blender/addon/presets.py
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
-
-# <pep8 compliant>
from __future__ import annotations
from bl_operators.presets import AddPresetBase
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 021b61d4751..5835d29c088 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
-
-# <pep8 compliant>
from __future__ import annotations
import bpy
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 71f64d7d360..8565ad7b263 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
-
-# <pep8 compliant>
from __future__ import annotations
import bpy
diff --git a/intern/cycles/blender/addon/version_update.py b/intern/cycles/blender/addon/version_update.py
index 531ecc177da..12880496dfd 100644
--- a/intern/cycles/blender/addon/version_update.py
+++ b/intern/cycles/blender/addon/version_update.py
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2011-2022 Blender Foundation
-
-# <pep8 compliant>
from __future__ import annotations
import bpy
diff --git a/intern/cycles/blender/image.cpp b/intern/cycles/blender/image.cpp
index e01b72c1653..aea79ad60ad 100644
--- a/intern/cycles/blender/image.cpp
+++ b/intern/cycles/blender/image.cpp
@@ -7,6 +7,8 @@
#include "blender/session.h"
#include "blender/util.h"
+#include "util/half.h"
+
CCL_NAMESPACE_BEGIN
/* Packed Images */
@@ -19,17 +21,40 @@ BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
frame(frame),
tile_number(tile_number),
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
- properly later as we are close to release. */
+ * properly later as we are close to release. */
free_cache(!is_preview_render && !b_image.has_data())
{
}
bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaData &metadata)
{
- metadata.width = b_image.size()[0];
- metadata.height = b_image.size()[1];
+ if (b_image.source() != BL::Image::source_TILED) {
+ /* Image sequence might have different dimensions, and hence needs to be handled in a special
+ * manner.
+ * NOTE: Currently the sequences are not handled by this image loader. */
+ assert(b_image.source() != BL::Image::source_SEQUENCE);
+
+ metadata.width = b_image.size()[0];
+ metadata.height = b_image.size()[1];
+ metadata.channels = b_image.channels();
+ }
+ else {
+ /* Different UDIM tiles might have different resolutions, so get resolution from the actual
+ * tile. */
+ BL::UDIMTile b_udim_tile = b_image.tiles.get(tile_number);
+ if (b_udim_tile) {
+ metadata.width = b_udim_tile.size()[0];
+ metadata.height = b_udim_tile.size()[1];
+ metadata.channels = b_udim_tile.channels();
+ }
+ else {
+ metadata.width = 0;
+ metadata.height = 0;
+ metadata.channels = 0;
+ }
+ }
+
metadata.depth = 1;
- metadata.channels = b_image.channels();
if (b_image.is_float()) {
if (metadata.channels == 1) {
@@ -62,80 +87,134 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
}
bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata,
- void *pixels,
- const size_t pixels_size,
+ void *out_pixels,
+ const size_t out_pixels_size,
const bool associate_alpha)
{
const size_t num_pixels = ((size_t)metadata.width) * metadata.height;
const int channels = metadata.channels;
- if (b_image.is_float()) {
- /* image data */
- float *image_pixels;
- image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile_number);
+ if (metadata.type == IMAGE_DATA_TYPE_FLOAT || metadata.type == IMAGE_DATA_TYPE_FLOAT4) {
+ /* Float. */
+ float *in_pixels = image_get_float_pixels_for_frame(b_image, frame, tile_number);
- if (image_pixels && num_pixels * channels == pixels_size) {
- memcpy(pixels, image_pixels, pixels_size * sizeof(float));
+ if (in_pixels && num_pixels * channels == out_pixels_size) {
+ /* Straight copy pixel data. */
+ memcpy(out_pixels, in_pixels, out_pixels_size * sizeof(float));
}
else {
+ /* Missing or invalid pixel data. */
if (channels == 1) {
- memset(pixels, 0, num_pixels * sizeof(float));
+ memset(out_pixels, 0, num_pixels * sizeof(float));
}
else {
- const size_t num_pixels_safe = pixels_size / channels;
- float *fp = (float *)pixels;
- for (int i = 0; i < num_pixels_safe; i++, fp += channels) {
- fp[0] = 1.0f;
- fp[1] = 0.0f;
- fp[2] = 1.0f;
+ const size_t num_pixels_safe = out_pixels_size / channels;
+ float *out_pixel = (float *)out_pixels;
+ for (int i = 0; i < num_pixels_safe; i++, out_pixel += channels) {
+ out_pixel[0] = 1.0f;
+ out_pixel[1] = 0.0f;
+ out_pixel[2] = 1.0f;
if (channels == 4) {
- fp[3] = 1.0f;
+ out_pixel[3] = 1.0f;
}
}
}
}
- if (image_pixels) {
- MEM_freeN(image_pixels);
+ if (in_pixels) {
+ MEM_freeN(in_pixels);
}
}
- else {
- unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile_number);
+ else if (metadata.type == IMAGE_DATA_TYPE_HALF || metadata.type == IMAGE_DATA_TYPE_HALF4) {
+ /* Half float. Blender does not have a half type, but in some cases
+ * we up-sample byte to half to avoid precision loss for colorspace
+ * conversion. */
+ unsigned char *in_pixels = image_get_pixels_for_frame(b_image, frame, tile_number);
- if (image_pixels && num_pixels * channels == pixels_size) {
- memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
+ if (in_pixels && num_pixels * channels == out_pixels_size) {
+ /* Convert uchar to half. */
+ const uchar *in_pixel = in_pixels;
+ half *out_pixel = (half *)out_pixels;
+ if (associate_alpha && channels == 4) {
+ for (size_t i = 0; i < num_pixels; i++, in_pixel += 4, out_pixel += 4) {
+ const float alpha = util_image_cast_to_float(in_pixel[3]);
+ out_pixel[0] = float_to_half_image(util_image_cast_to_float(in_pixel[0]) * alpha);
+ out_pixel[1] = float_to_half_image(util_image_cast_to_float(in_pixel[1]) * alpha);
+ out_pixel[2] = float_to_half_image(util_image_cast_to_float(in_pixel[2]) * alpha);
+ out_pixel[3] = float_to_half_image(alpha);
+ }
+ }
+ else {
+ for (size_t i = 0; i < num_pixels; i++) {
+ for (int c = 0; c < channels; c++, in_pixel++, out_pixel++) {
+ *out_pixel = float_to_half_image(util_image_cast_to_float(*in_pixel));
+ }
+ }
+ }
}
else {
+ /* Missing or invalid pixel data. */
if (channels == 1) {
- memset(pixels, 0, pixels_size * sizeof(unsigned char));
+ memset(out_pixels, 0, num_pixels * sizeof(half));
}
else {
- const size_t num_pixels_safe = pixels_size / channels;
- unsigned char *cp = (unsigned char *)pixels;
- for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
- cp[0] = 255;
- cp[1] = 0;
- cp[2] = 255;
+ const size_t num_pixels_safe = out_pixels_size / channels;
+ half *out_pixel = (half *)out_pixels;
+ for (int i = 0; i < num_pixels_safe; i++, out_pixel += channels) {
+ out_pixel[0] = float_to_half_image(1.0f);
+ out_pixel[1] = float_to_half_image(0.0f);
+ out_pixel[2] = float_to_half_image(1.0f);
if (channels == 4) {
- cp[3] = 255;
+ out_pixel[3] = float_to_half_image(1.0f);
}
}
}
}
- if (image_pixels) {
- MEM_freeN(image_pixels);
+ if (in_pixels) {
+ MEM_freeN(in_pixels);
}
+ }
+ else {
+ /* Byte. */
+ unsigned char *in_pixels = image_get_pixels_for_frame(b_image, frame, tile_number);
+
+ if (in_pixels && num_pixels * channels == out_pixels_size) {
+ /* Straight copy pixel data. */
+ memcpy(out_pixels, in_pixels, out_pixels_size * sizeof(unsigned char));
- if (associate_alpha) {
- /* Premultiply, byte images are always straight for Blender. */
- unsigned char *cp = (unsigned char *)pixels;
- for (size_t i = 0; i < num_pixels; i++, cp += channels) {
- cp[0] = (cp[0] * cp[3]) / 255;
- cp[1] = (cp[1] * cp[3]) / 255;
- cp[2] = (cp[2] * cp[3]) / 255;
+ if (associate_alpha && channels == 4) {
+ /* Premultiply, byte images are always straight for Blender. */
+ unsigned char *out_pixel = (unsigned char *)out_pixels;
+ for (size_t i = 0; i < num_pixels; i++, out_pixel += 4) {
+ out_pixel[0] = (out_pixel[0] * out_pixel[3]) / 255;
+ out_pixel[1] = (out_pixel[1] * out_pixel[3]) / 255;
+ out_pixel[2] = (out_pixel[2] * out_pixel[3]) / 255;
+ }
+ }
+ }
+ else {
+ /* Missing or invalid pixel data. */
+ if (channels == 1) {
+ memset(out_pixels, 0, out_pixels_size * sizeof(unsigned char));
+ }
+ else {
+ const size_t num_pixels_safe = out_pixels_size / channels;
+ unsigned char *out_pixel = (unsigned char *)out_pixels;
+ for (size_t i = 0; i < num_pixels_safe; i++, out_pixel += channels) {
+ out_pixel[0] = 255;
+ out_pixel[1] = 0;
+ out_pixel[2] = 255;
+ if (channels == 4) {
+ out_pixel[3] = 255;
+ }
+ }
}
}
+
+ if (in_pixels) {
+ MEM_freeN(in_pixels);
+ }
}
/* Free image buffers to save memory during render. */
diff --git a/intern/cycles/blender/mesh.cpp b/intern/cycles/blender/mesh.cpp
index c76ce3801d4..e2db52cc5c1 100644
--- a/intern/cycles/blender/mesh.cpp
+++ b/intern/cycles/blender/mesh.cpp
@@ -267,75 +267,62 @@ static void mikk_compute_tangents(
genTangSpaceDefault(&context);
}
-/* Create sculpt vertex color attributes. */
-static void attr_create_sculpt_vertex_color(Scene *scene,
- Mesh *mesh,
- BL::Mesh &b_mesh,
- bool subdivision)
-{
- for (BL::MeshVertColorLayer &l : b_mesh.sculpt_vertex_colors) {
- const bool active_render = l.active_render();
- AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
- ustring vcol_name = ustring(l.name().c_str());
-
- const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
- mesh->need_attribute(scene, vcol_std);
-
- if (!need_vcol) {
- continue;
- }
-
- AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
- Attribute *vcol_attr = attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_VERTEX);
- vcol_attr->std = vcol_std;
-
- float4 *cdata = vcol_attr->data_float4();
- int numverts = b_mesh.vertices.length();
-
- for (int i = 0; i < numverts; i++) {
- *(cdata++) = get_float4(l.data[i].color());
- }
- }
-}
-
template<typename TypeInCycles, typename GetValueAtIndex>
static void fill_generic_attribute(BL::Mesh &b_mesh,
TypeInCycles *data,
const BL::Attribute::domain_enum b_domain,
+ const bool subdivision,
const GetValueAtIndex &get_value_at_index)
{
switch (b_domain) {
case BL::Attribute::domain_CORNER: {
- for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
- const int index = t.index() * 3;
- BL::Array<int, 3> loops = t.loops();
- data[index] = get_value_at_index(loops[0]);
- data[index + 1] = get_value_at_index(loops[1]);
- data[index + 2] = get_value_at_index(loops[2]);
+ if (subdivision) {
+ for (BL::MeshPolygon &p : b_mesh.polygons) {
+ int n = p.loop_total();
+ for (int i = 0; i < n; i++) {
+ *data = get_value_at_index(p.loop_start() + i);
+ data++;
+ }
+ }
+ }
+ else {
+ for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
+ const int index = t.index() * 3;
+ BL::Array<int, 3> loops = t.loops();
+ data[index] = get_value_at_index(loops[0]);
+ data[index + 1] = get_value_at_index(loops[1]);
+ data[index + 2] = get_value_at_index(loops[2]);
+ }
}
break;
}
case BL::Attribute::domain_EDGE: {
- /* Average edge attributes at vertices. */
- const size_t num_verts = b_mesh.vertices.length();
- vector<int> count(num_verts, 0);
-
- for (BL::MeshEdge &e : b_mesh.edges) {
- BL::Array<int, 2> vertices = e.vertices();
- TypeInCycles value = get_value_at_index(e.index());
-
- data[vertices[0]] += value;
- data[vertices[1]] += value;
- count[vertices[0]]++;
- count[vertices[1]]++;
+ if constexpr (std::is_same_v<TypeInCycles, uchar4>) {
+ /* uchar4 edge attributes do not exist, and averaging in place
+ * would not work. */
+ assert(0);
}
+ else {
+ /* Average edge attributes at vertices. */
+ const size_t num_verts = b_mesh.vertices.length();
+ vector<int> count(num_verts, 0);
+
+ for (BL::MeshEdge &e : b_mesh.edges) {
+ BL::Array<int, 2> vertices = e.vertices();
+ TypeInCycles value = get_value_at_index(e.index());
+
+ data[vertices[0]] += value;
+ data[vertices[1]] += value;
+ count[vertices[0]]++;
+ count[vertices[1]]++;
+ }
- for (size_t i = 0; i < num_verts; i++) {
- if (count[i] > 1) {
- data[i] /= (float)count[i];
+ for (size_t i = 0; i < num_verts; i++) {
+ if (count[i] > 1) {
+ data[i] /= (float)count[i];
+ }
}
}
-
break;
}
case BL::Attribute::domain_POINT: {
@@ -346,8 +333,16 @@ static void fill_generic_attribute(BL::Mesh &b_mesh,
break;
}
case BL::Attribute::domain_FACE: {
- for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
- data[t.index()] = get_value_at_index(t.polygon_index());
+ if (subdivision) {
+ const int num_polygons = b_mesh.polygons.length();
+ for (int i = 0; i < num_polygons; i++) {
+ data[i] = get_value_at_index(i);
+ }
+ }
+ else {
+ for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
+ data[t.index()] = get_value_at_index(t.polygon_index());
+ }
}
break;
}
@@ -395,21 +390,22 @@ static void attr_create_generic(Scene *scene,
const bool need_motion,
const float motion_scale)
{
- if (subdivision) {
- /* TODO: Handle subdivision correctly. */
- return;
- }
- AttributeSet &attributes = mesh->attributes;
+ AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
static const ustring u_velocity("velocity");
+ int attribute_index = 0;
+ int render_color_index = b_mesh.attributes.render_color_index();
+
for (BL::Attribute &b_attribute : b_mesh.attributes) {
const ustring name{b_attribute.name().c_str()};
+ const bool is_render_color = (attribute_index++ == render_color_index);
if (need_motion && name == u_velocity) {
attr_create_motion(mesh, b_attribute, motion_scale);
}
- if (!mesh->need_attribute(scene, name)) {
+ if (!(mesh->need_attribute(scene, name) ||
+ (is_render_color && mesh->need_attribute(scene, ATTR_STD_VERTEX_COLOR)))) {
continue;
}
if (attributes.find(name)) {
@@ -445,15 +441,16 @@ static void attr_create_generic(Scene *scene,
BL::FloatAttribute b_float_attribute{b_attribute};
Attribute *attr = attributes.add(name, TypeFloat, element);
float *data = attr->data_float();
- fill_generic_attribute(
- b_mesh, data, b_domain, [&](int i) { return b_float_attribute.data[i].value(); });
+ fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
+ return b_float_attribute.data[i].value();
+ });
break;
}
case BL::Attribute::data_type_BOOLEAN: {
BL::BoolAttribute b_bool_attribute{b_attribute};
Attribute *attr = attributes.add(name, TypeFloat, element);
float *data = attr->data_float();
- fill_generic_attribute(b_mesh, data, b_domain, [&](int i) {
+ fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
return (float)b_bool_attribute.data[i].value();
});
break;
@@ -462,25 +459,59 @@ static void attr_create_generic(Scene *scene,
BL::IntAttribute b_int_attribute{b_attribute};
Attribute *attr = attributes.add(name, TypeFloat, element);
float *data = attr->data_float();
- fill_generic_attribute(
- b_mesh, data, b_domain, [&](int i) { return (float)b_int_attribute.data[i].value(); });
+ fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
+ return (float)b_int_attribute.data[i].value();
+ });
break;
}
case BL::Attribute::data_type_FLOAT_VECTOR: {
BL::FloatVectorAttribute b_vector_attribute{b_attribute};
Attribute *attr = attributes.add(name, TypeVector, element);
float3 *data = attr->data_float3();
- fill_generic_attribute(b_mesh, data, b_domain, [&](int i) {
+ fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
BL::Array<float, 3> v = b_vector_attribute.data[i].vector();
return make_float3(v[0], v[1], v[2]);
});
break;
}
+ case BL::Attribute::data_type_BYTE_COLOR: {
+ BL::ByteColorAttribute b_color_attribute{b_attribute};
+
+ if (element == ATTR_ELEMENT_CORNER) {
+ element = ATTR_ELEMENT_CORNER_BYTE;
+ }
+ Attribute *attr = attributes.add(name, TypeRGBA, element);
+ if (is_render_color) {
+ attr->std = ATTR_STD_VERTEX_COLOR;
+ }
+
+ if (element == ATTR_ELEMENT_CORNER_BYTE) {
+ uchar4 *data = attr->data_uchar4();
+ fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
+ /* Compress/encode vertex color using the sRGB curve. */
+ const float4 c = get_float4(b_color_attribute.data[i].color());
+ return color_float4_to_uchar4(color_linear_to_srgb_v4(c));
+ });
+ }
+ else {
+ float4 *data = attr->data_float4();
+ fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
+ BL::Array<float, 4> v = b_color_attribute.data[i].color();
+ return make_float4(v[0], v[1], v[2], v[3]);
+ });
+ }
+ break;
+ }
case BL::Attribute::data_type_FLOAT_COLOR: {
BL::FloatColorAttribute b_color_attribute{b_attribute};
+
Attribute *attr = attributes.add(name, TypeRGBA, element);
+ if (is_render_color) {
+ attr->std = ATTR_STD_VERTEX_COLOR;
+ }
+
float4 *data = attr->data_float4();
- fill_generic_attribute(b_mesh, data, b_domain, [&](int i) {
+ fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
BL::Array<float, 4> v = b_color_attribute.data[i].color();
return make_float4(v[0], v[1], v[2], v[3]);
});
@@ -490,7 +521,7 @@ static void attr_create_generic(Scene *scene,
BL::Float2Attribute b_float2_attribute{b_attribute};
Attribute *attr = attributes.add(name, TypeFloat2, element);
float2 *data = attr->data_float2();
- fill_generic_attribute(b_mesh, data, b_domain, [&](int i) {
+ fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
BL::Array<float, 2> v = b_float2_attribute.data[i].vector();
return make_float2(v[0], v[1]);
});
@@ -503,69 +534,6 @@ static void attr_create_generic(Scene *scene,
}
}
-/* Create vertex color attributes. */
-static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision)
-{
- for (BL::MeshLoopColorLayer &l : b_mesh.vertex_colors) {
- const bool active_render = l.active_render();
- AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
- ustring vcol_name = ustring(l.name().c_str());
-
- const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
- mesh->need_attribute(scene, vcol_std);
-
- if (!need_vcol) {
- continue;
- }
-
- Attribute *vcol_attr = NULL;
-
- if (subdivision) {
- if (active_render) {
- vcol_attr = mesh->subd_attributes.add(vcol_std, vcol_name);
- }
- else {
- vcol_attr = mesh->subd_attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
- }
-
- uchar4 *cdata = vcol_attr->data_uchar4();
-
- for (BL::MeshPolygon &p : b_mesh.polygons) {
- int n = p.loop_total();
- for (int i = 0; i < n; i++) {
- float4 color = get_float4(l.data[p.loop_start() + i].color());
- /* Compress/encode vertex color using the sRGB curve. */
- *(cdata++) = color_float4_to_uchar4(color);
- }
- }
- }
- else {
- if (active_render) {
- vcol_attr = mesh->attributes.add(vcol_std, vcol_name);
- }
- else {
- vcol_attr = mesh->attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
- }
-
- uchar4 *cdata = vcol_attr->data_uchar4();
-
- for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
- int3 li = get_int3(t.loops());
- float4 c1 = get_float4(l.data[li[0]].color());
- float4 c2 = get_float4(l.data[li[1]].color());
- float4 c3 = get_float4(l.data[li[2]].color());
-
- /* Compress/encode vertex color using the sRGB curve. */
- cdata[0] = color_float4_to_uchar4(c1);
- cdata[1] = color_float4_to_uchar4(c2);
- cdata[2] = color_float4_to_uchar4(c3);
-
- cdata += 3;
- }
- }
- }
-}
-
/* Create uv map attributes. */
static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
{
@@ -1029,8 +997,6 @@ static void create_mesh(Scene *scene,
* The calculate functions will check whether they're needed or not.
*/
attr_create_pointiness(scene, mesh, b_mesh, subdivision);
- attr_create_vertex_color(scene, mesh, b_mesh, subdivision);
- attr_create_sculpt_vertex_color(scene, mesh, b_mesh, subdivision);
attr_create_random_per_island(scene, mesh, b_mesh, subdivision);
attr_create_generic(scene, mesh, b_mesh, subdivision, need_motion, motion_scale);
diff --git a/intern/cycles/blender/volume.cpp b/intern/cycles/blender/volume.cpp
index 8dd2d45c0b6..61b2f9ee276 100644
--- a/intern/cycles/blender/volume.cpp
+++ b/intern/cycles/blender/volume.cpp
@@ -219,7 +219,10 @@ static void sync_smoke_volume(
class BlenderVolumeLoader : public VDBImageLoader {
public:
- BlenderVolumeLoader(BL::BlendData &b_data, BL::Volume &b_volume, const string &grid_name)
+ BlenderVolumeLoader(BL::BlendData &b_data,
+ BL::Volume &b_volume,
+ const string &grid_name,
+ BL::VolumeRender::precision_enum precision_)
: VDBImageLoader(grid_name), b_volume(b_volume)
{
b_volume.grids.load(b_data.ptr.data);
@@ -241,6 +244,22 @@ class BlenderVolumeLoader : public VDBImageLoader {
}
}
#endif
+#ifdef WITH_NANOVDB
+ switch (precision_) {
+ case BL::VolumeRender::precision_FULL:
+ precision = 32;
+ break;
+ case BL::VolumeRender::precision_HALF:
+ precision = 16;
+ break;
+ default:
+ case BL::VolumeRender::precision_VARIABLE:
+ precision = 0;
+ break;
+ }
+#else
+ (void)precision_;
+#endif
}
BL::Volume b_volume;
@@ -318,7 +337,8 @@ static void sync_volume_object(BL::BlendData &b_data,
volume->attributes.add(std) :
volume->attributes.add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VOXEL);
- ImageLoader *loader = new BlenderVolumeLoader(b_data, b_volume, name.string());
+ ImageLoader *loader = new BlenderVolumeLoader(
+ b_data, b_volume, name.string(), b_render.precision());
ImageParams params;
params.frame = b_volume.grids.frame();
diff --git a/intern/cycles/bvh/build.cpp b/intern/cycles/bvh/build.cpp
index 79e9b800690..1df3517673e 100644
--- a/intern/cycles/bvh/build.cpp
+++ b/intern/cycles/bvh/build.cpp
@@ -811,7 +811,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
/* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
params.sah_primitive_cost * unaligned_split.nodeSAH;
- /* TOOD(sergey): Check we can create leaf already. */
+ /* TODO(sergey): Check we can create leaf already. */
/* Check whether unaligned split is better than the regular one. */
if (unalignedSplitSAH < splitSAH) {
do_unalinged_split = true;
diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake
index 5b875565c97..6bcd8152874 100644
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -146,8 +146,8 @@ if(CYCLES_STANDALONE_REPOSITORY)
-DOIIO_STATIC_DEFINE
)
- set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO_ROOT_DIR}/include)
- set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR} ${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO)
+ set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO_ROOT_DIR}/include)
+ set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR} ${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO)
# Special exceptions for libraries which needs explicit debug version
set(OPENIMAGEIO_LIBRARIES
optimized ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO.lib
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp
index 6908ae5ead3..cb7e909a2d5 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -457,6 +457,8 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
/* Use the biggest kernel for estimation. */
const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
+ (kernel_features & KERNEL_FEATURE_MNEE) ?
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
/* Launch kernel, using just 1 block appears sufficient to reserve memory for all
@@ -1084,7 +1086,9 @@ void CUDADevice::tex_alloc(device_texture &mem)
need_texture_info = true;
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
- mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
+ mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3 &&
+ mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FPN &&
+ mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FP16) {
CUDA_RESOURCE_DESC resDesc;
memset(&resDesc, 0, sizeof(resDesc));
diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp
index 7159277b325..ea68c821166 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -420,6 +420,8 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
/* Use the biggest kernel for estimation. */
const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
+ (kernel_features & KERNEL_FEATURE_MNEE) ?
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
/* Launch kernel, using just 1 block appears sufficient to reserve memory for all
@@ -1042,7 +1044,9 @@ void HIPDevice::tex_alloc(device_texture &mem)
need_texture_info = true;
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
- mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
+ mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3 &&
+ mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FPN &&
+ mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FP16) {
/* Bindless textures. */
hipResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
diff --git a/intern/cycles/device/kernel.cpp b/intern/cycles/device/kernel.cpp
index 072731a2af5..96a99cd62cd 100644
--- a/intern/cycles/device/kernel.cpp
+++ b/intern/cycles/device/kernel.cpp
@@ -33,6 +33,8 @@ const char *device_kernel_as_string(DeviceKernel kernel)
return "integrator_shade_surface";
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
return "integrator_shade_surface_raytrace";
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
+ return "integrator_shade_surface_mnee";
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
return "integrator_shade_volume";
case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
diff --git a/intern/cycles/device/memory.cpp b/intern/cycles/device/memory.cpp
index 4c068dbdd3e..40cf2573cfb 100644
--- a/intern/cycles/device/memory.cpp
+++ b/intern/cycles/device/memory.cpp
@@ -165,6 +165,8 @@ device_texture::device_texture(Device *device,
case IMAGE_DATA_TYPE_BYTE:
case IMAGE_DATA_TYPE_NANOVDB_FLOAT:
case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
+ case IMAGE_DATA_TYPE_NANOVDB_FPN:
+ case IMAGE_DATA_TYPE_NANOVDB_FP16:
data_type = TYPE_UCHAR;
data_elements = 1;
break;
diff --git a/intern/cycles/device/metal/bvh.mm b/intern/cycles/device/metal/bvh.mm
index 086fbb093ba..09c4ace081e 100644
--- a/intern/cycles/device/metal/bvh.mm
+++ b/intern/cycles/device/metal/bvh.mm
@@ -11,6 +11,7 @@
# include "util/progress.h"
# include "device/metal/bvh.h"
+# include "device/metal/util.h"
CCL_NAMESPACE_BEGIN
@@ -18,6 +19,7 @@ CCL_NAMESPACE_BEGIN
{ \
string str = string_printf(__VA_ARGS__); \
progress.set_substatus(str); \
+ metal_printf("%s\n", str.c_str()); \
}
BVHMetal::BVHMetal(const BVHParams &params_,
diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index 7506b9b069f..0e6817d94f8 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -31,6 +31,8 @@ class MetalDevice : public Device {
string source[PSO_NUM];
string source_md5[PSO_NUM];
+ bool capture_enabled = false;
+
KernelParamsMetal launch_params = {0};
/* MetalRT members ----------------------------------*/
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index e1438a9d6e2..086bf0af979 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -86,6 +86,10 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
use_metalrt = (atoi(metalrt) != 0);
}
+ if (getenv("CYCLES_DEBUG_METAL_CAPTURE_KERNEL")) {
+ capture_enabled = true;
+ }
+
MTLArgumentDescriptor *arg_desc_params = [[MTLArgumentDescriptor alloc] init];
arg_desc_params.dataType = MTLDataTypePointer;
arg_desc_params.access = MTLArgumentAccessReadOnly;
@@ -394,7 +398,7 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
}
if (size > 0) {
- if (mem.type == MEM_DEVICE_ONLY) {
+ if (mem.type == MEM_DEVICE_ONLY && !capture_enabled) {
options = MTLResourceStorageModePrivate;
}
@@ -697,8 +701,7 @@ void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
void MetalDevice::tex_alloc(device_texture &mem)
{
/* Check that dimensions fit within maximum allowable size.
- See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
- */
+ * See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
if (mem.data_width > 16384 || mem.data_height > 16384) {
set_error(string_printf(
"Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 304efc813ec..fec4cd80466 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -35,7 +35,8 @@ bool kernel_has_intersection(DeviceKernel device_kernel)
device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
- device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
}
struct ShaderCache {
diff --git a/intern/cycles/device/metal/queue.h b/intern/cycles/device/metal/queue.h
index 6cc84a20787..de20514de0b 100644
--- a/intern/cycles/device/metal/queue.h
+++ b/intern/cycles/device/metal/queue.h
@@ -12,8 +12,6 @@
# include "device/metal/util.h"
# include "kernel/device/metal/globals.h"
-# define metal_printf VLOG(4) << string_printf
-
CCL_NAMESPACE_BEGIN
class MetalDevice;
@@ -77,6 +75,38 @@ class MetalDeviceQueue : public DeviceQueue {
void close_compute_encoder();
void close_blit_encoder();
+
+ bool verbose_tracing = false;
+
+ /* Per-kernel profiling (see CYCLES_METAL_PROFILING). */
+
+ struct TimingData {
+ DeviceKernel kernel;
+ int work_size;
+ uint64_t timing_id;
+ };
+ std::vector<TimingData> command_encoder_labels;
+ id<MTLSharedEvent> timing_shared_event = nil;
+ uint64_t timing_shared_event_id;
+ uint64_t command_buffer_start_timing_id;
+
+ struct TimingStats {
+ double total_time = 0.0;
+ uint64_t total_work_size = 0;
+ uint64_t num_dispatches = 0;
+ };
+ TimingStats timing_stats[DEVICE_KERNEL_NUM];
+ double last_completion_time = 0.0;
+
+ /* .gputrace capture (see CYCLES_DEBUG_METAL_CAPTURE_...). */
+
+ id<MTLCaptureScope> mtlCaptureScope = nil;
+ DeviceKernel capture_kernel;
+ int capture_dispatch = 0;
+ int capture_dispatch_counter = 0;
+ bool is_capturing = false;
+ bool is_capturing_to_disk = false;
+ bool has_captured_to_disk = false;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index ec10e091b25..8b2d5d81859 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -37,6 +37,61 @@ MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
}
wait_semaphore = dispatch_semaphore_create(0);
+
+ if (@available(macos 10.14, *)) {
+ if (getenv("CYCLES_METAL_PROFILING")) {
+ /* Enable per-kernel timing breakdown (shown at end of render). */
+ timing_shared_event = [mtlDevice newSharedEvent];
+ }
+ if (getenv("CYCLES_METAL_DEBUG")) {
+ /* Enable very verbose tracing (shows every dispatch). */
+ verbose_tracing = true;
+ }
+ timing_shared_event_id = 1;
+ }
+
+ capture_kernel = DeviceKernel(-1);
+ if (auto capture_kernel_str = getenv("CYCLES_DEBUG_METAL_CAPTURE_KERNEL")) {
+ /* Enable .gputrace capture for the specified DeviceKernel. */
+ MTLCaptureManager *captureManager = [MTLCaptureManager sharedCaptureManager];
+ mtlCaptureScope = [captureManager newCaptureScopeWithDevice:mtlDevice];
+ mtlCaptureScope.label = [NSString stringWithFormat:@"Cycles kernel dispatch"];
+ [captureManager setDefaultCaptureScope:mtlCaptureScope];
+
+ capture_dispatch = -1;
+ if (auto capture_dispatch_str = getenv("CYCLES_DEBUG_METAL_CAPTURE_DISPATCH")) {
+ capture_dispatch = atoi(capture_dispatch_str);
+ capture_dispatch_counter = 0;
+ }
+
+ capture_kernel = DeviceKernel(atoi(capture_kernel_str));
+ printf("Capture kernel: %d = %s\n", capture_kernel, device_kernel_as_string(capture_kernel));
+
+ if (auto capture_url = getenv("CYCLES_DEBUG_METAL_CAPTURE_URL")) {
+ if (@available(macos 10.15, *)) {
+ if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
+
+ MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc] init];
+ captureDescriptor.captureObject = mtlCaptureScope;
+ captureDescriptor.destination = MTLCaptureDestinationGPUTraceDocument;
+ captureDescriptor.outputURL = [NSURL fileURLWithPath:@(capture_url)];
+
+ NSError *error;
+ if (![captureManager startCaptureWithDescriptor:captureDescriptor error:&error]) {
+ NSString *err = [error localizedDescription];
+ printf("Start capture failed: %s\n", [err UTF8String]);
+ }
+ else {
+ printf("Capture started (URL: %s)\n", capture_url);
+ is_capturing_to_disk = true;
+ }
+ }
+ else {
+ printf("Capture to file is not supported\n");
+ }
+ }
+ }
+ }
}
MetalDeviceQueue::~MetalDeviceQueue()
@@ -58,6 +113,56 @@ MetalDeviceQueue::~MetalDeviceQueue()
[mtlCommandQueue release];
mtlCommandQueue = nil;
}
+
+ if (mtlCaptureScope) {
+ [mtlCaptureScope release];
+ }
+
+ double total_time = 0.0;
+
+ /* Show per-kernel timings, if gathered (see CYCLES_METAL_PROFILING). */
+ int64_t total_work_size = 0;
+ int64_t num_dispatches = 0;
+ for (auto &stat : timing_stats) {
+ total_time += stat.total_time;
+ total_work_size += stat.total_work_size;
+ num_dispatches += stat.num_dispatches;
+ }
+
+ if (num_dispatches) {
+ printf("\nMetal dispatch stats:\n\n");
+ auto header = string_printf("%-40s %16s %12s %12s %7s %7s",
+ "Kernel name",
+ "Total threads",
+ "Dispatches",
+ "Avg. T/D",
+ "Time",
+ "Time%");
+ auto divider = string(header.length(), '-');
+ printf("%s\n%s\n%s\n", divider.c_str(), header.c_str(), divider.c_str());
+
+ for (size_t i = 0; i < DEVICE_KERNEL_NUM; i++) {
+ auto &stat = timing_stats[i];
+ if (stat.num_dispatches > 0) {
+ printf("%-40s %16s %12s %12s %6.2fs %6.2f%%\n",
+ device_kernel_as_string(DeviceKernel(i)),
+ string_human_readable_number(stat.total_work_size).c_str(),
+ string_human_readable_number(stat.num_dispatches).c_str(),
+ string_human_readable_number(stat.total_work_size / stat.num_dispatches).c_str(),
+ stat.total_time,
+ stat.total_time * 100.0 / total_time);
+ }
+ }
+ printf("%s\n", divider.c_str());
+ printf("%-40s %16s %12s %12s %6.2fs %6.2f%%\n",
+ "",
+ "",
+ string_human_readable_number(num_dispatches).c_str(),
+ "",
+ total_time,
+ 100.0);
+ printf("%s\n\n", divider.c_str());
+ }
}
int MetalDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
@@ -101,6 +206,19 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
const int work_size,
DeviceKernelArguments const &args)
{
+ if (kernel == capture_kernel) {
+ if (capture_dispatch < 0 || capture_dispatch == capture_dispatch_counter) {
+ /* Start gputrace capture. */
+ if (mtlCommandBuffer) {
+ synchronize();
+ }
+ [mtlCaptureScope beginScope];
+ printf("[mtlCaptureScope beginScope]\n");
+ is_capturing = true;
+ }
+ capture_dispatch_counter += 1;
+ }
+
if (metal_device->have_error()) {
return false;
}
@@ -110,6 +228,10 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
+ if (timing_shared_event) {
+ command_encoder_labels.push_back({kernel, work_size, timing_shared_event_id});
+ }
+
/* Determine size requirement for argument buffer. */
size_t arg_buffer_length = 0;
for (size_t i = 0; i < args.count; i++) {
@@ -189,6 +311,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
/* Encode KernelParamsMetal buffers */
[metal_device->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer offset:globals_offsets];
+ if (verbose_tracing || timing_shared_event || is_capturing) {
+ /* Add human-readable labels if we're doing any form of debugging / profiling. */
+ mtlComputeCommandEncoder.label = [[NSString alloc]
+ initWithFormat:@"Metal queue launch %s, work_size %d",
+ device_kernel_as_string(kernel),
+ work_size];
+ }
+
/* this relies on IntegratorStateGPU layout being contiguous device_ptrs */
const size_t pointer_block_end = offsetof(KernelParamsMetal, __integrator_state) +
sizeof(IntegratorStateGPU);
@@ -196,7 +326,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
int pointer_index = offset / sizeof(device_ptr);
MetalDevice::MetalMem *mmem = *(
MetalDevice::MetalMem **)((uint8_t *)&metal_device->launch_params + offset);
- if (mmem && (mmem->mtlBuffer || mmem->mtlTexture)) {
+ if (mmem && mmem->mem && (mmem->mtlBuffer || mmem->mtlTexture)) {
[metal_device->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer
offset:0
atIndex:pointer_index];
@@ -270,6 +400,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
break;
default:
bvhMetalRT = nil;
@@ -343,12 +474,53 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
}];
+ if (verbose_tracing || is_capturing) {
+ /* Force a sync we've enabled step-by-step verbose tracing or if we're capturing. */
+ synchronize();
+
+ /* Show queue counters and dispatch timing. */
+ if (verbose_tracing) {
+ if (kernel == DEVICE_KERNEL_INTEGRATOR_RESET) {
+ printf(
+ "_____________________________________.____________________.______________.___________"
+ "______________________________________\n");
+ }
+
+ printf("%-40s| %7d threads |%5.2fms | buckets [",
+ device_kernel_as_string(kernel),
+ work_size,
+ last_completion_time * 1000.0);
+ std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
+ for (auto &it : metal_device->metal_mem_map) {
+ const string c_integrator_queue_counter = "integrator_queue_counter";
+ if (it.first->name == c_integrator_queue_counter) {
+ /* Workaround "device_copy_from" being protected. */
+ struct MyDeviceMemory : device_memory {
+ void device_copy_from__IntegratorQueueCounter()
+ {
+ device_copy_from(0, data_width, 1, sizeof(IntegratorQueueCounter));
+ }
+ };
+ ((MyDeviceMemory *)it.first)->device_copy_from__IntegratorQueueCounter();
+
+ if (IntegratorQueueCounter *queue_counter = (IntegratorQueueCounter *)
+ it.first->host_pointer) {
+ for (int i = 0; i < DEVICE_KERNEL_INTEGRATOR_NUM; i++)
+ printf("%s%d", i == 0 ? "" : ",", int(queue_counter->num_queued[i]));
+ }
+ break;
+ }
+ }
+ printf("]\n");
+ }
+ }
+
return !(metal_device->have_error());
}
bool MetalDeviceQueue::synchronize()
{
- if (metal_device->have_error()) {
+ if (has_captured_to_disk || metal_device->have_error()) {
return false;
}
@@ -358,6 +530,28 @@ bool MetalDeviceQueue::synchronize()
close_blit_encoder();
if (mtlCommandBuffer) {
+ scoped_timer timer;
+ if (timing_shared_event) {
+ /* For per-kernel timing, add event handlers to measure & accumulate dispatch times. */
+ __block double completion_time = 0;
+ for (uint64_t i = command_buffer_start_timing_id; i < timing_shared_event_id; i++) {
+ [timing_shared_event notifyListener:shared_event_listener
+ atValue:i
+ block:^(id<MTLSharedEvent> sharedEvent, uint64_t value) {
+ completion_time = timer.get_time() - completion_time;
+ last_completion_time = completion_time;
+ for (auto label : command_encoder_labels) {
+ if (label.timing_id == value) {
+ TimingStats &stat = timing_stats[label.kernel];
+ stat.num_dispatches++;
+ stat.total_time += completion_time;
+ stat.total_work_size += label.work_size;
+ }
+ }
+ }];
+ }
+ }
+
uint64_t shared_event_id = this->shared_event_id++;
if (@available(macos 10.14, *)) {
@@ -373,6 +567,22 @@ bool MetalDeviceQueue::synchronize()
dispatch_semaphore_wait(wait_semaphore, DISPATCH_TIME_FOREVER);
}
+ if (is_capturing) {
+ [mtlCaptureScope endScope];
+ is_capturing = false;
+ printf("[mtlCaptureScope endScope]\n");
+
+ if (is_capturing_to_disk) {
+ if (@available(macos 10.15, *)) {
+ [[MTLCaptureManager sharedCaptureManager] stopCapture];
+ has_captured_to_disk = true;
+ is_capturing_to_disk = false;
+ is_capturing = false;
+ printf("Capture stopped\n");
+ }
+ }
+ }
+
[mtlCommandBuffer release];
for (const CopyBack &mmem : copy_back_mem) {
@@ -384,6 +594,7 @@ bool MetalDeviceQueue::synchronize()
metal_device->flush_delayed_free_list();
mtlCommandBuffer = nil;
+ command_encoder_labels.clear();
}
return !(metal_device->have_error());
@@ -529,6 +740,13 @@ id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel
{
bool concurrent = (kernel < DEVICE_KERNEL_INTEGRATOR_NUM);
+ if (timing_shared_event) {
+ /* Close the current encoder to ensure we're able to capture per-encoder timing data. */
+ if (mtlComputeEncoder) {
+ close_compute_encoder();
+ }
+ }
+
if (@available(macos 10.14, *)) {
if (mtlComputeEncoder) {
if (mtlComputeEncoder.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
@@ -574,6 +792,7 @@ id<MTLBlitCommandEncoder> MetalDeviceQueue::get_blit_encoder()
if (!mtlCommandBuffer) {
mtlCommandBuffer = [mtlCommandQueue commandBuffer];
[mtlCommandBuffer retain];
+ command_buffer_start_timing_id = timing_shared_event_id;
}
mtlBlitEncoder = [mtlCommandBuffer blitCommandEncoder];
@@ -584,6 +803,10 @@ void MetalDeviceQueue::close_compute_encoder()
{
[mtlComputeEncoder endEncoding];
mtlComputeEncoder = nil;
+
+ if (timing_shared_event) {
+ [mtlCommandBuffer encodeSignalEvent:timing_shared_event value:timing_shared_event_id++];
+ }
}
void MetalDeviceQueue::close_blit_encoder()
diff --git a/intern/cycles/device/metal/util.h b/intern/cycles/device/metal/util.h
index cc653ab7e12..f728967835d 100644
--- a/intern/cycles/device/metal/util.h
+++ b/intern/cycles/device/metal/util.h
@@ -14,6 +14,8 @@
# include "util/thread.h"
+# define metal_printf VLOG(4) << string_printf
+
CCL_NAMESPACE_BEGIN
enum MetalGPUVendor {
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 9fc265bc327..9ab9bbb59c5 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -452,9 +452,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
{ /* Load and compile PTX module with OptiX kernels. */
- string ptx_data, ptx_filename = path_get((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
- "lib/kernel_optix_shader_raytrace.ptx" :
- "lib/kernel_optix.ptx");
+ string ptx_data, ptx_filename = path_get(
+ (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
+ "lib/kernel_optix_shader_raytrace.ptx" :
+ "lib/kernel_optix.ptx");
if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
if (!getenv("OPTIX_ROOT_DIR")) {
set_error(
@@ -464,7 +465,9 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
ptx_filename = compile_kernel(
kernel_features,
- (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? "kernel_shader_raytrace" : "kernel",
+ (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
+ "kernel_shader_raytrace" :
+ "kernel",
"optix",
true);
}
@@ -550,7 +553,8 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
OptixBuiltinISOptions builtin_options = {};
# if OPTIX_ABI_VERSION >= 55
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
- builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE;
+ builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE |
+ OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */
# else
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
@@ -620,6 +624,14 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
"__direct_callable__svm_node_bevel";
}
+ /* MNEE. */
+ if (kernel_features & KERNEL_FEATURE_MNEE) {
+ group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module;
+ group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName =
+ "__raygen__kernel_optix_integrator_shade_surface_mnee";
+ }
+
optix_assert(optixProgramGroupCreate(
context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
@@ -701,6 +713,46 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2));
}
+ if (kernel_features & KERNEL_FEATURE_MNEE) {
+ /* Create MNEE pipeline. */
+ vector<OptixProgramGroup> pipeline_groups;
+ pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
+ pipeline_groups.push_back(groups[PG_MISS]);
+ pipeline_groups.push_back(groups[PG_HITD]);
+ pipeline_groups.push_back(groups[PG_HITS]);
+ pipeline_groups.push_back(groups[PG_HITL]);
+ pipeline_groups.push_back(groups[PG_HITV]);
+ if (motion_blur) {
+ pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+ pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+ }
+ if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+ pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
+ pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
+ }
+ pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
+ pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
+
+ optix_assert(optixPipelineCreate(context,
+ &pipeline_options,
+ &link_options,
+ pipeline_groups.data(),
+ pipeline_groups.size(),
+ nullptr,
+ 0,
+ &pipelines[PIP_SHADE_MNEE]));
+
+ /* Combine ray generation and trace continuation stack size. */
+ const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG +
+ link_options.maxTraceDepth * trace_css;
+ const unsigned int dss = 0;
+
+ /* Set stack size depending on pipeline options. */
+ optix_assert(
+ optixPipelineSetStackSize(pipelines[PIP_SHADE_MNEE], 0, dss, css, motion_blur ? 3 : 2));
+ }
+
{ /* Create intersection-only pipeline. */
vector<OptixProgramGroup> pipeline_groups;
pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
@@ -1186,7 +1238,7 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
const OptixResult result = optixDenoiserSetup(
denoiser_.optix_denoiser,
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
- on a stream that is not the default stream */
+ * on a stream that is not the default stream. */
tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
denoiser_.state.device_pointer,
@@ -1336,7 +1388,10 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
OptixAccelBufferSizes sizes = {};
OptixAccelBuildOptions options = {};
options.operation = operation;
- if (use_fast_trace_bvh) {
+ if (use_fast_trace_bvh ||
+ /* The build flags have to match the ones used to query the built-in curve intersection
+ program (see optixBuiltinISModuleGet above) */
+ build_input.type == OPTIX_BUILD_INPUT_TYPE_CURVES) {
VLOG(2) << "Using fast to trace OptiX BVH";
options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
}
@@ -1861,7 +1916,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
{
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
* since it needs to filter out end-caps there).
-
+ *
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
*/
diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h
index 1f53c729c3f..817afdc8384 100644
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -24,6 +24,7 @@ enum {
PG_RGEN_INTERSECT_SUBSURFACE,
PG_RGEN_INTERSECT_VOLUME_STACK,
PG_RGEN_SHADE_SURFACE_RAYTRACE,
+ PG_RGEN_SHADE_SURFACE_MNEE,
PG_MISS,
PG_HITD, /* Default hit group. */
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
@@ -46,7 +47,7 @@ static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
/* List of OptiX pipelines. */
-enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES };
+enum { PIP_SHADE_RAYTRACE, PIP_SHADE_MNEE, PIP_INTERSECT, NUM_PIPELINES };
/* A single shader binding table entry. */
struct SbtRecord {
diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp
index d635512c58a..366bf95269d 100644
--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@@ -28,6 +28,7 @@ void OptiXDeviceQueue::init_execution()
static bool is_optix_specific_kernel(DeviceKernel kernel)
{
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
@@ -63,7 +64,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
cuda_stream_));
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
- kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
cuda_device_assert(
cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
@@ -82,6 +84,10 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
break;
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
+ pipeline = optix_device->pipelines[PIP_SHADE_MNEE];
+ sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
+ break;
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
pipeline = optix_device->pipelines[PIP_INTERSECT];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
diff --git a/intern/cycles/graph/node_enum.h b/intern/cycles/graph/node_enum.h
index b3a55efb327..6c8bfed7ec8 100644
--- a/intern/cycles/graph/node_enum.h
+++ b/intern/cycles/graph/node_enum.h
@@ -19,8 +19,10 @@ struct NodeEnum {
}
void insert(const char *x, int y)
{
- left[ustring(x)] = y;
- right[y] = ustring(x);
+ ustring ustr_x(x);
+
+ left[ustr_x] = y;
+ right[y] = ustr_x;
}
bool exists(ustring x) const
diff --git a/intern/cycles/hydra/display_driver.cpp b/intern/cycles/hydra/display_driver.cpp
index a809ace63e2..0c0b577c358 100644
--- a/intern/cycles/hydra/display_driver.cpp
+++ b/intern/cycles/hydra/display_driver.cpp
@@ -23,10 +23,18 @@ HdCyclesDisplayDriver::HdCyclesDisplayDriver(HdCyclesSession *renderParam, Hgi *
HdCyclesDisplayDriver::~HdCyclesDisplayDriver()
{
- deinit();
+ if (texture_) {
+ _hgi->DestroyTexture(&texture_);
+ }
+
+ if (gl_pbo_id_) {
+ glDeleteBuffers(1, &gl_pbo_id_);
+ }
+
+ gl_context_dispose();
}
-void HdCyclesDisplayDriver::init()
+void HdCyclesDisplayDriver::gl_context_create()
{
#ifdef _WIN32
if (!gl_context_) {
@@ -64,16 +72,42 @@ void HdCyclesDisplayDriver::init()
}
}
-void HdCyclesDisplayDriver::deinit()
+bool HdCyclesDisplayDriver::gl_context_enable()
{
- if (texture_) {
- _hgi->DestroyTexture(&texture_);
+#ifdef _WIN32
+ if (!hdc_ || !gl_context_) {
+ return false;
}
- if (gl_pbo_id_) {
- glDeleteBuffers(1, &gl_pbo_id_);
+ mutex_.lock();
+
+ // Do not change context if this is called in the main thread
+ if (wglGetCurrentContext() == nullptr) {
+ if (!TF_VERIFY(wglMakeCurrent((HDC)hdc_, (HGLRC)gl_context_))) {
+ mutex_.unlock();
+ return false;
+ }
+ }
+
+ return true;
+#else
+ return false;
+#endif
+}
+
+void HdCyclesDisplayDriver::gl_context_disable()
+{
+#ifdef _WIN32
+ if (wglGetCurrentContext() == gl_context_) {
+ TF_VERIFY(wglMakeCurrent(nullptr, nullptr));
}
+ mutex_.unlock();
+#endif
+}
+
+void HdCyclesDisplayDriver::gl_context_dispose()
+{
#ifdef _WIN32
if (gl_context_) {
TF_VERIFY(wglDeleteContext((HGLRC)gl_context_));
@@ -90,13 +124,9 @@ bool HdCyclesDisplayDriver::update_begin(const Params &params,
int texture_width,
int texture_height)
{
-#ifdef _WIN32
- if (!hdc_ || !gl_context_) {
+ if (!gl_context_enable()) {
return false;
}
-#endif
-
- graphics_interop_activate();
if (gl_render_sync_) {
glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
@@ -121,15 +151,14 @@ bool HdCyclesDisplayDriver::update_begin(const Params &params,
void HdCyclesDisplayDriver::update_end()
{
gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-
glFlush();
- graphics_interop_deactivate();
+ gl_context_disable();
}
void HdCyclesDisplayDriver::flush()
{
- graphics_interop_activate();
+ gl_context_enable();
if (gl_upload_sync_) {
glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
@@ -139,7 +168,7 @@ void HdCyclesDisplayDriver::flush()
glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
}
- graphics_interop_deactivate();
+ gl_context_disable();
}
half4 *HdCyclesDisplayDriver::map_texture_buffer()
@@ -179,25 +208,12 @@ DisplayDriver::GraphicsInterop HdCyclesDisplayDriver::graphics_interop_get()
void HdCyclesDisplayDriver::graphics_interop_activate()
{
- mutex_.lock();
-
-#ifdef _WIN32
- // Do not change context if this is called in the main thread
- if (wglGetCurrentContext() == nullptr) {
- TF_VERIFY(wglMakeCurrent((HDC)hdc_, (HGLRC)gl_context_));
- }
-#endif
+ gl_context_enable();
}
void HdCyclesDisplayDriver::graphics_interop_deactivate()
{
-#ifdef _WIN32
- if (wglGetCurrentContext() == gl_context_) {
- TF_VERIFY(wglMakeCurrent(nullptr, nullptr));
- }
-#endif
-
- mutex_.unlock();
+ gl_context_disable();
}
void HdCyclesDisplayDriver::clear()
@@ -214,7 +230,11 @@ void HdCyclesDisplayDriver::draw(const Params &params)
return;
}
- init();
+ if (!renderBuffer->IsResourceUsed()) {
+ return;
+ }
+
+ gl_context_create();
// Cycles 'DisplayDriver' only supports 'half4' format
TF_VERIFY(renderBuffer->GetFormat() == HdFormatFloat16Vec4);
@@ -255,7 +275,6 @@ void HdCyclesDisplayDriver::draw(const Params &params)
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
gl_render_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-
glFlush();
need_update_ = false;
diff --git a/intern/cycles/hydra/display_driver.h b/intern/cycles/hydra/display_driver.h
index 20086830e6a..2a05397c325 100644
--- a/intern/cycles/hydra/display_driver.h
+++ b/intern/cycles/hydra/display_driver.h
@@ -19,9 +19,6 @@ class HdCyclesDisplayDriver final : public CCL_NS::DisplayDriver {
~HdCyclesDisplayDriver();
private:
- void init();
- void deinit();
-
void next_tile_begin() override;
bool update_begin(const Params &params, int texture_width, int texture_height) override;
@@ -41,6 +38,11 @@ class HdCyclesDisplayDriver final : public CCL_NS::DisplayDriver {
void draw(const Params &params) override;
+ void gl_context_create();
+ bool gl_context_enable();
+ void gl_context_disable();
+ void gl_context_dispose();
+
HdCyclesSession *const _renderParam;
Hgi *const _hgi;
@@ -48,7 +50,6 @@ class HdCyclesDisplayDriver final : public CCL_NS::DisplayDriver {
void *hdc_ = nullptr;
void *gl_context_ = nullptr;
#endif
-
CCL_NS::thread_mutex mutex_;
PXR_NS::HgiTextureHandle texture_;
diff --git a/intern/cycles/hydra/output_driver.cpp b/intern/cycles/hydra/output_driver.cpp
index c5f64ac1c18..f4ea853f243 100644
--- a/intern/cycles/hydra/output_driver.cpp
+++ b/intern/cycles/hydra/output_driver.cpp
@@ -30,11 +30,11 @@ bool HdCyclesOutputDriver::update_render_tile(const Tile &tile)
std::vector<float> pixels;
for (const HdRenderPassAovBinding &aovBinding : _renderParam->GetAovBindings()) {
- if (aovBinding == _renderParam->GetDisplayAovBinding()) {
- continue; // Display AOV binding is already updated by Cycles display driver
- }
-
if (const auto renderBuffer = static_cast<HdCyclesRenderBuffer *>(aovBinding.renderBuffer)) {
+ if (aovBinding == _renderParam->GetDisplayAovBinding() && renderBuffer->IsResourceUsed()) {
+ continue; // Display AOV binding is already updated by Cycles display driver
+ }
+
const HdFormat format = renderBuffer->GetFormat();
if (format == HdFormatInvalid) {
continue; // Skip invalid AOV bindings
diff --git a/intern/cycles/hydra/render_buffer.cpp b/intern/cycles/hydra/render_buffer.cpp
index 4867def0624..4d8b21d1e61 100644
--- a/intern/cycles/hydra/render_buffer.cpp
+++ b/intern/cycles/hydra/render_buffer.cpp
@@ -35,7 +35,7 @@ bool HdCyclesRenderBuffer::Allocate(const GfVec3i &dimensions, HdFormat format,
return false;
}
- const size_t oldSize = _data.size();
+ const size_t oldSize = _dataSize;
const size_t newSize = dimensions[0] * dimensions[1] * HdDataSizeOfFormat(format);
if (oldSize == newSize) {
return true;
@@ -49,8 +49,8 @@ bool HdCyclesRenderBuffer::Allocate(const GfVec3i &dimensions, HdFormat format,
_width = dimensions[0];
_height = dimensions[1];
_format = format;
-
- _data.resize(newSize);
+ _dataSize = newSize;
+ _resourceUsed = false;
return true;
}
@@ -63,6 +63,7 @@ void HdCyclesRenderBuffer::_Deallocate()
_data.clear();
_data.shrink_to_fit();
+ _dataSize = 0;
_resource = VtValue();
}
@@ -74,6 +75,10 @@ void *HdCyclesRenderBuffer::Map()
return nullptr;
}
+ if (_data.size() != _dataSize) {
+ _data.resize(_dataSize);
+ }
+
++_mapped;
return _data.data();
@@ -103,10 +108,17 @@ void HdCyclesRenderBuffer::SetConverged(bool converged)
_converged = converged;
}
+bool HdCyclesRenderBuffer::IsResourceUsed() const
+{
+ return _resourceUsed;
+}
+
VtValue HdCyclesRenderBuffer::GetResource(bool multiSampled) const
{
TF_UNUSED(multiSampled);
+ _resourceUsed = true;
+
return _resource;
}
diff --git a/intern/cycles/hydra/render_buffer.h b/intern/cycles/hydra/render_buffer.h
index 8eb874f0068..90629d4aee0 100644
--- a/intern/cycles/hydra/render_buffer.h
+++ b/intern/cycles/hydra/render_buffer.h
@@ -58,6 +58,8 @@ class HdCyclesRenderBuffer final : public PXR_NS::HdRenderBuffer {
void SetConverged(bool converged);
+ bool IsResourceUsed() const;
+
PXR_NS::VtValue GetResource(bool multiSampled = false) const override;
void SetResource(const PXR_NS::VtValue &resource);
@@ -74,9 +76,11 @@ class HdCyclesRenderBuffer final : public PXR_NS::HdRenderBuffer {
unsigned int _width = 0u;
unsigned int _height = 0u;
PXR_NS::HdFormat _format = PXR_NS::HdFormatInvalid;
+ size_t _dataSize = 0;
std::vector<uint8_t> _data;
PXR_NS::VtValue _resource;
+ mutable std::atomic_bool _resourceUsed = false;
std::atomic_int _mapped = 0;
std::atomic_bool _converged = false;
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index 8306460d607..ede81705ae8 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -65,6 +65,8 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
integrator_shader_sort_counter_(device, "integrator_shader_sort_counter", MEM_READ_WRITE),
integrator_shader_raytrace_sort_counter_(
device, "integrator_shader_raytrace_sort_counter", MEM_READ_WRITE),
+ integrator_shader_mnee_sort_counter_(
+ device, "integrator_shader_mnee_sort_counter", MEM_READ_WRITE),
integrator_shader_sort_prefix_sum_(
device, "integrator_shader_sort_prefix_sum", MEM_READ_WRITE),
integrator_next_main_path_index_(device, "integrator_next_main_path_index", MEM_READ_WRITE),
@@ -188,6 +190,9 @@ void PathTraceWorkGPU::alloc_integrator_sorting()
integrator_shader_raytrace_sort_counter_.alloc(max_shaders);
integrator_shader_raytrace_sort_counter_.zero_to_device();
+ integrator_shader_mnee_sort_counter_.alloc(max_shaders);
+ integrator_shader_mnee_sort_counter_.zero_to_device();
+
integrator_shader_sort_prefix_sum_.alloc(max_shaders);
integrator_shader_sort_prefix_sum_.zero_to_device();
@@ -195,6 +200,8 @@ void PathTraceWorkGPU::alloc_integrator_sorting()
(int *)integrator_shader_sort_counter_.device_pointer;
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
+ integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
+ (int *)integrator_shader_mnee_sort_counter_.device_pointer;
}
}
@@ -327,6 +334,7 @@ void PathTraceWorkGPU::enqueue_reset()
queue_->zero_to_device(integrator_queue_counter_);
queue_->zero_to_device(integrator_shader_sort_counter_);
queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
+ queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
/* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the
* counter on the host side because `zero_to_device()` is not doing it. */
@@ -450,6 +458,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: {
/* Shading kernels with integrator state and render buffer. */
DeviceKernelArguments args(&d_path_index, &buffers_->buffer.device_pointer, &work_size);
@@ -1080,13 +1089,15 @@ int PathTraceWorkGPU::shadow_catcher_count_possible_splits()
bool PathTraceWorkGPU::kernel_uses_sorting(DeviceKernel kernel)
{
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
- kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
}
bool PathTraceWorkGPU::kernel_creates_shadow_paths(DeviceKernel kernel)
{
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
}
@@ -1094,7 +1105,8 @@ bool PathTraceWorkGPU::kernel_creates_ao_paths(DeviceKernel kernel)
{
return (device_scene_->data.kernel_features & KERNEL_FEATURE_AO) &&
(kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
- kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
}
bool PathTraceWorkGPU::kernel_is_shadow_path(DeviceKernel kernel)
diff --git a/intern/cycles/integrator/path_trace_work_gpu.h b/intern/cycles/integrator/path_trace_work_gpu.h
index 90f8b8a4509..4c10a221a30 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.h
+++ b/intern/cycles/integrator/path_trace_work_gpu.h
@@ -133,6 +133,7 @@ class PathTraceWorkGPU : public PathTraceWork {
/* Shader sorting. */
device_vector<int> integrator_shader_sort_counter_;
device_vector<int> integrator_shader_raytrace_sort_counter_;
+ device_vector<int> integrator_shader_mnee_sort_counter_;
device_vector<int> integrator_shader_sort_prefix_sum_;
/* Path split. */
device_vector<int> integrator_next_main_path_index_;
diff --git a/intern/cycles/kernel/device/cpu/image.h b/intern/cycles/kernel/device/cpu/image.h
index 3b714a3e580..7809ec5f4a7 100644
--- a/intern/cycles/kernel/device/cpu/image.h
+++ b/intern/cycles/kernel/device/cpu/image.h
@@ -817,6 +817,16 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
}
case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp);
+ case IMAGE_DATA_TYPE_NANOVDB_FPN: {
+ const float f = NanoVDBInterpolator<nanovdb::FpN, float>::interp_3d(
+ info, P.x, P.y, P.z, interp);
+ return make_float4(f, f, f, 1.0f);
+ }
+ case IMAGE_DATA_TYPE_NANOVDB_FP16: {
+ const float f = NanoVDBInterpolator<nanovdb::Fp16, float>::interp_3d(
+ info, P.x, P.y, P.z, interp);
+ return make_float4(f, f, f, 1.0f);
+ }
#endif
default:
assert(0);
diff --git a/intern/cycles/kernel/device/gpu/image.h b/intern/cycles/kernel/device/gpu/image.h
index c5bc7d88e02..29d851ae478 100644
--- a/intern/cycles/kernel/device/gpu/image.h
+++ b/intern/cycles/kernel/device/gpu/image.h
@@ -125,7 +125,8 @@ kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, fl
#ifdef WITH_NANOVDB
template<typename T, typename S>
-ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z)
+ccl_device typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_tricubic_nanovdb(
+ S &s, float x, float y, float z)
{
float px = floorf(x);
float py = floorf(y);
@@ -157,7 +158,7 @@ ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, fl
}
template<typename T>
-ccl_device_noinline T kernel_tex_image_interp_nanovdb(
+ccl_device_noinline typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
{
using namespace nanovdb;
@@ -238,6 +239,14 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
info, x, y, z, interpolation);
return make_float4(f[0], f[1], f[2], 1.0f);
}
+ if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FPN) {
+ float f = kernel_tex_image_interp_nanovdb<nanovdb::FpN>(info, x, y, z, interpolation);
+ return make_float4(f, f, f, 1.0f);
+ }
+ if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FP16) {
+ float f = kernel_tex_image_interp_nanovdb<nanovdb::Fp16>(info, x, y, z, interpolation);
+ return make_float4(f, f, f, 1.0f);
+ }
#endif
if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 39f0d3c9cd0..b9a44ccad02 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -245,7 +245,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
-#ifdef __KERNEL_METAL__
+#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
constant int __dummy_constant [[function_constant(0)]];
#endif
@@ -260,7 +260,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
if (global_index < work_size) {
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
-#ifdef __KERNEL_METAL__
+#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
KernelGlobals kg = NULL;
/* Workaround Ambient Occlusion and Bevel nodes not working with Metal.
* Dummy offset should not affect result, but somehow fixes bug! */
@@ -274,6 +274,21 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
+ ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
+ ccl_global const int *path_index_array,
+ ccl_global float *render_buffer,
+ const int work_size)
+{
+ const int global_index = ccl_gpu_global_id_x();
+
+ if (global_index < work_size) {
+ const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+ ccl_gpu_kernel_call(integrator_shade_surface_mnee(NULL, state, render_buffer));
+ }
+}
+ccl_gpu_kernel_postfix
+
+ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_volume,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
@@ -651,8 +666,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
const int x = render_pixel_index % width; \
const int y = render_pixel_index / width; \
\
- ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \
- y * stride * kfilm_convert.pass_stride; \
+ const uint64_t buffer_pixel_index = x + y * stride; \
+ ccl_global const float *buffer = render_buffer + offset + \
+ buffer_pixel_index * kfilm_convert.pass_stride; \
\
ccl_global float *pixel = pixels + \
(render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \
@@ -681,8 +697,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
const int x = render_pixel_index % width; \
const int y = render_pixel_index / width; \
\
- ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \
- y * stride * kfilm_convert.pass_stride; \
+ const uint64_t buffer_pixel_index = x + y * stride; \
+ ccl_global const float *buffer = render_buffer + offset + \
+ buffer_pixel_index * kfilm_convert.pass_stride; \
\
float pixel[4]; \
film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \
diff --git a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
index e2c5d2ff024..3bd57bc0f1a 100644
--- a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
+++ b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
@@ -15,3 +15,11 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytr
global_index;
integrator_shade_surface_raytrace(nullptr, path_index, __params.render_buffer);
}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_mnee()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_surface_mnee(nullptr, path_index, __params.render_buffer);
+}
diff --git a/intern/cycles/kernel/film/adaptive_sampling.h b/intern/cycles/kernel/film/adaptive_sampling.h
index ad9b3b08ac5..16867c39d99 100644
--- a/intern/cycles/kernel/film/adaptive_sampling.h
+++ b/intern/cycles/kernel/film/adaptive_sampling.h
@@ -91,13 +91,13 @@ ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg,
bool prev = false;
for (int x = start_x; x < start_x + width; ++x) {
int index = offset + x + y * stride;
- ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+ ccl_global float *buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
if (buffer[aux_w_offset] == 0.0f) {
if (x > start_x && !prev) {
index = index - 1;
- buffer = render_buffer + index * kernel_data.film.pass_stride;
+ buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
buffer[aux_w_offset] = 0.0f;
}
prev = true;
@@ -124,13 +124,13 @@ ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg,
bool prev = false;
for (int y = start_y; y < start_y + height; ++y) {
int index = offset + x + y * stride;
- ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+ ccl_global float *buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
if (buffer[aux_w_offset] == 0.0f) {
if (y > start_y && !prev) {
index = index - stride;
- buffer = render_buffer + index * kernel_data.film.pass_stride;
+ buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
buffer[aux_w_offset] = 0.0f;
}
prev = true;
diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h
index 3772db845a8..0db4241b6e3 100644
--- a/intern/cycles/kernel/integrator/init_from_bake.h
+++ b/intern/cycles/kernel/integrator/init_from_bake.h
@@ -102,7 +102,7 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
/* Setup render buffers. */
const int index = INTEGRATOR_STATE(state, path, render_pixel_index);
const int pass_stride = kernel_data.film.pass_stride;
- ccl_global float *buffer = render_buffer + index * pass_stride;
+ ccl_global float *buffer = render_buffer + (uint64_t)index * pass_stride;
ccl_global float *primitive = buffer + kernel_data.film.pass_bake_primitive;
ccl_global float *differential = buffer + kernel_data.film.pass_bake_differential;
@@ -243,9 +243,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
/* Setup next kernel to execute. */
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flag & SD_OBJECT_CAUSTICS);
- const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics;
+ const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
- if (use_raytrace_kernel) {
+ if (use_caustics) {
+ INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
+ }
+ else if (use_raytrace_kernel) {
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
}
else {
diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h
index b8ce625c11b..2dfac44b414 100644
--- a/intern/cycles/kernel/integrator/intersect_closest.h
+++ b/intern/cycles/kernel/integrator/intersect_closest.h
@@ -125,9 +125,12 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
const int flags = kernel_tex_fetch(__shaders, shader).flags;
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
- const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
+ const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
- if (use_raytrace_kernel) {
+ if (use_caustics) {
+ INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ }
+ else if (use_raytrace_kernel) {
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
else {
@@ -150,9 +153,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
const int object_flags = intersection_get_object_flags(kg, &isect);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
- const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
+ const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
- if (use_raytrace_kernel) {
+ if (use_caustics) {
+ INTEGRATOR_PATH_NEXT_SORTED(
+ current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ }
+ else if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
@@ -222,8 +229,12 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
const int object_flags = intersection_get_object_flags(kg, isect);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
- const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
- if (use_raytrace_kernel) {
+ const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
+ if (use_caustics) {
+ INTEGRATOR_PATH_NEXT_SORTED(
+ current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ }
+ else if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
@@ -272,9 +283,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
const int object_flags = intersection_get_object_flags(kg, isect);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
- const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
+ const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
- if (use_raytrace_kernel) {
+ if (use_caustics) {
+ INTEGRATOR_PATH_NEXT_SORTED(
+ current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ }
+ else if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
diff --git a/intern/cycles/kernel/integrator/megakernel.h b/intern/cycles/kernel/integrator/megakernel.h
index a0c15794470..17ae13ad23f 100644
--- a/intern/cycles/kernel/integrator/megakernel.h
+++ b/intern/cycles/kernel/integrator/megakernel.h
@@ -77,6 +77,9 @@ ccl_device void integrator_megakernel(KernelGlobals kg,
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
integrator_shade_surface_raytrace(kg, state, render_buffer);
break;
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
+ integrator_shade_surface_mnee(kg, state, render_buffer);
+ break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
integrator_shade_light(kg, state, render_buffer);
break;
diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h
index 2f7b711e28c..ad83f82d091 100644
--- a/intern/cycles/kernel/integrator/mnee.h
+++ b/intern/cycles/kernel/integrator/mnee.h
@@ -807,6 +807,15 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
* and keep pdf in vertex area measure */
mnee_update_light_sample(kg, vertices[vertex_count - 1].p, ls);
+ /* Save state path bounce info in case a light path node is used in the refractive interface or
+ * light shader graph. */
+ const int transmission_bounce = INTEGRATOR_STATE(state, path, transmission_bounce);
+ const int diffuse_bounce = INTEGRATOR_STATE(state, path, diffuse_bounce);
+ const int bounce = INTEGRATOR_STATE(state, path, bounce);
+
+ /* Set diffuse bounce info . */
+ INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = diffuse_bounce + 1;
+
/* Evaluate light sample
* in case the light has a node-based shader:
* 1. sd_mnee will be used to store light data, which is why we need to do
@@ -814,6 +823,12 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
* interface data at the end of the call for the shadow ray setup to work.
* 2. ls needs to contain the last interface data for the light shader to
* evaluate properly */
+
+ /* Set bounce info in case a light path node is used in the light shader graph. */
+ INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = transmission_bounce + vertex_count -
+ 1;
+ INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + vertex_count;
+
float3 light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time);
bsdf_eval_mul3(throughput, light_eval / ls->pdf);
@@ -885,6 +900,11 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
false,
LAMP_NONE);
+ /* Set bounce info in case a light path node is used in the refractive interface
+ * shader graph. */
+ INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = transmission_bounce + vi;
+ INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + 1 + vi;
+
/* Evaluate shader nodes at solution vi. */
shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true);
@@ -901,6 +921,11 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
bsdf_eval_mul3(throughput, bsdf_contribution);
}
+ /* Restore original state path bounce info. */
+ INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = transmission_bounce;
+ INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = diffuse_bounce;
+ INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce;
+
return true;
}
@@ -1029,18 +1054,17 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
return 0;
/* Check whether the transmission depth limit is reached before continuing. */
- if (INTEGRATOR_STATE(state, path, transmission_bounce) + vertex_count >=
+ if ((INTEGRATOR_STATE(state, path, transmission_bounce) + vertex_count - 1) >=
kernel_data.integrator.max_transmission_bounce)
return 0;
/* Check whether the diffuse depth limit is reached before continuing. */
- if (INTEGRATOR_STATE(state, path, diffuse_bounce) + 1 >=
+ if ((INTEGRATOR_STATE(state, path, diffuse_bounce) + 1) >=
kernel_data.integrator.max_diffuse_bounce)
return 0;
/* Check whether the overall depth limit is reached before continuing. */
- if (INTEGRATOR_STATE(state, path, bounce) + 1 + vertex_count >=
- kernel_data.integrator.max_bounce)
+ if ((INTEGRATOR_STATE(state, path, bounce) + vertex_count) >= kernel_data.integrator.max_bounce)
return 0;
/* Mark the manifold walk valid to turn off mollification regardless of how successful the walk
diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h
index 62b3ce1c15c..72ecf67e8a0 100644
--- a/intern/cycles/kernel/integrator/shade_background.h
+++ b/intern/cycles/kernel/integrator/shade_background.h
@@ -48,7 +48,7 @@ ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
PROFILING_SHADER(emission_sd->object, emission_sd->shader);
PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
+ shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>(
kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
L = shader_background_eval(emission_sd);
diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h
index 859c314b088..ce1398859b7 100644
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -137,7 +137,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
# ifdef __MNEE__
int mnee_vertex_count = 0;
- IF_KERNEL_NODES_FEATURE(RAYTRACE)
+ IF_KERNEL_FEATURE(MNEE)
{
if (ls.lamp != LAMP_NONE) {
/* Is this a caustic light? */
@@ -253,13 +253,13 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
# ifdef __MNEE__
if (mnee_vertex_count > 0) {
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) =
- INTEGRATOR_STATE(state, path, transmission_bounce) + mnee_vertex_count;
+ INTEGRATOR_STATE(state, path, transmission_bounce) + mnee_vertex_count - 1;
INTEGRATOR_STATE_WRITE(shadow_state,
shadow_path,
diffuse_bounce) = INTEGRATOR_STATE(state, path, diffuse_bounce) + 1;
INTEGRATOR_STATE_WRITE(shadow_state,
shadow_path,
- bounce) = INTEGRATOR_STATE(state, path, bounce) + 1 + mnee_vertex_count;
+ bounce) = INTEGRATOR_STATE(state, path, bounce) + mnee_vertex_count;
}
else
# endif
@@ -631,4 +631,12 @@ ccl_device_forceinline void integrator_shade_surface_raytrace(
kg, state, render_buffer);
}
+ccl_device_forceinline void integrator_shade_surface_mnee(
+ KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
+{
+ integrator_shade_surface<(KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE) |
+ KERNEL_FEATURE_MNEE,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE>(kg, state, render_buffer);
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h
index 2391cc2356d..b449f807290 100644
--- a/intern/cycles/kernel/integrator/subsurface.h
+++ b/intern/cycles/kernel/integrator/subsurface.h
@@ -174,9 +174,14 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
- const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics;
+ const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
- if (use_raytrace_kernel) {
+ if (use_caustics) {
+ INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
+ shader);
+ }
+ else if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
shader);
diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
index dbbce655ef7..39fcee1ac0d 100644
--- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
@@ -17,6 +17,7 @@
#include "kernel/types.h"
#include "kernel/closure/alloc.h"
#include "kernel/closure/bsdf_diffuse_ramp.h"
+#include "kernel/closure/bsdf_util.h"
// clang-format on
CCL_NAMESPACE_BEGIN
@@ -30,6 +31,8 @@ class DiffuseRampClosure : public CBSDFClosure {
void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
DiffuseRampBsdf *bsdf = (DiffuseRampBsdf *)bsdf_alloc_osl(
sd, sizeof(DiffuseRampBsdf), weight, &params);
diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
index fd602c8847e..972ed7e4a6d 100644
--- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
@@ -17,6 +17,7 @@
#include "kernel/types.h"
#include "kernel/closure/alloc.h"
#include "kernel/closure/bsdf_phong_ramp.h"
+#include "kernel/closure/bsdf_util.h"
// clang-format on
CCL_NAMESPACE_BEGIN
@@ -30,6 +31,8 @@ class PhongRampClosure : public CBSDFClosure {
void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
PhongRampBsdf *bsdf = (PhongRampBsdf *)bsdf_alloc_osl(
sd, sizeof(PhongRampBsdf), weight, &params);
diff --git a/intern/cycles/kernel/osl/bssrdf.cpp b/intern/cycles/kernel/osl/bssrdf.cpp
index 29d2d93ce34..4b282fddad3 100644
--- a/intern/cycles/kernel/osl/bssrdf.cpp
+++ b/intern/cycles/kernel/osl/bssrdf.cpp
@@ -44,6 +44,8 @@ class CBSSRDFClosure : public CClosurePrimitive {
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
if (method == u_burley) {
alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID);
}
diff --git a/intern/cycles/kernel/osl/closures.cpp b/intern/cycles/kernel/osl/closures.cpp
index 06b91ab9bb7..7c6b48154e4 100644
--- a/intern/cycles/kernel/osl/closures.cpp
+++ b/intern/cycles/kernel/osl/closures.cpp
@@ -180,6 +180,8 @@ class PrincipledSheenClosure : public CBSDFClosure {
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
if (!skip(sd, path_flag, LABEL_DIFFUSE)) {
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc_osl(
sd, sizeof(PrincipledSheenBsdf), weight, &params);
sd->flag |= (bsdf) ? bsdf_principled_sheen_setup(sd, bsdf) : 0;
@@ -223,6 +225,8 @@ class PrincipledHairClosure : public CBSDFClosure {
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
if (!skip(sd, path_flag, LABEL_GLOSSY)) {
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)alloc(sd, path_flag, weight);
if (!bsdf) {
return;
@@ -282,6 +286,7 @@ class PrincipledClearcoatClosure : public CBSDFClosure {
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
if (!bsdf) {
return;
@@ -503,6 +508,8 @@ class MicrofacetClosure : public CBSDFClosure {
return;
}
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
sd, sizeof(MicrofacetBsdf), weight, &params);
@@ -601,6 +608,8 @@ class MicrofacetGGXFresnelClosure : public MicrofacetFresnelClosure {
public:
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
if (!bsdf) {
return;
@@ -630,6 +639,8 @@ class MicrofacetGGXAnisoFresnelClosure : public MicrofacetFresnelClosure {
public:
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
if (!bsdf) {
return;
@@ -695,6 +706,8 @@ class MicrofacetMultiGGXClosure : public MicrofacetMultiClosure {
public:
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
if (!bsdf) {
return;
@@ -723,6 +736,8 @@ class MicrofacetMultiGGXAnisoClosure : public MicrofacetMultiClosure {
public:
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
if (!bsdf) {
return;
@@ -755,6 +770,8 @@ class MicrofacetMultiGGXGlassClosure : public MicrofacetMultiClosure {
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
if (!bsdf) {
return;
@@ -819,6 +836,8 @@ class MicrofacetMultiGGXFresnelClosure : public MicrofacetMultiFresnelClosure {
public:
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
if (!bsdf) {
return;
@@ -849,6 +868,8 @@ class MicrofacetMultiGGXAnisoFresnelClosure : public MicrofacetMultiFresnelClosu
public:
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
if (!bsdf) {
return;
@@ -883,6 +904,8 @@ class MicrofacetMultiGGXGlassFresnelClosure : public MicrofacetMultiFresnelClosu
void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
{
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
+
MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
if (!bsdf) {
return;
diff --git a/intern/cycles/kernel/osl/closures.h b/intern/cycles/kernel/osl/closures.h
index 6225644a8e6..e10a3d88a04 100644
--- a/intern/cycles/kernel/osl/closures.h
+++ b/intern/cycles/kernel/osl/closures.h
@@ -114,6 +114,7 @@ class CBSDFClosure : public CClosurePrimitive {
void setup(ShaderData *sd, uint32_t path_flag, float3 weight) \
{ \
if (!skip(sd, path_flag, TYPE)) { \
+ params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); \
structname *bsdf = (structname *)bsdf_alloc_osl(sd, sizeof(structname), weight, &params); \
sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \
} \
diff --git a/intern/cycles/kernel/svm/blackbody.h b/intern/cycles/kernel/svm/blackbody.h
index af59c2fe747..774fa16b384 100644
--- a/intern/cycles/kernel/svm/blackbody.h
+++ b/intern/cycles/kernel/svm/blackbody.h
@@ -24,6 +24,7 @@ ccl_device_noinline void svm_node_blackbody(KernelGlobals kg,
float temperature = stack_load_float(stack, temperature_offset);
float3 color_rgb = rec709_to_rgb(kg, svm_math_blackbody_color_rec709(temperature));
+ color_rgb = max(color_rgb, zero_float3());
stack_store_float3(stack, col_offset, color_rgb);
}
diff --git a/intern/cycles/kernel/svm/displace.h b/intern/cycles/kernel/svm/displace.h
index 56fb5c3f9b0..128023263fd 100644
--- a/intern/cycles/kernel/svm/displace.h
+++ b/intern/cycles/kernel/svm/displace.h
@@ -188,6 +188,7 @@ ccl_device_noinline int svm_node_vector_displacement(
else
{
stack_store_float3(stack, displacement_offset, zero_float3());
+ (void)data_node;
}
return offset;
diff --git a/intern/cycles/kernel/svm/math_util.h b/intern/cycles/kernel/svm/math_util.h
index 9f2d9561e26..89bd4a501a7 100644
--- a/intern/cycles/kernel/svm/math_util.h
+++ b/intern/cycles/kernel/svm/math_util.h
@@ -192,28 +192,26 @@ ccl_device float svm_math(NodeMathType type, float a, float b, float c)
ccl_device float3 svm_math_blackbody_color_rec709(float t)
{
/* Calculate color in range 800..12000 using an approximation
- * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
- * Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
- * which is enough to get the same 8 bit/channel color.
- */
+ * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B.
+ *
+ * The result of this can be negative to support gamut wider than
+ * than rec.709, just needs to be clamped. */
if (t >= 12000.0f) {
- return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
+ return make_float3(0.8262954810464208f, 0.9945080501520986f, 1.566307710274283f);
}
- else if (t < 965.0f) {
- /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */
- return make_float3(4.70366907f, 0.0f, 0.0f);
+ else if (t < 800.0f) {
+ /* Arbitrary lower limit where light is very dim, matching OSL. */
+ return make_float3(5.413294490189271f, -0.20319390035873933f, -0.0822535242887164f);
}
- /* Manually align for readability. */
- /* clang-format off */
- int i = (t >= 6365.0f) ? 5 :
- (t >= 3315.0f) ? 4 :
- (t >= 1902.0f) ? 3 :
- (t >= 1449.0f) ? 2 :
- (t >= 1167.0f) ? 1 :
+ int i = (t >= 6365.0f) ? 6 :
+ (t >= 3315.0f) ? 5 :
+ (t >= 1902.0f) ? 4 :
+ (t >= 1449.0f) ? 3 :
+ (t >= 1167.0f) ? 2 :
+ (t >= 965.0f) ? 1 :
0;
- /* clang-format on */
ccl_constant float *r = blackbody_table_r[i];
ccl_constant float *g = blackbody_table_g[i];
diff --git a/intern/cycles/kernel/tables.h b/intern/cycles/kernel/tables.h
index f826cc5c5ef..c1fdbba3fa7 100644
--- a/intern/cycles/kernel/tables.h
+++ b/intern/cycles/kernel/tables.h
@@ -4,30 +4,33 @@
/* clang-format off */
ccl_inline_constant float blackbody_table_r[][3] = {
- {2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f},
- {3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f},
- {4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f},
- {4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f},
- {4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f},
- {3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f}
+ {1.61919106e+03f, -2.05010916e-03f, 5.02995757e+00f},
+ {2.48845471e+03f, -1.11330907e-03f, 3.22621544e+00f},
+ {3.34143193e+03f, -4.86551192e-04f, 1.76486769e+00f},
+ {4.09461742e+03f, -1.27446582e-04f, 7.25731635e-01f},
+ {4.67028036e+03f, 2.91258199e-05f, 1.26703442e-01f},
+ {4.59509185e+03f, 2.87495649e-05f, 1.50345020e-01f},
+ {3.78717450e+03f, 9.35907826e-06f, 3.99075871e-01f}
};
ccl_inline_constant float blackbody_table_g[][3] = {
- {-7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f},
- {-1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f},
- {-1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f},
- {-1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f},
- {-1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f},
- {-5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f}
+ {-4.88999748e+02f, 6.04330754e-04f, -7.55807526e-02f},
+ {-7.55994277e+02f, 3.16730098e-04f, 4.78306139e-01f},
+ {-1.02363977e+03f, 1.20223470e-04f, 9.36662319e-01f},
+ {-1.26571316e+03f, 4.87340896e-06f, 1.27054498e+00f},
+ {-1.42529332e+03f, -4.01150431e-05f, 1.43972784e+00f},
+ {-1.17554822e+03f, -2.16378048e-05f, 1.30408023e+00f},
+ {-5.00799571e+02f, -4.59832026e-06f, 1.09098763e+00f}
};
ccl_inline_constant float blackbody_table_b[][4] = {
- {0.0f, 0.0f, 0.0f, 0.0f}, /* zeros should be optimized by compiler */
- {0.0f, 0.0f, 0.0f, 0.0f},
- {0.0f, 0.0f, 0.0f, 0.0f},
- {-2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f},
- {-2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f},
- {6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f}
+ {5.96945309e-11f, -4.85742887e-08f, -9.70622247e-05f, -4.07936148e-03f},
+ {2.40430366e-11f, 5.55021075e-08f, -1.98503712e-04f, 2.89312858e-02f},
+ {-1.40949732e-11f, 1.89878968e-07f, -3.56632824e-04f, 9.10767778e-02f},
+ {-3.61460868e-11f, 2.84822009e-07f, -4.93211319e-04f, 1.56723440e-01f},
+ {-1.97075738e-11f, 1.75359352e-07f, -2.50542825e-04f, -2.22783266e-02f},
+ {-1.61997957e-13f, -1.64216008e-08f, 3.86216271e-04f, -7.38077418e-01f},
+ {6.72650283e-13f, -2.73078809e-08f, 4.24098264e-04f, -7.52335691e-01f}
};
ccl_inline_constant float cie_colour_match[][3] = {
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 297e88c718c..a76effdd952 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -1572,6 +1572,7 @@ typedef enum DeviceKernel : int {
DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL,
@@ -1689,6 +1690,9 @@ enum KernelFeatureFlag : uint32_t {
KERNEL_FEATURE_AO_PASS = (1U << 25U),
KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U),
KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE),
+
+ /* MNEE. */
+ KERNEL_FEATURE_MNEE = (1U << 27U),
};
/* Shader node feature mask, to specialize shader evaluation for kernels. */
@@ -1696,6 +1700,8 @@ enum KernelFeatureFlag : uint32_t {
#define KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT \
(KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VORONOI_EXTRA | \
KERNEL_FEATURE_NODE_LIGHT_PATH)
+#define KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND \
+ (KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT | KERNEL_FEATURE_NODE_AOV)
#define KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW \
(KERNEL_FEATURE_NODE_BSDF | KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | \
KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE | \
@@ -1714,9 +1720,12 @@ enum KernelFeatureFlag : uint32_t {
* are different depending on the main, shadow or null path. For GPU we don't have
* C++17 everywhere so can't use it. */
#ifdef __KERNEL_CPU__
+# define IF_KERNEL_FEATURE(feature) \
+ if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
# define IF_KERNEL_NODES_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
#else
+# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
# define IF_KERNEL_NODES_FEATURE(feature) \
if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
#endif
diff --git a/intern/cycles/scene/attribute.cpp b/intern/cycles/scene/attribute.cpp
index df01189a54b..d6b4c0240f6 100644
--- a/intern/cycles/scene/attribute.cpp
+++ b/intern/cycles/scene/attribute.cpp
@@ -661,6 +661,26 @@ Attribute *AttributeSet::find(AttributeStandard std) const
return NULL;
}
+Attribute *AttributeSet::find_matching(const Attribute &other)
+{
+ for (Attribute &attr : attributes) {
+ if (attr.name != other.name) {
+ continue;
+ }
+ if (attr.std != other.std) {
+ continue;
+ }
+ if (attr.type != other.type) {
+ continue;
+ }
+ if (attr.element != other.element) {
+ continue;
+ }
+ return &attr;
+ }
+ return nullptr;
+}
+
void AttributeSet::remove(AttributeStandard std)
{
Attribute *attr = find(std);
@@ -729,32 +749,24 @@ void AttributeSet::clear(bool preserve_voxel_data)
void AttributeSet::update(AttributeSet &&new_attributes)
{
- /* add or update old_attributes based on the new_attributes */
- foreach (Attribute &attr, new_attributes.attributes) {
- Attribute *nattr = add(attr.name, attr.type, attr.element);
- nattr->std = attr.std;
- nattr->set_data_from(std::move(attr));
- }
-
- /* remove any attributes not on new_attributes */
+ /* Remove any attributes not on new_attributes. */
list<Attribute>::iterator it;
for (it = attributes.begin(); it != attributes.end();) {
- if (it->std != ATTR_STD_NONE) {
- if (new_attributes.find(it->std) == nullptr) {
- remove(it++);
- continue;
- }
- }
- else if (it->name != "") {
- if (new_attributes.find(it->name) == nullptr) {
- remove(it++);
- continue;
- }
+ const Attribute &old_attr = *it;
+ if (new_attributes.find_matching(old_attr) == nullptr) {
+ remove(it++);
+ continue;
}
-
it++;
}
+ /* Add or update old_attributes based on the new_attributes. */
+ foreach (Attribute &attr, new_attributes.attributes) {
+ Attribute *nattr = add(attr.name, attr.type, attr.element);
+ nattr->std = attr.std;
+ nattr->set_data_from(std::move(attr));
+ }
+
/* If all attributes were replaced, transform is no longer applied. */
geometry->transform_applied = false;
}
diff --git a/intern/cycles/scene/attribute.h b/intern/cycles/scene/attribute.h
index fd13b8ff6de..7f8cbf32049 100644
--- a/intern/cycles/scene/attribute.h
+++ b/intern/cycles/scene/attribute.h
@@ -194,6 +194,7 @@ class AttributeSet {
void remove(AttributeStandard std);
Attribute *find(AttributeRequest &req);
+ Attribute *find_matching(const Attribute &other);
void remove(Attribute *attribute);
diff --git a/intern/cycles/scene/image.cpp b/intern/cycles/scene/image.cpp
index c61ad1f1d71..1b44162351a 100644
--- a/intern/cycles/scene/image.cpp
+++ b/intern/cycles/scene/image.cpp
@@ -64,6 +64,10 @@ const char *name_from_type(ImageDataType type)
return "nanovdb_float";
case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
return "nanovdb_float3";
+ case IMAGE_DATA_TYPE_NANOVDB_FPN:
+ return "nanovdb_fpn";
+ case IMAGE_DATA_TYPE_NANOVDB_FP16:
+ return "nanovdb_fp16";
case IMAGE_DATA_NUM_TYPES:
assert(!"System enumerator type, should never be used");
return "";
@@ -268,17 +272,12 @@ void ImageMetaData::detect_colorspace()
compress_as_srgb = true;
}
else {
- /* Always compress non-raw 8bit images as scene linear + sRGB, as a
- * heuristic to keep memory usage the same without too much data loss
- * due to quantization in common cases. */
- compress_as_srgb = (type == IMAGE_DATA_TYPE_BYTE || type == IMAGE_DATA_TYPE_BYTE4);
-
/* If colorspace conversion needed, use half instead of short so we can
* represent HDR values that might result from conversion. */
- if (type == IMAGE_DATA_TYPE_USHORT) {
+ if (type == IMAGE_DATA_TYPE_BYTE || type == IMAGE_DATA_TYPE_USHORT) {
type = IMAGE_DATA_TYPE_HALF;
}
- else if (type == IMAGE_DATA_TYPE_USHORT4) {
+ else if (type == IMAGE_DATA_TYPE_BYTE4 || type == IMAGE_DATA_TYPE_USHORT4) {
type = IMAGE_DATA_TYPE_HALF4;
}
}
@@ -378,7 +377,9 @@ void ImageManager::load_image_metadata(Image *img)
metadata.detect_colorspace();
assert(features.has_nanovdb || (metadata.type != IMAGE_DATA_TYPE_NANOVDB_FLOAT ||
- metadata.type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3));
+ metadata.type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3 ||
+ metadata.type != IMAGE_DATA_TYPE_NANOVDB_FPN ||
+ metadata.type != IMAGE_DATA_TYPE_NANOVDB_FP16));
img->need_metadata = false;
}
@@ -796,7 +797,8 @@ void ImageManager::device_load_image(Device *device, Scene *scene, int slot, Pro
}
}
#ifdef WITH_NANOVDB
- else if (type == IMAGE_DATA_TYPE_NANOVDB_FLOAT || type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
+ else if (type == IMAGE_DATA_TYPE_NANOVDB_FLOAT || type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3 ||
+ type == IMAGE_DATA_TYPE_NANOVDB_FPN || type == IMAGE_DATA_TYPE_NANOVDB_FP16) {
thread_scoped_lock device_lock(device_mutex);
void *pixels = img->mem->alloc(img->metadata.byte_size, 0);
diff --git a/intern/cycles/scene/image_oiio.cpp b/intern/cycles/scene/image_oiio.cpp
index 3f825afbe90..09676455308 100644
--- a/intern/cycles/scene/image_oiio.cpp
+++ b/intern/cycles/scene/image_oiio.cpp
@@ -94,10 +94,11 @@ bool OIIOImageLoader::load_metadata(const ImageDeviceFeatures & /*features*/,
template<TypeDesc::BASETYPE FileFormat, typename StorageType>
static void oiio_load_pixels(const ImageMetaData &metadata,
const unique_ptr<ImageInput> &in,
+ const bool associate_alpha,
StorageType *pixels)
{
- const int width = metadata.width;
- const int height = metadata.height;
+ const size_t width = metadata.width;
+ const size_t height = metadata.height;
const int depth = metadata.depth;
const int components = metadata.channels;
@@ -105,12 +106,12 @@ static void oiio_load_pixels(const ImageMetaData &metadata,
StorageType *readpixels = pixels;
vector<StorageType> tmppixels;
if (components > 4) {
- tmppixels.resize(((size_t)width) * height * components);
+ tmppixels.resize(width * height * components);
readpixels = &tmppixels[0];
}
if (depth <= 1) {
- size_t scanlinesize = ((size_t)width) * components * sizeof(StorageType);
+ size_t scanlinesize = width * components * sizeof(StorageType);
in->read_image(FileFormat,
(uchar *)readpixels + (height - 1) * scanlinesize,
AutoStride,
@@ -122,7 +123,7 @@ static void oiio_load_pixels(const ImageMetaData &metadata,
}
if (components > 4) {
- size_t dimensions = ((size_t)width) * height;
+ size_t dimensions = width * height;
for (size_t i = dimensions - 1, pixel = 0; pixel < dimensions; pixel++, i--) {
pixels[i * 4 + 3] = tmppixels[i * components + 3];
pixels[i * 4 + 2] = tmppixels[i * components + 2];
@@ -137,7 +138,7 @@ static void oiio_load_pixels(const ImageMetaData &metadata,
if (cmyk) {
const StorageType one = util_image_cast_from_float<StorageType>(1.0f);
- const size_t num_pixels = ((size_t)width) * height * depth;
+ const size_t num_pixels = width * height * depth;
for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
float c = util_image_cast_to_float(pixels[i * 4 + 0]);
float m = util_image_cast_to_float(pixels[i * 4 + 1]);
@@ -149,6 +150,16 @@ static void oiio_load_pixels(const ImageMetaData &metadata,
pixels[i * 4 + 3] = one;
}
}
+
+ if (components == 4 && associate_alpha) {
+ size_t dimensions = width * height;
+ for (size_t i = dimensions - 1, pixel = 0; pixel < dimensions; pixel++, i--) {
+ const StorageType alpha = pixels[i * 4 + 3];
+ pixels[i * 4 + 0] = util_image_multiply_native(pixels[i * 4 + 0], alpha);
+ pixels[i * 4 + 1] = util_image_multiply_native(pixels[i * 4 + 1], alpha);
+ pixels[i * 4 + 2] = util_image_multiply_native(pixels[i * 4 + 2], alpha);
+ }
+ }
}
bool OIIOImageLoader::load_pixels(const ImageMetaData &metadata,
@@ -172,33 +183,41 @@ bool OIIOImageLoader::load_pixels(const ImageMetaData &metadata,
ImageSpec spec = ImageSpec();
ImageSpec config = ImageSpec();
- if (!associate_alpha) {
- config.attribute("oiio:UnassociatedAlpha", 1);
- }
+ /* Load without automatic OIIO alpha conversion, we do it ourselves. OIIO
+ * will associate alpha in the the 8bit buffer for PNGs, which leads to too
+ * much precision loss when we load it as half float to do a colorspace
+ * transform. */
+ config.attribute("oiio:UnassociatedAlpha", 1);
if (!in->open(filepath.string(), spec, config)) {
return false;
}
+ const bool do_associate_alpha = associate_alpha &&
+ spec.get_int_attribute("oiio:UnassociatedAlpha", 0);
+
switch (metadata.type) {
case IMAGE_DATA_TYPE_BYTE:
case IMAGE_DATA_TYPE_BYTE4:
- oiio_load_pixels<TypeDesc::UINT8, uchar>(metadata, in, (uchar *)pixels);
+ oiio_load_pixels<TypeDesc::UINT8, uchar>(metadata, in, do_associate_alpha, (uchar *)pixels);
break;
case IMAGE_DATA_TYPE_USHORT:
case IMAGE_DATA_TYPE_USHORT4:
- oiio_load_pixels<TypeDesc::USHORT, uint16_t>(metadata, in, (uint16_t *)pixels);
+ oiio_load_pixels<TypeDesc::USHORT, uint16_t>(
+ metadata, in, do_associate_alpha, (uint16_t *)pixels);
break;
case IMAGE_DATA_TYPE_HALF:
case IMAGE_DATA_TYPE_HALF4:
- oiio_load_pixels<TypeDesc::HALF, half>(metadata, in, (half *)pixels);
+ oiio_load_pixels<TypeDesc::HALF, half>(metadata, in, do_associate_alpha, (half *)pixels);
break;
case IMAGE_DATA_TYPE_FLOAT:
case IMAGE_DATA_TYPE_FLOAT4:
- oiio_load_pixels<TypeDesc::FLOAT, float>(metadata, in, (float *)pixels);
+ oiio_load_pixels<TypeDesc::FLOAT, float>(metadata, in, do_associate_alpha, (float *)pixels);
break;
case IMAGE_DATA_TYPE_NANOVDB_FLOAT:
case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
+ case IMAGE_DATA_TYPE_NANOVDB_FPN:
+ case IMAGE_DATA_TYPE_NANOVDB_FP16:
case IMAGE_DATA_NUM_TYPES:
break;
}
diff --git a/intern/cycles/scene/image_vdb.cpp b/intern/cycles/scene/image_vdb.cpp
index b6f0911fa2c..2209be60a97 100644
--- a/intern/cycles/scene/image_vdb.cpp
+++ b/intern/cycles/scene/image_vdb.cpp
@@ -44,14 +44,30 @@ struct ToDenseOp {
# ifdef WITH_NANOVDB
struct ToNanoOp {
nanovdb::GridHandle<> nanogrid;
+ int precision;
template<typename GridType, typename FloatGridType, typename FloatDataType, int channels>
bool operator()(const openvdb::GridBase::ConstPtr &grid)
{
if constexpr (!std::is_same_v<GridType, openvdb::MaskGrid>) {
try {
- nanogrid = nanovdb::openToNanoVDB(
- FloatGridType(*openvdb::gridConstPtrCast<GridType>(grid)));
+ FloatGridType floatgrid(*openvdb::gridConstPtrCast<GridType>(grid));
+ if constexpr (std::is_same_v<FloatGridType, openvdb::FloatGrid>) {
+ if (precision == 0) {
+ nanogrid = nanovdb::openToNanoVDB<nanovdb::HostBuffer,
+ typename FloatGridType::TreeType,
+ nanovdb::FpN>(floatgrid);
+ return true;
+ }
+ else if (precision == 16) {
+ nanogrid = nanovdb::openToNanoVDB<nanovdb::HostBuffer,
+ typename FloatGridType::TreeType,
+ nanovdb::Fp16>(floatgrid);
+ return true;
+ }
+ }
+
+ nanogrid = nanovdb::openToNanoVDB(floatgrid);
}
catch (const std::exception &e) {
VLOG(1) << "Error converting OpenVDB to NanoVDB grid: " << e.what();
@@ -98,10 +114,13 @@ bool VDBImageLoader::load_metadata(const ImageDeviceFeatures &features, ImageMet
# ifdef WITH_NANOVDB
if (features.has_nanovdb) {
/* NanoVDB expects no inactive leaf nodes. */
- /*openvdb::FloatGrid &pruned_grid = *openvdb::gridPtrCast<openvdb::FloatGrid>(grid);
+# if 0
+ openvdb::FloatGrid &pruned_grid = *openvdb::gridPtrCast<openvdb::FloatGrid>(grid);
openvdb::tools::pruneInactive(pruned_grid.tree());
- nanogrid = nanovdb::openToNanoVDB(pruned_grid);*/
+ nanogrid = nanovdb::openToNanoVDB(pruned_grid);
+# endif
ToNanoOp op;
+ op.precision = precision;
if (!openvdb::grid_type_operation(grid, op)) {
return false;
}
@@ -124,7 +143,15 @@ bool VDBImageLoader::load_metadata(const ImageDeviceFeatures &features, ImageMet
if (nanogrid) {
metadata.byte_size = nanogrid.size();
if (metadata.channels == 1) {
- metadata.type = IMAGE_DATA_TYPE_NANOVDB_FLOAT;
+ if (precision == 0) {
+ metadata.type = IMAGE_DATA_TYPE_NANOVDB_FPN;
+ }
+ else if (precision == 16) {
+ metadata.type = IMAGE_DATA_TYPE_NANOVDB_FP16;
+ }
+ else {
+ metadata.type = IMAGE_DATA_TYPE_NANOVDB_FLOAT;
+ }
}
else {
metadata.type = IMAGE_DATA_TYPE_NANOVDB_FLOAT3;
diff --git a/intern/cycles/scene/image_vdb.h b/intern/cycles/scene/image_vdb.h
index a5fd51915ef..ea5f6b0b3d9 100644
--- a/intern/cycles/scene/image_vdb.h
+++ b/intern/cycles/scene/image_vdb.h
@@ -51,6 +51,7 @@ class VDBImageLoader : public ImageLoader {
#endif
#ifdef WITH_NANOVDB
nanovdb::GridHandle<> nanogrid;
+ int precision = 0;
#endif
};
diff --git a/intern/cycles/scene/object.cpp b/intern/cycles/scene/object.cpp
index 8015be6393b..ddd89a16640 100644
--- a/intern/cycles/scene/object.cpp
+++ b/intern/cycles/scene/object.cpp
@@ -327,9 +327,11 @@ float Object::compute_volume_step_size() const
/* Auto detect step size. */
float3 size = one_float3();
#ifdef WITH_NANOVDB
- /* Dimensions were not applied to image transform with NanOVDB (see image_vdb.cpp) */
+ /* Dimensions were not applied to image transform with NanoVDB (see image_vdb.cpp) */
if (metadata.type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
- metadata.type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3)
+ metadata.type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3 &&
+ metadata.type != IMAGE_DATA_TYPE_NANOVDB_FPN &&
+ metadata.type != IMAGE_DATA_TYPE_NANOVDB_FP16)
#endif
size /= make_float3(metadata.width, metadata.height, metadata.depth);
diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp
index b35242139ea..8b5604eba72 100644
--- a/intern/cycles/scene/scene.cpp
+++ b/intern/cycles/scene/scene.cpp
@@ -550,7 +550,7 @@ void Scene::update_kernel_features()
dscene.data.integrator.use_caustics = false;
if (has_caustics_caster && has_caustics_receiver && has_caustics_light) {
dscene.data.integrator.use_caustics = true;
- kernel_features |= KERNEL_FEATURE_NODE_RAYTRACE;
+ kernel_features |= KERNEL_FEATURE_MNEE;
}
if (bake_manager->get_baking()) {
@@ -597,6 +597,7 @@ static void log_kernel_features(const uint features)
<< "\n";
VLOG(2) << "Use Shader Raytrace " << string_from_bool(features & KERNEL_FEATURE_NODE_RAYTRACE)
<< "\n";
+ VLOG(2) << "Use MNEE" << string_from_bool(features & KERNEL_FEATURE_MNEE) << "\n";
VLOG(2) << "Use Transparent " << string_from_bool(features & KERNEL_FEATURE_TRANSPARENT) << "\n";
VLOG(2) << "Use Denoising " << string_from_bool(features & KERNEL_FEATURE_DENOISING) << "\n";
VLOG(2) << "Use Path Tracing " << string_from_bool(features & KERNEL_FEATURE_PATH_TRACING)
diff --git a/intern/cycles/scene/shader_graph.cpp b/intern/cycles/scene/shader_graph.cpp
index d44e12f5fab..f25d0b7c7b9 100644
--- a/intern/cycles/scene/shader_graph.cpp
+++ b/intern/cycles/scene/shader_graph.cpp
@@ -888,7 +888,7 @@ void ShaderGraph::default_inputs(bool do_osl)
void ShaderGraph::refine_bump_nodes()
{
- /* we transverse the node graph looking for bump nodes, when we find them,
+ /* We transverse the node graph looking for bump nodes, when we find them,
* like in bump_from_displacement(), we copy the sub-graph defined from "bump"
* input to the inputs "center","dx" and "dy" What is in "bump" input is moved
* to "center" input. */
@@ -898,18 +898,18 @@ void ShaderGraph::refine_bump_nodes()
ShaderInput *bump_input = node->input("Height");
ShaderNodeSet nodes_bump;
- /* make 2 extra copies of the subgraph defined in Bump input */
+ /* Make 2 extra copies of the subgraph defined in Bump input. */
ShaderNodeMap nodes_dx;
ShaderNodeMap nodes_dy;
- /* find dependencies for the given input */
+ /* Find dependencies for the given input. */
find_dependencies(nodes_bump, bump_input);
copy_nodes(nodes_bump, nodes_dx);
copy_nodes(nodes_bump, nodes_dy);
- /* mark nodes to indicate they are use for bump computation, so
- that any texture coordinates are shifted by dx/dy when sampling */
+ /* Mark nodes to indicate they are use for bump computation, so
+ * that any texture coordinates are shifted by dx/dy when sampling. */
foreach (ShaderNode *node, nodes_bump)
node->bump = SHADER_BUMP_CENTER;
foreach (NodePair &pair, nodes_dx)
@@ -924,7 +924,7 @@ void ShaderGraph::refine_bump_nodes()
connect(out_dx, node->input("SampleX"));
connect(out_dy, node->input("SampleY"));
- /* add generated nodes */
+ /* Add generated nodes. */
foreach (NodePair &pair, nodes_dx)
add(pair.second);
foreach (NodePair &pair, nodes_dy)
diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp
index 03c152928d5..3b58556f601 100644
--- a/intern/cycles/scene/shader_nodes.cpp
+++ b/intern/cycles/scene/shader_nodes.cpp
@@ -5882,7 +5882,7 @@ void BlackbodyNode::constant_fold(const ConstantFolder &folder)
if (folder.all_inputs_constant()) {
const float3 rgb_rec709 = svm_math_blackbody_color_rec709(temperature);
const float3 rgb = folder.scene->shader_manager->rec709_to_scene_linear(rgb_rec709);
- folder.make_constant(rgb);
+ folder.make_constant(max(rgb, zero_float3()));
}
}
diff --git a/intern/cycles/session/denoising.cpp b/intern/cycles/session/denoising.cpp
index ff9ca1b4345..a9377d412e8 100644
--- a/intern/cycles/session/denoising.cpp
+++ b/intern/cycles/session/denoising.cpp
@@ -90,7 +90,7 @@ static vector<ChannelMapping> output_channels()
return map;
}
-/* Renderlayer Handling */
+/* Render-layer Handling. */
bool DenoiseImageLayer::detect_denoising_channels()
{
diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp
index 143628f1e30..dac36ab0135 100644
--- a/intern/cycles/test/render_graph_finalize_test.cpp
+++ b/intern/cycles/test/render_graph_finalize_test.cpp
@@ -946,7 +946,7 @@ TEST_F(RenderGraph, constant_fold_bright_contrast)
TEST_F(RenderGraph, constant_fold_blackbody)
{
EXPECT_ANY_MESSAGE(log);
- CORRECT_INFO_MESSAGE(log, "Folding Blackbody::Color to constant (3.94163, 0.226523, 0).");
+ CORRECT_INFO_MESSAGE(log, "Folding Blackbody::Color to constant (3.96553, 0.227897, 0).");
builder
.add_node(ShaderNodeBuilder<BlackbodyNode>(graph, "Blackbody").set("Temperature", 1200.0f))
diff --git a/intern/cycles/util/half.h b/intern/cycles/util/half.h
index af33264e044..c668638eb02 100644
--- a/intern/cycles/util/half.h
+++ b/intern/cycles/util/half.h
@@ -74,9 +74,9 @@ struct half4 {
ccl_device_inline half float_to_half_image(float f)
{
#if defined(__KERNEL_METAL__) || defined(__KERNEL_ONEAPI__)
- return half(f);
+ return half(min(f, 65504.0f));
#elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
- return __float2half(f);
+ return __float2half(min(f, 65504.0f));
#else
const uint u = __float_as_uint(f);
/* Sign bit, shifted to its position. */
@@ -139,9 +139,9 @@ ccl_device_inline float4 half4_to_float4_image(const half4 h)
ccl_device_inline half float_to_half_display(const float f)
{
#if defined(__KERNEL_METAL__) || defined(__KERNEL_ONEAPI__)
- return half(f);
+ return half(min(f, 65504.0f));
#elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
- return __float2half(f);
+ return __float2half(min(f, 65504.0f));
#else
const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f);
const int absolute = x & 0x7FFFFFFF;
diff --git a/intern/cycles/util/image.h b/intern/cycles/util/image.h
index 9348125d072..17446a83e59 100644
--- a/intern/cycles/util/image.h
+++ b/intern/cycles/util/image.h
@@ -78,6 +78,26 @@ template<> inline half util_image_cast_from_float(float value)
return float_to_half_image(value);
}
+/* Multiply image pixels in native data format. */
+template<typename T> inline T util_image_multiply_native(T a, T b);
+
+template<> inline float util_image_multiply_native(float a, float b)
+{
+ return a * b;
+}
+template<> inline uchar util_image_multiply_native(uchar a, uchar b)
+{
+ return ((uint32_t)a * (uint32_t)b) / 255;
+}
+template<> inline uint16_t util_image_multiply_native(uint16_t a, uint16_t b)
+{
+ return ((uint32_t)a * (uint32_t)b) / 65535;
+}
+template<> inline half util_image_multiply_native(half a, half b)
+{
+ return float_to_half_image(half_to_float_image(a) * half_to_float_image(b));
+}
+
CCL_NAMESPACE_END
#endif /* __UTIL_IMAGE_H__ */
diff --git a/intern/cycles/util/texture.h b/intern/cycles/util/texture.h
index e8bb058a3c9..90e842933c2 100644
--- a/intern/cycles/util/texture.h
+++ b/intern/cycles/util/texture.h
@@ -37,6 +37,8 @@ typedef enum ImageDataType {
IMAGE_DATA_TYPE_USHORT = 7,
IMAGE_DATA_TYPE_NANOVDB_FLOAT = 8,
IMAGE_DATA_TYPE_NANOVDB_FLOAT3 = 9,
+ IMAGE_DATA_TYPE_NANOVDB_FPN = 10,
+ IMAGE_DATA_TYPE_NANOVDB_FP16 = 11,
IMAGE_DATA_NUM_TYPES
} ImageDataType;