Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/blender/addon/properties.py99
-rw-r--r--intern/cycles/blender/addon/ui.py14
-rw-r--r--intern/cycles/blender/image.cpp15
-rw-r--r--intern/cycles/blender/image.h8
-rw-r--r--intern/cycles/blender/mesh.cpp2
-rw-r--r--intern/cycles/blender/shader.cpp32
-rw-r--r--intern/cycles/device/metal/device_impl.h6
-rw-r--r--intern/cycles/device/metal/device_impl.mm96
-rw-r--r--intern/cycles/device/metal/kernel.h108
-rw-r--r--intern/cycles/device/metal/kernel.mm856
-rw-r--r--intern/cycles/device/metal/queue.mm31
-rw-r--r--intern/cycles/device/optix/device_impl.cpp38
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/device/metal/compat.h24
-rw-r--r--intern/cycles/kernel/osl/services.cpp38
-rw-r--r--intern/cycles/kernel/osl/services.h14
-rw-r--r--intern/cycles/kernel/osl/shaders/CMakeLists.txt2
-rw-r--r--intern/cycles/kernel/osl/shaders/node_color.h50
-rw-r--r--intern/cycles/kernel/osl/shaders/node_combine_color.osl16
-rw-r--r--intern/cycles/kernel/osl/shaders/node_separate_color.osl26
-rw-r--r--intern/cycles/kernel/svm/color_util.h26
-rw-r--r--intern/cycles/kernel/svm/sepcomb_color.h54
-rw-r--r--intern/cycles/kernel/svm/svm.h7
-rw-r--r--intern/cycles/kernel/svm/types.h8
-rw-r--r--intern/cycles/scene/image.cpp51
-rw-r--r--intern/cycles/scene/image.h9
-rw-r--r--intern/cycles/scene/osl.cpp7
-rw-r--r--intern/cycles/scene/osl.h2
-rw-r--r--intern/cycles/scene/shader_nodes.cpp139
-rw-r--r--intern/cycles/scene/shader_nodes.h20
-rw-r--r--intern/cycles/util/color.h50
31 files changed, 1226 insertions, 623 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 9b4799d252f..9acc9e99ad0 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -24,12 +24,19 @@ from . import camera
enum_devices = (
('CPU', "CPU", "Use CPU for rendering"),
- ('GPU', "GPU Compute", "Use GPU compute device for rendering, configured in the system tab in the user preferences"),
+ ('GPU', "GPU Compute",
+ "Use GPU compute device for rendering, configured in the system tab in the user preferences"),
)
enum_feature_set = (
- ('SUPPORTED', "Supported", "Only use finished and supported features"),
- ('EXPERIMENTAL', "Experimental", "Use experimental and incomplete features that might be broken or change in the future", 'ERROR', 1),
+ ('SUPPORTED',
+ "Supported",
+ "Only use finished and supported features"),
+ ('EXPERIMENTAL',
+ "Experimental",
+ "Use experimental and incomplete features that might be broken or change in the future",
+ 'ERROR',
+ 1),
)
enum_displacement_methods = (
@@ -81,9 +88,14 @@ enum_sampling_pattern = (
)
enum_volume_sampling = (
- ('DISTANCE', "Distance", "Use distance sampling, best for dense volumes with lights far away"),
- ('EQUIANGULAR', "Equiangular", "Use equiangular sampling, best for volumes with low density with light inside or near the volume"),
- ('MULTIPLE_IMPORTANCE', "Multiple Importance",
+ ('DISTANCE',
+ "Distance",
+ "Use distance sampling, best for dense volumes with lights far away"),
+ ('EQUIANGULAR',
+ "Equiangular",
+ "Use equiangular sampling, best for volumes with low density with light inside or near the volume"),
+ ('MULTIPLE_IMPORTANCE',
+ "Multiple Importance",
"Combine distance and equi-angular sampling for volumes where neither method is ideal"),
)
@@ -93,10 +105,15 @@ enum_volume_interpolation = (
)
enum_world_mis = (
- ('NONE', "None", "Don't sample the background, faster but might cause noise for non-solid backgrounds"),
- ('AUTOMATIC', "Auto", "Automatically try to determine the best setting"),
- ('MANUAL', "Manual", "Manually set the resolution of the sampling map, higher values are slower and require more memory but reduce noise")
-)
+ ('NONE',
+ "None",
+ "Don't sample the background, faster but might cause noise for non-solid backgrounds"),
+ ('AUTOMATIC',
+ "Auto",
+ "Automatically try to determine the best setting"),
+ ('MANUAL',
+ "Manual",
+ "Manually set the resolution of the sampling map, higher values are slower and require more memory but reduce noise"))
enum_device_type = (
('CPU', "CPU", "CPU", 0),
@@ -210,17 +227,33 @@ enum_denoising_input_passes = (
)
enum_denoising_prefilter = (
- ('NONE', "None", "No prefiltering, use when guiding passes are noise-free", 1),
- ('FAST', "Fast", "Denoise color and guiding passes together. Improves quality when guiding passes are noisy using least amount of extra processing time", 2),
- ('ACCURATE', "Accurate", "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", 3),
+ ('NONE',
+ "None",
+ "No prefiltering, use when guiding passes are noise-free",
+ 1),
+ ('FAST',
+ "Fast",
+ "Denoise color and guiding passes together. Improves quality when guiding passes are noisy using least amount of extra processing time",
+ 2),
+ ('ACCURATE',
+ "Accurate",
+ "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time",
+ 3),
)
enum_direct_light_sampling_type = (
- ('MULTIPLE_IMPORTANCE_SAMPLING', "Multiple Importance Sampling",
- "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing", 0),
- ('FORWARD_PATH_TRACING', "Forward Path Tracing", "Direct light contributions are only sampled using forward path tracing", 1),
- ('NEXT_EVENT_ESTIMATION', "Next-Event Estimation",
- "Direct light contributions are only sampled using next-event estimation", 2),
+ ('MULTIPLE_IMPORTANCE_SAMPLING',
+ "Multiple Importance Sampling",
+ "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing",
+ 0),
+ ('FORWARD_PATH_TRACING',
+ "Forward Path Tracing",
+ "Direct light contributions are only sampled using forward path tracing",
+ 1),
+ ('NEXT_EVENT_ESTIMATION',
+ "Next-Event Estimation",
+ "Direct light contributions are only sampled using next-event estimation",
+ 2),
)
@@ -357,7 +390,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
scrambling_distance: FloatProperty(
name="Scrambling Distance",
default=1.0,
- min=0.0, soft_max=1.0,
+ min=0.0,
+ soft_max=1.0,
description="Reduce randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts if set too low",
)
preview_scrambling_distance: BoolProperty(
@@ -383,7 +417,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
name="Light Sampling Threshold",
description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). "
"Zero disables the test and never ignores lights",
- min=0.0, max=1.0,
+ min=0.0,
+ max=1.0,
default=0.01,
)
@@ -395,7 +430,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
adaptive_threshold: FloatProperty(
name="Adaptive Sampling Threshold",
description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples",
- min=0.0, max=1.0,
+ min=0.0,
+ max=1.0,
soft_min=0.001,
default=0.01,
precision=4,
@@ -403,7 +439,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
adaptive_min_samples: IntProperty(
name="Adaptive Min Samples",
description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold",
- min=0, max=4096,
+ min=0,
+ max=4096,
default=0,
)
@@ -415,7 +452,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
preview_adaptive_threshold: FloatProperty(
name="Adaptive Sampling Threshold",
description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples, for viewport renders",
- min=0.0, max=1.0,
+ min=0.0,
+ max=1.0,
soft_min=0.001,
default=0.1,
precision=4,
@@ -423,7 +461,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
preview_adaptive_min_samples: IntProperty(
name="Adaptive Min Samples",
description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold, for viewport renders",
- min=0, max=4096,
+ min=0,
+ max=4096,
default=0,
)
@@ -550,7 +589,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
max_subdivisions: IntProperty(
name="Max Subdivisions",
description="Stop subdividing when this level is reached even if the dice rate would produce finer tessellation",
- min=0, max=16,
+ min=0,
+ max=16,
default=12,
)
@@ -817,8 +857,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
debug_use_optix_debug: BoolProperty(
name="OptiX Module Debug",
description="Load OptiX module in debug mode: lower logging verbosity level, enable validations, and lower optimization level",
- default=False
- )
+ default=False)
@classmethod
def register(cls):
@@ -1185,7 +1224,8 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
motion_steps: IntProperty(
name="Motion Steps",
description="Control accuracy of motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))",
- min=1, max=7,
+ min=1,
+ max=7,
default=1,
)
@@ -1224,7 +1264,8 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
shadow_terminator_geometry_offset: FloatProperty(
name="Shadow Terminator Geometry Offset",
description="Offset rays from the surface to reduce shadow terminator artifact on low poly geometry. Only affects triangles at grazing angles to light",
- min=0.0, max=1.0,
+ min=0.0,
+ max=1.0,
default=0.1,
)
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 886f5345020..9d2dbdf6732 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -1082,8 +1082,18 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
def has_geometry_visibility(ob):
- return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT', 'VOLUME', 'POINTCLOUD', 'CURVES'}) or
- (ob.instance_type == 'COLLECTION' and ob.instance_collection))
+ return ob and (
+ (ob.type in {
+ 'MESH',
+ 'CURVE',
+ 'SURFACE',
+ 'FONT',
+ 'META',
+ 'LIGHT',
+ 'VOLUME',
+ 'POINTCLOUD',
+ 'CURVES',
+ }) or (ob.instance_type == 'COLLECTION' and ob.instance_collection))
class CYCLES_OBJECT_PT_shading(CyclesButtonsPanel, Panel):
diff --git a/intern/cycles/blender/image.cpp b/intern/cycles/blender/image.cpp
index ca4c8f5904a..e01b72c1653 100644
--- a/intern/cycles/blender/image.cpp
+++ b/intern/cycles/blender/image.cpp
@@ -13,9 +13,11 @@ CCL_NAMESPACE_BEGIN
BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
const int frame,
+ const int tile_number,
const bool is_preview_render)
: b_image(b_image),
frame(frame),
+ tile_number(tile_number),
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
properly later as we are close to release. */
free_cache(!is_preview_render && !b_image.has_data())
@@ -66,12 +68,11 @@ bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata,
{
const size_t num_pixels = ((size_t)metadata.width) * metadata.height;
const int channels = metadata.channels;
- const int tile = 0; /* TODO(lukas): Support tiles here? */
if (b_image.is_float()) {
/* image data */
float *image_pixels;
- image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile);
+ image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile_number);
if (image_pixels && num_pixels * channels == pixels_size) {
memcpy(pixels, image_pixels, pixels_size * sizeof(float));
@@ -99,7 +100,7 @@ bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata,
}
}
else {
- unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile);
+ unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile_number);
if (image_pixels && num_pixels * channels == pixels_size) {
memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
@@ -153,7 +154,13 @@ string BlenderImageLoader::name() const
bool BlenderImageLoader::equals(const ImageLoader &other) const
{
const BlenderImageLoader &other_loader = (const BlenderImageLoader &)other;
- return b_image == other_loader.b_image && frame == other_loader.frame;
+ return b_image == other_loader.b_image && frame == other_loader.frame &&
+ tile_number == other_loader.tile_number;
+}
+
+int BlenderImageLoader::get_tile_number() const
+{
+ return tile_number;
}
/* Point Density */
diff --git a/intern/cycles/blender/image.h b/intern/cycles/blender/image.h
index ee576b31f7e..c2cc0f51b9b 100644
--- a/intern/cycles/blender/image.h
+++ b/intern/cycles/blender/image.h
@@ -12,7 +12,10 @@ CCL_NAMESPACE_BEGIN
class BlenderImageLoader : public ImageLoader {
public:
- BlenderImageLoader(BL::Image b_image, const int frame, const bool is_preview_render);
+ BlenderImageLoader(BL::Image b_image,
+ const int frame,
+ const int tile_number,
+ const bool is_preview_render);
bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
bool load_pixels(const ImageMetaData &metadata,
@@ -22,8 +25,11 @@ class BlenderImageLoader : public ImageLoader {
string name() const override;
bool equals(const ImageLoader &other) const override;
+ int get_tile_number() const override;
+
BL::Image b_image;
int frame;
+ int tile_number;
bool free_cache;
};
diff --git a/intern/cycles/blender/mesh.cpp b/intern/cycles/blender/mesh.cpp
index de67e27923d..c76ce3801d4 100644
--- a/intern/cycles/blender/mesh.cpp
+++ b/intern/cycles/blender/mesh.cpp
@@ -316,7 +316,7 @@ static void fill_generic_attribute(BL::Mesh &b_mesh,
break;
}
case BL::Attribute::domain_EDGE: {
- /* Averge edge attributes at vertices. */
+ /* Average edge attributes at vertices. */
const size_t num_verts = b_mesh.vertices.length();
vector<int> count(num_verts, 0);
diff --git a/intern/cycles/blender/shader.cpp b/intern/cycles/blender/shader.cpp
index d3527567b96..81a64457c88 100644
--- a/intern/cycles/blender/shader.cpp
+++ b/intern/cycles/blender/shader.cpp
@@ -355,6 +355,18 @@ static ShaderNode *add_node(Scene *scene,
else if (b_node.is_a(&RNA_ShaderNodeCombineHSV)) {
node = graph->create_node<CombineHSVNode>();
}
+ else if (b_node.is_a(&RNA_ShaderNodeSeparateColor)) {
+ BL::ShaderNodeSeparateColor b_separate_node(b_node);
+ SeparateColorNode *separate_node = graph->create_node<SeparateColorNode>();
+ separate_node->set_color_type((NodeCombSepColorType)b_separate_node.mode());
+ node = separate_node;
+ }
+ else if (b_node.is_a(&RNA_ShaderNodeCombineColor)) {
+ BL::ShaderNodeCombineColor b_combine_node(b_node);
+ CombineColorNode *combine_node = graph->create_node<CombineColorNode>();
+ combine_node->set_color_type((NodeCombSepColorType)b_combine_node.mode());
+ node = combine_node;
+ }
else if (b_node.is_a(&RNA_ShaderNodeSeparateXYZ)) {
node = graph->create_node<SeparateXYZNode>();
}
@@ -764,9 +776,21 @@ static ShaderNode *add_node(Scene *scene,
*/
int scene_frame = b_scene.frame_current();
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
- image->handle = scene->image_manager->add_image(
- new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
- image->image_params());
+ if (b_image.source() != BL::Image::source_TILED) {
+ image->handle = scene->image_manager->add_image(
+ new BlenderImageLoader(b_image, image_frame, 0, b_engine.is_preview()),
+ image->image_params());
+ }
+ else {
+ vector<ImageLoader *> loaders;
+ loaders.reserve(image->get_tiles().size());
+ for (int tile_number : image->get_tiles()) {
+ loaders.push_back(
+ new BlenderImageLoader(b_image, image_frame, tile_number, b_engine.is_preview()));
+ }
+
+ image->handle = scene->image_manager->add_image(loaders, image->image_params());
+ }
}
else {
ustring filename = ustring(
@@ -802,7 +826,7 @@ static ShaderNode *add_node(Scene *scene,
int scene_frame = b_scene.frame_current();
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
env->handle = scene->image_manager->add_image(
- new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
+ new BlenderImageLoader(b_image, image_frame, 0, b_engine.is_preview()),
env->image_params());
}
else {
diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index 27c58ce6d2f..7506b9b069f 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -28,7 +28,8 @@ class MetalDevice : public Device {
id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
nil; /* encoder used for fetching device pointers from MTLBuffers */
- string source_used_for_compile[PSO_NUM];
+ string source[PSO_NUM];
+ string source_md5[PSO_NUM];
KernelParamsMetal launch_params = {0};
@@ -72,7 +73,6 @@ class MetalDevice : public Device {
id<MTLBuffer> texture_bindings_3d = nil;
std::vector<id<MTLTexture>> texture_slot_map;
- MetalDeviceKernels kernels;
bool use_metalrt = false;
bool use_function_specialisation = false;
@@ -110,6 +110,8 @@ class MetalDevice : public Device {
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
+ id<MTLLibrary> compile(string const &source);
+
/* ------------------------------------------------------------------ */
/* low-level memory management */
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index c01f51fb506..e1438a9d6e2 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -275,96 +275,44 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
* active, but may still need to be rendered without motion blur if that isn't active as well. */
motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
- NSError *error = NULL;
+ source[PSO_GENERIC] = get_source(kernel_features);
+ mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
- for (int i = 0; i < PSO_NUM; i++) {
- if (mtlLibrary[i]) {
- [mtlLibrary[i] release];
- mtlLibrary[i] = nil;
- }
- }
+ MD5Hash md5;
+ md5.append(source[PSO_GENERIC]);
+ source_md5[PSO_GENERIC] = md5.get_hex();
+
+ metal_printf("Front-end compilation finished (generic)\n");
+
+ bool result = MetalDeviceKernels::load(this, false);
+
+ reserve_local_memory(kernel_features);
+
+ return result;
+}
+id<MTLLibrary> MetalDevice::compile(string const &source)
+{
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
options.fastMathEnabled = YES;
if (@available(macOS 12.0, *)) {
options.languageVersion = MTLLanguageVersion2_4;
}
- else {
- return false;
- }
- string metalsrc;
-
- /* local helper: dump source to disk and return filepath */
- auto dump_source = [&](int kernel_type) -> string {
- string &source = source_used_for_compile[kernel_type];
- string metalsrc = path_cache_get(path_join("kernels",
- string_printf("%s.%s.metal",
- kernel_type_as_string(kernel_type),
- util_md5_string(source).c_str())));
- path_write_text(metalsrc, source);
- return metalsrc;
- };
-
- /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor,
- * then compile it into a MTLLibrary */
- auto fetch_and_compile_source = [&](int kernel_type) {
- /* Record the source used to compile this library, for hash building later. */
- string &source = source_used_for_compile[kernel_type];
-
- switch (kernel_type) {
- case PSO_GENERIC: {
- source = get_source(kernel_features);
- break;
- }
- case PSO_SPECIALISED: {
- /* PSO_SPECIALISED derives from PSO_GENERIC */
- string &generic_source = source_used_for_compile[PSO_GENERIC];
- if (generic_source.empty()) {
- generic_source = get_source(kernel_features);
- }
- source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source;
- break;
- }
- default:
- assert(0);
- }
-
- /* create MTLLibrary (front-end compilation) */
- mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str())
+ NSError *error = NULL;
+ id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
options:options
error:&error];
- bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr);
-
- if (!mtlLibrary[kernel_type] || do_source_dump) {
- string metalsrc = dump_source(kernel_type);
-
- if (!mtlLibrary[kernel_type]) {
- NSString *err = [error localizedDescription];
- set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
-
- return false;
- }
- }
- return true;
- };
-
- fetch_and_compile_source(PSO_GENERIC);
-
- if (use_function_specialisation) {
- fetch_and_compile_source(PSO_SPECIALISED);
+ if (!mtlLibrary) {
+ NSString *err = [error localizedDescription];
+ set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
}
- metal_printf("Front-end compilation finished\n");
-
- bool result = kernels.load(this, PSO_GENERIC);
-
[options release];
- reserve_local_memory(kernel_features);
- return result;
+ return mtlLibrary;
}
void MetalDevice::reserve_local_memory(const uint kernel_features)
diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h
index b12491d820d..69b2a686ecc 100644
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -54,103 +54,41 @@ enum {
const char *kernel_type_as_string(int kernel_type);
struct MetalKernelPipeline {
- void release()
- {
- if (pipeline) {
- [pipeline release];
- pipeline = nil;
- if (@available(macOS 11.0, *)) {
- for (int i = 0; i < METALRT_TABLE_NUM; i++) {
- if (intersection_func_table[i]) {
- [intersection_func_table[i] release];
- intersection_func_table[i] = nil;
- }
- }
- }
- }
- if (function) {
- [function release];
- function = nil;
- }
- if (@available(macOS 11.0, *)) {
- for (int i = 0; i < METALRT_TABLE_NUM; i++) {
- if (intersection_func_table[i]) {
- [intersection_func_table[i] release];
- }
- }
- }
- }
- bool loaded = false;
- id<MTLFunction> function = nil;
- id<MTLComputePipelineState> pipeline = nil;
-
- API_AVAILABLE(macos(11.0))
- id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
-};
-
-struct MetalKernelLoadDesc {
- int pso_index = 0;
- const char *function_name = nullptr;
- int kernel_index = 0;
- int threads_per_threadgroup = 0;
- MTLFunctionConstantValues *constant_values = nullptr;
- NSArray *linked_functions = nullptr;
-
- struct IntersectorFunctions {
- NSArray *defaults;
- NSArray *shadow;
- NSArray *local;
- NSArray *operator[](int index) const
- {
- if (index == METALRT_TABLE_DEFAULT)
- return defaults;
- if (index == METALRT_TABLE_SHADOW)
- return shadow;
- return local;
- }
- } intersector_functions = {nullptr};
-};
-
-/* Metal kernel and associate occupancy information. */
-class MetalDeviceKernel {
- public:
- ~MetalDeviceKernel();
+ void compile();
- bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5);
+ id<MTLLibrary> mtlLibrary = nil;
+ bool scene_specialized;
+ string source_md5;
- void mark_loaded(int pso_index)
- {
- pso[pso_index].loaded = true;
- }
+ bool use_metalrt;
+ bool metalrt_hair;
+ bool metalrt_hair_thick;
+ bool metalrt_pointcloud;
- int get_num_threads_per_block() const
- {
- return num_threads_per_block;
- }
- const MetalKernelPipeline &get_pso() const;
+ int threads_per_threadgroup;
- double load_duration = 0.0;
+ DeviceKernel device_kernel;
+ bool loaded = false;
+ id<MTLDevice> mtlDevice = nil;
+ id<MTLFunction> function = nil;
+ id<MTLComputePipelineState> pipeline = nil;
+ int num_threads_per_block = 0;
- private:
- MetalKernelPipeline pso[PSO_NUM];
+ string error_str;
- int num_threads_per_block = 0;
+ API_AVAILABLE(macos(11.0))
+ id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
+ id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil};
};
/* Cache of Metal kernels for each DeviceKernel. */
-class MetalDeviceKernels {
- public:
- bool load(MetalDevice *device, int kernel_type);
- bool available(DeviceKernel kernel) const;
- const MetalDeviceKernel &get(DeviceKernel kernel) const;
+namespace MetalDeviceKernels {
- MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM];
+bool load(MetalDevice *device, bool scene_specialized);
+const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);
- id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}};
-
- string loaded_md5[PSO_NUM];
-};
+} /* namespace MetalDeviceKernels */
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 9555ca03c8e..304efc813ec 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -9,6 +9,7 @@
# include "util/path.h"
# include "util/tbb.h"
# include "util/time.h"
+# include "util/unique_ptr.h"
CCL_NAMESPACE_BEGIN
@@ -28,82 +29,376 @@ const char *kernel_type_as_string(int kernel_type)
return "";
}
-MetalDeviceKernel::~MetalDeviceKernel()
+bool kernel_has_intersection(DeviceKernel device_kernel)
{
- for (int i = 0; i < PSO_NUM; i++) {
- pso[i].release();
+ return (device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
+}
+
+struct ShaderCache {
+ ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice)
+ {
+ }
+ ~ShaderCache();
+
+ /* Get the fastest available pipeline for the specified kernel. */
+ MetalKernelPipeline *get_best_pipeline(DeviceKernel kernel, const MetalDevice *device);
+
+ /* Non-blocking request for a kernel, optionally specialized to the scene being rendered by
+ * device. */
+ void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized);
+
+ void wait_for_all();
+
+ private:
+ friend ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice);
+
+ void compile_thread_func(int thread_index);
+
+ using PipelineCollection = std::vector<unique_ptr<MetalKernelPipeline>>;
+
+ struct PipelineRequest {
+ MetalKernelPipeline *pipeline = nullptr;
+ std::function<void(MetalKernelPipeline *)> completionHandler;
+ };
+
+ std::mutex cache_mutex;
+
+ PipelineCollection pipelines[DEVICE_KERNEL_NUM];
+ id<MTLDevice> mtlDevice;
+
+ bool running = false;
+ std::condition_variable cond_var;
+ std::deque<PipelineRequest> request_queue;
+ std::vector<std::thread> compile_threads;
+ std::atomic_int incomplete_requests = 0;
+};
+
+std::mutex g_shaderCacheMutex;
+std::map<id<MTLDevice>, unique_ptr<ShaderCache>> g_shaderCache;
+
+ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice)
+{
+ thread_scoped_lock lock(g_shaderCacheMutex);
+ auto it = g_shaderCache.find(mtlDevice);
+ if (it != g_shaderCache.end()) {
+ return it->second.get();
+ }
+
+ g_shaderCache[mtlDevice] = make_unique<ShaderCache>(mtlDevice);
+ return g_shaderCache[mtlDevice].get();
+}
+
+ShaderCache::~ShaderCache()
+{
+ metal_printf("ShaderCache shutting down with incomplete_requests = %d\n",
+ int(incomplete_requests));
+
+ running = false;
+ cond_var.notify_all();
+ for (auto &thread : compile_threads) {
+ thread.join();
+ }
+}
+
+void ShaderCache::wait_for_all()
+{
+ while (incomplete_requests > 0) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
+ }
+}
+
+void ShaderCache::compile_thread_func(int thread_index)
+{
+ while (1) {
+
+ /* wait for / acquire next request */
+ PipelineRequest request;
+ {
+ thread_scoped_lock lock(cache_mutex);
+ cond_var.wait(lock, [&] { return !running || !request_queue.empty(); });
+ if (!running) {
+ break;
+ }
+
+ if (!request_queue.empty()) {
+ request = request_queue.front();
+ request_queue.pop_front();
+ }
+ }
+
+ /* service request */
+ if (request.pipeline) {
+ request.pipeline->compile();
+ incomplete_requests--;
+ }
}
}
-bool MetalDeviceKernel::load(MetalDevice *device,
- MetalKernelLoadDesc const &desc_in,
- MD5Hash const &md5)
+void ShaderCache::load_kernel(DeviceKernel device_kernel,
+ MetalDevice *device,
+ bool scene_specialized)
{
- __block MetalKernelLoadDesc const desc(desc_in);
- if (desc.kernel_index == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+ {
+ /* create compiler threads on first run */
+ thread_scoped_lock lock(cache_mutex);
+ if (compile_threads.empty()) {
+ running = true;
+ for (int i = 0; i < max_mtlcompiler_threads; i++) {
+ compile_threads.push_back(std::thread([&] { compile_thread_func(i); }));
+ }
+ }
+ }
+
+ if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
/* skip megakernel */
- return true;
+ return;
}
- bool use_binary_archive = true;
- if (device->device_vendor == METAL_GPU_APPLE) {
- /* Workaround for T94142: Cycles Metal crash with simultaneous viewport and final render */
- use_binary_archive = false;
+ if (scene_specialized) {
+ /* Only specialize kernels where it can make an impact. */
+ if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+ device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+ return;
+ }
}
- if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
- use_binary_archive = (atoi(str) == 0);
+ {
+ /* check whether the kernel has already been requested / cached */
+ thread_scoped_lock lock(cache_mutex);
+ for (auto &pipeline : pipelines[device_kernel]) {
+ if (scene_specialized) {
+ if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
+ /* we already requested a pipeline that is specialized for this kernel data */
+ metal_printf("Specialized kernel already requested (%s)\n",
+ device_kernel_as_string(device_kernel));
+ return;
+ }
+ }
+ else {
+ if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) {
+ /* we already requested a generic pipeline for this kernel */
+ metal_printf("Generic kernel already requested (%s)\n",
+ device_kernel_as_string(device_kernel));
+ return;
+ }
+ }
+ }
}
- id<MTLBinaryArchive> archive = nil;
- string metalbin_path;
- if (use_binary_archive) {
- NSProcessInfo *processInfo = [NSProcessInfo processInfo];
- string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
- MD5Hash local_md5(md5);
- local_md5.append(osVersion);
- string metalbin_name = string(desc.function_name) + "." + local_md5.get_hex() +
- to_string(desc.pso_index) + ".bin";
- metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
- path_create_directories(metalbin_path);
+ incomplete_requests++;
+
+ PipelineRequest request;
+ request.pipeline = new MetalKernelPipeline;
+ request.pipeline->scene_specialized = scene_specialized;
+ request.pipeline->mtlDevice = mtlDevice;
+ request.pipeline->source_md5 =
+ device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
+ request.pipeline->mtlLibrary =
+ device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
+ request.pipeline->device_kernel = device_kernel;
+ request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
+
+ /* metalrt options */
+ request.pipeline->use_metalrt = device->use_metalrt;
+ request.pipeline->metalrt_hair = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_HAIR);
+ request.pipeline->metalrt_hair_thick = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+ request.pipeline->metalrt_pointcloud = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
+
+ {
+ thread_scoped_lock lock(cache_mutex);
+ pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
+ request_queue.push_back(request);
+ }
+ cond_var.notify_one();
+}
- if (path_exists(metalbin_path) && use_binary_archive) {
- if (@available(macOS 11.0, *)) {
- MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
- archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
- archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
- [archiveDesc release];
+MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device)
+{
+ thread_scoped_lock lock(cache_mutex);
+ auto &collection = pipelines[kernel];
+ if (collection.empty()) {
+ return nullptr;
+ }
+
+ /* metalrt options */
+ bool use_metalrt = device->use_metalrt;
+ bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR);
+ bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+ bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
+
+ MetalKernelPipeline *best_pipeline = nullptr;
+ for (auto &pipeline : collection) {
+ if (!pipeline->loaded) {
+ /* still loading - ignore */
+ continue;
+ }
+
+ if (pipeline->use_metalrt != use_metalrt || pipeline->metalrt_hair != metalrt_hair ||
+ pipeline->metalrt_hair_thick != metalrt_hair_thick ||
+ pipeline->metalrt_pointcloud != metalrt_pointcloud) {
+ /* wrong combination of metalrt options */
+ continue;
+ }
+
+ if (pipeline->scene_specialized) {
+ if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
+ best_pipeline = pipeline.get();
}
}
+ else if (!best_pipeline) {
+ best_pipeline = pipeline.get();
+ }
+ }
+
+ return best_pipeline;
+}
+
+void MetalKernelPipeline::compile()
+{
+ int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC;
+
+ const std::string function_name = std::string("cycles_metal_") +
+ device_kernel_as_string(device_kernel);
+
+ int threads_per_threadgroup = this->threads_per_threadgroup;
+ if (device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL &&
+ device_kernel < DEVICE_KERNEL_INTEGRATOR_RESET) {
+ /* Always use 512 for the sorting kernels */
+ threads_per_threadgroup = 512;
}
- NSString *entryPoint = [@(desc.function_name) copy];
+ NSString *entryPoint = [@(function_name.c_str()) copy];
NSError *error = NULL;
if (@available(macOS 11.0, *)) {
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
func_desc.name = entryPoint;
- if (desc.constant_values) {
- func_desc.constantValues = desc.constant_values;
- }
- pso[desc.pso_index].function = [device->mtlLibrary[desc.pso_index]
- newFunctionWithDescriptor:func_desc
- error:&error];
+ function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
}
+
[entryPoint release];
- if (pso[desc.pso_index].function == nil) {
+ if (function == nil) {
NSString *err = [error localizedDescription];
string errors = [err UTF8String];
+ metal_printf("Error getting function \"%s\": %s", function_name.c_str(), errors.c_str());
+ return;
+ }
+
+ function.label = [entryPoint copy];
+
+ if (use_metalrt) {
+ if (@available(macOS 11.0, *)) {
+ /* create the id<MTLFunction> for each intersection function */
+ const char *function_names[] = {
+ "__anyhit__cycles_metalrt_visibility_test_tri",
+ "__anyhit__cycles_metalrt_visibility_test_box",
+ "__anyhit__cycles_metalrt_shadow_all_hit_tri",
+ "__anyhit__cycles_metalrt_shadow_all_hit_box",
+ "__anyhit__cycles_metalrt_local_hit_tri",
+ "__anyhit__cycles_metalrt_local_hit_box",
+ "__intersection__curve_ribbon",
+ "__intersection__curve_ribbon_shadow",
+ "__intersection__curve_all",
+ "__intersection__curve_all_shadow",
+ "__intersection__point",
+ "__intersection__point_shadow",
+ };
+ assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
+
+ MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
+ for (int i = 0; i < METALRT_FUNC_NUM; i++) {
+ const char *function_name = function_names[i];
+ desc.name = [@(function_name) copy];
+
+ NSError *error = NULL;
+ rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
- device->set_error(
- string_printf("Error getting function \"%s\": %s", desc.function_name, errors.c_str()));
- return false;
+ if (rt_intersection_function[i] == nil) {
+ NSString *err = [error localizedDescription];
+ string errors = [err UTF8String];
+
+ error_str = string_printf(
+ "Error getting intersection function \"%s\": %s", function_name, errors.c_str());
+ break;
+ }
+
+ rt_intersection_function[i].label = [@(function_name) copy];
+ }
+ }
}
- pso[desc.pso_index].function.label = [@(desc.function_name) copy];
+ NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
+ NSArray *linked_functions = nil;
+
+ if (use_metalrt) {
+ id<MTLFunction> curve_intersect_default = nil;
+ id<MTLFunction> curve_intersect_shadow = nil;
+ id<MTLFunction> point_intersect_default = nil;
+ id<MTLFunction> point_intersect_shadow = nil;
+ if (metalrt_hair) {
+ /* Add curve intersection programs. */
+ if (metalrt_hair_thick) {
+ /* Slower programs for thick hair since that also slows down ribbons.
+ * Ideally this should not be needed. */
+ curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL];
+ curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_ALL_SHADOW];
+ }
+ else {
+ curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON];
+ curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW];
+ }
+ }
+ if (metalrt_pointcloud) {
+ point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT];
+ point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW];
+ }
+ table_functions[METALRT_TABLE_DEFAULT] = [NSArray
+ arrayWithObjects:rt_intersection_function[METALRT_FUNC_DEFAULT_TRI],
+ curve_intersect_default ?
+ curve_intersect_default :
+ rt_intersection_function[METALRT_FUNC_DEFAULT_BOX],
+ point_intersect_default ?
+ point_intersect_default :
+ rt_intersection_function[METALRT_FUNC_DEFAULT_BOX],
+ nil];
+ table_functions[METALRT_TABLE_SHADOW] = [NSArray
+ arrayWithObjects:rt_intersection_function[METALRT_FUNC_SHADOW_TRI],
+ curve_intersect_shadow ?
+ curve_intersect_shadow :
+ rt_intersection_function[METALRT_FUNC_SHADOW_BOX],
+ point_intersect_shadow ?
+ point_intersect_shadow :
+ rt_intersection_function[METALRT_FUNC_SHADOW_BOX],
+ nil];
+ table_functions[METALRT_TABLE_LOCAL] = [NSArray
+ arrayWithObjects:rt_intersection_function[METALRT_FUNC_LOCAL_TRI],
+ rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
+ rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
+ nil];
+
+ NSMutableSet *unique_functions = [NSMutableSet
+ setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
+ [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
+ [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
+
+ if (kernel_has_intersection(device_kernel)) {
+ linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]]
+ sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
+ return [f1.label compare:f2.label];
+ }];
+ }
+ unique_functions = nil;
+ }
- __block MTLComputePipelineDescriptor *computePipelineStateDescriptor =
+ MTLComputePipelineDescriptor *computePipelineStateDescriptor =
[[MTLComputePipelineDescriptor alloc] init];
computePipelineStateDescriptor.buffers[0].mutability = MTLMutabilityImmutable;
@@ -111,52 +406,86 @@ bool MetalDeviceKernel::load(MetalDevice *device,
computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable;
if (@available(macos 10.14, *)) {
- computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = desc.threads_per_threadgroup;
+ computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = threads_per_threadgroup;
}
computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
- computePipelineStateDescriptor.computeFunction = pso[desc.pso_index].function;
+ computePipelineStateDescriptor.computeFunction = function;
+
if (@available(macOS 11.0, *)) {
/* Attach the additional functions to an MTLLinkedFunctions object */
- if (desc.linked_functions) {
+ if (linked_functions) {
computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
- computePipelineStateDescriptor.linkedFunctions.functions = desc.linked_functions;
+ computePipelineStateDescriptor.linkedFunctions.functions = linked_functions;
}
-
computePipelineStateDescriptor.maxCallStackDepth = 1;
+ if (use_metalrt) {
+ computePipelineStateDescriptor.maxCallStackDepth = 8;
+ }
}
- /* Create a new Compute pipeline state object */
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
- bool creating_new_archive = false;
+ bool use_binary_archive = true;
+ if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
+ use_binary_archive = (atoi(str) == 0);
+ }
+
+ id<MTLBinaryArchive> archive = nil;
+ string metalbin_path;
+ string metalbin_name;
+ if (use_binary_archive) {
+ NSProcessInfo *processInfo = [NSProcessInfo processInfo];
+ string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
+ MD5Hash local_md5;
+ local_md5.append(source_md5);
+ local_md5.append(osVersion);
+ local_md5.append((uint8_t *)&this->threads_per_threadgroup,
+ sizeof(this->threads_per_threadgroup));
+
+ string options;
+ if (use_metalrt && kernel_has_intersection(device_kernel)) {
+ /* incorporate any MetalRT specializations into the archive name */
+ options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d",
+ metalrt_hair ? 1 : 0,
+ metalrt_hair_thick ? 1 : 0,
+ metalrt_pointcloud ? 1 : 0);
+ }
+
+ /* Replace non-alphanumerical characters with underscores. */
+ string device_name = [mtlDevice.name UTF8String];
+ for (char &c : device_name) {
+ if ((c < '0' || c > '9') && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) {
+ c = '_';
+ }
+ }
+
+ metalbin_name = device_name;
+ metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel));
+ metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type));
+ metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin");
+
+ metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
+ path_create_directories(metalbin_path);
+
+ if (path_exists(metalbin_path) && use_binary_archive) {
+ if (@available(macOS 11.0, *)) {
+ MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
+ archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
+ archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
+ [archiveDesc release];
+ }
+ }
+ }
+
+ __block bool creating_new_archive = false;
if (@available(macOS 11.0, *)) {
if (use_binary_archive) {
if (!archive) {
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
archiveDesc.url = nil;
- archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
+ archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
creating_new_archive = true;
-
- double starttime = time_dt();
-
- if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
- error:&error]) {
- NSString *errStr = [error localizedDescription];
- metal_printf("Failed to add PSO to archive:\n%s\n",
- errStr ? [errStr UTF8String] : "nil");
- }
- else {
- double duration = time_dt() - starttime;
- metal_printf("%2d | %-55s | %7.2fs\n",
- desc.kernel_index,
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
- duration);
-
- if (desc.pso_index == PSO_GENERIC) {
- this->load_duration = duration;
- }
- }
}
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
@@ -170,17 +499,14 @@ bool MetalDeviceKernel::load(MetalDevice *device,
MTLComputePipelineReflection *reflection,
NSError *error) {
bool recreate_archive = false;
- if (computePipelineState == nil && archive && !creating_new_archive) {
-
- assert(0);
-
+ if (computePipelineState == nil && archive) {
NSString *errStr = [error localizedDescription];
metal_printf(
"Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
"(error: %s)\n",
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
+ device_kernel_as_string((DeviceKernel)device_kernel),
errStr ? [errStr UTF8String] : "nil");
- computePipelineState = [device->mtlDevice
+ computePipelineState = [mtlDevice
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
options:MTLPipelineOptionNone
reflection:nullptr
@@ -192,32 +518,23 @@ bool MetalDeviceKernel::load(MetalDevice *device,
if (computePipelineState == nil) {
NSString *errStr = [error localizedDescription];
- device->set_error(string_printf("Failed to create compute pipeline state \"%s\", error: \n",
- device_kernel_as_string((DeviceKernel)desc.kernel_index)) +
- (errStr ? [errStr UTF8String] : "nil"));
- metal_printf("%2d | %-55s | %7.2fs | FAILED!\n",
- desc.kernel_index,
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
+ error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n",
+ device_kernel_as_string((DeviceKernel)device_kernel));
+ error_str += (errStr ? [errStr UTF8String] : "nil");
+ metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n",
+ kernel_type_as_string(pso_type),
+ device_kernel,
+ device_kernel_as_string((DeviceKernel)device_kernel),
duration);
return;
}
- pso[desc.pso_index].pipeline = computePipelineState;
- num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
- computePipelineState.threadExecutionWidth);
+ int num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
+ computePipelineState.threadExecutionWidth);
num_threads_per_block = std::max(num_threads_per_block,
(int)computePipelineState.threadExecutionWidth);
-
- if (!use_binary_archive) {
- metal_printf("%2d | %-55s | %7.2fs\n",
- desc.kernel_index,
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
- duration);
-
- if (desc.pso_index == PSO_GENERIC) {
- this->load_duration = duration;
- }
- }
+ this->pipeline = computePipelineState;
+ this->num_threads_per_block = num_threads_per_block;
if (@available(macOS 11.0, *)) {
if (creating_new_archive || recreate_archive) {
@@ -228,304 +545,85 @@ bool MetalDeviceKernel::load(MetalDevice *device,
}
}
}
+ };
- [computePipelineStateDescriptor release];
- computePipelineStateDescriptor = nil;
-
- if (device->use_metalrt && desc.linked_functions) {
- for (int table = 0; table < METALRT_TABLE_NUM; table++) {
- if (@available(macOS 11.0, *)) {
- MTLIntersectionFunctionTableDescriptor *ift_desc =
- [[MTLIntersectionFunctionTableDescriptor alloc] init];
- ift_desc.functionCount = desc.intersector_functions[table].count;
-
- pso[desc.pso_index].intersection_func_table[table] = [pso[desc.pso_index].pipeline
- newIntersectionFunctionTableWithDescriptor:ift_desc];
-
- /* Finally write the function handles into this pipeline's table */
- for (int i = 0; i < 2; i++) {
- id<MTLFunctionHandle> handle = [pso[desc.pso_index].pipeline
- functionHandleWithFunction:desc.intersector_functions[table][i]];
- [pso[desc.pso_index].intersection_func_table[table] setFunction:handle atIndex:i];
- }
+ /* Block on load to ensure we continue with a valid kernel function */
+ if (creating_new_archive) {
+ starttime = time_dt();
+ NSError *error;
+ if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
+ error:&error]) {
+ NSString *errStr = [error localizedDescription];
+ metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil");
+ }
+ }
+ id<MTLComputePipelineState> pipeline = [mtlDevice
+ newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
+ options:pipelineOptions
+ reflection:nullptr
+ error:&error];
+ completionHandler(pipeline, nullptr, error);
+
+ this->loaded = true;
+ [computePipelineStateDescriptor release];
+ computePipelineStateDescriptor = nil;
+
+ if (use_metalrt && linked_functions) {
+ for (int table = 0; table < METALRT_TABLE_NUM; table++) {
+ if (@available(macOS 11.0, *)) {
+ MTLIntersectionFunctionTableDescriptor *ift_desc =
+ [[MTLIntersectionFunctionTableDescriptor alloc] init];
+ ift_desc.functionCount = table_functions[table].count;
+ intersection_func_table[table] = [this->pipeline
+ newIntersectionFunctionTableWithDescriptor:ift_desc];
+
+ /* Finally write the function handles into this pipeline's table */
+ for (int i = 0; i < 2; i++) {
+ id<MTLFunctionHandle> handle = [pipeline
+ functionHandleWithFunction:table_functions[table][i]];
+ [intersection_func_table[table] setFunction:handle atIndex:i];
}
}
}
+ }
- mark_loaded(desc.pso_index);
- };
+ double duration = time_dt() - starttime;
- if (desc.pso_index == PSO_SPECIALISED) {
- /* Asynchronous load */
- dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
- NSError *error;
- id<MTLComputePipelineState> pipeline = [device->mtlDevice
- newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
- options:pipelineOptions
- reflection:nullptr
- error:&error];
- completionHandler(pipeline, nullptr, error);
- });
+ if (!use_binary_archive) {
+ metal_printf("%16s | %2d | %-55s | %7.2fs\n",
+ kernel_type_as_string(pso_type),
+ int(device_kernel),
+ device_kernel_as_string(device_kernel),
+ duration);
}
else {
- /* Block on load to ensure we continue with a valid kernel function */
- id<MTLComputePipelineState> pipeline = [device->mtlDevice
- newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
- options:pipelineOptions
- reflection:nullptr
- error:&error];
- completionHandler(pipeline, nullptr, error);
- }
-
- return true;
-}
-
-const MetalKernelPipeline &MetalDeviceKernel::get_pso() const
-{
- if (pso[PSO_SPECIALISED].loaded) {
- return pso[PSO_SPECIALISED];
+ metal_printf("%16s | %2d | %-55s | %7.2fs | %s: %s\n",
+ kernel_type_as_string(pso_type),
+ device_kernel,
+ device_kernel_as_string((DeviceKernel)device_kernel),
+ duration,
+ creating_new_archive ? " new" : "load",
+ metalbin_name.c_str());
}
-
- assert(pso[PSO_GENERIC].loaded);
- return pso[PSO_GENERIC];
}
-bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
+bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized)
{
- bool any_error = false;
-
- MD5Hash md5;
-
- /* Build the function constant table */
- MTLFunctionConstantValues *constant_values = nullptr;
- if (kernel_type == PSO_SPECIALISED) {
- constant_values = [MTLFunctionConstantValues new];
-
-# define KERNEL_FILM(_type, name) \
- [constant_values setConstantValue:&data.film.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_film_##name]; \
- md5.append((uint8_t *)&data.film.name, sizeof(data.film.name));
-
-# define KERNEL_BACKGROUND(_type, name) \
- [constant_values setConstantValue:&data.background.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_background_##name]; \
- md5.append((uint8_t *)&data.background.name, sizeof(data.background.name));
-
-# define KERNEL_INTEGRATOR(_type, name) \
- [constant_values setConstantValue:&data.integrator.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_integrator_##name]; \
- md5.append((uint8_t *)&data.integrator.name, sizeof(data.integrator.name));
-
-# define KERNEL_BVH(_type, name) \
- [constant_values setConstantValue:&data.bvh.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_bvh_##name]; \
- md5.append((uint8_t *)&data.bvh.name, sizeof(data.bvh.name));
-
- /* METAL_WIP: populate constant_values based on KernelData */
- assert(0);
- /*
- const KernelData &data = device->launch_params.data;
- # include "kernel/types/background.h"
- # include "kernel/types/bvh.h"
- # include "kernel/types/film.h"
- # include "kernel/types/integrator.h"
- */
- }
-
- if (device->use_metalrt) {
- if (@available(macOS 11.0, *)) {
- /* create the id<MTLFunction> for each intersection function */
- const char *function_names[] = {
- "__anyhit__cycles_metalrt_visibility_test_tri",
- "__anyhit__cycles_metalrt_visibility_test_box",
- "__anyhit__cycles_metalrt_shadow_all_hit_tri",
- "__anyhit__cycles_metalrt_shadow_all_hit_box",
- "__anyhit__cycles_metalrt_local_hit_tri",
- "__anyhit__cycles_metalrt_local_hit_box",
- "__intersection__curve_ribbon",
- "__intersection__curve_ribbon_shadow",
- "__intersection__curve_all",
- "__intersection__curve_all_shadow",
- "__intersection__point",
- "__intersection__point_shadow",
- };
- assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
-
- MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
- if (kernel_type == PSO_SPECIALISED) {
- desc.constantValues = constant_values;
- }
- for (int i = 0; i < METALRT_FUNC_NUM; i++) {
- const char *function_name = function_names[i];
- desc.name = [@(function_name) copy];
-
- NSError *error = NULL;
- rt_intersection_funcs[kernel_type][i] = [device->mtlLibrary[kernel_type]
- newFunctionWithDescriptor:desc
- error:&error];
-
- if (rt_intersection_funcs[kernel_type][i] == nil) {
- NSString *err = [error localizedDescription];
- string errors = [err UTF8String];
-
- device->set_error(string_printf(
- "Error getting intersection function \"%s\": %s", function_name, errors.c_str()));
- any_error = true;
- break;
- }
-
- rt_intersection_funcs[kernel_type][i].label = [@(function_name) copy];
- }
- }
- }
- md5.append(device->source_used_for_compile[kernel_type]);
-
- string hash = md5.get_hex();
- if (loaded_md5[kernel_type] == hash) {
- return true;
- }
-
- if (!any_error) {
- NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
- NSArray *function_list = nil;
-
- if (device->use_metalrt) {
- id<MTLFunction> curve_intersect_default = nil;
- id<MTLFunction> curve_intersect_shadow = nil;
- id<MTLFunction> point_intersect_default = nil;
- id<MTLFunction> point_intersect_shadow = nil;
- if (device->kernel_features & KERNEL_FEATURE_HAIR) {
- /* Add curve intersection programs. */
- if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) {
- /* Slower programs for thick hair since that also slows down ribbons.
- * Ideally this should not be needed. */
- curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL];
- curve_intersect_shadow =
- rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW];
- }
- else {
- curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON];
- curve_intersect_shadow =
- rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW];
- }
- }
- if (device->kernel_features & KERNEL_FEATURE_POINTCLOUD) {
- point_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT];
- point_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT_SHADOW];
- }
- table_functions[METALRT_TABLE_DEFAULT] = [NSArray
- arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI],
- curve_intersect_default ?
- curve_intersect_default :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX],
- point_intersect_default ?
- point_intersect_default :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX],
- nil];
- table_functions[METALRT_TABLE_SHADOW] = [NSArray
- arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI],
- curve_intersect_shadow ?
- curve_intersect_shadow :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX],
- point_intersect_shadow ?
- point_intersect_shadow :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX],
- nil];
- table_functions[METALRT_TABLE_LOCAL] = [NSArray
- arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI],
- rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX],
- rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX],
- nil];
-
- NSMutableSet *unique_functions = [NSMutableSet
- setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
- [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
- [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
-
- function_list = [[NSArray arrayWithArray:[unique_functions allObjects]]
- sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
- return [f1.label compare:f2.label];
- }];
-
- unique_functions = nil;
- }
-
- metal_printf("Starting %s \"cycles_metal_...\" pipeline builds\n",
- kernel_type_as_string(kernel_type));
-
- tbb::task_arena local_arena(max_mtlcompiler_threads);
- local_arena.execute([&]() {
- parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) {
- /* skip megakernel */
- if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
- return;
- }
-
- /* Only specialize kernels where it can make an impact. */
- if (kernel_type == PSO_SPECIALISED) {
- if (i < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
- i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
- return;
- }
- }
-
- MetalDeviceKernel &kernel = kernels_[i];
-
- const std::string function_name = std::string("cycles_metal_") +
- device_kernel_as_string((DeviceKernel)i);
- int threads_per_threadgroup = device->max_threads_per_threadgroup;
- if (i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && i < DEVICE_KERNEL_INTEGRATOR_RESET) {
- /* Always use 512 for the sorting kernels */
- threads_per_threadgroup = 512;
- }
-
- NSArray *kernel_function_list = nil;
-
- if (i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
- i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
- i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
- i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
- i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
- kernel_function_list = function_list;
- }
-
- MetalKernelLoadDesc desc;
- desc.pso_index = kernel_type;
- desc.kernel_index = i;
- desc.linked_functions = kernel_function_list;
- desc.intersector_functions.defaults = table_functions[METALRT_TABLE_DEFAULT];
- desc.intersector_functions.shadow = table_functions[METALRT_TABLE_SHADOW];
- desc.intersector_functions.local = table_functions[METALRT_TABLE_LOCAL];
- desc.constant_values = constant_values;
- desc.threads_per_threadgroup = threads_per_threadgroup;
- desc.function_name = function_name.c_str();
-
- bool success = kernel.load(device, desc, md5);
-
- any_error |= !success;
- });
- });
+ auto shader_cache = get_shader_cache(device->mtlDevice);
+ for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
+ shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized);
}
- bool loaded = !any_error;
- if (loaded) {
- loaded_md5[kernel_type] = hash;
+ if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) {
+ shader_cache->wait_for_all();
}
- return loaded;
-}
-
-const MetalDeviceKernel &MetalDeviceKernels::get(DeviceKernel kernel) const
-{
- return kernels_[(int)kernel];
+ return true;
}
-bool MetalDeviceKernels::available(DeviceKernel kernel) const
+const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
+ DeviceKernel kernel)
{
- return kernels_[(int)kernel].get_pso().function != nil;
+ return get_shader_cache(device->mtlDevice)->get_best_pipeline(kernel, device);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 1686ab95ffa..ec10e091b25 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -108,9 +108,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
VLOG(3) << "Metal queue launch " << device_kernel_as_string(kernel) << ", work_size "
<< work_size;
- const MetalDeviceKernel &metal_kernel = metal_device->kernels.get(kernel);
- const MetalKernelPipeline &metal_kernel_pso = metal_kernel.get_pso();
-
id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
/* Determine size requirement for argument buffer. */
@@ -212,6 +209,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
bytes_written = globals_offsets + sizeof(KernelParamsMetal);
+ const MetalKernelPipeline *metal_kernel_pso = MetalDeviceKernels::get_best_pipeline(metal_device,
+ kernel);
+ if (!metal_kernel_pso) {
+ metal_device->set_error(
+ string_printf("No MetalKernelPipeline for %s\n", device_kernel_as_string(kernel)));
+ return false;
+ }
+
/* Encode ancillaries */
[metal_device->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets];
[metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_2d
@@ -228,14 +233,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
- if (metal_kernel_pso.intersection_func_table[table]) {
- [metal_kernel_pso.intersection_func_table[table] setBuffer:arg_buffer
- offset:globals_offsets
- atIndex:1];
+ if (metal_kernel_pso->intersection_func_table[table]) {
+ [metal_kernel_pso->intersection_func_table[table] setBuffer:arg_buffer
+ offset:globals_offsets
+ atIndex:1];
[metal_device->mtlAncillaryArgEncoder
- setIntersectionFunctionTable:metal_kernel_pso.intersection_func_table[table]
+ setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
atIndex:3 + table];
- [mtlComputeCommandEncoder useResource:metal_kernel_pso.intersection_func_table[table]
+ [mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
usage:MTLResourceUsageRead];
}
else {
@@ -281,10 +286,10 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
}
- [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso.pipeline];
+ [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso->pipeline];
/* Compute kernel launch parameters. */
- const int num_threads_per_block = metal_kernel.get_num_threads_per_block();
+ const int num_threads_per_block = metal_kernel_pso->num_threads_per_block;
int shared_mem_bytes = 0;
@@ -314,7 +319,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
threadsPerThreadgroup:size_threads_per_threadgroup];
[mtlCommandBuffer addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
- NSString *kernel_name = metal_kernel_pso.function.label;
+ NSString *kernel_name = metal_kernel_pso->function.label;
/* Enhanced command buffer errors are only available in 11.0+ */
if (@available(macos 11.0, *)) {
@@ -547,6 +552,8 @@ id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel
computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
MTLDispatchTypeSerial];
+ [mtlComputeEncoder setLabel:@(device_kernel_as_string(kernel))];
+
/* declare usage of MTLBuffers etc */
prepare_resources(kernel);
}
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 8830d8c44ac..9fc265bc327 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -23,6 +23,7 @@
# include "util/md5.h"
# include "util/path.h"
# include "util/progress.h"
+# include "util/task.h"
# include "util/time.h"
# undef __KERNEL_CPU__
@@ -216,6 +217,25 @@ static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
return OPTIX_SUCCESS;
}
+# if OPTIX_ABI_VERSION >= 55
+static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &failure_reason)
+{
+ OptixTask additional_tasks[16];
+ unsigned int num_additional_tasks = 0;
+
+ const OptixResult result = optixTaskExecute(task, additional_tasks, 16, &num_additional_tasks);
+ if (result == OPTIX_SUCCESS) {
+ for (unsigned int i = 0; i < num_additional_tasks; ++i) {
+ pool.push(function_bind(
+ &execute_optix_task, std::ref(pool), additional_tasks[i], std::ref(failure_reason)));
+ }
+ }
+ else {
+ failure_reason = result;
+ }
+}
+# endif
+
} // namespace
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
@@ -453,6 +473,23 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
return false;
}
+# if OPTIX_ABI_VERSION >= 55
+ OptixTask task = nullptr;
+ OptixResult result = optixModuleCreateFromPTXWithTasks(context,
+ &module_options,
+ &pipeline_options,
+ ptx_data.data(),
+ ptx_data.size(),
+ nullptr,
+ nullptr,
+ &optix_module,
+ &task);
+ if (result == OPTIX_SUCCESS) {
+ TaskPool pool;
+ execute_optix_task(pool, task, result);
+ pool.wait_work();
+ }
+# else
const OptixResult result = optixModuleCreateFromPTX(context,
&module_options,
&pipeline_options,
@@ -461,6 +498,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
nullptr,
0,
&optix_module);
+# endif
if (result != OPTIX_SUCCESS) {
set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)",
ptx_filename.c_str(),
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index d97854a52d0..473bdb67920 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -145,6 +145,7 @@ set(SRC_KERNEL_SVM_HEADERS
svm/normal.h
svm/ramp.h
svm/ramp_util.h
+ svm/sepcomb_color.h
svm/sepcomb_hsv.h
svm/sepcomb_vector.h
svm/sky.h
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 4e309f16c08..0ed52074a90 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -29,10 +29,26 @@ using namespace metal::raytracing;
/* Qualifiers */
-#define ccl_device
-#define ccl_device_inline ccl_device
-#define ccl_device_forceinline ccl_device
-#define ccl_device_noinline ccl_device __attribute__((noinline))
+#if defined(__KERNEL_METAL_APPLE__)
+
+/* Inline everything for Apple GPUs.
+ * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface
+ * at the cost of longer compile times (~4.5 minutes on M1 Max). */
+
+# define ccl_device __attribute__((always_inline))
+# define ccl_device_inline __attribute__((always_inline))
+# define ccl_device_forceinline __attribute__((always_inline))
+# define ccl_device_noinline __attribute__((always_inline))
+
+#else
+
+# define ccl_device
+# define ccl_device_inline ccl_device
+# define ccl_device_forceinline ccl_device
+# define ccl_device_noinline ccl_device __attribute__((noinline))
+
+#endif
+
#define ccl_device_noinline_cpu ccl_device
#define ccl_device_inline_method ccl_device
#define ccl_global device
diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp
index 832498f1f73..e2e10b5b83f 100644
--- a/intern/cycles/kernel/osl/services.cpp
+++ b/intern/cycles/kernel/osl/services.cpp
@@ -1304,8 +1304,38 @@ bool OSLRenderServices::texture(ustring filename,
break;
}
case OSLTextureHandle::SVM: {
- /* Packed texture. */
- float4 rgba = kernel_tex_image_interp(kernel_globals, handle->svm_slot, s, 1.0f - t);
+ int id = -1;
+ if (handle->svm_slots[0].w == -1) {
+ /* Packed single texture. */
+ id = handle->svm_slots[0].y;
+ }
+ else {
+ /* Packed tiled texture. */
+ int tx = (int)s;
+ int ty = (int)t;
+ int tile = 1001 + 10 * ty + tx;
+ for (int4 tile_node : handle->svm_slots) {
+ if (tile_node.x == tile) {
+ id = tile_node.y;
+ break;
+ }
+ if (tile_node.z == tile) {
+ id = tile_node.w;
+ break;
+ }
+ }
+ s -= tx;
+ t -= ty;
+ }
+
+ float4 rgba;
+ if (id == -1) {
+ rgba = make_float4(
+ TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
+ }
+ else {
+ rgba = kernel_tex_image_interp(kernel_globals, id, s, 1.0f - t);
+ }
result[0] = rgba[0];
if (nchannels > 1)
@@ -1319,7 +1349,7 @@ bool OSLRenderServices::texture(ustring filename,
}
case OSLTextureHandle::IES: {
/* IES light. */
- result[0] = kernel_ies_interp(kernel_globals, handle->svm_slot, s, t);
+ result[0] = kernel_ies_interp(kernel_globals, handle->svm_slots[0].y, s, t);
status = true;
break;
}
@@ -1413,7 +1443,7 @@ bool OSLRenderServices::texture3d(ustring filename,
/* Packed texture. */
ShaderData *sd = (ShaderData *)(sg->renderstate);
KernelGlobals kernel_globals = sd->osl_globals;
- int slot = handle->svm_slot;
+ int slot = handle->svm_slots[0].y;
float3 P_float3 = make_float3(P.x, P.y, P.z);
float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE);
diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h
index 653fa017140..edffd912bad 100644
--- a/intern/cycles/kernel/osl/services.h
+++ b/intern/cycles/kernel/osl/services.h
@@ -39,18 +39,26 @@ struct KernelGlobalsCPU;
* with additional data.
*
* These are stored in a concurrent hash map, because OSL can compile multiple
- * shaders in parallel. */
+ * shaders in parallel.
+ *
+ * NOTE: The svm_slots array contains a compressed mapping of tile to svm_slot pairs
+ * stored as follows: x:tile_a, y:svm_slot_a, z:tile_b, w:svm_slot_b etc. */
struct OSLTextureHandle : public OIIO::RefCnt {
enum Type { OIIO, SVM, IES, BEVEL, AO };
+ OSLTextureHandle(Type type, const vector<int4> &svm_slots)
+ : type(type), svm_slots(svm_slots), oiio_handle(NULL), processor(NULL)
+ {
+ }
+
OSLTextureHandle(Type type = OIIO, int svm_slot = -1)
- : type(type), svm_slot(svm_slot), oiio_handle(NULL), processor(NULL)
+ : OSLTextureHandle(type, {make_int4(0, svm_slot, -1, -1)})
{
}
Type type;
- int svm_slot;
+ vector<int4> svm_slots;
OSL::TextureSystem::TextureHandle *oiio_handle;
ColorSpaceProcessor *processor;
};
diff --git a/intern/cycles/kernel/osl/shaders/CMakeLists.txt b/intern/cycles/kernel/osl/shaders/CMakeLists.txt
index 7ced21c5670..741bce7c399 100644
--- a/intern/cycles/kernel/osl/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/shaders/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRC_OSL
node_camera.osl
node_checker_texture.osl
node_clamp.osl
+ node_combine_color.osl
node_combine_rgb.osl
node_combine_hsv.osl
node_combine_xyz.osl
@@ -68,6 +69,7 @@ set(SRC_OSL
node_refraction_bsdf.osl
node_rgb_curves.osl
node_rgb_ramp.osl
+ node_separate_color.osl
node_separate_rgb.osl
node_separate_hsv.osl
node_separate_xyz.osl
diff --git a/intern/cycles/kernel/osl/shaders/node_color.h b/intern/cycles/kernel/osl/shaders/node_color.h
index 388dd114e9a..06735f5b03d 100644
--- a/intern/cycles/kernel/osl/shaders/node_color.h
+++ b/intern/cycles/kernel/osl/shaders/node_color.h
@@ -148,3 +148,53 @@ color hsv_to_rgb(color hsv)
return rgb;
}
+
+color rgb_to_hsl(color rgb)
+{
+ float cmax, cmin, h, s, l;
+
+ cmax = max(rgb[0], max(rgb[1], rgb[2]));
+ cmin = min(rgb[0], min(rgb[1], rgb[2]));
+ l = min(1.0, (cmax + cmin) / 2.0);
+
+ if (cmax == cmin) {
+ h = s = 0.0; /* achromatic */
+ }
+ else {
+ float cdelta = cmax - cmin;
+ s = l > 0.5 ? cdelta / (2.0 - cmax - cmin) : cdelta / (cmax + cmin);
+ if (cmax == rgb[0]) {
+ h = (rgb[1] - rgb[2]) / cdelta + (rgb[1] < rgb[2] ? 6.0 : 0.0);
+ }
+ else if (cmax == rgb[1]) {
+ h = (rgb[2] - rgb[0]) / cdelta + 2.0;
+ }
+ else {
+ h = (rgb[0] - rgb[1]) / cdelta + 4.0;
+ }
+ }
+ h /= 6.0;
+
+ return color(h, s, l);
+}
+
+color hsl_to_rgb(color hsl)
+{
+ float nr, ng, nb, chroma, h, s, l;
+
+ h = hsl[0];
+ s = hsl[1];
+ l = hsl[2];
+
+ nr = abs(h * 6.0 - 3.0) - 1.0;
+ ng = 2.0 - abs(h * 6.0 - 2.0);
+ nb = 2.0 - abs(h * 6.0 - 4.0);
+
+ nr = clamp(nr, 0.0, 1.0);
+ nb = clamp(nb, 0.0, 1.0);
+ ng = clamp(ng, 0.0, 1.0);
+
+ chroma = (1.0 - abs(2.0 * l - 1.0)) * s;
+
+ return color((nr - 0.5) * chroma + l, (ng - 0.5) * chroma + l, (nb - 0.5) * chroma + l);
+}
diff --git a/intern/cycles/kernel/osl/shaders/node_combine_color.osl b/intern/cycles/kernel/osl/shaders/node_combine_color.osl
new file mode 100644
index 00000000000..681a592d2bb
--- /dev/null
+++ b/intern/cycles/kernel/osl/shaders/node_combine_color.osl
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#include "stdcycles.h"
+
+shader node_combine_color(string color_type = "rgb",
+ float Red = 0.0,
+ float Green = 0.0,
+ float Blue = 0.0,
+ output color Color = 0.8)
+{
+ if (color_type == "rgb" || color_type == "hsv" || color_type == "hsl")
+ Color = color(color_type, Red, Green, Blue);
+ else
+ warning("%s", "Unknown color space!");
+}
diff --git a/intern/cycles/kernel/osl/shaders/node_separate_color.osl b/intern/cycles/kernel/osl/shaders/node_separate_color.osl
new file mode 100644
index 00000000000..6f3e3149d8e
--- /dev/null
+++ b/intern/cycles/kernel/osl/shaders/node_separate_color.osl
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#include "node_color.h"
+#include "stdcycles.h"
+
+shader node_separate_color(string color_type = "rgb",
+ color Color = 0.8,
+ output float Red = 0.0,
+ output float Green = 0.0,
+ output float Blue = 0.0)
+{
+ color col;
+ if (color_type == "rgb")
+ col = Color;
+ else if (color_type == "hsv")
+ col = rgb_to_hsv(Color);
+ else if (color_type == "hsl")
+ col = rgb_to_hsl(Color);
+ else
+ warning("%s", "Unknown color space!");
+
+ Red = col[0];
+ Green = col[1];
+ Blue = col[2];
+}
diff --git a/intern/cycles/kernel/svm/color_util.h b/intern/cycles/kernel/svm/color_util.h
index b439721383c..fa22d4bc8c2 100644
--- a/intern/cycles/kernel/svm/color_util.h
+++ b/intern/cycles/kernel/svm/color_util.h
@@ -307,4 +307,30 @@ ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness,
return color;
}
+ccl_device float3 svm_combine_color(NodeCombSepColorType type, float3 color)
+{
+ switch (type) {
+ case NODE_COMBSEP_COLOR_HSV:
+ return hsv_to_rgb(color);
+ case NODE_COMBSEP_COLOR_HSL:
+ return hsl_to_rgb(color);
+ case NODE_COMBSEP_COLOR_RGB:
+ default:
+ return color;
+ }
+}
+
+ccl_device float3 svm_separate_color(NodeCombSepColorType type, float3 color)
+{
+ switch (type) {
+ case NODE_COMBSEP_COLOR_HSV:
+ return rgb_to_hsv(color);
+ case NODE_COMBSEP_COLOR_HSL:
+ return rgb_to_hsl(color);
+ case NODE_COMBSEP_COLOR_RGB:
+ default:
+ return color;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/sepcomb_color.h b/intern/cycles/kernel/svm/sepcomb_color.h
new file mode 100644
index 00000000000..d186e7f163b
--- /dev/null
+++ b/intern/cycles/kernel/svm/sepcomb_color.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline void svm_node_combine_color(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ ccl_private float *stack,
+ uint color_type,
+ uint inputs_stack_offsets,
+ uint result_stack_offset)
+{
+ uint red_stack_offset, green_stack_offset, blue_stack_offset;
+ svm_unpack_node_uchar3(
+ inputs_stack_offsets, &red_stack_offset, &green_stack_offset, &blue_stack_offset);
+
+ float r = stack_load_float(stack, red_stack_offset);
+ float g = stack_load_float(stack, green_stack_offset);
+ float b = stack_load_float(stack, blue_stack_offset);
+
+ /* Combine, and convert back to RGB */
+ float3 color = svm_combine_color((NodeCombSepColorType)color_type, make_float3(r, g, b));
+
+ if (stack_valid(result_stack_offset))
+ stack_store_float3(stack, result_stack_offset, color);
+}
+
+ccl_device_noinline void svm_node_separate_color(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ ccl_private float *stack,
+ uint color_type,
+ uint input_stack_offset,
+ uint results_stack_offsets)
+{
+ float3 color = stack_load_float3(stack, input_stack_offset);
+
+ /* Convert color space */
+ color = svm_separate_color((NodeCombSepColorType)color_type, color);
+
+ uint red_stack_offset, green_stack_offset, blue_stack_offset;
+ svm_unpack_node_uchar3(
+ results_stack_offsets, &red_stack_offset, &green_stack_offset, &blue_stack_offset);
+
+ if (stack_valid(red_stack_offset))
+ stack_store_float(stack, red_stack_offset, color.x);
+ if (stack_valid(green_stack_offset))
+ stack_store_float(stack, green_stack_offset, color.y);
+ if (stack_valid(blue_stack_offset))
+ stack_store_float(stack, blue_stack_offset, color.z);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 08352a6231f..5def943c87f 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -181,6 +181,7 @@ CCL_NAMESPACE_END
#include "kernel/svm/noisetex.h"
#include "kernel/svm/normal.h"
#include "kernel/svm/ramp.h"
+#include "kernel/svm/sepcomb_color.h"
#include "kernel/svm/sepcomb_hsv.h"
#include "kernel/svm/sepcomb_vector.h"
#include "kernel/svm/sky.h"
@@ -508,6 +509,12 @@ ccl_device void svm_eval_nodes(KernelGlobals kg,
case NODE_MIX:
offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset);
break;
+ case NODE_SEPARATE_COLOR:
+ svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_COMBINE_COLOR:
+ svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w);
+ break;
case NODE_SEPARATE_VECTOR:
svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
break;
diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h
index bede58f7a54..82109ec4c4f 100644
--- a/intern/cycles/kernel/svm/types.h
+++ b/intern/cycles/kernel/svm/types.h
@@ -92,6 +92,8 @@ typedef enum ShaderNodeType {
NODE_NORMAL_MAP,
NODE_INVERT,
NODE_MIX,
+ NODE_SEPARATE_COLOR,
+ NODE_COMBINE_COLOR,
NODE_SEPARATE_VECTOR,
NODE_COMBINE_VECTOR,
NODE_SEPARATE_HSV,
@@ -487,6 +489,12 @@ typedef enum NodePrincipledHairParametrization {
NODE_PRINCIPLED_HAIR_NUM,
} NodePrincipledHairParametrization;
+typedef enum NodeCombSepColorType {
+ NODE_COMBSEP_COLOR_RGB,
+ NODE_COMBSEP_COLOR_HSV,
+ NODE_COMBSEP_COLOR_HSL,
+} NodeCombSepColorType;
+
/* Closure */
typedef enum ClosureType {
diff --git a/intern/cycles/scene/image.cpp b/intern/cycles/scene/image.cpp
index 21fde88915e..c61ad1f1d71 100644
--- a/intern/cycles/scene/image.cpp
+++ b/intern/cycles/scene/image.cpp
@@ -117,12 +117,12 @@ void ImageHandle::clear()
manager = NULL;
}
-bool ImageHandle::empty()
+bool ImageHandle::empty() const
{
return tile_slots.empty();
}
-int ImageHandle::num_tiles()
+int ImageHandle::num_tiles() const
{
return tile_slots.size();
}
@@ -154,6 +154,35 @@ int ImageHandle::svm_slot(const int tile_index) const
return tile_slots[tile_index];
}
+vector<int4> ImageHandle::get_svm_slots() const
+{
+ const size_t num_nodes = divide_up(tile_slots.size(), 2);
+
+ vector<int4> svm_slots;
+ svm_slots.reserve(num_nodes);
+ for (size_t i = 0; i < num_nodes; i++) {
+ int4 node;
+
+ int slot = tile_slots[2 * i];
+ node.x = manager->images[slot]->loader->get_tile_number();
+ node.y = slot;
+
+ if ((2 * i + 1) < tile_slots.size()) {
+ slot = tile_slots[2 * i + 1];
+ node.z = manager->images[slot]->loader->get_tile_number();
+ node.w = slot;
+ }
+ else {
+ node.z = -1;
+ node.w = -1;
+ }
+
+ svm_slots.push_back(node);
+ }
+
+ return svm_slots;
+}
+
device_texture *ImageHandle::image_memory(const int tile_index) const
{
if (tile_index >= tile_slots.size()) {
@@ -266,6 +295,11 @@ ustring ImageLoader::osl_filepath() const
return ustring();
}
+int ImageLoader::get_tile_number() const
+{
+ return 0;
+}
+
bool ImageLoader::equals(const ImageLoader *a, const ImageLoader *b)
{
if (a == NULL && b == NULL) {
@@ -397,6 +431,19 @@ ImageHandle ImageManager::add_image(ImageLoader *loader,
return handle;
}
+ImageHandle ImageManager::add_image(const vector<ImageLoader *> &loaders,
+ const ImageParams &params)
+{
+ ImageHandle handle;
+ for (ImageLoader *loader : loaders) {
+ const int slot = add_image_slot(loader, params, true);
+ handle.tile_slots.push_back(slot);
+ }
+
+ handle.manager = this;
+ return handle;
+}
+
int ImageManager::add_image_slot(ImageLoader *loader,
const ImageParams &params,
const bool builtin)
diff --git a/intern/cycles/scene/image.h b/intern/cycles/scene/image.h
index 4d0dee35eca..9edb6a7eaf5 100644
--- a/intern/cycles/scene/image.h
+++ b/intern/cycles/scene/image.h
@@ -112,6 +112,9 @@ class ImageLoader {
/* Optional for OSL texture cache. */
virtual ustring osl_filepath() const;
+ /* Optional for tiled textures loaded externally. */
+ virtual int get_tile_number() const;
+
/* Free any memory used for loading metadata and pixels. */
virtual void cleanup(){};
@@ -139,11 +142,12 @@ class ImageHandle {
void clear();
- bool empty();
- int num_tiles();
+ bool empty() const;
+ int num_tiles() const;
ImageMetaData metadata();
int svm_slot(const int tile_index = 0) const;
+ vector<int4> get_svm_slots() const;
device_texture *image_memory(const int tile_index = 0) const;
VDBImageLoader *vdb_loader(const int tile_index = 0) const;
@@ -169,6 +173,7 @@ class ImageManager {
const ImageParams &params,
const array<int> &tiles);
ImageHandle add_image(ImageLoader *loader, const ImageParams &params, const bool builtin = true);
+ ImageHandle add_image(const vector<ImageLoader *> &loaders, const ImageParams &params);
void device_update(Device *device, Scene *scene, Progress &progress);
void device_update_slot(Device *device, Scene *scene, int slot, Progress *progress);
diff --git a/intern/cycles/scene/osl.cpp b/intern/cycles/scene/osl.cpp
index ffa1a2f5623..6698e6e2cce 100644
--- a/intern/cycles/scene/osl.cpp
+++ b/intern/cycles/scene/osl.cpp
@@ -1211,14 +1211,15 @@ void OSLCompiler::parameter_texture(const char *name, ustring filename, ustring
parameter(name, filename);
}
-void OSLCompiler::parameter_texture(const char *name, int svm_slot)
+void OSLCompiler::parameter_texture(const char *name, const ImageHandle &handle)
{
/* Texture loaded through SVM image texture system. We generate a unique
* name, which ends up being used in OSLRenderServices::get_texture_handle
* to get handle again. Note that this name must be unique between multiple
* render sessions as the render services are shared. */
ustring filename(string_printf("@svm%d", texture_shared_unique_id++).c_str());
- services->textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::SVM, svm_slot));
+ services->textures.insert(filename,
+ new OSLTextureHandle(OSLTextureHandle::SVM, handle.get_svm_slots()));
parameter(name, filename);
}
@@ -1290,7 +1291,7 @@ void OSLCompiler::parameter_texture(const char * /* name */,
{
}
-void OSLCompiler::parameter_texture(const char * /* name */, int /* svm_slot */)
+void OSLCompiler::parameter_texture(const char * /* name */, const ImageHandle & /*handle*/)
{
}
diff --git a/intern/cycles/scene/osl.h b/intern/cycles/scene/osl.h
index f0f97dbcaad..bf27069b1b1 100644
--- a/intern/cycles/scene/osl.h
+++ b/intern/cycles/scene/osl.h
@@ -147,7 +147,7 @@ class OSLCompiler {
void parameter_attribute(const char *name, ustring s);
void parameter_texture(const char *name, ustring filename, ustring colorspace);
- void parameter_texture(const char *name, int svm_slot);
+ void parameter_texture(const char *name, const ImageHandle &handle);
void parameter_texture_ies(const char *name, int svm_slot);
ShaderType output_type()
diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp
index 95fccf725f3..03c152928d5 100644
--- a/intern/cycles/scene/shader_nodes.cpp
+++ b/intern/cycles/scene/shader_nodes.cpp
@@ -19,7 +19,6 @@
#include "util/color.h"
#include "util/foreach.h"
#include "util/log.h"
-#include "util/string.h"
#include "util/transform.h"
#include "kernel/tables.h"
@@ -450,22 +449,19 @@ void ImageTextureNode::compile(OSLCompiler &compiler)
const ustring known_colorspace = metadata.colorspace;
if (handle.svm_slot() == -1) {
- /* OIIO currently does not support <UVTILE> substitutions natively. Replace with a format they
- * understand. */
- std::string osl_filename = filename.string();
- string_replace(osl_filename, "<UVTILE>", "<U>_<V>");
compiler.parameter_texture(
- "filename", ustring(osl_filename), compress_as_srgb ? u_colorspace_raw : known_colorspace);
+ "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace);
}
else {
- compiler.parameter_texture("filename", handle.svm_slot());
+ compiler.parameter_texture("filename", handle);
}
const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) ||
alpha_type == IMAGE_ALPHA_CHANNEL_PACKED ||
alpha_type == IMAGE_ALPHA_IGNORE);
const bool is_tiled = (filename.find("<UDIM>") != string::npos ||
- filename.find("<UVTILE>") != string::npos);
+ filename.find("<UVTILE>") != string::npos) ||
+ handle.num_tiles() > 1;
compiler.parameter(this, "projection");
compiler.parameter(this, "projection_blend");
@@ -610,7 +606,7 @@ void EnvironmentTextureNode::compile(OSLCompiler &compiler)
"filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace);
}
else {
- compiler.parameter_texture("filename", handle.svm_slot());
+ compiler.parameter_texture("filename", handle);
}
compiler.parameter(this, "projection");
@@ -965,7 +961,7 @@ void SkyTextureNode::compile(OSLCompiler &compiler)
compiler.parameter_array("nishita_data", sunsky.nishita_data, 10);
/* nishita texture */
if (sky_type == NODE_SKY_NISHITA) {
- compiler.parameter_texture("filename", handle.svm_slot());
+ compiler.parameter_texture("filename", handle);
}
compiler.add(this, "node_sky_texture");
}
@@ -1860,7 +1856,7 @@ void PointDensityTextureNode::compile(OSLCompiler &compiler)
handle = image_manager->add_image(filename.string(), image_params());
}
- compiler.parameter_texture("filename", handle.svm_slot());
+ compiler.parameter_texture("filename", handle);
if (space == NODE_TEX_VOXEL_SPACE_WORLD) {
compiler.parameter("mapping", tfm);
compiler.parameter("use_mapping", 1);
@@ -5010,6 +5006,63 @@ void MixNode::constant_fold(const ConstantFolder &folder)
}
}
+/* Combine Color */
+
+NODE_DEFINE(CombineColorNode)
+{
+ NodeType *type = NodeType::add("combine_color", create, NodeType::SHADER);
+
+ static NodeEnum type_enum;
+ type_enum.insert("rgb", NODE_COMBSEP_COLOR_RGB);
+ type_enum.insert("hsv", NODE_COMBSEP_COLOR_HSV);
+ type_enum.insert("hsl", NODE_COMBSEP_COLOR_HSL);
+ SOCKET_ENUM(color_type, "Type", type_enum, NODE_COMBSEP_COLOR_RGB);
+
+ SOCKET_IN_FLOAT(r, "Red", 0.0f);
+ SOCKET_IN_FLOAT(g, "Green", 0.0f);
+ SOCKET_IN_FLOAT(b, "Blue", 0.0f);
+
+ SOCKET_OUT_COLOR(color, "Color");
+
+ return type;
+}
+
+CombineColorNode::CombineColorNode() : ShaderNode(get_node_type())
+{
+}
+
+void CombineColorNode::constant_fold(const ConstantFolder &folder)
+{
+ if (folder.all_inputs_constant()) {
+ folder.make_constant(svm_combine_color(color_type, make_float3(r, g, b)));
+ }
+}
+
+void CombineColorNode::compile(SVMCompiler &compiler)
+{
+ ShaderInput *red_in = input("Red");
+ ShaderInput *green_in = input("Green");
+ ShaderInput *blue_in = input("Blue");
+ ShaderOutput *color_out = output("Color");
+
+ int red_stack_offset = compiler.stack_assign(red_in);
+ int green_stack_offset = compiler.stack_assign(green_in);
+ int blue_stack_offset = compiler.stack_assign(blue_in);
+ int color_stack_offset = compiler.stack_assign(color_out);
+
+ compiler.add_node(
+ NODE_COMBINE_COLOR,
+ color_type,
+ compiler.encode_uchar4(red_stack_offset, green_stack_offset, blue_stack_offset),
+ color_stack_offset);
+}
+
+void CombineColorNode::compile(OSLCompiler &compiler)
+{
+ compiler.parameter(this, "color_type");
+ compiler.add(this, "node_combine_color");
+}
+
/* Combine RGB */
NODE_DEFINE(CombineRGBNode)
@@ -5250,6 +5303,70 @@ void BrightContrastNode::compile(OSLCompiler &compiler)
compiler.add(this, "node_brightness");
}
+/* Separate Color */
+
+NODE_DEFINE(SeparateColorNode)
+{
+ NodeType *type = NodeType::add("separate_color", create, NodeType::SHADER);
+
+ static NodeEnum type_enum;
+ type_enum.insert("rgb", NODE_COMBSEP_COLOR_RGB);
+ type_enum.insert("hsv", NODE_COMBSEP_COLOR_HSV);
+ type_enum.insert("hsl", NODE_COMBSEP_COLOR_HSL);
+ SOCKET_ENUM(color_type, "Type", type_enum, NODE_COMBSEP_COLOR_RGB);
+
+ SOCKET_IN_COLOR(color, "Color", zero_float3());
+
+ SOCKET_OUT_FLOAT(r, "Red");
+ SOCKET_OUT_FLOAT(g, "Green");
+ SOCKET_OUT_FLOAT(b, "Blue");
+
+ return type;
+}
+
+SeparateColorNode::SeparateColorNode() : ShaderNode(get_node_type())
+{
+}
+
+void SeparateColorNode::constant_fold(const ConstantFolder &folder)
+{
+ if (folder.all_inputs_constant()) {
+ float3 col = svm_separate_color(color_type, color);
+
+ for (int channel = 0; channel < 3; channel++) {
+ if (outputs[channel] == folder.output) {
+ folder.make_constant(col[channel]);
+ return;
+ }
+ }
+ }
+}
+
+void SeparateColorNode::compile(SVMCompiler &compiler)
+{
+ ShaderInput *color_in = input("Color");
+ ShaderOutput *red_out = output("Red");
+ ShaderOutput *green_out = output("Green");
+ ShaderOutput *blue_out = output("Blue");
+
+ int color_stack_offset = compiler.stack_assign(color_in);
+ int red_stack_offset = compiler.stack_assign(red_out);
+ int green_stack_offset = compiler.stack_assign(green_out);
+ int blue_stack_offset = compiler.stack_assign(blue_out);
+
+ compiler.add_node(
+ NODE_SEPARATE_COLOR,
+ color_type,
+ color_stack_offset,
+ compiler.encode_uchar4(red_stack_offset, green_stack_offset, blue_stack_offset));
+}
+
+void SeparateColorNode::compile(OSLCompiler &compiler)
+{
+ compiler.parameter(this, "color_type");
+ compiler.add(this, "node_separate_color");
+}
+
/* Separate RGB */
NODE_DEFINE(SeparateRGBNode)
diff --git a/intern/cycles/scene/shader_nodes.h b/intern/cycles/scene/shader_nodes.h
index 9aef5d3151f..ac40a397c1e 100644
--- a/intern/cycles/scene/shader_nodes.h
+++ b/intern/cycles/scene/shader_nodes.h
@@ -1101,6 +1101,17 @@ class MixNode : public ShaderNode {
NODE_SOCKET_API(float, fac)
};
+class CombineColorNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(CombineColorNode)
+ void constant_fold(const ConstantFolder &folder);
+
+ NODE_SOCKET_API(NodeCombSepColorType, color_type)
+ NODE_SOCKET_API(float, r)
+ NODE_SOCKET_API(float, g)
+ NODE_SOCKET_API(float, b)
+};
+
class CombineRGBNode : public ShaderNode {
public:
SHADER_NODE_CLASS(CombineRGBNode)
@@ -1150,6 +1161,15 @@ class BrightContrastNode : public ShaderNode {
NODE_SOCKET_API(float, contrast)
};
+class SeparateColorNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(SeparateColorNode)
+ void constant_fold(const ConstantFolder &folder);
+
+ NODE_SOCKET_API(NodeCombSepColorType, color_type)
+ NODE_SOCKET_API(float3, color)
+};
+
class SeparateRGBNode : public ShaderNode {
public:
SHADER_NODE_CLASS(SeparateRGBNode)
diff --git a/intern/cycles/util/color.h b/intern/cycles/util/color.h
index cccccde3ba6..795c3754976 100644
--- a/intern/cycles/util/color.h
+++ b/intern/cycles/util/color.h
@@ -152,6 +152,56 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
return rgb;
}
+ccl_device float3 rgb_to_hsl(float3 rgb)
+{
+ float cmax, cmin, h, s, l;
+
+ cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z));
+ cmin = min(rgb.x, min(rgb.y, rgb.z));
+ l = min(1.0f, (cmax + cmin) / 2.0f);
+
+ if (cmax == cmin) {
+ h = s = 0.0f; /* achromatic */
+ }
+ else {
+ float cdelta = cmax - cmin;
+ s = l > 0.5f ? cdelta / (2.0f - cmax - cmin) : cdelta / (cmax + cmin);
+ if (cmax == rgb.x) {
+ h = (rgb.y - rgb.z) / cdelta + (rgb.y < rgb.z ? 6.0f : 0.0f);
+ }
+ else if (cmax == rgb.y) {
+ h = (rgb.z - rgb.x) / cdelta + 2.0f;
+ }
+ else {
+ h = (rgb.x - rgb.y) / cdelta + 4.0f;
+ }
+ }
+ h /= 6.0f;
+
+ return make_float3(h, s, l);
+}
+
+ccl_device float3 hsl_to_rgb(float3 hsl)
+{
+ float nr, ng, nb, chroma, h, s, l;
+
+ h = hsl.x;
+ s = hsl.y;
+ l = hsl.z;
+
+ nr = fabsf(h * 6.0f - 3.0f) - 1.0f;
+ ng = 2.0f - fabsf(h * 6.0f - 2.0f);
+ nb = 2.0f - fabsf(h * 6.0f - 4.0f);
+
+ nr = clamp(nr, 0.0f, 1.0f);
+ nb = clamp(nb, 0.0f, 1.0f);
+ ng = clamp(ng, 0.0f, 1.0f);
+
+ chroma = (1.0f - fabsf(2.0f * l - 1.0f)) * s;
+
+ return make_float3((nr - 0.5f) * chroma + l, (ng - 0.5f) * chroma + l, (nb - 0.5f) * chroma + l);
+}
+
ccl_device float3 xyY_to_xyz(float x, float y, float Y)
{
float X, Z;