diff options
Diffstat (limited to 'intern/cycles')
32 files changed, 1228 insertions, 625 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 9b4799d252f..9acc9e99ad0 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -24,12 +24,19 @@ from . import camera enum_devices = ( ('CPU', "CPU", "Use CPU for rendering"), - ('GPU', "GPU Compute", "Use GPU compute device for rendering, configured in the system tab in the user preferences"), + ('GPU', "GPU Compute", + "Use GPU compute device for rendering, configured in the system tab in the user preferences"), ) enum_feature_set = ( - ('SUPPORTED', "Supported", "Only use finished and supported features"), - ('EXPERIMENTAL', "Experimental", "Use experimental and incomplete features that might be broken or change in the future", 'ERROR', 1), + ('SUPPORTED', + "Supported", + "Only use finished and supported features"), + ('EXPERIMENTAL', + "Experimental", + "Use experimental and incomplete features that might be broken or change in the future", + 'ERROR', + 1), ) enum_displacement_methods = ( @@ -81,9 +88,14 @@ enum_sampling_pattern = ( ) enum_volume_sampling = ( - ('DISTANCE', "Distance", "Use distance sampling, best for dense volumes with lights far away"), - ('EQUIANGULAR', "Equiangular", "Use equiangular sampling, best for volumes with low density with light inside or near the volume"), - ('MULTIPLE_IMPORTANCE', "Multiple Importance", + ('DISTANCE', + "Distance", + "Use distance sampling, best for dense volumes with lights far away"), + ('EQUIANGULAR', + "Equiangular", + "Use equiangular sampling, best for volumes with low density with light inside or near the volume"), + ('MULTIPLE_IMPORTANCE', + "Multiple Importance", "Combine distance and equi-angular sampling for volumes where neither method is ideal"), ) @@ -93,10 +105,15 @@ enum_volume_interpolation = ( ) enum_world_mis = ( - ('NONE', "None", "Don't sample the background, faster but might cause noise for non-solid backgrounds"), - ('AUTOMATIC', "Auto", "Automatically try to determine the best setting"), - ('MANUAL', "Manual", "Manually set the resolution of the sampling map, higher values are slower and require more memory but reduce noise") -) + ('NONE', + "None", + "Don't sample the background, faster but might cause noise for non-solid backgrounds"), + ('AUTOMATIC', + "Auto", + "Automatically try to determine the best setting"), + ('MANUAL', + "Manual", + "Manually set the resolution of the sampling map, higher values are slower and require more memory but reduce noise")) enum_device_type = ( ('CPU', "CPU", "CPU", 0), @@ -210,17 +227,33 @@ enum_denoising_input_passes = ( ) enum_denoising_prefilter = ( - ('NONE', "None", "No prefiltering, use when guiding passes are noise-free", 1), - ('FAST', "Fast", "Denoise color and guiding passes together. Improves quality when guiding passes are noisy using least amount of extra processing time", 2), - ('ACCURATE', "Accurate", "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", 3), + ('NONE', + "None", + "No prefiltering, use when guiding passes are noise-free", + 1), + ('FAST', + "Fast", + "Denoise color and guiding passes together. Improves quality when guiding passes are noisy using least amount of extra processing time", + 2), + ('ACCURATE', + "Accurate", + "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", + 3), ) enum_direct_light_sampling_type = ( - ('MULTIPLE_IMPORTANCE_SAMPLING', "Multiple Importance Sampling", - "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing", 0), - ('FORWARD_PATH_TRACING', "Forward Path Tracing", "Direct light contributions are only sampled using forward path tracing", 1), - ('NEXT_EVENT_ESTIMATION', "Next-Event Estimation", - "Direct light contributions are only sampled using next-event estimation", 2), + ('MULTIPLE_IMPORTANCE_SAMPLING', + "Multiple Importance Sampling", + "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing", + 0), + ('FORWARD_PATH_TRACING', + "Forward Path Tracing", + "Direct light contributions are only sampled using forward path tracing", + 1), + ('NEXT_EVENT_ESTIMATION', + "Next-Event Estimation", + "Direct light contributions are only sampled using next-event estimation", + 2), ) @@ -357,7 +390,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): scrambling_distance: FloatProperty( name="Scrambling Distance", default=1.0, - min=0.0, soft_max=1.0, + min=0.0, + soft_max=1.0, description="Reduce randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts if set too low", ) preview_scrambling_distance: BoolProperty( @@ -383,7 +417,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): name="Light Sampling Threshold", description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). " "Zero disables the test and never ignores lights", - min=0.0, max=1.0, + min=0.0, + max=1.0, default=0.01, ) @@ -395,7 +430,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): adaptive_threshold: FloatProperty( name="Adaptive Sampling Threshold", description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples", - min=0.0, max=1.0, + min=0.0, + max=1.0, soft_min=0.001, default=0.01, precision=4, @@ -403,7 +439,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): adaptive_min_samples: IntProperty( name="Adaptive Min Samples", description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold", - min=0, max=4096, + min=0, + max=4096, default=0, ) @@ -415,7 +452,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): preview_adaptive_threshold: FloatProperty( name="Adaptive Sampling Threshold", description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples, for viewport renders", - min=0.0, max=1.0, + min=0.0, + max=1.0, soft_min=0.001, default=0.1, precision=4, @@ -423,7 +461,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): preview_adaptive_min_samples: IntProperty( name="Adaptive Min Samples", description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold, for viewport renders", - min=0, max=4096, + min=0, + max=4096, default=0, ) @@ -550,7 +589,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): max_subdivisions: IntProperty( name="Max Subdivisions", description="Stop subdividing when this level is reached even if the dice rate would produce finer tessellation", - min=0, max=16, + min=0, + max=16, default=12, ) @@ -817,8 +857,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): debug_use_optix_debug: BoolProperty( name="OptiX Module Debug", description="Load OptiX module in debug mode: lower logging verbosity level, enable validations, and lower optimization level", - default=False - ) + default=False) @classmethod def register(cls): @@ -1185,7 +1224,8 @@ class CyclesObjectSettings(bpy.types.PropertyGroup): motion_steps: IntProperty( name="Motion Steps", description="Control accuracy of motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))", - min=1, max=7, + min=1, + max=7, default=1, ) @@ -1224,7 +1264,8 @@ class CyclesObjectSettings(bpy.types.PropertyGroup): shadow_terminator_geometry_offset: FloatProperty( name="Shadow Terminator Geometry Offset", description="Offset rays from the surface to reduce shadow terminator artifact on low poly geometry. Only affects triangles at grazing angles to light", - min=0.0, max=1.0, + min=0.0, + max=1.0, default=0.1, ) diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 886f5345020..9d2dbdf6732 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -1082,8 +1082,18 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel): def has_geometry_visibility(ob): - return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT', 'VOLUME', 'POINTCLOUD', 'CURVES'}) or - (ob.instance_type == 'COLLECTION' and ob.instance_collection)) + return ob and ( + (ob.type in { + 'MESH', + 'CURVE', + 'SURFACE', + 'FONT', + 'META', + 'LIGHT', + 'VOLUME', + 'POINTCLOUD', + 'CURVES', + }) or (ob.instance_type == 'COLLECTION' and ob.instance_collection)) class CYCLES_OBJECT_PT_shading(CyclesButtonsPanel, Panel): diff --git a/intern/cycles/blender/image.cpp b/intern/cycles/blender/image.cpp index ca4c8f5904a..e01b72c1653 100644 --- a/intern/cycles/blender/image.cpp +++ b/intern/cycles/blender/image.cpp @@ -13,9 +13,11 @@ CCL_NAMESPACE_BEGIN BlenderImageLoader::BlenderImageLoader(BL::Image b_image, const int frame, + const int tile_number, const bool is_preview_render) : b_image(b_image), frame(frame), + tile_number(tile_number), /* Don't free cache for preview render to avoid race condition from T93560, to be fixed properly later as we are close to release. */ free_cache(!is_preview_render && !b_image.has_data()) @@ -66,12 +68,11 @@ bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata, { const size_t num_pixels = ((size_t)metadata.width) * metadata.height; const int channels = metadata.channels; - const int tile = 0; /* TODO(lukas): Support tiles here? */ if (b_image.is_float()) { /* image data */ float *image_pixels; - image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile); + image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile_number); if (image_pixels && num_pixels * channels == pixels_size) { memcpy(pixels, image_pixels, pixels_size * sizeof(float)); @@ -99,7 +100,7 @@ bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata, } } else { - unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile); + unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile_number); if (image_pixels && num_pixels * channels == pixels_size) { memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char)); @@ -153,7 +154,13 @@ string BlenderImageLoader::name() const bool BlenderImageLoader::equals(const ImageLoader &other) const { const BlenderImageLoader &other_loader = (const BlenderImageLoader &)other; - return b_image == other_loader.b_image && frame == other_loader.frame; + return b_image == other_loader.b_image && frame == other_loader.frame && + tile_number == other_loader.tile_number; +} + +int BlenderImageLoader::get_tile_number() const +{ + return tile_number; } /* Point Density */ diff --git a/intern/cycles/blender/image.h b/intern/cycles/blender/image.h index ee576b31f7e..c2cc0f51b9b 100644 --- a/intern/cycles/blender/image.h +++ b/intern/cycles/blender/image.h @@ -12,7 +12,10 @@ CCL_NAMESPACE_BEGIN class BlenderImageLoader : public ImageLoader { public: - BlenderImageLoader(BL::Image b_image, const int frame, const bool is_preview_render); + BlenderImageLoader(BL::Image b_image, + const int frame, + const int tile_number, + const bool is_preview_render); bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override; bool load_pixels(const ImageMetaData &metadata, @@ -22,8 +25,11 @@ class BlenderImageLoader : public ImageLoader { string name() const override; bool equals(const ImageLoader &other) const override; + int get_tile_number() const override; + BL::Image b_image; int frame; + int tile_number; bool free_cache; }; diff --git a/intern/cycles/blender/mesh.cpp b/intern/cycles/blender/mesh.cpp index de67e27923d..c76ce3801d4 100644 --- a/intern/cycles/blender/mesh.cpp +++ b/intern/cycles/blender/mesh.cpp @@ -316,7 +316,7 @@ static void fill_generic_attribute(BL::Mesh &b_mesh, break; } case BL::Attribute::domain_EDGE: { - /* Averge edge attributes at vertices. */ + /* Average edge attributes at vertices. */ const size_t num_verts = b_mesh.vertices.length(); vector<int> count(num_verts, 0); diff --git a/intern/cycles/blender/shader.cpp b/intern/cycles/blender/shader.cpp index d3527567b96..81a64457c88 100644 --- a/intern/cycles/blender/shader.cpp +++ b/intern/cycles/blender/shader.cpp @@ -355,6 +355,18 @@ static ShaderNode *add_node(Scene *scene, else if (b_node.is_a(&RNA_ShaderNodeCombineHSV)) { node = graph->create_node<CombineHSVNode>(); } + else if (b_node.is_a(&RNA_ShaderNodeSeparateColor)) { + BL::ShaderNodeSeparateColor b_separate_node(b_node); + SeparateColorNode *separate_node = graph->create_node<SeparateColorNode>(); + separate_node->set_color_type((NodeCombSepColorType)b_separate_node.mode()); + node = separate_node; + } + else if (b_node.is_a(&RNA_ShaderNodeCombineColor)) { + BL::ShaderNodeCombineColor b_combine_node(b_node); + CombineColorNode *combine_node = graph->create_node<CombineColorNode>(); + combine_node->set_color_type((NodeCombSepColorType)b_combine_node.mode()); + node = combine_node; + } else if (b_node.is_a(&RNA_ShaderNodeSeparateXYZ)) { node = graph->create_node<SeparateXYZNode>(); } @@ -764,9 +776,21 @@ static ShaderNode *add_node(Scene *scene, */ int scene_frame = b_scene.frame_current(); int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame); - image->handle = scene->image_manager->add_image( - new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()), - image->image_params()); + if (b_image.source() != BL::Image::source_TILED) { + image->handle = scene->image_manager->add_image( + new BlenderImageLoader(b_image, image_frame, 0, b_engine.is_preview()), + image->image_params()); + } + else { + vector<ImageLoader *> loaders; + loaders.reserve(image->get_tiles().size()); + for (int tile_number : image->get_tiles()) { + loaders.push_back( + new BlenderImageLoader(b_image, image_frame, tile_number, b_engine.is_preview())); + } + + image->handle = scene->image_manager->add_image(loaders, image->image_params()); + } } else { ustring filename = ustring( @@ -802,7 +826,7 @@ static ShaderNode *add_node(Scene *scene, int scene_frame = b_scene.frame_current(); int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame); env->handle = scene->image_manager->add_image( - new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()), + new BlenderImageLoader(b_image, image_frame, 0, b_engine.is_preview()), env->image_params()); } else { diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake index f4186374d10..d2f30fe764b 100644 --- a/intern/cycles/cmake/external_libs.cmake +++ b/intern/cycles/cmake/external_libs.cmake @@ -145,8 +145,8 @@ if(CYCLES_STANDALONE_REPOSITORY) -DOIIO_STATIC_DEFINE ) - set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO_ROOT_DIR}/include) - set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR} ${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO) + set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO_ROOT_DIR}/include) + set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR} ${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO) # Special exceptions for libraries which needs explicit debug version set(OPENIMAGEIO_LIBRARIES optimized ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO.lib diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h index 27c58ce6d2f..7506b9b069f 100644 --- a/intern/cycles/device/metal/device_impl.h +++ b/intern/cycles/device/metal/device_impl.h @@ -28,7 +28,8 @@ class MetalDevice : public Device { id<MTLCommandQueue> mtlGeneralCommandQueue = nil; id<MTLArgumentEncoder> mtlAncillaryArgEncoder = nil; /* encoder used for fetching device pointers from MTLBuffers */ - string source_used_for_compile[PSO_NUM]; + string source[PSO_NUM]; + string source_md5[PSO_NUM]; KernelParamsMetal launch_params = {0}; @@ -72,7 +73,6 @@ class MetalDevice : public Device { id<MTLBuffer> texture_bindings_3d = nil; std::vector<id<MTLTexture>> texture_slot_map; - MetalDeviceKernels kernels; bool use_metalrt = false; bool use_function_specialisation = false; @@ -110,6 +110,8 @@ class MetalDevice : public Device { virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override; + id<MTLLibrary> compile(string const &source); + /* ------------------------------------------------------------------ */ /* low-level memory management */ diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index c01f51fb506..e1438a9d6e2 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -275,96 +275,44 @@ bool MetalDevice::load_kernels(const uint _kernel_features) * active, but may still need to be rendered without motion blur if that isn't active as well. */ motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION; - NSError *error = NULL; + source[PSO_GENERIC] = get_source(kernel_features); + mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]); - for (int i = 0; i < PSO_NUM; i++) { - if (mtlLibrary[i]) { - [mtlLibrary[i] release]; - mtlLibrary[i] = nil; - } - } + MD5Hash md5; + md5.append(source[PSO_GENERIC]); + source_md5[PSO_GENERIC] = md5.get_hex(); + + metal_printf("Front-end compilation finished (generic)\n"); + + bool result = MetalDeviceKernels::load(this, false); + + reserve_local_memory(kernel_features); + + return result; +} +id<MTLLibrary> MetalDevice::compile(string const &source) +{ MTLCompileOptions *options = [[MTLCompileOptions alloc] init]; options.fastMathEnabled = YES; if (@available(macOS 12.0, *)) { options.languageVersion = MTLLanguageVersion2_4; } - else { - return false; - } - string metalsrc; - - /* local helper: dump source to disk and return filepath */ - auto dump_source = [&](int kernel_type) -> string { - string &source = source_used_for_compile[kernel_type]; - string metalsrc = path_cache_get(path_join("kernels", - string_printf("%s.%s.metal", - kernel_type_as_string(kernel_type), - util_md5_string(source).c_str()))); - path_write_text(metalsrc, source); - return metalsrc; - }; - - /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor, - * then compile it into a MTLLibrary */ - auto fetch_and_compile_source = [&](int kernel_type) { - /* Record the source used to compile this library, for hash building later. */ - string &source = source_used_for_compile[kernel_type]; - - switch (kernel_type) { - case PSO_GENERIC: { - source = get_source(kernel_features); - break; - } - case PSO_SPECIALISED: { - /* PSO_SPECIALISED derives from PSO_GENERIC */ - string &generic_source = source_used_for_compile[PSO_GENERIC]; - if (generic_source.empty()) { - generic_source = get_source(kernel_features); - } - source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source; - break; - } - default: - assert(0); - } - - /* create MTLLibrary (front-end compilation) */ - mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str()) + NSError *error = NULL; + id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str()) options:options error:&error]; - bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr); - - if (!mtlLibrary[kernel_type] || do_source_dump) { - string metalsrc = dump_source(kernel_type); - - if (!mtlLibrary[kernel_type]) { - NSString *err = [error localizedDescription]; - set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); - - return false; - } - } - return true; - }; - - fetch_and_compile_source(PSO_GENERIC); - - if (use_function_specialisation) { - fetch_and_compile_source(PSO_SPECIALISED); + if (!mtlLibrary) { + NSString *err = [error localizedDescription]; + set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); } - metal_printf("Front-end compilation finished\n"); - - bool result = kernels.load(this, PSO_GENERIC); - [options release]; - reserve_local_memory(kernel_features); - return result; + return mtlLibrary; } void MetalDevice::reserve_local_memory(const uint kernel_features) diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h index b12491d820d..69b2a686ecc 100644 --- a/intern/cycles/device/metal/kernel.h +++ b/intern/cycles/device/metal/kernel.h @@ -54,103 +54,41 @@ enum { const char *kernel_type_as_string(int kernel_type); struct MetalKernelPipeline { - void release() - { - if (pipeline) { - [pipeline release]; - pipeline = nil; - if (@available(macOS 11.0, *)) { - for (int i = 0; i < METALRT_TABLE_NUM; i++) { - if (intersection_func_table[i]) { - [intersection_func_table[i] release]; - intersection_func_table[i] = nil; - } - } - } - } - if (function) { - [function release]; - function = nil; - } - if (@available(macOS 11.0, *)) { - for (int i = 0; i < METALRT_TABLE_NUM; i++) { - if (intersection_func_table[i]) { - [intersection_func_table[i] release]; - } - } - } - } - bool loaded = false; - id<MTLFunction> function = nil; - id<MTLComputePipelineState> pipeline = nil; - - API_AVAILABLE(macos(11.0)) - id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil}; -}; - -struct MetalKernelLoadDesc { - int pso_index = 0; - const char *function_name = nullptr; - int kernel_index = 0; - int threads_per_threadgroup = 0; - MTLFunctionConstantValues *constant_values = nullptr; - NSArray *linked_functions = nullptr; - - struct IntersectorFunctions { - NSArray *defaults; - NSArray *shadow; - NSArray *local; - NSArray *operator[](int index) const - { - if (index == METALRT_TABLE_DEFAULT) - return defaults; - if (index == METALRT_TABLE_SHADOW) - return shadow; - return local; - } - } intersector_functions = {nullptr}; -}; - -/* Metal kernel and associate occupancy information. */ -class MetalDeviceKernel { - public: - ~MetalDeviceKernel(); + void compile(); - bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5); + id<MTLLibrary> mtlLibrary = nil; + bool scene_specialized; + string source_md5; - void mark_loaded(int pso_index) - { - pso[pso_index].loaded = true; - } + bool use_metalrt; + bool metalrt_hair; + bool metalrt_hair_thick; + bool metalrt_pointcloud; - int get_num_threads_per_block() const - { - return num_threads_per_block; - } - const MetalKernelPipeline &get_pso() const; + int threads_per_threadgroup; - double load_duration = 0.0; + DeviceKernel device_kernel; + bool loaded = false; + id<MTLDevice> mtlDevice = nil; + id<MTLFunction> function = nil; + id<MTLComputePipelineState> pipeline = nil; + int num_threads_per_block = 0; - private: - MetalKernelPipeline pso[PSO_NUM]; + string error_str; - int num_threads_per_block = 0; + API_AVAILABLE(macos(11.0)) + id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil}; + id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil}; }; /* Cache of Metal kernels for each DeviceKernel. */ -class MetalDeviceKernels { - public: - bool load(MetalDevice *device, int kernel_type); - bool available(DeviceKernel kernel) const; - const MetalDeviceKernel &get(DeviceKernel kernel) const; +namespace MetalDeviceKernels { - MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM]; +bool load(MetalDevice *device, bool scene_specialized); +const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel); - id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}}; - - string loaded_md5[PSO_NUM]; -}; +} /* namespace MetalDeviceKernels */ CCL_NAMESPACE_END diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 9555ca03c8e..304efc813ec 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -9,6 +9,7 @@ # include "util/path.h" # include "util/tbb.h" # include "util/time.h" +# include "util/unique_ptr.h" CCL_NAMESPACE_BEGIN @@ -28,82 +29,376 @@ const char *kernel_type_as_string(int kernel_type) return ""; } -MetalDeviceKernel::~MetalDeviceKernel() +bool kernel_has_intersection(DeviceKernel device_kernel) { - for (int i = 0; i < PSO_NUM; i++) { - pso[i].release(); + return (device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || + device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || + device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || + device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK || + device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE); +} + +struct ShaderCache { + ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice) + { + } + ~ShaderCache(); + + /* Get the fastest available pipeline for the specified kernel. */ + MetalKernelPipeline *get_best_pipeline(DeviceKernel kernel, const MetalDevice *device); + + /* Non-blocking request for a kernel, optionally specialized to the scene being rendered by + * device. */ + void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized); + + void wait_for_all(); + + private: + friend ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice); + + void compile_thread_func(int thread_index); + + using PipelineCollection = std::vector<unique_ptr<MetalKernelPipeline>>; + + struct PipelineRequest { + MetalKernelPipeline *pipeline = nullptr; + std::function<void(MetalKernelPipeline *)> completionHandler; + }; + + std::mutex cache_mutex; + + PipelineCollection pipelines[DEVICE_KERNEL_NUM]; + id<MTLDevice> mtlDevice; + + bool running = false; + std::condition_variable cond_var; + std::deque<PipelineRequest> request_queue; + std::vector<std::thread> compile_threads; + std::atomic_int incomplete_requests = 0; +}; + +std::mutex g_shaderCacheMutex; +std::map<id<MTLDevice>, unique_ptr<ShaderCache>> g_shaderCache; + +ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice) +{ + thread_scoped_lock lock(g_shaderCacheMutex); + auto it = g_shaderCache.find(mtlDevice); + if (it != g_shaderCache.end()) { + return it->second.get(); + } + + g_shaderCache[mtlDevice] = make_unique<ShaderCache>(mtlDevice); + return g_shaderCache[mtlDevice].get(); +} + +ShaderCache::~ShaderCache() +{ + metal_printf("ShaderCache shutting down with incomplete_requests = %d\n", + int(incomplete_requests)); + + running = false; + cond_var.notify_all(); + for (auto &thread : compile_threads) { + thread.join(); + } +} + +void ShaderCache::wait_for_all() +{ + while (incomplete_requests > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } +} + +void ShaderCache::compile_thread_func(int thread_index) +{ + while (1) { + + /* wait for / acquire next request */ + PipelineRequest request; + { + thread_scoped_lock lock(cache_mutex); + cond_var.wait(lock, [&] { return !running || !request_queue.empty(); }); + if (!running) { + break; + } + + if (!request_queue.empty()) { + request = request_queue.front(); + request_queue.pop_front(); + } + } + + /* service request */ + if (request.pipeline) { + request.pipeline->compile(); + incomplete_requests--; + } } } -bool MetalDeviceKernel::load(MetalDevice *device, - MetalKernelLoadDesc const &desc_in, - MD5Hash const &md5) +void ShaderCache::load_kernel(DeviceKernel device_kernel, + MetalDevice *device, + bool scene_specialized) { - __block MetalKernelLoadDesc const desc(desc_in); - if (desc.kernel_index == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { + { + /* create compiler threads on first run */ + thread_scoped_lock lock(cache_mutex); + if (compile_threads.empty()) { + running = true; + for (int i = 0; i < max_mtlcompiler_threads; i++) { + compile_threads.push_back(std::thread([&] { compile_thread_func(i); })); + } + } + } + + if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { /* skip megakernel */ - return true; + return; } - bool use_binary_archive = true; - if (device->device_vendor == METAL_GPU_APPLE) { - /* Workaround for T94142: Cycles Metal crash with simultaneous viewport and final render */ - use_binary_archive = false; + if (scene_specialized) { + /* Only specialize kernels where it can make an impact. */ + if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || + device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { + return; + } } - if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) { - use_binary_archive = (atoi(str) == 0); + { + /* check whether the kernel has already been requested / cached */ + thread_scoped_lock lock(cache_mutex); + for (auto &pipeline : pipelines[device_kernel]) { + if (scene_specialized) { + if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) { + /* we already requested a pipeline that is specialized for this kernel data */ + metal_printf("Specialized kernel already requested (%s)\n", + device_kernel_as_string(device_kernel)); + return; + } + } + else { + if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) { + /* we already requested a generic pipeline for this kernel */ + metal_printf("Generic kernel already requested (%s)\n", + device_kernel_as_string(device_kernel)); + return; + } + } + } } - id<MTLBinaryArchive> archive = nil; - string metalbin_path; - if (use_binary_archive) { - NSProcessInfo *processInfo = [NSProcessInfo processInfo]; - string osVersion = [[processInfo operatingSystemVersionString] UTF8String]; - MD5Hash local_md5(md5); - local_md5.append(osVersion); - string metalbin_name = string(desc.function_name) + "." + local_md5.get_hex() + - to_string(desc.pso_index) + ".bin"; - metalbin_path = path_cache_get(path_join("kernels", metalbin_name)); - path_create_directories(metalbin_path); + incomplete_requests++; + + PipelineRequest request; + request.pipeline = new MetalKernelPipeline; + request.pipeline->scene_specialized = scene_specialized; + request.pipeline->mtlDevice = mtlDevice; + request.pipeline->source_md5 = + device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC]; + request.pipeline->mtlLibrary = + device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC]; + request.pipeline->device_kernel = device_kernel; + request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup; + + /* metalrt options */ + request.pipeline->use_metalrt = device->use_metalrt; + request.pipeline->metalrt_hair = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_HAIR); + request.pipeline->metalrt_hair_thick = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_HAIR_THICK); + request.pipeline->metalrt_pointcloud = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_POINTCLOUD); + + { + thread_scoped_lock lock(cache_mutex); + pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline)); + request_queue.push_back(request); + } + cond_var.notify_one(); +} - if (path_exists(metalbin_path) && use_binary_archive) { - if (@available(macOS 11.0, *)) { - MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init]; - archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())]; - archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil]; - [archiveDesc release]; +MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device) +{ + thread_scoped_lock lock(cache_mutex); + auto &collection = pipelines[kernel]; + if (collection.empty()) { + return nullptr; + } + + /* metalrt options */ + bool use_metalrt = device->use_metalrt; + bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR); + bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK); + bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD); + + MetalKernelPipeline *best_pipeline = nullptr; + for (auto &pipeline : collection) { + if (!pipeline->loaded) { + /* still loading - ignore */ + continue; + } + + if (pipeline->use_metalrt != use_metalrt || pipeline->metalrt_hair != metalrt_hair || + pipeline->metalrt_hair_thick != metalrt_hair_thick || + pipeline->metalrt_pointcloud != metalrt_pointcloud) { + /* wrong combination of metalrt options */ + continue; + } + + if (pipeline->scene_specialized) { + if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) { + best_pipeline = pipeline.get(); } } + else if (!best_pipeline) { + best_pipeline = pipeline.get(); + } + } + + return best_pipeline; +} + +void MetalKernelPipeline::compile() +{ + int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC; + + const std::string function_name = std::string("cycles_metal_") + + device_kernel_as_string(device_kernel); + + int threads_per_threadgroup = this->threads_per_threadgroup; + if (device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && + device_kernel < DEVICE_KERNEL_INTEGRATOR_RESET) { + /* Always use 512 for the sorting kernels */ + threads_per_threadgroup = 512; } - NSString *entryPoint = [@(desc.function_name) copy]; + NSString *entryPoint = [@(function_name.c_str()) copy]; NSError *error = NULL; if (@available(macOS 11.0, *)) { MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor]; func_desc.name = entryPoint; - if (desc.constant_values) { - func_desc.constantValues = desc.constant_values; - } - pso[desc.pso_index].function = [device->mtlLibrary[desc.pso_index] - newFunctionWithDescriptor:func_desc - error:&error]; + function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error]; } + [entryPoint release]; - if (pso[desc.pso_index].function == nil) { + if (function == nil) { NSString *err = [error localizedDescription]; string errors = [err UTF8String]; + metal_printf("Error getting function \"%s\": %s", function_name.c_str(), errors.c_str()); + return; + } + + function.label = [entryPoint copy]; + + if (use_metalrt) { + if (@available(macOS 11.0, *)) { + /* create the id<MTLFunction> for each intersection function */ + const char *function_names[] = { + "__anyhit__cycles_metalrt_visibility_test_tri", + "__anyhit__cycles_metalrt_visibility_test_box", + "__anyhit__cycles_metalrt_shadow_all_hit_tri", + "__anyhit__cycles_metalrt_shadow_all_hit_box", + "__anyhit__cycles_metalrt_local_hit_tri", + "__anyhit__cycles_metalrt_local_hit_box", + "__intersection__curve_ribbon", + "__intersection__curve_ribbon_shadow", + "__intersection__curve_all", + "__intersection__curve_all_shadow", + "__intersection__point", + "__intersection__point_shadow", + }; + assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM); + + MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor]; + for (int i = 0; i < METALRT_FUNC_NUM; i++) { + const char *function_name = function_names[i]; + desc.name = [@(function_name) copy]; + + NSError *error = NULL; + rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error]; - device->set_error( - string_printf("Error getting function \"%s\": %s", desc.function_name, errors.c_str())); - return false; + if (rt_intersection_function[i] == nil) { + NSString *err = [error localizedDescription]; + string errors = [err UTF8String]; + + error_str = string_printf( + "Error getting intersection function \"%s\": %s", function_name, errors.c_str()); + break; + } + + rt_intersection_function[i].label = [@(function_name) copy]; + } + } } - pso[desc.pso_index].function.label = [@(desc.function_name) copy]; + NSArray *table_functions[METALRT_TABLE_NUM] = {nil}; + NSArray *linked_functions = nil; + + if (use_metalrt) { + id<MTLFunction> curve_intersect_default = nil; + id<MTLFunction> curve_intersect_shadow = nil; + id<MTLFunction> point_intersect_default = nil; + id<MTLFunction> point_intersect_shadow = nil; + if (metalrt_hair) { + /* Add curve intersection programs. */ + if (metalrt_hair_thick) { + /* Slower programs for thick hair since that also slows down ribbons. + * Ideally this should not be needed. */ + curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL]; + curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_ALL_SHADOW]; + } + else { + curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON]; + curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW]; + } + } + if (metalrt_pointcloud) { + point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT]; + point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW]; + } + table_functions[METALRT_TABLE_DEFAULT] = [NSArray + arrayWithObjects:rt_intersection_function[METALRT_FUNC_DEFAULT_TRI], + curve_intersect_default ? + curve_intersect_default : + rt_intersection_function[METALRT_FUNC_DEFAULT_BOX], + point_intersect_default ? + point_intersect_default : + rt_intersection_function[METALRT_FUNC_DEFAULT_BOX], + nil]; + table_functions[METALRT_TABLE_SHADOW] = [NSArray + arrayWithObjects:rt_intersection_function[METALRT_FUNC_SHADOW_TRI], + curve_intersect_shadow ? + curve_intersect_shadow : + rt_intersection_function[METALRT_FUNC_SHADOW_BOX], + point_intersect_shadow ? + point_intersect_shadow : + rt_intersection_function[METALRT_FUNC_SHADOW_BOX], + nil]; + table_functions[METALRT_TABLE_LOCAL] = [NSArray + arrayWithObjects:rt_intersection_function[METALRT_FUNC_LOCAL_TRI], + rt_intersection_function[METALRT_FUNC_LOCAL_BOX], + rt_intersection_function[METALRT_FUNC_LOCAL_BOX], + nil]; + + NSMutableSet *unique_functions = [NSMutableSet + setWithArray:table_functions[METALRT_TABLE_DEFAULT]]; + [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]]; + [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]]; + + if (kernel_has_intersection(device_kernel)) { + linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]] + sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) { + return [f1.label compare:f2.label]; + }]; + } + unique_functions = nil; + } - __block MTLComputePipelineDescriptor *computePipelineStateDescriptor = + MTLComputePipelineDescriptor *computePipelineStateDescriptor = [[MTLComputePipelineDescriptor alloc] init]; computePipelineStateDescriptor.buffers[0].mutability = MTLMutabilityImmutable; @@ -111,52 +406,86 @@ bool MetalDeviceKernel::load(MetalDevice *device, computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable; if (@available(macos 10.14, *)) { - computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = desc.threads_per_threadgroup; + computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = threads_per_threadgroup; } computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true; - computePipelineStateDescriptor.computeFunction = pso[desc.pso_index].function; + computePipelineStateDescriptor.computeFunction = function; + if (@available(macOS 11.0, *)) { /* Attach the additional functions to an MTLLinkedFunctions object */ - if (desc.linked_functions) { + if (linked_functions) { computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init]; - computePipelineStateDescriptor.linkedFunctions.functions = desc.linked_functions; + computePipelineStateDescriptor.linkedFunctions.functions = linked_functions; } - computePipelineStateDescriptor.maxCallStackDepth = 1; + if (use_metalrt) { + computePipelineStateDescriptor.maxCallStackDepth = 8; + } } - /* Create a new Compute pipeline state object */ MTLPipelineOption pipelineOptions = MTLPipelineOptionNone; - bool creating_new_archive = false; + bool use_binary_archive = true; + if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) { + use_binary_archive = (atoi(str) == 0); + } + + id<MTLBinaryArchive> archive = nil; + string metalbin_path; + string metalbin_name; + if (use_binary_archive) { + NSProcessInfo *processInfo = [NSProcessInfo processInfo]; + string osVersion = [[processInfo operatingSystemVersionString] UTF8String]; + MD5Hash local_md5; + local_md5.append(source_md5); + local_md5.append(osVersion); + local_md5.append((uint8_t *)&this->threads_per_threadgroup, + sizeof(this->threads_per_threadgroup)); + + string options; + if (use_metalrt && kernel_has_intersection(device_kernel)) { + /* incorporate any MetalRT specializations into the archive name */ + options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d", + metalrt_hair ? 1 : 0, + metalrt_hair_thick ? 1 : 0, + metalrt_pointcloud ? 1 : 0); + } + + /* Replace non-alphanumerical characters with underscores. */ + string device_name = [mtlDevice.name UTF8String]; + for (char &c : device_name) { + if ((c < '0' || c > '9') && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) { + c = '_'; + } + } + + metalbin_name = device_name; + metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel)); + metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type)); + metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin"); + + metalbin_path = path_cache_get(path_join("kernels", metalbin_name)); + path_create_directories(metalbin_path); + + if (path_exists(metalbin_path) && use_binary_archive) { + if (@available(macOS 11.0, *)) { + MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init]; + archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())]; + archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil]; + [archiveDesc release]; + } + } + } + + __block bool creating_new_archive = false; if (@available(macOS 11.0, *)) { if (use_binary_archive) { if (!archive) { MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init]; archiveDesc.url = nil; - archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil]; + archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil]; creating_new_archive = true; - - double starttime = time_dt(); - - if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor - error:&error]) { - NSString *errStr = [error localizedDescription]; - metal_printf("Failed to add PSO to archive:\n%s\n", - errStr ? [errStr UTF8String] : "nil"); - } - else { - double duration = time_dt() - starttime; - metal_printf("%2d | %-55s | %7.2fs\n", - desc.kernel_index, - device_kernel_as_string((DeviceKernel)desc.kernel_index), - duration); - - if (desc.pso_index == PSO_GENERIC) { - this->load_duration = duration; - } - } } computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil]; pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss; @@ -170,17 +499,14 @@ bool MetalDeviceKernel::load(MetalDevice *device, MTLComputePipelineReflection *reflection, NSError *error) { bool recreate_archive = false; - if (computePipelineState == nil && archive && !creating_new_archive) { - - assert(0); - + if (computePipelineState == nil && archive) { NSString *errStr = [error localizedDescription]; metal_printf( "Failed to create compute pipeline state \"%s\" from archive - attempting recreation... " "(error: %s)\n", - device_kernel_as_string((DeviceKernel)desc.kernel_index), + device_kernel_as_string((DeviceKernel)device_kernel), errStr ? [errStr UTF8String] : "nil"); - computePipelineState = [device->mtlDevice + computePipelineState = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor options:MTLPipelineOptionNone reflection:nullptr @@ -192,32 +518,23 @@ bool MetalDeviceKernel::load(MetalDevice *device, if (computePipelineState == nil) { NSString *errStr = [error localizedDescription]; - device->set_error(string_printf("Failed to create compute pipeline state \"%s\", error: \n", - device_kernel_as_string((DeviceKernel)desc.kernel_index)) + - (errStr ? [errStr UTF8String] : "nil")); - metal_printf("%2d | %-55s | %7.2fs | FAILED!\n", - desc.kernel_index, - device_kernel_as_string((DeviceKernel)desc.kernel_index), + error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n", + device_kernel_as_string((DeviceKernel)device_kernel)); + error_str += (errStr ? [errStr UTF8String] : "nil"); + metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n", + kernel_type_as_string(pso_type), + device_kernel, + device_kernel_as_string((DeviceKernel)device_kernel), duration); return; } - pso[desc.pso_index].pipeline = computePipelineState; - num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup, - computePipelineState.threadExecutionWidth); + int num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup, + computePipelineState.threadExecutionWidth); num_threads_per_block = std::max(num_threads_per_block, (int)computePipelineState.threadExecutionWidth); - - if (!use_binary_archive) { - metal_printf("%2d | %-55s | %7.2fs\n", - desc.kernel_index, - device_kernel_as_string((DeviceKernel)desc.kernel_index), - duration); - - if (desc.pso_index == PSO_GENERIC) { - this->load_duration = duration; - } - } + this->pipeline = computePipelineState; + this->num_threads_per_block = num_threads_per_block; if (@available(macOS 11.0, *)) { if (creating_new_archive || recreate_archive) { @@ -228,304 +545,85 @@ bool MetalDeviceKernel::load(MetalDevice *device, } } } + }; - [computePipelineStateDescriptor release]; - computePipelineStateDescriptor = nil; - - if (device->use_metalrt && desc.linked_functions) { - for (int table = 0; table < METALRT_TABLE_NUM; table++) { - if (@available(macOS 11.0, *)) { - MTLIntersectionFunctionTableDescriptor *ift_desc = - [[MTLIntersectionFunctionTableDescriptor alloc] init]; - ift_desc.functionCount = desc.intersector_functions[table].count; - - pso[desc.pso_index].intersection_func_table[table] = [pso[desc.pso_index].pipeline - newIntersectionFunctionTableWithDescriptor:ift_desc]; - - /* Finally write the function handles into this pipeline's table */ - for (int i = 0; i < 2; i++) { - id<MTLFunctionHandle> handle = [pso[desc.pso_index].pipeline - functionHandleWithFunction:desc.intersector_functions[table][i]]; - [pso[desc.pso_index].intersection_func_table[table] setFunction:handle atIndex:i]; - } + /* Block on load to ensure we continue with a valid kernel function */ + if (creating_new_archive) { + starttime = time_dt(); + NSError *error; + if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor + error:&error]) { + NSString *errStr = [error localizedDescription]; + metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil"); + } + } + id<MTLComputePipelineState> pipeline = [mtlDevice + newComputePipelineStateWithDescriptor:computePipelineStateDescriptor + options:pipelineOptions + reflection:nullptr + error:&error]; + completionHandler(pipeline, nullptr, error); + + this->loaded = true; + [computePipelineStateDescriptor release]; + computePipelineStateDescriptor = nil; + + if (use_metalrt && linked_functions) { + for (int table = 0; table < METALRT_TABLE_NUM; table++) { + if (@available(macOS 11.0, *)) { + MTLIntersectionFunctionTableDescriptor *ift_desc = + [[MTLIntersectionFunctionTableDescriptor alloc] init]; + ift_desc.functionCount = table_functions[table].count; + intersection_func_table[table] = [this->pipeline + newIntersectionFunctionTableWithDescriptor:ift_desc]; + + /* Finally write the function handles into this pipeline's table */ + for (int i = 0; i < 2; i++) { + id<MTLFunctionHandle> handle = [pipeline + functionHandleWithFunction:table_functions[table][i]]; + [intersection_func_table[table] setFunction:handle atIndex:i]; } } } + } - mark_loaded(desc.pso_index); - }; + double duration = time_dt() - starttime; - if (desc.pso_index == PSO_SPECIALISED) { - /* Asynchronous load */ - dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ - NSError *error; - id<MTLComputePipelineState> pipeline = [device->mtlDevice - newComputePipelineStateWithDescriptor:computePipelineStateDescriptor - options:pipelineOptions - reflection:nullptr - error:&error]; - completionHandler(pipeline, nullptr, error); - }); + if (!use_binary_archive) { + metal_printf("%16s | %2d | %-55s | %7.2fs\n", + kernel_type_as_string(pso_type), + int(device_kernel), + device_kernel_as_string(device_kernel), + duration); } else { - /* Block on load to ensure we continue with a valid kernel function */ - id<MTLComputePipelineState> pipeline = [device->mtlDevice - newComputePipelineStateWithDescriptor:computePipelineStateDescriptor - options:pipelineOptions - reflection:nullptr - error:&error]; - completionHandler(pipeline, nullptr, error); - } - - return true; -} - -const MetalKernelPipeline &MetalDeviceKernel::get_pso() const -{ - if (pso[PSO_SPECIALISED].loaded) { - return pso[PSO_SPECIALISED]; + metal_printf("%16s | %2d | %-55s | %7.2fs | %s: %s\n", + kernel_type_as_string(pso_type), + device_kernel, + device_kernel_as_string((DeviceKernel)device_kernel), + duration, + creating_new_archive ? " new" : "load", + metalbin_name.c_str()); } - - assert(pso[PSO_GENERIC].loaded); - return pso[PSO_GENERIC]; } -bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type) +bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized) { - bool any_error = false; - - MD5Hash md5; - - /* Build the function constant table */ - MTLFunctionConstantValues *constant_values = nullptr; - if (kernel_type == PSO_SPECIALISED) { - constant_values = [MTLFunctionConstantValues new]; - -# define KERNEL_FILM(_type, name) \ - [constant_values setConstantValue:&data.film.name \ - type:get_MTLDataType_##_type() \ - atIndex:KernelData_film_##name]; \ - md5.append((uint8_t *)&data.film.name, sizeof(data.film.name)); - -# define KERNEL_BACKGROUND(_type, name) \ - [constant_values setConstantValue:&data.background.name \ - type:get_MTLDataType_##_type() \ - atIndex:KernelData_background_##name]; \ - md5.append((uint8_t *)&data.background.name, sizeof(data.background.name)); - -# define KERNEL_INTEGRATOR(_type, name) \ - [constant_values setConstantValue:&data.integrator.name \ - type:get_MTLDataType_##_type() \ - atIndex:KernelData_integrator_##name]; \ - md5.append((uint8_t *)&data.integrator.name, sizeof(data.integrator.name)); - -# define KERNEL_BVH(_type, name) \ - [constant_values setConstantValue:&data.bvh.name \ - type:get_MTLDataType_##_type() \ - atIndex:KernelData_bvh_##name]; \ - md5.append((uint8_t *)&data.bvh.name, sizeof(data.bvh.name)); - - /* METAL_WIP: populate constant_values based on KernelData */ - assert(0); - /* - const KernelData &data = device->launch_params.data; - # include "kernel/types/background.h" - # include "kernel/types/bvh.h" - # include "kernel/types/film.h" - # include "kernel/types/integrator.h" - */ - } - - if (device->use_metalrt) { - if (@available(macOS 11.0, *)) { - /* create the id<MTLFunction> for each intersection function */ - const char *function_names[] = { - "__anyhit__cycles_metalrt_visibility_test_tri", - "__anyhit__cycles_metalrt_visibility_test_box", - "__anyhit__cycles_metalrt_shadow_all_hit_tri", - "__anyhit__cycles_metalrt_shadow_all_hit_box", - "__anyhit__cycles_metalrt_local_hit_tri", - "__anyhit__cycles_metalrt_local_hit_box", - "__intersection__curve_ribbon", - "__intersection__curve_ribbon_shadow", - "__intersection__curve_all", - "__intersection__curve_all_shadow", - "__intersection__point", - "__intersection__point_shadow", - }; - assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM); - - MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor]; - if (kernel_type == PSO_SPECIALISED) { - desc.constantValues = constant_values; - } - for (int i = 0; i < METALRT_FUNC_NUM; i++) { - const char *function_name = function_names[i]; - desc.name = [@(function_name) copy]; - - NSError *error = NULL; - rt_intersection_funcs[kernel_type][i] = [device->mtlLibrary[kernel_type] - newFunctionWithDescriptor:desc - error:&error]; - - if (rt_intersection_funcs[kernel_type][i] == nil) { - NSString *err = [error localizedDescription]; - string errors = [err UTF8String]; - - device->set_error(string_printf( - "Error getting intersection function \"%s\": %s", function_name, errors.c_str())); - any_error = true; - break; - } - - rt_intersection_funcs[kernel_type][i].label = [@(function_name) copy]; - } - } - } - md5.append(device->source_used_for_compile[kernel_type]); - - string hash = md5.get_hex(); - if (loaded_md5[kernel_type] == hash) { - return true; - } - - if (!any_error) { - NSArray *table_functions[METALRT_TABLE_NUM] = {nil}; - NSArray *function_list = nil; - - if (device->use_metalrt) { - id<MTLFunction> curve_intersect_default = nil; - id<MTLFunction> curve_intersect_shadow = nil; - id<MTLFunction> point_intersect_default = nil; - id<MTLFunction> point_intersect_shadow = nil; - if (device->kernel_features & KERNEL_FEATURE_HAIR) { - /* Add curve intersection programs. */ - if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) { - /* Slower programs for thick hair since that also slows down ribbons. - * Ideally this should not be needed. */ - curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL]; - curve_intersect_shadow = - rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW]; - } - else { - curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON]; - curve_intersect_shadow = - rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW]; - } - } - if (device->kernel_features & KERNEL_FEATURE_POINTCLOUD) { - point_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT]; - point_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT_SHADOW]; - } - table_functions[METALRT_TABLE_DEFAULT] = [NSArray - arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI], - curve_intersect_default ? - curve_intersect_default : - rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX], - point_intersect_default ? - point_intersect_default : - rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX], - nil]; - table_functions[METALRT_TABLE_SHADOW] = [NSArray - arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI], - curve_intersect_shadow ? - curve_intersect_shadow : - rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX], - point_intersect_shadow ? - point_intersect_shadow : - rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX], - nil]; - table_functions[METALRT_TABLE_LOCAL] = [NSArray - arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI], - rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX], - rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX], - nil]; - - NSMutableSet *unique_functions = [NSMutableSet - setWithArray:table_functions[METALRT_TABLE_DEFAULT]]; - [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]]; - [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]]; - - function_list = [[NSArray arrayWithArray:[unique_functions allObjects]] - sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) { - return [f1.label compare:f2.label]; - }]; - - unique_functions = nil; - } - - metal_printf("Starting %s \"cycles_metal_...\" pipeline builds\n", - kernel_type_as_string(kernel_type)); - - tbb::task_arena local_arena(max_mtlcompiler_threads); - local_arena.execute([&]() { - parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) { - /* skip megakernel */ - if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { - return; - } - - /* Only specialize kernels where it can make an impact. */ - if (kernel_type == PSO_SPECIALISED) { - if (i < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || - i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { - return; - } - } - - MetalDeviceKernel &kernel = kernels_[i]; - - const std::string function_name = std::string("cycles_metal_") + - device_kernel_as_string((DeviceKernel)i); - int threads_per_threadgroup = device->max_threads_per_threadgroup; - if (i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && i < DEVICE_KERNEL_INTEGRATOR_RESET) { - /* Always use 512 for the sorting kernels */ - threads_per_threadgroup = 512; - } - - NSArray *kernel_function_list = nil; - - if (i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || - i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || - i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || - i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK || - i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) { - kernel_function_list = function_list; - } - - MetalKernelLoadDesc desc; - desc.pso_index = kernel_type; - desc.kernel_index = i; - desc.linked_functions = kernel_function_list; - desc.intersector_functions.defaults = table_functions[METALRT_TABLE_DEFAULT]; - desc.intersector_functions.shadow = table_functions[METALRT_TABLE_SHADOW]; - desc.intersector_functions.local = table_functions[METALRT_TABLE_LOCAL]; - desc.constant_values = constant_values; - desc.threads_per_threadgroup = threads_per_threadgroup; - desc.function_name = function_name.c_str(); - - bool success = kernel.load(device, desc, md5); - - any_error |= !success; - }); - }); + auto shader_cache = get_shader_cache(device->mtlDevice); + for (int i = 0; i < DEVICE_KERNEL_NUM; i++) { + shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized); } - bool loaded = !any_error; - if (loaded) { - loaded_md5[kernel_type] = hash; + if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) { + shader_cache->wait_for_all(); } - return loaded; -} - -const MetalDeviceKernel &MetalDeviceKernels::get(DeviceKernel kernel) const -{ - return kernels_[(int)kernel]; + return true; } -bool MetalDeviceKernels::available(DeviceKernel kernel) const +const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device, + DeviceKernel kernel) { - return kernels_[(int)kernel].get_pso().function != nil; + return get_shader_cache(device->mtlDevice)->get_best_pipeline(kernel, device); } CCL_NAMESPACE_END diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm index 1686ab95ffa..ec10e091b25 100644 --- a/intern/cycles/device/metal/queue.mm +++ b/intern/cycles/device/metal/queue.mm @@ -108,9 +108,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, VLOG(3) << "Metal queue launch " << device_kernel_as_string(kernel) << ", work_size " << work_size; - const MetalDeviceKernel &metal_kernel = metal_device->kernels.get(kernel); - const MetalKernelPipeline &metal_kernel_pso = metal_kernel.get_pso(); - id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel); /* Determine size requirement for argument buffer. */ @@ -212,6 +209,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, } bytes_written = globals_offsets + sizeof(KernelParamsMetal); + const MetalKernelPipeline *metal_kernel_pso = MetalDeviceKernels::get_best_pipeline(metal_device, + kernel); + if (!metal_kernel_pso) { + metal_device->set_error( + string_printf("No MetalKernelPipeline for %s\n", device_kernel_as_string(kernel))); + return false; + } + /* Encode ancillaries */ [metal_device->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets]; [metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_2d @@ -228,14 +233,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, } for (int table = 0; table < METALRT_TABLE_NUM; table++) { - if (metal_kernel_pso.intersection_func_table[table]) { - [metal_kernel_pso.intersection_func_table[table] setBuffer:arg_buffer - offset:globals_offsets - atIndex:1]; + if (metal_kernel_pso->intersection_func_table[table]) { + [metal_kernel_pso->intersection_func_table[table] setBuffer:arg_buffer + offset:globals_offsets + atIndex:1]; [metal_device->mtlAncillaryArgEncoder - setIntersectionFunctionTable:metal_kernel_pso.intersection_func_table[table] + setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table] atIndex:3 + table]; - [mtlComputeCommandEncoder useResource:metal_kernel_pso.intersection_func_table[table] + [mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table] usage:MTLResourceUsageRead]; } else { @@ -281,10 +286,10 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, } } - [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso.pipeline]; + [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso->pipeline]; /* Compute kernel launch parameters. */ - const int num_threads_per_block = metal_kernel.get_num_threads_per_block(); + const int num_threads_per_block = metal_kernel_pso->num_threads_per_block; int shared_mem_bytes = 0; @@ -314,7 +319,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, threadsPerThreadgroup:size_threads_per_threadgroup]; [mtlCommandBuffer addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { - NSString *kernel_name = metal_kernel_pso.function.label; + NSString *kernel_name = metal_kernel_pso->function.label; /* Enhanced command buffer errors are only available in 11.0+ */ if (@available(macos 11.0, *)) { @@ -547,6 +552,8 @@ id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent : MTLDispatchTypeSerial]; + [mtlComputeEncoder setLabel:@(device_kernel_as_string(kernel))]; + /* declare usage of MTLBuffers etc */ prepare_resources(kernel); } diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 8830d8c44ac..9fc265bc327 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -23,6 +23,7 @@ # include "util/md5.h" # include "util/path.h" # include "util/progress.h" +# include "util/task.h" # include "util/time.h" # undef __KERNEL_CPU__ @@ -216,6 +217,25 @@ static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser, return OPTIX_SUCCESS; } +# if OPTIX_ABI_VERSION >= 55 +static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &failure_reason) +{ + OptixTask additional_tasks[16]; + unsigned int num_additional_tasks = 0; + + const OptixResult result = optixTaskExecute(task, additional_tasks, 16, &num_additional_tasks); + if (result == OPTIX_SUCCESS) { + for (unsigned int i = 0; i < num_additional_tasks; ++i) { + pool.push(function_bind( + &execute_optix_task, std::ref(pool), additional_tasks[i], std::ref(failure_reason))); + } + } + else { + failure_reason = result; + } +} +# endif + } // namespace OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) @@ -453,6 +473,23 @@ bool OptiXDevice::load_kernels(const uint kernel_features) return false; } +# if OPTIX_ABI_VERSION >= 55 + OptixTask task = nullptr; + OptixResult result = optixModuleCreateFromPTXWithTasks(context, + &module_options, + &pipeline_options, + ptx_data.data(), + ptx_data.size(), + nullptr, + nullptr, + &optix_module, + &task); + if (result == OPTIX_SUCCESS) { + TaskPool pool; + execute_optix_task(pool, task, result); + pool.wait_work(); + } +# else const OptixResult result = optixModuleCreateFromPTX(context, &module_options, &pipeline_options, @@ -461,6 +498,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) nullptr, 0, &optix_module); +# endif if (result != OPTIX_SUCCESS) { set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)", ptx_filename.c_str(), diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index d97854a52d0..473bdb67920 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -145,6 +145,7 @@ set(SRC_KERNEL_SVM_HEADERS svm/normal.h svm/ramp.h svm/ramp_util.h + svm/sepcomb_color.h svm/sepcomb_hsv.h svm/sepcomb_vector.h svm/sky.h diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 4e309f16c08..0ed52074a90 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -29,10 +29,26 @@ using namespace metal::raytracing; /* Qualifiers */ -#define ccl_device -#define ccl_device_inline ccl_device -#define ccl_device_forceinline ccl_device -#define ccl_device_noinline ccl_device __attribute__((noinline)) +#if defined(__KERNEL_METAL_APPLE__) + +/* Inline everything for Apple GPUs. + * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface + * at the cost of longer compile times (~4.5 minutes on M1 Max). */ + +# define ccl_device __attribute__((always_inline)) +# define ccl_device_inline __attribute__((always_inline)) +# define ccl_device_forceinline __attribute__((always_inline)) +# define ccl_device_noinline __attribute__((always_inline)) + +#else + +# define ccl_device +# define ccl_device_inline ccl_device +# define ccl_device_forceinline ccl_device +# define ccl_device_noinline ccl_device __attribute__((noinline)) + +#endif + #define ccl_device_noinline_cpu ccl_device #define ccl_device_inline_method ccl_device #define ccl_global device diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index 832498f1f73..e2e10b5b83f 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -1304,8 +1304,38 @@ bool OSLRenderServices::texture(ustring filename, break; } case OSLTextureHandle::SVM: { - /* Packed texture. */ - float4 rgba = kernel_tex_image_interp(kernel_globals, handle->svm_slot, s, 1.0f - t); + int id = -1; + if (handle->svm_slots[0].w == -1) { + /* Packed single texture. */ + id = handle->svm_slots[0].y; + } + else { + /* Packed tiled texture. */ + int tx = (int)s; + int ty = (int)t; + int tile = 1001 + 10 * ty + tx; + for (int4 tile_node : handle->svm_slots) { + if (tile_node.x == tile) { + id = tile_node.y; + break; + } + if (tile_node.z == tile) { + id = tile_node.w; + break; + } + } + s -= tx; + t -= ty; + } + + float4 rgba; + if (id == -1) { + rgba = make_float4( + TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); + } + else { + rgba = kernel_tex_image_interp(kernel_globals, id, s, 1.0f - t); + } result[0] = rgba[0]; if (nchannels > 1) @@ -1319,7 +1349,7 @@ bool OSLRenderServices::texture(ustring filename, } case OSLTextureHandle::IES: { /* IES light. */ - result[0] = kernel_ies_interp(kernel_globals, handle->svm_slot, s, t); + result[0] = kernel_ies_interp(kernel_globals, handle->svm_slots[0].y, s, t); status = true; break; } @@ -1413,7 +1443,7 @@ bool OSLRenderServices::texture3d(ustring filename, /* Packed texture. */ ShaderData *sd = (ShaderData *)(sg->renderstate); KernelGlobals kernel_globals = sd->osl_globals; - int slot = handle->svm_slot; + int slot = handle->svm_slots[0].y; float3 P_float3 = make_float3(P.x, P.y, P.z); float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE); diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h index 653fa017140..edffd912bad 100644 --- a/intern/cycles/kernel/osl/services.h +++ b/intern/cycles/kernel/osl/services.h @@ -39,18 +39,26 @@ struct KernelGlobalsCPU; * with additional data. * * These are stored in a concurrent hash map, because OSL can compile multiple - * shaders in parallel. */ + * shaders in parallel. + * + * NOTE: The svm_slots array contains a compressed mapping of tile to svm_slot pairs + * stored as follows: x:tile_a, y:svm_slot_a, z:tile_b, w:svm_slot_b etc. */ struct OSLTextureHandle : public OIIO::RefCnt { enum Type { OIIO, SVM, IES, BEVEL, AO }; + OSLTextureHandle(Type type, const vector<int4> &svm_slots) + : type(type), svm_slots(svm_slots), oiio_handle(NULL), processor(NULL) + { + } + OSLTextureHandle(Type type = OIIO, int svm_slot = -1) - : type(type), svm_slot(svm_slot), oiio_handle(NULL), processor(NULL) + : OSLTextureHandle(type, {make_int4(0, svm_slot, -1, -1)}) { } Type type; - int svm_slot; + vector<int4> svm_slots; OSL::TextureSystem::TextureHandle *oiio_handle; ColorSpaceProcessor *processor; }; diff --git a/intern/cycles/kernel/osl/shaders/CMakeLists.txt b/intern/cycles/kernel/osl/shaders/CMakeLists.txt index 7ced21c5670..741bce7c399 100644 --- a/intern/cycles/kernel/osl/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/osl/shaders/CMakeLists.txt @@ -16,6 +16,7 @@ set(SRC_OSL node_camera.osl node_checker_texture.osl node_clamp.osl + node_combine_color.osl node_combine_rgb.osl node_combine_hsv.osl node_combine_xyz.osl @@ -68,6 +69,7 @@ set(SRC_OSL node_refraction_bsdf.osl node_rgb_curves.osl node_rgb_ramp.osl + node_separate_color.osl node_separate_rgb.osl node_separate_hsv.osl node_separate_xyz.osl diff --git a/intern/cycles/kernel/osl/shaders/node_color.h b/intern/cycles/kernel/osl/shaders/node_color.h index 388dd114e9a..06735f5b03d 100644 --- a/intern/cycles/kernel/osl/shaders/node_color.h +++ b/intern/cycles/kernel/osl/shaders/node_color.h @@ -148,3 +148,53 @@ color hsv_to_rgb(color hsv) return rgb; } + +color rgb_to_hsl(color rgb) +{ + float cmax, cmin, h, s, l; + + cmax = max(rgb[0], max(rgb[1], rgb[2])); + cmin = min(rgb[0], min(rgb[1], rgb[2])); + l = min(1.0, (cmax + cmin) / 2.0); + + if (cmax == cmin) { + h = s = 0.0; /* achromatic */ + } + else { + float cdelta = cmax - cmin; + s = l > 0.5 ? cdelta / (2.0 - cmax - cmin) : cdelta / (cmax + cmin); + if (cmax == rgb[0]) { + h = (rgb[1] - rgb[2]) / cdelta + (rgb[1] < rgb[2] ? 6.0 : 0.0); + } + else if (cmax == rgb[1]) { + h = (rgb[2] - rgb[0]) / cdelta + 2.0; + } + else { + h = (rgb[0] - rgb[1]) / cdelta + 4.0; + } + } + h /= 6.0; + + return color(h, s, l); +} + +color hsl_to_rgb(color hsl) +{ + float nr, ng, nb, chroma, h, s, l; + + h = hsl[0]; + s = hsl[1]; + l = hsl[2]; + + nr = abs(h * 6.0 - 3.0) - 1.0; + ng = 2.0 - abs(h * 6.0 - 2.0); + nb = 2.0 - abs(h * 6.0 - 4.0); + + nr = clamp(nr, 0.0, 1.0); + nb = clamp(nb, 0.0, 1.0); + ng = clamp(ng, 0.0, 1.0); + + chroma = (1.0 - abs(2.0 * l - 1.0)) * s; + + return color((nr - 0.5) * chroma + l, (ng - 0.5) * chroma + l, (nb - 0.5) * chroma + l); +} diff --git a/intern/cycles/kernel/osl/shaders/node_combine_color.osl b/intern/cycles/kernel/osl/shaders/node_combine_color.osl new file mode 100644 index 00000000000..681a592d2bb --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_combine_color.osl @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "stdcycles.h" + +shader node_combine_color(string color_type = "rgb", + float Red = 0.0, + float Green = 0.0, + float Blue = 0.0, + output color Color = 0.8) +{ + if (color_type == "rgb" || color_type == "hsv" || color_type == "hsl") + Color = color(color_type, Red, Green, Blue); + else + warning("%s", "Unknown color space!"); +} diff --git a/intern/cycles/kernel/osl/shaders/node_separate_color.osl b/intern/cycles/kernel/osl/shaders/node_separate_color.osl new file mode 100644 index 00000000000..6f3e3149d8e --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_separate_color.osl @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "node_color.h" +#include "stdcycles.h" + +shader node_separate_color(string color_type = "rgb", + color Color = 0.8, + output float Red = 0.0, + output float Green = 0.0, + output float Blue = 0.0) +{ + color col; + if (color_type == "rgb") + col = Color; + else if (color_type == "hsv") + col = rgb_to_hsv(Color); + else if (color_type == "hsl") + col = rgb_to_hsl(Color); + else + warning("%s", "Unknown color space!"); + + Red = col[0]; + Green = col[1]; + Blue = col[2]; +} diff --git a/intern/cycles/kernel/svm/color_util.h b/intern/cycles/kernel/svm/color_util.h index b439721383c..fa22d4bc8c2 100644 --- a/intern/cycles/kernel/svm/color_util.h +++ b/intern/cycles/kernel/svm/color_util.h @@ -307,4 +307,30 @@ ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness, return color; } +ccl_device float3 svm_combine_color(NodeCombSepColorType type, float3 color) +{ + switch (type) { + case NODE_COMBSEP_COLOR_HSV: + return hsv_to_rgb(color); + case NODE_COMBSEP_COLOR_HSL: + return hsl_to_rgb(color); + case NODE_COMBSEP_COLOR_RGB: + default: + return color; + } +} + +ccl_device float3 svm_separate_color(NodeCombSepColorType type, float3 color) +{ + switch (type) { + case NODE_COMBSEP_COLOR_HSV: + return rgb_to_hsv(color); + case NODE_COMBSEP_COLOR_HSL: + return rgb_to_hsl(color); + case NODE_COMBSEP_COLOR_RGB: + default: + return color; + } +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/sepcomb_color.h b/intern/cycles/kernel/svm/sepcomb_color.h new file mode 100644 index 00000000000..d186e7f163b --- /dev/null +++ b/intern/cycles/kernel/svm/sepcomb_color.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +ccl_device_noinline void svm_node_combine_color(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private float *stack, + uint color_type, + uint inputs_stack_offsets, + uint result_stack_offset) +{ + uint red_stack_offset, green_stack_offset, blue_stack_offset; + svm_unpack_node_uchar3( + inputs_stack_offsets, &red_stack_offset, &green_stack_offset, &blue_stack_offset); + + float r = stack_load_float(stack, red_stack_offset); + float g = stack_load_float(stack, green_stack_offset); + float b = stack_load_float(stack, blue_stack_offset); + + /* Combine, and convert back to RGB */ + float3 color = svm_combine_color((NodeCombSepColorType)color_type, make_float3(r, g, b)); + + if (stack_valid(result_stack_offset)) + stack_store_float3(stack, result_stack_offset, color); +} + +ccl_device_noinline void svm_node_separate_color(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private float *stack, + uint color_type, + uint input_stack_offset, + uint results_stack_offsets) +{ + float3 color = stack_load_float3(stack, input_stack_offset); + + /* Convert color space */ + color = svm_separate_color((NodeCombSepColorType)color_type, color); + + uint red_stack_offset, green_stack_offset, blue_stack_offset; + svm_unpack_node_uchar3( + results_stack_offsets, &red_stack_offset, &green_stack_offset, &blue_stack_offset); + + if (stack_valid(red_stack_offset)) + stack_store_float(stack, red_stack_offset, color.x); + if (stack_valid(green_stack_offset)) + stack_store_float(stack, green_stack_offset, color.y); + if (stack_valid(blue_stack_offset)) + stack_store_float(stack, blue_stack_offset, color.z); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 08352a6231f..5def943c87f 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -181,6 +181,7 @@ CCL_NAMESPACE_END #include "kernel/svm/noisetex.h" #include "kernel/svm/normal.h" #include "kernel/svm/ramp.h" +#include "kernel/svm/sepcomb_color.h" #include "kernel/svm/sepcomb_hsv.h" #include "kernel/svm/sepcomb_vector.h" #include "kernel/svm/sky.h" @@ -508,6 +509,12 @@ ccl_device void svm_eval_nodes(KernelGlobals kg, case NODE_MIX: offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset); break; + case NODE_SEPARATE_COLOR: + svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w); + break; + case NODE_COMBINE_COLOR: + svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w); + break; case NODE_SEPARATE_VECTOR: svm_node_separate_vector(sd, stack, node.y, node.z, node.w); break; diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h index bede58f7a54..82109ec4c4f 100644 --- a/intern/cycles/kernel/svm/types.h +++ b/intern/cycles/kernel/svm/types.h @@ -92,6 +92,8 @@ typedef enum ShaderNodeType { NODE_NORMAL_MAP, NODE_INVERT, NODE_MIX, + NODE_SEPARATE_COLOR, + NODE_COMBINE_COLOR, NODE_SEPARATE_VECTOR, NODE_COMBINE_VECTOR, NODE_SEPARATE_HSV, @@ -487,6 +489,12 @@ typedef enum NodePrincipledHairParametrization { NODE_PRINCIPLED_HAIR_NUM, } NodePrincipledHairParametrization; +typedef enum NodeCombSepColorType { + NODE_COMBSEP_COLOR_RGB, + NODE_COMBSEP_COLOR_HSV, + NODE_COMBSEP_COLOR_HSL, +} NodeCombSepColorType; + /* Closure */ typedef enum ClosureType { diff --git a/intern/cycles/scene/image.cpp b/intern/cycles/scene/image.cpp index 21fde88915e..c61ad1f1d71 100644 --- a/intern/cycles/scene/image.cpp +++ b/intern/cycles/scene/image.cpp @@ -117,12 +117,12 @@ void ImageHandle::clear() manager = NULL; } -bool ImageHandle::empty() +bool ImageHandle::empty() const { return tile_slots.empty(); } -int ImageHandle::num_tiles() +int ImageHandle::num_tiles() const { return tile_slots.size(); } @@ -154,6 +154,35 @@ int ImageHandle::svm_slot(const int tile_index) const return tile_slots[tile_index]; } +vector<int4> ImageHandle::get_svm_slots() const +{ + const size_t num_nodes = divide_up(tile_slots.size(), 2); + + vector<int4> svm_slots; + svm_slots.reserve(num_nodes); + for (size_t i = 0; i < num_nodes; i++) { + int4 node; + + int slot = tile_slots[2 * i]; + node.x = manager->images[slot]->loader->get_tile_number(); + node.y = slot; + + if ((2 * i + 1) < tile_slots.size()) { + slot = tile_slots[2 * i + 1]; + node.z = manager->images[slot]->loader->get_tile_number(); + node.w = slot; + } + else { + node.z = -1; + node.w = -1; + } + + svm_slots.push_back(node); + } + + return svm_slots; +} + device_texture *ImageHandle::image_memory(const int tile_index) const { if (tile_index >= tile_slots.size()) { @@ -266,6 +295,11 @@ ustring ImageLoader::osl_filepath() const return ustring(); } +int ImageLoader::get_tile_number() const +{ + return 0; +} + bool ImageLoader::equals(const ImageLoader *a, const ImageLoader *b) { if (a == NULL && b == NULL) { @@ -397,6 +431,19 @@ ImageHandle ImageManager::add_image(ImageLoader *loader, return handle; } +ImageHandle ImageManager::add_image(const vector<ImageLoader *> &loaders, + const ImageParams ¶ms) +{ + ImageHandle handle; + for (ImageLoader *loader : loaders) { + const int slot = add_image_slot(loader, params, true); + handle.tile_slots.push_back(slot); + } + + handle.manager = this; + return handle; +} + int ImageManager::add_image_slot(ImageLoader *loader, const ImageParams ¶ms, const bool builtin) diff --git a/intern/cycles/scene/image.h b/intern/cycles/scene/image.h index 4d0dee35eca..9edb6a7eaf5 100644 --- a/intern/cycles/scene/image.h +++ b/intern/cycles/scene/image.h @@ -112,6 +112,9 @@ class ImageLoader { /* Optional for OSL texture cache. */ virtual ustring osl_filepath() const; + /* Optional for tiled textures loaded externally. */ + virtual int get_tile_number() const; + /* Free any memory used for loading metadata and pixels. */ virtual void cleanup(){}; @@ -139,11 +142,12 @@ class ImageHandle { void clear(); - bool empty(); - int num_tiles(); + bool empty() const; + int num_tiles() const; ImageMetaData metadata(); int svm_slot(const int tile_index = 0) const; + vector<int4> get_svm_slots() const; device_texture *image_memory(const int tile_index = 0) const; VDBImageLoader *vdb_loader(const int tile_index = 0) const; @@ -169,6 +173,7 @@ class ImageManager { const ImageParams ¶ms, const array<int> &tiles); ImageHandle add_image(ImageLoader *loader, const ImageParams ¶ms, const bool builtin = true); + ImageHandle add_image(const vector<ImageLoader *> &loaders, const ImageParams ¶ms); void device_update(Device *device, Scene *scene, Progress &progress); void device_update_slot(Device *device, Scene *scene, int slot, Progress *progress); diff --git a/intern/cycles/scene/osl.cpp b/intern/cycles/scene/osl.cpp index ffa1a2f5623..6698e6e2cce 100644 --- a/intern/cycles/scene/osl.cpp +++ b/intern/cycles/scene/osl.cpp @@ -1211,14 +1211,15 @@ void OSLCompiler::parameter_texture(const char *name, ustring filename, ustring parameter(name, filename); } -void OSLCompiler::parameter_texture(const char *name, int svm_slot) +void OSLCompiler::parameter_texture(const char *name, const ImageHandle &handle) { /* Texture loaded through SVM image texture system. We generate a unique * name, which ends up being used in OSLRenderServices::get_texture_handle * to get handle again. Note that this name must be unique between multiple * render sessions as the render services are shared. */ ustring filename(string_printf("@svm%d", texture_shared_unique_id++).c_str()); - services->textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::SVM, svm_slot)); + services->textures.insert(filename, + new OSLTextureHandle(OSLTextureHandle::SVM, handle.get_svm_slots())); parameter(name, filename); } @@ -1290,7 +1291,7 @@ void OSLCompiler::parameter_texture(const char * /* name */, { } -void OSLCompiler::parameter_texture(const char * /* name */, int /* svm_slot */) +void OSLCompiler::parameter_texture(const char * /* name */, const ImageHandle & /*handle*/) { } diff --git a/intern/cycles/scene/osl.h b/intern/cycles/scene/osl.h index f0f97dbcaad..bf27069b1b1 100644 --- a/intern/cycles/scene/osl.h +++ b/intern/cycles/scene/osl.h @@ -147,7 +147,7 @@ class OSLCompiler { void parameter_attribute(const char *name, ustring s); void parameter_texture(const char *name, ustring filename, ustring colorspace); - void parameter_texture(const char *name, int svm_slot); + void parameter_texture(const char *name, const ImageHandle &handle); void parameter_texture_ies(const char *name, int svm_slot); ShaderType output_type() diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp index 95fccf725f3..03c152928d5 100644 --- a/intern/cycles/scene/shader_nodes.cpp +++ b/intern/cycles/scene/shader_nodes.cpp @@ -19,7 +19,6 @@ #include "util/color.h" #include "util/foreach.h" #include "util/log.h" -#include "util/string.h" #include "util/transform.h" #include "kernel/tables.h" @@ -450,22 +449,19 @@ void ImageTextureNode::compile(OSLCompiler &compiler) const ustring known_colorspace = metadata.colorspace; if (handle.svm_slot() == -1) { - /* OIIO currently does not support <UVTILE> substitutions natively. Replace with a format they - * understand. */ - std::string osl_filename = filename.string(); - string_replace(osl_filename, "<UVTILE>", "<U>_<V>"); compiler.parameter_texture( - "filename", ustring(osl_filename), compress_as_srgb ? u_colorspace_raw : known_colorspace); + "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace); } else { - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle); } const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) || alpha_type == IMAGE_ALPHA_CHANNEL_PACKED || alpha_type == IMAGE_ALPHA_IGNORE); const bool is_tiled = (filename.find("<UDIM>") != string::npos || - filename.find("<UVTILE>") != string::npos); + filename.find("<UVTILE>") != string::npos) || + handle.num_tiles() > 1; compiler.parameter(this, "projection"); compiler.parameter(this, "projection_blend"); @@ -610,7 +606,7 @@ void EnvironmentTextureNode::compile(OSLCompiler &compiler) "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace); } else { - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle); } compiler.parameter(this, "projection"); @@ -965,7 +961,7 @@ void SkyTextureNode::compile(OSLCompiler &compiler) compiler.parameter_array("nishita_data", sunsky.nishita_data, 10); /* nishita texture */ if (sky_type == NODE_SKY_NISHITA) { - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle); } compiler.add(this, "node_sky_texture"); } @@ -1860,7 +1856,7 @@ void PointDensityTextureNode::compile(OSLCompiler &compiler) handle = image_manager->add_image(filename.string(), image_params()); } - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle); if (space == NODE_TEX_VOXEL_SPACE_WORLD) { compiler.parameter("mapping", tfm); compiler.parameter("use_mapping", 1); @@ -5010,6 +5006,63 @@ void MixNode::constant_fold(const ConstantFolder &folder) } } +/* Combine Color */ + +NODE_DEFINE(CombineColorNode) +{ + NodeType *type = NodeType::add("combine_color", create, NodeType::SHADER); + + static NodeEnum type_enum; + type_enum.insert("rgb", NODE_COMBSEP_COLOR_RGB); + type_enum.insert("hsv", NODE_COMBSEP_COLOR_HSV); + type_enum.insert("hsl", NODE_COMBSEP_COLOR_HSL); + SOCKET_ENUM(color_type, "Type", type_enum, NODE_COMBSEP_COLOR_RGB); + + SOCKET_IN_FLOAT(r, "Red", 0.0f); + SOCKET_IN_FLOAT(g, "Green", 0.0f); + SOCKET_IN_FLOAT(b, "Blue", 0.0f); + + SOCKET_OUT_COLOR(color, "Color"); + + return type; +} + +CombineColorNode::CombineColorNode() : ShaderNode(get_node_type()) +{ +} + +void CombineColorNode::constant_fold(const ConstantFolder &folder) +{ + if (folder.all_inputs_constant()) { + folder.make_constant(svm_combine_color(color_type, make_float3(r, g, b))); + } +} + +void CombineColorNode::compile(SVMCompiler &compiler) +{ + ShaderInput *red_in = input("Red"); + ShaderInput *green_in = input("Green"); + ShaderInput *blue_in = input("Blue"); + ShaderOutput *color_out = output("Color"); + + int red_stack_offset = compiler.stack_assign(red_in); + int green_stack_offset = compiler.stack_assign(green_in); + int blue_stack_offset = compiler.stack_assign(blue_in); + int color_stack_offset = compiler.stack_assign(color_out); + + compiler.add_node( + NODE_COMBINE_COLOR, + color_type, + compiler.encode_uchar4(red_stack_offset, green_stack_offset, blue_stack_offset), + color_stack_offset); +} + +void CombineColorNode::compile(OSLCompiler &compiler) +{ + compiler.parameter(this, "color_type"); + compiler.add(this, "node_combine_color"); +} + /* Combine RGB */ NODE_DEFINE(CombineRGBNode) @@ -5250,6 +5303,70 @@ void BrightContrastNode::compile(OSLCompiler &compiler) compiler.add(this, "node_brightness"); } +/* Separate Color */ + +NODE_DEFINE(SeparateColorNode) +{ + NodeType *type = NodeType::add("separate_color", create, NodeType::SHADER); + + static NodeEnum type_enum; + type_enum.insert("rgb", NODE_COMBSEP_COLOR_RGB); + type_enum.insert("hsv", NODE_COMBSEP_COLOR_HSV); + type_enum.insert("hsl", NODE_COMBSEP_COLOR_HSL); + SOCKET_ENUM(color_type, "Type", type_enum, NODE_COMBSEP_COLOR_RGB); + + SOCKET_IN_COLOR(color, "Color", zero_float3()); + + SOCKET_OUT_FLOAT(r, "Red"); + SOCKET_OUT_FLOAT(g, "Green"); + SOCKET_OUT_FLOAT(b, "Blue"); + + return type; +} + +SeparateColorNode::SeparateColorNode() : ShaderNode(get_node_type()) +{ +} + +void SeparateColorNode::constant_fold(const ConstantFolder &folder) +{ + if (folder.all_inputs_constant()) { + float3 col = svm_separate_color(color_type, color); + + for (int channel = 0; channel < 3; channel++) { + if (outputs[channel] == folder.output) { + folder.make_constant(col[channel]); + return; + } + } + } +} + +void SeparateColorNode::compile(SVMCompiler &compiler) +{ + ShaderInput *color_in = input("Color"); + ShaderOutput *red_out = output("Red"); + ShaderOutput *green_out = output("Green"); + ShaderOutput *blue_out = output("Blue"); + + int color_stack_offset = compiler.stack_assign(color_in); + int red_stack_offset = compiler.stack_assign(red_out); + int green_stack_offset = compiler.stack_assign(green_out); + int blue_stack_offset = compiler.stack_assign(blue_out); + + compiler.add_node( + NODE_SEPARATE_COLOR, + color_type, + color_stack_offset, + compiler.encode_uchar4(red_stack_offset, green_stack_offset, blue_stack_offset)); +} + +void SeparateColorNode::compile(OSLCompiler &compiler) +{ + compiler.parameter(this, "color_type"); + compiler.add(this, "node_separate_color"); +} + /* Separate RGB */ NODE_DEFINE(SeparateRGBNode) diff --git a/intern/cycles/scene/shader_nodes.h b/intern/cycles/scene/shader_nodes.h index 9aef5d3151f..ac40a397c1e 100644 --- a/intern/cycles/scene/shader_nodes.h +++ b/intern/cycles/scene/shader_nodes.h @@ -1101,6 +1101,17 @@ class MixNode : public ShaderNode { NODE_SOCKET_API(float, fac) }; +class CombineColorNode : public ShaderNode { + public: + SHADER_NODE_CLASS(CombineColorNode) + void constant_fold(const ConstantFolder &folder); + + NODE_SOCKET_API(NodeCombSepColorType, color_type) + NODE_SOCKET_API(float, r) + NODE_SOCKET_API(float, g) + NODE_SOCKET_API(float, b) +}; + class CombineRGBNode : public ShaderNode { public: SHADER_NODE_CLASS(CombineRGBNode) @@ -1150,6 +1161,15 @@ class BrightContrastNode : public ShaderNode { NODE_SOCKET_API(float, contrast) }; +class SeparateColorNode : public ShaderNode { + public: + SHADER_NODE_CLASS(SeparateColorNode) + void constant_fold(const ConstantFolder &folder); + + NODE_SOCKET_API(NodeCombSepColorType, color_type) + NODE_SOCKET_API(float3, color) +}; + class SeparateRGBNode : public ShaderNode { public: SHADER_NODE_CLASS(SeparateRGBNode) diff --git a/intern/cycles/util/color.h b/intern/cycles/util/color.h index cccccde3ba6..795c3754976 100644 --- a/intern/cycles/util/color.h +++ b/intern/cycles/util/color.h @@ -152,6 +152,56 @@ ccl_device float3 hsv_to_rgb(float3 hsv) return rgb; } +ccl_device float3 rgb_to_hsl(float3 rgb) +{ + float cmax, cmin, h, s, l; + + cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z)); + cmin = min(rgb.x, min(rgb.y, rgb.z)); + l = min(1.0f, (cmax + cmin) / 2.0f); + + if (cmax == cmin) { + h = s = 0.0f; /* achromatic */ + } + else { + float cdelta = cmax - cmin; + s = l > 0.5f ? cdelta / (2.0f - cmax - cmin) : cdelta / (cmax + cmin); + if (cmax == rgb.x) { + h = (rgb.y - rgb.z) / cdelta + (rgb.y < rgb.z ? 6.0f : 0.0f); + } + else if (cmax == rgb.y) { + h = (rgb.z - rgb.x) / cdelta + 2.0f; + } + else { + h = (rgb.x - rgb.y) / cdelta + 4.0f; + } + } + h /= 6.0f; + + return make_float3(h, s, l); +} + +ccl_device float3 hsl_to_rgb(float3 hsl) +{ + float nr, ng, nb, chroma, h, s, l; + + h = hsl.x; + s = hsl.y; + l = hsl.z; + + nr = fabsf(h * 6.0f - 3.0f) - 1.0f; + ng = 2.0f - fabsf(h * 6.0f - 2.0f); + nb = 2.0f - fabsf(h * 6.0f - 4.0f); + + nr = clamp(nr, 0.0f, 1.0f); + nb = clamp(nb, 0.0f, 1.0f); + ng = clamp(ng, 0.0f, 1.0f); + + chroma = (1.0f - fabsf(2.0f * l - 1.0f)) * s; + + return make_float3((nr - 0.5f) * chroma + l, (ng - 0.5f) * chroma + l, (nb - 0.5f) * chroma + l); +} + ccl_device float3 xyY_to_xyz(float x, float y, float Y) { float X, Z; |