diff options
Diffstat (limited to 'intern')
53 files changed, 1921 insertions, 676 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 9b4799d252f..9acc9e99ad0 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -24,12 +24,19 @@ from . import camera enum_devices = ( ('CPU', "CPU", "Use CPU for rendering"), - ('GPU', "GPU Compute", "Use GPU compute device for rendering, configured in the system tab in the user preferences"), + ('GPU', "GPU Compute", + "Use GPU compute device for rendering, configured in the system tab in the user preferences"), ) enum_feature_set = ( - ('SUPPORTED', "Supported", "Only use finished and supported features"), - ('EXPERIMENTAL', "Experimental", "Use experimental and incomplete features that might be broken or change in the future", 'ERROR', 1), + ('SUPPORTED', + "Supported", + "Only use finished and supported features"), + ('EXPERIMENTAL', + "Experimental", + "Use experimental and incomplete features that might be broken or change in the future", + 'ERROR', + 1), ) enum_displacement_methods = ( @@ -81,9 +88,14 @@ enum_sampling_pattern = ( ) enum_volume_sampling = ( - ('DISTANCE', "Distance", "Use distance sampling, best for dense volumes with lights far away"), - ('EQUIANGULAR', "Equiangular", "Use equiangular sampling, best for volumes with low density with light inside or near the volume"), - ('MULTIPLE_IMPORTANCE', "Multiple Importance", + ('DISTANCE', + "Distance", + "Use distance sampling, best for dense volumes with lights far away"), + ('EQUIANGULAR', + "Equiangular", + "Use equiangular sampling, best for volumes with low density with light inside or near the volume"), + ('MULTIPLE_IMPORTANCE', + "Multiple Importance", "Combine distance and equi-angular sampling for volumes where neither method is ideal"), ) @@ -93,10 +105,15 @@ enum_volume_interpolation = ( ) enum_world_mis = ( - ('NONE', "None", "Don't sample the background, faster but might cause noise for non-solid backgrounds"), - ('AUTOMATIC', "Auto", "Automatically try to determine the best setting"), - ('MANUAL', "Manual", "Manually set the resolution of the sampling map, higher values are slower and require more memory but reduce noise") -) + ('NONE', + "None", + "Don't sample the background, faster but might cause noise for non-solid backgrounds"), + ('AUTOMATIC', + "Auto", + "Automatically try to determine the best setting"), + ('MANUAL', + "Manual", + "Manually set the resolution of the sampling map, higher values are slower and require more memory but reduce noise")) enum_device_type = ( ('CPU', "CPU", "CPU", 0), @@ -210,17 +227,33 @@ enum_denoising_input_passes = ( ) enum_denoising_prefilter = ( - ('NONE', "None", "No prefiltering, use when guiding passes are noise-free", 1), - ('FAST', "Fast", "Denoise color and guiding passes together. Improves quality when guiding passes are noisy using least amount of extra processing time", 2), - ('ACCURATE', "Accurate", "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", 3), + ('NONE', + "None", + "No prefiltering, use when guiding passes are noise-free", + 1), + ('FAST', + "Fast", + "Denoise color and guiding passes together. Improves quality when guiding passes are noisy using least amount of extra processing time", + 2), + ('ACCURATE', + "Accurate", + "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", + 3), ) enum_direct_light_sampling_type = ( - ('MULTIPLE_IMPORTANCE_SAMPLING', "Multiple Importance Sampling", - "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing", 0), - ('FORWARD_PATH_TRACING', "Forward Path Tracing", "Direct light contributions are only sampled using forward path tracing", 1), - ('NEXT_EVENT_ESTIMATION', "Next-Event Estimation", - "Direct light contributions are only sampled using next-event estimation", 2), + ('MULTIPLE_IMPORTANCE_SAMPLING', + "Multiple Importance Sampling", + "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing", + 0), + ('FORWARD_PATH_TRACING', + "Forward Path Tracing", + "Direct light contributions are only sampled using forward path tracing", + 1), + ('NEXT_EVENT_ESTIMATION', + "Next-Event Estimation", + "Direct light contributions are only sampled using next-event estimation", + 2), ) @@ -357,7 +390,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): scrambling_distance: FloatProperty( name="Scrambling Distance", default=1.0, - min=0.0, soft_max=1.0, + min=0.0, + soft_max=1.0, description="Reduce randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts if set too low", ) preview_scrambling_distance: BoolProperty( @@ -383,7 +417,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): name="Light Sampling Threshold", description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). " "Zero disables the test and never ignores lights", - min=0.0, max=1.0, + min=0.0, + max=1.0, default=0.01, ) @@ -395,7 +430,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): adaptive_threshold: FloatProperty( name="Adaptive Sampling Threshold", description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples", - min=0.0, max=1.0, + min=0.0, + max=1.0, soft_min=0.001, default=0.01, precision=4, @@ -403,7 +439,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): adaptive_min_samples: IntProperty( name="Adaptive Min Samples", description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold", - min=0, max=4096, + min=0, + max=4096, default=0, ) @@ -415,7 +452,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): preview_adaptive_threshold: FloatProperty( name="Adaptive Sampling Threshold", description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples, for viewport renders", - min=0.0, max=1.0, + min=0.0, + max=1.0, soft_min=0.001, default=0.1, precision=4, @@ -423,7 +461,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): preview_adaptive_min_samples: IntProperty( name="Adaptive Min Samples", description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold, for viewport renders", - min=0, max=4096, + min=0, + max=4096, default=0, ) @@ -550,7 +589,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): max_subdivisions: IntProperty( name="Max Subdivisions", description="Stop subdividing when this level is reached even if the dice rate would produce finer tessellation", - min=0, max=16, + min=0, + max=16, default=12, ) @@ -817,8 +857,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): debug_use_optix_debug: BoolProperty( name="OptiX Module Debug", description="Load OptiX module in debug mode: lower logging verbosity level, enable validations, and lower optimization level", - default=False - ) + default=False) @classmethod def register(cls): @@ -1185,7 +1224,8 @@ class CyclesObjectSettings(bpy.types.PropertyGroup): motion_steps: IntProperty( name="Motion Steps", description="Control accuracy of motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))", - min=1, max=7, + min=1, + max=7, default=1, ) @@ -1224,7 +1264,8 @@ class CyclesObjectSettings(bpy.types.PropertyGroup): shadow_terminator_geometry_offset: FloatProperty( name="Shadow Terminator Geometry Offset", description="Offset rays from the surface to reduce shadow terminator artifact on low poly geometry. Only affects triangles at grazing angles to light", - min=0.0, max=1.0, + min=0.0, + max=1.0, default=0.1, ) diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 886f5345020..9d2dbdf6732 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -1082,8 +1082,18 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel): def has_geometry_visibility(ob): - return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT', 'VOLUME', 'POINTCLOUD', 'CURVES'}) or - (ob.instance_type == 'COLLECTION' and ob.instance_collection)) + return ob and ( + (ob.type in { + 'MESH', + 'CURVE', + 'SURFACE', + 'FONT', + 'META', + 'LIGHT', + 'VOLUME', + 'POINTCLOUD', + 'CURVES', + }) or (ob.instance_type == 'COLLECTION' and ob.instance_collection)) class CYCLES_OBJECT_PT_shading(CyclesButtonsPanel, Panel): diff --git a/intern/cycles/blender/image.cpp b/intern/cycles/blender/image.cpp index ca4c8f5904a..e01b72c1653 100644 --- a/intern/cycles/blender/image.cpp +++ b/intern/cycles/blender/image.cpp @@ -13,9 +13,11 @@ CCL_NAMESPACE_BEGIN BlenderImageLoader::BlenderImageLoader(BL::Image b_image, const int frame, + const int tile_number, const bool is_preview_render) : b_image(b_image), frame(frame), + tile_number(tile_number), /* Don't free cache for preview render to avoid race condition from T93560, to be fixed properly later as we are close to release. */ free_cache(!is_preview_render && !b_image.has_data()) @@ -66,12 +68,11 @@ bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata, { const size_t num_pixels = ((size_t)metadata.width) * metadata.height; const int channels = metadata.channels; - const int tile = 0; /* TODO(lukas): Support tiles here? */ if (b_image.is_float()) { /* image data */ float *image_pixels; - image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile); + image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile_number); if (image_pixels && num_pixels * channels == pixels_size) { memcpy(pixels, image_pixels, pixels_size * sizeof(float)); @@ -99,7 +100,7 @@ bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata, } } else { - unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile); + unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile_number); if (image_pixels && num_pixels * channels == pixels_size) { memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char)); @@ -153,7 +154,13 @@ string BlenderImageLoader::name() const bool BlenderImageLoader::equals(const ImageLoader &other) const { const BlenderImageLoader &other_loader = (const BlenderImageLoader &)other; - return b_image == other_loader.b_image && frame == other_loader.frame; + return b_image == other_loader.b_image && frame == other_loader.frame && + tile_number == other_loader.tile_number; +} + +int BlenderImageLoader::get_tile_number() const +{ + return tile_number; } /* Point Density */ diff --git a/intern/cycles/blender/image.h b/intern/cycles/blender/image.h index ee576b31f7e..c2cc0f51b9b 100644 --- a/intern/cycles/blender/image.h +++ b/intern/cycles/blender/image.h @@ -12,7 +12,10 @@ CCL_NAMESPACE_BEGIN class BlenderImageLoader : public ImageLoader { public: - BlenderImageLoader(BL::Image b_image, const int frame, const bool is_preview_render); + BlenderImageLoader(BL::Image b_image, + const int frame, + const int tile_number, + const bool is_preview_render); bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override; bool load_pixels(const ImageMetaData &metadata, @@ -22,8 +25,11 @@ class BlenderImageLoader : public ImageLoader { string name() const override; bool equals(const ImageLoader &other) const override; + int get_tile_number() const override; + BL::Image b_image; int frame; + int tile_number; bool free_cache; }; diff --git a/intern/cycles/blender/mesh.cpp b/intern/cycles/blender/mesh.cpp index de67e27923d..c76ce3801d4 100644 --- a/intern/cycles/blender/mesh.cpp +++ b/intern/cycles/blender/mesh.cpp @@ -316,7 +316,7 @@ static void fill_generic_attribute(BL::Mesh &b_mesh, break; } case BL::Attribute::domain_EDGE: { - /* Averge edge attributes at vertices. */ + /* Average edge attributes at vertices. */ const size_t num_verts = b_mesh.vertices.length(); vector<int> count(num_verts, 0); diff --git a/intern/cycles/blender/shader.cpp b/intern/cycles/blender/shader.cpp index d3527567b96..81a64457c88 100644 --- a/intern/cycles/blender/shader.cpp +++ b/intern/cycles/blender/shader.cpp @@ -355,6 +355,18 @@ static ShaderNode *add_node(Scene *scene, else if (b_node.is_a(&RNA_ShaderNodeCombineHSV)) { node = graph->create_node<CombineHSVNode>(); } + else if (b_node.is_a(&RNA_ShaderNodeSeparateColor)) { + BL::ShaderNodeSeparateColor b_separate_node(b_node); + SeparateColorNode *separate_node = graph->create_node<SeparateColorNode>(); + separate_node->set_color_type((NodeCombSepColorType)b_separate_node.mode()); + node = separate_node; + } + else if (b_node.is_a(&RNA_ShaderNodeCombineColor)) { + BL::ShaderNodeCombineColor b_combine_node(b_node); + CombineColorNode *combine_node = graph->create_node<CombineColorNode>(); + combine_node->set_color_type((NodeCombSepColorType)b_combine_node.mode()); + node = combine_node; + } else if (b_node.is_a(&RNA_ShaderNodeSeparateXYZ)) { node = graph->create_node<SeparateXYZNode>(); } @@ -764,9 +776,21 @@ static ShaderNode *add_node(Scene *scene, */ int scene_frame = b_scene.frame_current(); int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame); - image->handle = scene->image_manager->add_image( - new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()), - image->image_params()); + if (b_image.source() != BL::Image::source_TILED) { + image->handle = scene->image_manager->add_image( + new BlenderImageLoader(b_image, image_frame, 0, b_engine.is_preview()), + image->image_params()); + } + else { + vector<ImageLoader *> loaders; + loaders.reserve(image->get_tiles().size()); + for (int tile_number : image->get_tiles()) { + loaders.push_back( + new BlenderImageLoader(b_image, image_frame, tile_number, b_engine.is_preview())); + } + + image->handle = scene->image_manager->add_image(loaders, image->image_params()); + } } else { ustring filename = ustring( @@ -802,7 +826,7 @@ static ShaderNode *add_node(Scene *scene, int scene_frame = b_scene.frame_current(); int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame); env->handle = scene->image_manager->add_image( - new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()), + new BlenderImageLoader(b_image, image_frame, 0, b_engine.is_preview()), env->image_params()); } else { diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake index f4186374d10..d2f30fe764b 100644 --- a/intern/cycles/cmake/external_libs.cmake +++ b/intern/cycles/cmake/external_libs.cmake @@ -145,8 +145,8 @@ if(CYCLES_STANDALONE_REPOSITORY) -DOIIO_STATIC_DEFINE ) - set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO_ROOT_DIR}/include) - set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR} ${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO) + set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO_ROOT_DIR}/include) + set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR} ${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO) # Special exceptions for libraries which needs explicit debug version set(OPENIMAGEIO_LIBRARIES optimized ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO.lib diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h index 27c58ce6d2f..7506b9b069f 100644 --- a/intern/cycles/device/metal/device_impl.h +++ b/intern/cycles/device/metal/device_impl.h @@ -28,7 +28,8 @@ class MetalDevice : public Device { id<MTLCommandQueue> mtlGeneralCommandQueue = nil; id<MTLArgumentEncoder> mtlAncillaryArgEncoder = nil; /* encoder used for fetching device pointers from MTLBuffers */ - string source_used_for_compile[PSO_NUM]; + string source[PSO_NUM]; + string source_md5[PSO_NUM]; KernelParamsMetal launch_params = {0}; @@ -72,7 +73,6 @@ class MetalDevice : public Device { id<MTLBuffer> texture_bindings_3d = nil; std::vector<id<MTLTexture>> texture_slot_map; - MetalDeviceKernels kernels; bool use_metalrt = false; bool use_function_specialisation = false; @@ -110,6 +110,8 @@ class MetalDevice : public Device { virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override; + id<MTLLibrary> compile(string const &source); + /* ------------------------------------------------------------------ */ /* low-level memory management */ diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index c01f51fb506..e1438a9d6e2 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -275,96 +275,44 @@ bool MetalDevice::load_kernels(const uint _kernel_features) * active, but may still need to be rendered without motion blur if that isn't active as well. */ motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION; - NSError *error = NULL; + source[PSO_GENERIC] = get_source(kernel_features); + mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]); - for (int i = 0; i < PSO_NUM; i++) { - if (mtlLibrary[i]) { - [mtlLibrary[i] release]; - mtlLibrary[i] = nil; - } - } + MD5Hash md5; + md5.append(source[PSO_GENERIC]); + source_md5[PSO_GENERIC] = md5.get_hex(); + + metal_printf("Front-end compilation finished (generic)\n"); + + bool result = MetalDeviceKernels::load(this, false); + + reserve_local_memory(kernel_features); + + return result; +} +id<MTLLibrary> MetalDevice::compile(string const &source) +{ MTLCompileOptions *options = [[MTLCompileOptions alloc] init]; options.fastMathEnabled = YES; if (@available(macOS 12.0, *)) { options.languageVersion = MTLLanguageVersion2_4; } - else { - return false; - } - string metalsrc; - - /* local helper: dump source to disk and return filepath */ - auto dump_source = [&](int kernel_type) -> string { - string &source = source_used_for_compile[kernel_type]; - string metalsrc = path_cache_get(path_join("kernels", - string_printf("%s.%s.metal", - kernel_type_as_string(kernel_type), - util_md5_string(source).c_str()))); - path_write_text(metalsrc, source); - return metalsrc; - }; - - /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor, - * then compile it into a MTLLibrary */ - auto fetch_and_compile_source = [&](int kernel_type) { - /* Record the source used to compile this library, for hash building later. */ - string &source = source_used_for_compile[kernel_type]; - - switch (kernel_type) { - case PSO_GENERIC: { - source = get_source(kernel_features); - break; - } - case PSO_SPECIALISED: { - /* PSO_SPECIALISED derives from PSO_GENERIC */ - string &generic_source = source_used_for_compile[PSO_GENERIC]; - if (generic_source.empty()) { - generic_source = get_source(kernel_features); - } - source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source; - break; - } - default: - assert(0); - } - - /* create MTLLibrary (front-end compilation) */ - mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str()) + NSError *error = NULL; + id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str()) options:options error:&error]; - bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr); - - if (!mtlLibrary[kernel_type] || do_source_dump) { - string metalsrc = dump_source(kernel_type); - - if (!mtlLibrary[kernel_type]) { - NSString *err = [error localizedDescription]; - set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); - - return false; - } - } - return true; - }; - - fetch_and_compile_source(PSO_GENERIC); - - if (use_function_specialisation) { - fetch_and_compile_source(PSO_SPECIALISED); + if (!mtlLibrary) { + NSString *err = [error localizedDescription]; + set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); } - metal_printf("Front-end compilation finished\n"); - - bool result = kernels.load(this, PSO_GENERIC); - [options release]; - reserve_local_memory(kernel_features); - return result; + return mtlLibrary; } void MetalDevice::reserve_local_memory(const uint kernel_features) diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h index b12491d820d..69b2a686ecc 100644 --- a/intern/cycles/device/metal/kernel.h +++ b/intern/cycles/device/metal/kernel.h @@ -54,103 +54,41 @@ enum { const char *kernel_type_as_string(int kernel_type); struct MetalKernelPipeline { - void release() - { - if (pipeline) { - [pipeline release]; - pipeline = nil; - if (@available(macOS 11.0, *)) { - for (int i = 0; i < METALRT_TABLE_NUM; i++) { - if (intersection_func_table[i]) { - [intersection_func_table[i] release]; - intersection_func_table[i] = nil; - } - } - } - } - if (function) { - [function release]; - function = nil; - } - if (@available(macOS 11.0, *)) { - for (int i = 0; i < METALRT_TABLE_NUM; i++) { - if (intersection_func_table[i]) { - [intersection_func_table[i] release]; - } - } - } - } - bool loaded = false; - id<MTLFunction> function = nil; - id<MTLComputePipelineState> pipeline = nil; - - API_AVAILABLE(macos(11.0)) - id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil}; -}; - -struct MetalKernelLoadDesc { - int pso_index = 0; - const char *function_name = nullptr; - int kernel_index = 0; - int threads_per_threadgroup = 0; - MTLFunctionConstantValues *constant_values = nullptr; - NSArray *linked_functions = nullptr; - - struct IntersectorFunctions { - NSArray *defaults; - NSArray *shadow; - NSArray *local; - NSArray *operator[](int index) const - { - if (index == METALRT_TABLE_DEFAULT) - return defaults; - if (index == METALRT_TABLE_SHADOW) - return shadow; - return local; - } - } intersector_functions = {nullptr}; -}; - -/* Metal kernel and associate occupancy information. */ -class MetalDeviceKernel { - public: - ~MetalDeviceKernel(); + void compile(); - bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5); + id<MTLLibrary> mtlLibrary = nil; + bool scene_specialized; + string source_md5; - void mark_loaded(int pso_index) - { - pso[pso_index].loaded = true; - } + bool use_metalrt; + bool metalrt_hair; + bool metalrt_hair_thick; + bool metalrt_pointcloud; - int get_num_threads_per_block() const - { - return num_threads_per_block; - } - const MetalKernelPipeline &get_pso() const; + int threads_per_threadgroup; - double load_duration = 0.0; + DeviceKernel device_kernel; + bool loaded = false; + id<MTLDevice> mtlDevice = nil; + id<MTLFunction> function = nil; + id<MTLComputePipelineState> pipeline = nil; + int num_threads_per_block = 0; - private: - MetalKernelPipeline pso[PSO_NUM]; + string error_str; - int num_threads_per_block = 0; + API_AVAILABLE(macos(11.0)) + id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil}; + id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil}; }; /* Cache of Metal kernels for each DeviceKernel. */ -class MetalDeviceKernels { - public: - bool load(MetalDevice *device, int kernel_type); - bool available(DeviceKernel kernel) const; - const MetalDeviceKernel &get(DeviceKernel kernel) const; +namespace MetalDeviceKernels { - MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM]; +bool load(MetalDevice *device, bool scene_specialized); +const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel); - id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}}; - - string loaded_md5[PSO_NUM]; -}; +} /* namespace MetalDeviceKernels */ CCL_NAMESPACE_END diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 9555ca03c8e..304efc813ec 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -9,6 +9,7 @@ # include "util/path.h" # include "util/tbb.h" # include "util/time.h" +# include "util/unique_ptr.h" CCL_NAMESPACE_BEGIN @@ -28,82 +29,376 @@ const char *kernel_type_as_string(int kernel_type) return ""; } -MetalDeviceKernel::~MetalDeviceKernel() +bool kernel_has_intersection(DeviceKernel device_kernel) { - for (int i = 0; i < PSO_NUM; i++) { - pso[i].release(); + return (device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || + device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || + device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || + device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK || + device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE); +} + +struct ShaderCache { + ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice) + { + } + ~ShaderCache(); + + /* Get the fastest available pipeline for the specified kernel. */ + MetalKernelPipeline *get_best_pipeline(DeviceKernel kernel, const MetalDevice *device); + + /* Non-blocking request for a kernel, optionally specialized to the scene being rendered by + * device. */ + void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized); + + void wait_for_all(); + + private: + friend ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice); + + void compile_thread_func(int thread_index); + + using PipelineCollection = std::vector<unique_ptr<MetalKernelPipeline>>; + + struct PipelineRequest { + MetalKernelPipeline *pipeline = nullptr; + std::function<void(MetalKernelPipeline *)> completionHandler; + }; + + std::mutex cache_mutex; + + PipelineCollection pipelines[DEVICE_KERNEL_NUM]; + id<MTLDevice> mtlDevice; + + bool running = false; + std::condition_variable cond_var; + std::deque<PipelineRequest> request_queue; + std::vector<std::thread> compile_threads; + std::atomic_int incomplete_requests = 0; +}; + +std::mutex g_shaderCacheMutex; +std::map<id<MTLDevice>, unique_ptr<ShaderCache>> g_shaderCache; + +ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice) +{ + thread_scoped_lock lock(g_shaderCacheMutex); + auto it = g_shaderCache.find(mtlDevice); + if (it != g_shaderCache.end()) { + return it->second.get(); + } + + g_shaderCache[mtlDevice] = make_unique<ShaderCache>(mtlDevice); + return g_shaderCache[mtlDevice].get(); +} + +ShaderCache::~ShaderCache() +{ + metal_printf("ShaderCache shutting down with incomplete_requests = %d\n", + int(incomplete_requests)); + + running = false; + cond_var.notify_all(); + for (auto &thread : compile_threads) { + thread.join(); + } +} + +void ShaderCache::wait_for_all() +{ + while (incomplete_requests > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } +} + +void ShaderCache::compile_thread_func(int thread_index) +{ + while (1) { + + /* wait for / acquire next request */ + PipelineRequest request; + { + thread_scoped_lock lock(cache_mutex); + cond_var.wait(lock, [&] { return !running || !request_queue.empty(); }); + if (!running) { + break; + } + + if (!request_queue.empty()) { + request = request_queue.front(); + request_queue.pop_front(); + } + } + + /* service request */ + if (request.pipeline) { + request.pipeline->compile(); + incomplete_requests--; + } } } -bool MetalDeviceKernel::load(MetalDevice *device, - MetalKernelLoadDesc const &desc_in, - MD5Hash const &md5) +void ShaderCache::load_kernel(DeviceKernel device_kernel, + MetalDevice *device, + bool scene_specialized) { - __block MetalKernelLoadDesc const desc(desc_in); - if (desc.kernel_index == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { + { + /* create compiler threads on first run */ + thread_scoped_lock lock(cache_mutex); + if (compile_threads.empty()) { + running = true; + for (int i = 0; i < max_mtlcompiler_threads; i++) { + compile_threads.push_back(std::thread([&] { compile_thread_func(i); })); + } + } + } + + if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { /* skip megakernel */ - return true; + return; } - bool use_binary_archive = true; - if (device->device_vendor == METAL_GPU_APPLE) { - /* Workaround for T94142: Cycles Metal crash with simultaneous viewport and final render */ - use_binary_archive = false; + if (scene_specialized) { + /* Only specialize kernels where it can make an impact. */ + if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || + device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { + return; + } } - if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) { - use_binary_archive = (atoi(str) == 0); + { + /* check whether the kernel has already been requested / cached */ + thread_scoped_lock lock(cache_mutex); + for (auto &pipeline : pipelines[device_kernel]) { + if (scene_specialized) { + if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) { + /* we already requested a pipeline that is specialized for this kernel data */ + metal_printf("Specialized kernel already requested (%s)\n", + device_kernel_as_string(device_kernel)); + return; + } + } + else { + if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) { + /* we already requested a generic pipeline for this kernel */ + metal_printf("Generic kernel already requested (%s)\n", + device_kernel_as_string(device_kernel)); + return; + } + } + } } - id<MTLBinaryArchive> archive = nil; - string metalbin_path; - if (use_binary_archive) { - NSProcessInfo *processInfo = [NSProcessInfo processInfo]; - string osVersion = [[processInfo operatingSystemVersionString] UTF8String]; - MD5Hash local_md5(md5); - local_md5.append(osVersion); - string metalbin_name = string(desc.function_name) + "." + local_md5.get_hex() + - to_string(desc.pso_index) + ".bin"; - metalbin_path = path_cache_get(path_join("kernels", metalbin_name)); - path_create_directories(metalbin_path); + incomplete_requests++; + + PipelineRequest request; + request.pipeline = new MetalKernelPipeline; + request.pipeline->scene_specialized = scene_specialized; + request.pipeline->mtlDevice = mtlDevice; + request.pipeline->source_md5 = + device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC]; + request.pipeline->mtlLibrary = + device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC]; + request.pipeline->device_kernel = device_kernel; + request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup; + + /* metalrt options */ + request.pipeline->use_metalrt = device->use_metalrt; + request.pipeline->metalrt_hair = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_HAIR); + request.pipeline->metalrt_hair_thick = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_HAIR_THICK); + request.pipeline->metalrt_pointcloud = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_POINTCLOUD); + + { + thread_scoped_lock lock(cache_mutex); + pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline)); + request_queue.push_back(request); + } + cond_var.notify_one(); +} - if (path_exists(metalbin_path) && use_binary_archive) { - if (@available(macOS 11.0, *)) { - MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init]; - archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())]; - archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil]; - [archiveDesc release]; +MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device) +{ + thread_scoped_lock lock(cache_mutex); + auto &collection = pipelines[kernel]; + if (collection.empty()) { + return nullptr; + } + + /* metalrt options */ + bool use_metalrt = device->use_metalrt; + bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR); + bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK); + bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD); + + MetalKernelPipeline *best_pipeline = nullptr; + for (auto &pipeline : collection) { + if (!pipeline->loaded) { + /* still loading - ignore */ + continue; + } + + if (pipeline->use_metalrt != use_metalrt || pipeline->metalrt_hair != metalrt_hair || + pipeline->metalrt_hair_thick != metalrt_hair_thick || + pipeline->metalrt_pointcloud != metalrt_pointcloud) { + /* wrong combination of metalrt options */ + continue; + } + + if (pipeline->scene_specialized) { + if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) { + best_pipeline = pipeline.get(); } } + else if (!best_pipeline) { + best_pipeline = pipeline.get(); + } + } + + return best_pipeline; +} + +void MetalKernelPipeline::compile() +{ + int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC; + + const std::string function_name = std::string("cycles_metal_") + + device_kernel_as_string(device_kernel); + + int threads_per_threadgroup = this->threads_per_threadgroup; + if (device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && + device_kernel < DEVICE_KERNEL_INTEGRATOR_RESET) { + /* Always use 512 for the sorting kernels */ + threads_per_threadgroup = 512; } - NSString *entryPoint = [@(desc.function_name) copy]; + NSString *entryPoint = [@(function_name.c_str()) copy]; NSError *error = NULL; if (@available(macOS 11.0, *)) { MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor]; func_desc.name = entryPoint; - if (desc.constant_values) { - func_desc.constantValues = desc.constant_values; - } - pso[desc.pso_index].function = [device->mtlLibrary[desc.pso_index] - newFunctionWithDescriptor:func_desc - error:&error]; + function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error]; } + [entryPoint release]; - if (pso[desc.pso_index].function == nil) { + if (function == nil) { NSString *err = [error localizedDescription]; string errors = [err UTF8String]; + metal_printf("Error getting function \"%s\": %s", function_name.c_str(), errors.c_str()); + return; + } + + function.label = [entryPoint copy]; + + if (use_metalrt) { + if (@available(macOS 11.0, *)) { + /* create the id<MTLFunction> for each intersection function */ + const char *function_names[] = { + "__anyhit__cycles_metalrt_visibility_test_tri", + "__anyhit__cycles_metalrt_visibility_test_box", + "__anyhit__cycles_metalrt_shadow_all_hit_tri", + "__anyhit__cycles_metalrt_shadow_all_hit_box", + "__anyhit__cycles_metalrt_local_hit_tri", + "__anyhit__cycles_metalrt_local_hit_box", + "__intersection__curve_ribbon", + "__intersection__curve_ribbon_shadow", + "__intersection__curve_all", + "__intersection__curve_all_shadow", + "__intersection__point", + "__intersection__point_shadow", + }; + assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM); + + MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor]; + for (int i = 0; i < METALRT_FUNC_NUM; i++) { + const char *function_name = function_names[i]; + desc.name = [@(function_name) copy]; + + NSError *error = NULL; + rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error]; - device->set_error( - string_printf("Error getting function \"%s\": %s", desc.function_name, errors.c_str())); - return false; + if (rt_intersection_function[i] == nil) { + NSString *err = [error localizedDescription]; + string errors = [err UTF8String]; + + error_str = string_printf( + "Error getting intersection function \"%s\": %s", function_name, errors.c_str()); + break; + } + + rt_intersection_function[i].label = [@(function_name) copy]; + } + } } - pso[desc.pso_index].function.label = [@(desc.function_name) copy]; + NSArray *table_functions[METALRT_TABLE_NUM] = {nil}; + NSArray *linked_functions = nil; + + if (use_metalrt) { + id<MTLFunction> curve_intersect_default = nil; + id<MTLFunction> curve_intersect_shadow = nil; + id<MTLFunction> point_intersect_default = nil; + id<MTLFunction> point_intersect_shadow = nil; + if (metalrt_hair) { + /* Add curve intersection programs. */ + if (metalrt_hair_thick) { + /* Slower programs for thick hair since that also slows down ribbons. + * Ideally this should not be needed. */ + curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL]; + curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_ALL_SHADOW]; + } + else { + curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON]; + curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW]; + } + } + if (metalrt_pointcloud) { + point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT]; + point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW]; + } + table_functions[METALRT_TABLE_DEFAULT] = [NSArray + arrayWithObjects:rt_intersection_function[METALRT_FUNC_DEFAULT_TRI], + curve_intersect_default ? + curve_intersect_default : + rt_intersection_function[METALRT_FUNC_DEFAULT_BOX], + point_intersect_default ? + point_intersect_default : + rt_intersection_function[METALRT_FUNC_DEFAULT_BOX], + nil]; + table_functions[METALRT_TABLE_SHADOW] = [NSArray + arrayWithObjects:rt_intersection_function[METALRT_FUNC_SHADOW_TRI], + curve_intersect_shadow ? + curve_intersect_shadow : + rt_intersection_function[METALRT_FUNC_SHADOW_BOX], + point_intersect_shadow ? + point_intersect_shadow : + rt_intersection_function[METALRT_FUNC_SHADOW_BOX], + nil]; + table_functions[METALRT_TABLE_LOCAL] = [NSArray + arrayWithObjects:rt_intersection_function[METALRT_FUNC_LOCAL_TRI], + rt_intersection_function[METALRT_FUNC_LOCAL_BOX], + rt_intersection_function[METALRT_FUNC_LOCAL_BOX], + nil]; + + NSMutableSet *unique_functions = [NSMutableSet + setWithArray:table_functions[METALRT_TABLE_DEFAULT]]; + [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]]; + [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]]; + + if (kernel_has_intersection(device_kernel)) { + linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]] + sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) { + return [f1.label compare:f2.label]; + }]; + } + unique_functions = nil; + } - __block MTLComputePipelineDescriptor *computePipelineStateDescriptor = + MTLComputePipelineDescriptor *computePipelineStateDescriptor = [[MTLComputePipelineDescriptor alloc] init]; computePipelineStateDescriptor.buffers[0].mutability = MTLMutabilityImmutable; @@ -111,52 +406,86 @@ bool MetalDeviceKernel::load(MetalDevice *device, computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable; if (@available(macos 10.14, *)) { - computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = desc.threads_per_threadgroup; + computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = threads_per_threadgroup; } computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true; - computePipelineStateDescriptor.computeFunction = pso[desc.pso_index].function; + computePipelineStateDescriptor.computeFunction = function; + if (@available(macOS 11.0, *)) { /* Attach the additional functions to an MTLLinkedFunctions object */ - if (desc.linked_functions) { + if (linked_functions) { computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init]; - computePipelineStateDescriptor.linkedFunctions.functions = desc.linked_functions; + computePipelineStateDescriptor.linkedFunctions.functions = linked_functions; } - computePipelineStateDescriptor.maxCallStackDepth = 1; + if (use_metalrt) { + computePipelineStateDescriptor.maxCallStackDepth = 8; + } } - /* Create a new Compute pipeline state object */ MTLPipelineOption pipelineOptions = MTLPipelineOptionNone; - bool creating_new_archive = false; + bool use_binary_archive = true; + if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) { + use_binary_archive = (atoi(str) == 0); + } + + id<MTLBinaryArchive> archive = nil; + string metalbin_path; + string metalbin_name; + if (use_binary_archive) { + NSProcessInfo *processInfo = [NSProcessInfo processInfo]; + string osVersion = [[processInfo operatingSystemVersionString] UTF8String]; + MD5Hash local_md5; + local_md5.append(source_md5); + local_md5.append(osVersion); + local_md5.append((uint8_t *)&this->threads_per_threadgroup, + sizeof(this->threads_per_threadgroup)); + + string options; + if (use_metalrt && kernel_has_intersection(device_kernel)) { + /* incorporate any MetalRT specializations into the archive name */ + options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d", + metalrt_hair ? 1 : 0, + metalrt_hair_thick ? 1 : 0, + metalrt_pointcloud ? 1 : 0); + } + + /* Replace non-alphanumerical characters with underscores. */ + string device_name = [mtlDevice.name UTF8String]; + for (char &c : device_name) { + if ((c < '0' || c > '9') && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) { + c = '_'; + } + } + + metalbin_name = device_name; + metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel)); + metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type)); + metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin"); + + metalbin_path = path_cache_get(path_join("kernels", metalbin_name)); + path_create_directories(metalbin_path); + + if (path_exists(metalbin_path) && use_binary_archive) { + if (@available(macOS 11.0, *)) { + MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init]; + archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())]; + archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil]; + [archiveDesc release]; + } + } + } + + __block bool creating_new_archive = false; if (@available(macOS 11.0, *)) { if (use_binary_archive) { if (!archive) { MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init]; archiveDesc.url = nil; - archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil]; + archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil]; creating_new_archive = true; - - double starttime = time_dt(); - - if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor - error:&error]) { - NSString *errStr = [error localizedDescription]; - metal_printf("Failed to add PSO to archive:\n%s\n", - errStr ? [errStr UTF8String] : "nil"); - } - else { - double duration = time_dt() - starttime; - metal_printf("%2d | %-55s | %7.2fs\n", - desc.kernel_index, - device_kernel_as_string((DeviceKernel)desc.kernel_index), - duration); - - if (desc.pso_index == PSO_GENERIC) { - this->load_duration = duration; - } - } } computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil]; pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss; @@ -170,17 +499,14 @@ bool MetalDeviceKernel::load(MetalDevice *device, MTLComputePipelineReflection *reflection, NSError *error) { bool recreate_archive = false; - if (computePipelineState == nil && archive && !creating_new_archive) { - - assert(0); - + if (computePipelineState == nil && archive) { NSString *errStr = [error localizedDescription]; metal_printf( "Failed to create compute pipeline state \"%s\" from archive - attempting recreation... " "(error: %s)\n", - device_kernel_as_string((DeviceKernel)desc.kernel_index), + device_kernel_as_string((DeviceKernel)device_kernel), errStr ? [errStr UTF8String] : "nil"); - computePipelineState = [device->mtlDevice + computePipelineState = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor options:MTLPipelineOptionNone reflection:nullptr @@ -192,32 +518,23 @@ bool MetalDeviceKernel::load(MetalDevice *device, if (computePipelineState == nil) { NSString *errStr = [error localizedDescription]; - device->set_error(string_printf("Failed to create compute pipeline state \"%s\", error: \n", - device_kernel_as_string((DeviceKernel)desc.kernel_index)) + - (errStr ? [errStr UTF8String] : "nil")); - metal_printf("%2d | %-55s | %7.2fs | FAILED!\n", - desc.kernel_index, - device_kernel_as_string((DeviceKernel)desc.kernel_index), + error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n", + device_kernel_as_string((DeviceKernel)device_kernel)); + error_str += (errStr ? [errStr UTF8String] : "nil"); + metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n", + kernel_type_as_string(pso_type), + device_kernel, + device_kernel_as_string((DeviceKernel)device_kernel), duration); return; } - pso[desc.pso_index].pipeline = computePipelineState; - num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup, - computePipelineState.threadExecutionWidth); + int num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup, + computePipelineState.threadExecutionWidth); num_threads_per_block = std::max(num_threads_per_block, (int)computePipelineState.threadExecutionWidth); - - if (!use_binary_archive) { - metal_printf("%2d | %-55s | %7.2fs\n", - desc.kernel_index, - device_kernel_as_string((DeviceKernel)desc.kernel_index), - duration); - - if (desc.pso_index == PSO_GENERIC) { - this->load_duration = duration; - } - } + this->pipeline = computePipelineState; + this->num_threads_per_block = num_threads_per_block; if (@available(macOS 11.0, *)) { if (creating_new_archive || recreate_archive) { @@ -228,304 +545,85 @@ bool MetalDeviceKernel::load(MetalDevice *device, } } } + }; - [computePipelineStateDescriptor release]; - computePipelineStateDescriptor = nil; - - if (device->use_metalrt && desc.linked_functions) { - for (int table = 0; table < METALRT_TABLE_NUM; table++) { - if (@available(macOS 11.0, *)) { - MTLIntersectionFunctionTableDescriptor *ift_desc = - [[MTLIntersectionFunctionTableDescriptor alloc] init]; - ift_desc.functionCount = desc.intersector_functions[table].count; - - pso[desc.pso_index].intersection_func_table[table] = [pso[desc.pso_index].pipeline - newIntersectionFunctionTableWithDescriptor:ift_desc]; - - /* Finally write the function handles into this pipeline's table */ - for (int i = 0; i < 2; i++) { - id<MTLFunctionHandle> handle = [pso[desc.pso_index].pipeline - functionHandleWithFunction:desc.intersector_functions[table][i]]; - [pso[desc.pso_index].intersection_func_table[table] setFunction:handle atIndex:i]; - } + /* Block on load to ensure we continue with a valid kernel function */ + if (creating_new_archive) { + starttime = time_dt(); + NSError *error; + if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor + error:&error]) { + NSString *errStr = [error localizedDescription]; + metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil"); + } + } + id<MTLComputePipelineState> pipeline = [mtlDevice + newComputePipelineStateWithDescriptor:computePipelineStateDescriptor + options:pipelineOptions + reflection:nullptr + error:&error]; + completionHandler(pipeline, nullptr, error); + + this->loaded = true; + [computePipelineStateDescriptor release]; + computePipelineStateDescriptor = nil; + + if (use_metalrt && linked_functions) { + for (int table = 0; table < METALRT_TABLE_NUM; table++) { + if (@available(macOS 11.0, *)) { + MTLIntersectionFunctionTableDescriptor *ift_desc = + [[MTLIntersectionFunctionTableDescriptor alloc] init]; + ift_desc.functionCount = table_functions[table].count; + intersection_func_table[table] = [this->pipeline + newIntersectionFunctionTableWithDescriptor:ift_desc]; + + /* Finally write the function handles into this pipeline's table */ + for (int i = 0; i < 2; i++) { + id<MTLFunctionHandle> handle = [pipeline + functionHandleWithFunction:table_functions[table][i]]; + [intersection_func_table[table] setFunction:handle atIndex:i]; } } } + } - mark_loaded(desc.pso_index); - }; + double duration = time_dt() - starttime; - if (desc.pso_index == PSO_SPECIALISED) { - /* Asynchronous load */ - dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ - NSError *error; - id<MTLComputePipelineState> pipeline = [device->mtlDevice - newComputePipelineStateWithDescriptor:computePipelineStateDescriptor - options:pipelineOptions - reflection:nullptr - error:&error]; - completionHandler(pipeline, nullptr, error); - }); + if (!use_binary_archive) { + metal_printf("%16s | %2d | %-55s | %7.2fs\n", + kernel_type_as_string(pso_type), + int(device_kernel), + device_kernel_as_string(device_kernel), + duration); } else { - /* Block on load to ensure we continue with a valid kernel function */ - id<MTLComputePipelineState> pipeline = [device->mtlDevice - newComputePipelineStateWithDescriptor:computePipelineStateDescriptor - options:pipelineOptions - reflection:nullptr - error:&error]; - completionHandler(pipeline, nullptr, error); - } - - return true; -} - -const MetalKernelPipeline &MetalDeviceKernel::get_pso() const -{ - if (pso[PSO_SPECIALISED].loaded) { - return pso[PSO_SPECIALISED]; + metal_printf("%16s | %2d | %-55s | %7.2fs | %s: %s\n", + kernel_type_as_string(pso_type), + device_kernel, + device_kernel_as_string((DeviceKernel)device_kernel), + duration, + creating_new_archive ? " new" : "load", + metalbin_name.c_str()); } - - assert(pso[PSO_GENERIC].loaded); - return pso[PSO_GENERIC]; } -bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type) +bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized) { - bool any_error = false; - - MD5Hash md5; - - /* Build the function constant table */ - MTLFunctionConstantValues *constant_values = nullptr; - if (kernel_type == PSO_SPECIALISED) { - constant_values = [MTLFunctionConstantValues new]; - -# define KERNEL_FILM(_type, name) \ - [constant_values setConstantValue:&data.film.name \ - type:get_MTLDataType_##_type() \ - atIndex:KernelData_film_##name]; \ - md5.append((uint8_t *)&data.film.name, sizeof(data.film.name)); - -# define KERNEL_BACKGROUND(_type, name) \ - [constant_values setConstantValue:&data.background.name \ - type:get_MTLDataType_##_type() \ - atIndex:KernelData_background_##name]; \ - md5.append((uint8_t *)&data.background.name, sizeof(data.background.name)); - -# define KERNEL_INTEGRATOR(_type, name) \ - [constant_values setConstantValue:&data.integrator.name \ - type:get_MTLDataType_##_type() \ - atIndex:KernelData_integrator_##name]; \ - md5.append((uint8_t *)&data.integrator.name, sizeof(data.integrator.name)); - -# define KERNEL_BVH(_type, name) \ - [constant_values setConstantValue:&data.bvh.name \ - type:get_MTLDataType_##_type() \ - atIndex:KernelData_bvh_##name]; \ - md5.append((uint8_t *)&data.bvh.name, sizeof(data.bvh.name)); - - /* METAL_WIP: populate constant_values based on KernelData */ - assert(0); - /* - const KernelData &data = device->launch_params.data; - # include "kernel/types/background.h" - # include "kernel/types/bvh.h" - # include "kernel/types/film.h" - # include "kernel/types/integrator.h" - */ - } - - if (device->use_metalrt) { - if (@available(macOS 11.0, *)) { - /* create the id<MTLFunction> for each intersection function */ - const char *function_names[] = { - "__anyhit__cycles_metalrt_visibility_test_tri", - "__anyhit__cycles_metalrt_visibility_test_box", - "__anyhit__cycles_metalrt_shadow_all_hit_tri", - "__anyhit__cycles_metalrt_shadow_all_hit_box", - "__anyhit__cycles_metalrt_local_hit_tri", - "__anyhit__cycles_metalrt_local_hit_box", - "__intersection__curve_ribbon", - "__intersection__curve_ribbon_shadow", - "__intersection__curve_all", - "__intersection__curve_all_shadow", - "__intersection__point", - "__intersection__point_shadow", - }; - assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM); - - MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor]; - if (kernel_type == PSO_SPECIALISED) { - desc.constantValues = constant_values; - } - for (int i = 0; i < METALRT_FUNC_NUM; i++) { - const char *function_name = function_names[i]; - desc.name = [@(function_name) copy]; - - NSError *error = NULL; - rt_intersection_funcs[kernel_type][i] = [device->mtlLibrary[kernel_type] - newFunctionWithDescriptor:desc - error:&error]; - - if (rt_intersection_funcs[kernel_type][i] == nil) { - NSString *err = [error localizedDescription]; - string errors = [err UTF8String]; - - device->set_error(string_printf( - "Error getting intersection function \"%s\": %s", function_name, errors.c_str())); - any_error = true; - break; - } - - rt_intersection_funcs[kernel_type][i].label = [@(function_name) copy]; - } - } - } - md5.append(device->source_used_for_compile[kernel_type]); - - string hash = md5.get_hex(); - if (loaded_md5[kernel_type] == hash) { - return true; - } - - if (!any_error) { - NSArray *table_functions[METALRT_TABLE_NUM] = {nil}; - NSArray *function_list = nil; - - if (device->use_metalrt) { - id<MTLFunction> curve_intersect_default = nil; - id<MTLFunction> curve_intersect_shadow = nil; - id<MTLFunction> point_intersect_default = nil; - id<MTLFunction> point_intersect_shadow = nil; - if (device->kernel_features & KERNEL_FEATURE_HAIR) { - /* Add curve intersection programs. */ - if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) { - /* Slower programs for thick hair since that also slows down ribbons. - * Ideally this should not be needed. */ - curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL]; - curve_intersect_shadow = - rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW]; - } - else { - curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON]; - curve_intersect_shadow = - rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW]; - } - } - if (device->kernel_features & KERNEL_FEATURE_POINTCLOUD) { - point_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT]; - point_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT_SHADOW]; - } - table_functions[METALRT_TABLE_DEFAULT] = [NSArray - arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI], - curve_intersect_default ? - curve_intersect_default : - rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX], - point_intersect_default ? - point_intersect_default : - rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX], - nil]; - table_functions[METALRT_TABLE_SHADOW] = [NSArray - arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI], - curve_intersect_shadow ? - curve_intersect_shadow : - rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX], - point_intersect_shadow ? - point_intersect_shadow : - rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX], - nil]; - table_functions[METALRT_TABLE_LOCAL] = [NSArray - arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI], - rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX], - rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX], - nil]; - - NSMutableSet *unique_functions = [NSMutableSet - setWithArray:table_functions[METALRT_TABLE_DEFAULT]]; - [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]]; - [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]]; - - function_list = [[NSArray arrayWithArray:[unique_functions allObjects]] - sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) { - return [f1.label compare:f2.label]; - }]; - - unique_functions = nil; - } - - metal_printf("Starting %s \"cycles_metal_...\" pipeline builds\n", - kernel_type_as_string(kernel_type)); - - tbb::task_arena local_arena(max_mtlcompiler_threads); - local_arena.execute([&]() { - parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) { - /* skip megakernel */ - if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { - return; - } - - /* Only specialize kernels where it can make an impact. */ - if (kernel_type == PSO_SPECIALISED) { - if (i < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || - i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { - return; - } - } - - MetalDeviceKernel &kernel = kernels_[i]; - - const std::string function_name = std::string("cycles_metal_") + - device_kernel_as_string((DeviceKernel)i); - int threads_per_threadgroup = device->max_threads_per_threadgroup; - if (i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && i < DEVICE_KERNEL_INTEGRATOR_RESET) { - /* Always use 512 for the sorting kernels */ - threads_per_threadgroup = 512; - } - - NSArray *kernel_function_list = nil; - - if (i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || - i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || - i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || - i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK || - i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) { - kernel_function_list = function_list; - } - - MetalKernelLoadDesc desc; - desc.pso_index = kernel_type; - desc.kernel_index = i; - desc.linked_functions = kernel_function_list; - desc.intersector_functions.defaults = table_functions[METALRT_TABLE_DEFAULT]; - desc.intersector_functions.shadow = table_functions[METALRT_TABLE_SHADOW]; - desc.intersector_functions.local = table_functions[METALRT_TABLE_LOCAL]; - desc.constant_values = constant_values; - desc.threads_per_threadgroup = threads_per_threadgroup; - desc.function_name = function_name.c_str(); - - bool success = kernel.load(device, desc, md5); - - any_error |= !success; - }); - }); + auto shader_cache = get_shader_cache(device->mtlDevice); + for (int i = 0; i < DEVICE_KERNEL_NUM; i++) { + shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized); } - bool loaded = !any_error; - if (loaded) { - loaded_md5[kernel_type] = hash; + if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) { + shader_cache->wait_for_all(); } - return loaded; -} - -const MetalDeviceKernel &MetalDeviceKernels::get(DeviceKernel kernel) const -{ - return kernels_[(int)kernel]; + return true; } -bool MetalDeviceKernels::available(DeviceKernel kernel) const +const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device, + DeviceKernel kernel) { - return kernels_[(int)kernel].get_pso().function != nil; + return get_shader_cache(device->mtlDevice)->get_best_pipeline(kernel, device); } CCL_NAMESPACE_END diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm index 1686ab95ffa..ec10e091b25 100644 --- a/intern/cycles/device/metal/queue.mm +++ b/intern/cycles/device/metal/queue.mm @@ -108,9 +108,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, VLOG(3) << "Metal queue launch " << device_kernel_as_string(kernel) << ", work_size " << work_size; - const MetalDeviceKernel &metal_kernel = metal_device->kernels.get(kernel); - const MetalKernelPipeline &metal_kernel_pso = metal_kernel.get_pso(); - id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel); /* Determine size requirement for argument buffer. */ @@ -212,6 +209,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, } bytes_written = globals_offsets + sizeof(KernelParamsMetal); + const MetalKernelPipeline *metal_kernel_pso = MetalDeviceKernels::get_best_pipeline(metal_device, + kernel); + if (!metal_kernel_pso) { + metal_device->set_error( + string_printf("No MetalKernelPipeline for %s\n", device_kernel_as_string(kernel))); + return false; + } + /* Encode ancillaries */ [metal_device->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets]; [metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_2d @@ -228,14 +233,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, } for (int table = 0; table < METALRT_TABLE_NUM; table++) { - if (metal_kernel_pso.intersection_func_table[table]) { - [metal_kernel_pso.intersection_func_table[table] setBuffer:arg_buffer - offset:globals_offsets - atIndex:1]; + if (metal_kernel_pso->intersection_func_table[table]) { + [metal_kernel_pso->intersection_func_table[table] setBuffer:arg_buffer + offset:globals_offsets + atIndex:1]; [metal_device->mtlAncillaryArgEncoder - setIntersectionFunctionTable:metal_kernel_pso.intersection_func_table[table] + setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table] atIndex:3 + table]; - [mtlComputeCommandEncoder useResource:metal_kernel_pso.intersection_func_table[table] + [mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table] usage:MTLResourceUsageRead]; } else { @@ -281,10 +286,10 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, } } - [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso.pipeline]; + [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso->pipeline]; /* Compute kernel launch parameters. */ - const int num_threads_per_block = metal_kernel.get_num_threads_per_block(); + const int num_threads_per_block = metal_kernel_pso->num_threads_per_block; int shared_mem_bytes = 0; @@ -314,7 +319,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, threadsPerThreadgroup:size_threads_per_threadgroup]; [mtlCommandBuffer addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { - NSString *kernel_name = metal_kernel_pso.function.label; + NSString *kernel_name = metal_kernel_pso->function.label; /* Enhanced command buffer errors are only available in 11.0+ */ if (@available(macos 11.0, *)) { @@ -547,6 +552,8 @@ id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent : MTLDispatchTypeSerial]; + [mtlComputeEncoder setLabel:@(device_kernel_as_string(kernel))]; + /* declare usage of MTLBuffers etc */ prepare_resources(kernel); } diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 8830d8c44ac..9fc265bc327 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -23,6 +23,7 @@ # include "util/md5.h" # include "util/path.h" # include "util/progress.h" +# include "util/task.h" # include "util/time.h" # undef __KERNEL_CPU__ @@ -216,6 +217,25 @@ static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser, return OPTIX_SUCCESS; } +# if OPTIX_ABI_VERSION >= 55 +static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &failure_reason) +{ + OptixTask additional_tasks[16]; + unsigned int num_additional_tasks = 0; + + const OptixResult result = optixTaskExecute(task, additional_tasks, 16, &num_additional_tasks); + if (result == OPTIX_SUCCESS) { + for (unsigned int i = 0; i < num_additional_tasks; ++i) { + pool.push(function_bind( + &execute_optix_task, std::ref(pool), additional_tasks[i], std::ref(failure_reason))); + } + } + else { + failure_reason = result; + } +} +# endif + } // namespace OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) @@ -453,6 +473,23 @@ bool OptiXDevice::load_kernels(const uint kernel_features) return false; } +# if OPTIX_ABI_VERSION >= 55 + OptixTask task = nullptr; + OptixResult result = optixModuleCreateFromPTXWithTasks(context, + &module_options, + &pipeline_options, + ptx_data.data(), + ptx_data.size(), + nullptr, + nullptr, + &optix_module, + &task); + if (result == OPTIX_SUCCESS) { + TaskPool pool; + execute_optix_task(pool, task, result); + pool.wait_work(); + } +# else const OptixResult result = optixModuleCreateFromPTX(context, &module_options, &pipeline_options, @@ -461,6 +498,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) nullptr, 0, &optix_module); +# endif if (result != OPTIX_SUCCESS) { set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)", ptx_filename.c_str(), diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index d97854a52d0..473bdb67920 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -145,6 +145,7 @@ set(SRC_KERNEL_SVM_HEADERS svm/normal.h svm/ramp.h svm/ramp_util.h + svm/sepcomb_color.h svm/sepcomb_hsv.h svm/sepcomb_vector.h svm/sky.h diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 4e309f16c08..0ed52074a90 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -29,10 +29,26 @@ using namespace metal::raytracing; /* Qualifiers */ -#define ccl_device -#define ccl_device_inline ccl_device -#define ccl_device_forceinline ccl_device -#define ccl_device_noinline ccl_device __attribute__((noinline)) +#if defined(__KERNEL_METAL_APPLE__) + +/* Inline everything for Apple GPUs. + * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface + * at the cost of longer compile times (~4.5 minutes on M1 Max). */ + +# define ccl_device __attribute__((always_inline)) +# define ccl_device_inline __attribute__((always_inline)) +# define ccl_device_forceinline __attribute__((always_inline)) +# define ccl_device_noinline __attribute__((always_inline)) + +#else + +# define ccl_device +# define ccl_device_inline ccl_device +# define ccl_device_forceinline ccl_device +# define ccl_device_noinline ccl_device __attribute__((noinline)) + +#endif + #define ccl_device_noinline_cpu ccl_device #define ccl_device_inline_method ccl_device #define ccl_global device diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index 832498f1f73..e2e10b5b83f 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -1304,8 +1304,38 @@ bool OSLRenderServices::texture(ustring filename, break; } case OSLTextureHandle::SVM: { - /* Packed texture. */ - float4 rgba = kernel_tex_image_interp(kernel_globals, handle->svm_slot, s, 1.0f - t); + int id = -1; + if (handle->svm_slots[0].w == -1) { + /* Packed single texture. */ + id = handle->svm_slots[0].y; + } + else { + /* Packed tiled texture. */ + int tx = (int)s; + int ty = (int)t; + int tile = 1001 + 10 * ty + tx; + for (int4 tile_node : handle->svm_slots) { + if (tile_node.x == tile) { + id = tile_node.y; + break; + } + if (tile_node.z == tile) { + id = tile_node.w; + break; + } + } + s -= tx; + t -= ty; + } + + float4 rgba; + if (id == -1) { + rgba = make_float4( + TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); + } + else { + rgba = kernel_tex_image_interp(kernel_globals, id, s, 1.0f - t); + } result[0] = rgba[0]; if (nchannels > 1) @@ -1319,7 +1349,7 @@ bool OSLRenderServices::texture(ustring filename, } case OSLTextureHandle::IES: { /* IES light. */ - result[0] = kernel_ies_interp(kernel_globals, handle->svm_slot, s, t); + result[0] = kernel_ies_interp(kernel_globals, handle->svm_slots[0].y, s, t); status = true; break; } @@ -1413,7 +1443,7 @@ bool OSLRenderServices::texture3d(ustring filename, /* Packed texture. */ ShaderData *sd = (ShaderData *)(sg->renderstate); KernelGlobals kernel_globals = sd->osl_globals; - int slot = handle->svm_slot; + int slot = handle->svm_slots[0].y; float3 P_float3 = make_float3(P.x, P.y, P.z); float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE); diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h index 653fa017140..edffd912bad 100644 --- a/intern/cycles/kernel/osl/services.h +++ b/intern/cycles/kernel/osl/services.h @@ -39,18 +39,26 @@ struct KernelGlobalsCPU; * with additional data. * * These are stored in a concurrent hash map, because OSL can compile multiple - * shaders in parallel. */ + * shaders in parallel. + * + * NOTE: The svm_slots array contains a compressed mapping of tile to svm_slot pairs + * stored as follows: x:tile_a, y:svm_slot_a, z:tile_b, w:svm_slot_b etc. */ struct OSLTextureHandle : public OIIO::RefCnt { enum Type { OIIO, SVM, IES, BEVEL, AO }; + OSLTextureHandle(Type type, const vector<int4> &svm_slots) + : type(type), svm_slots(svm_slots), oiio_handle(NULL), processor(NULL) + { + } + OSLTextureHandle(Type type = OIIO, int svm_slot = -1) - : type(type), svm_slot(svm_slot), oiio_handle(NULL), processor(NULL) + : OSLTextureHandle(type, {make_int4(0, svm_slot, -1, -1)}) { } Type type; - int svm_slot; + vector<int4> svm_slots; OSL::TextureSystem::TextureHandle *oiio_handle; ColorSpaceProcessor *processor; }; diff --git a/intern/cycles/kernel/osl/shaders/CMakeLists.txt b/intern/cycles/kernel/osl/shaders/CMakeLists.txt index 7ced21c5670..741bce7c399 100644 --- a/intern/cycles/kernel/osl/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/osl/shaders/CMakeLists.txt @@ -16,6 +16,7 @@ set(SRC_OSL node_camera.osl node_checker_texture.osl node_clamp.osl + node_combine_color.osl node_combine_rgb.osl node_combine_hsv.osl node_combine_xyz.osl @@ -68,6 +69,7 @@ set(SRC_OSL node_refraction_bsdf.osl node_rgb_curves.osl node_rgb_ramp.osl + node_separate_color.osl node_separate_rgb.osl node_separate_hsv.osl node_separate_xyz.osl diff --git a/intern/cycles/kernel/osl/shaders/node_color.h b/intern/cycles/kernel/osl/shaders/node_color.h index 388dd114e9a..06735f5b03d 100644 --- a/intern/cycles/kernel/osl/shaders/node_color.h +++ b/intern/cycles/kernel/osl/shaders/node_color.h @@ -148,3 +148,53 @@ color hsv_to_rgb(color hsv) return rgb; } + +color rgb_to_hsl(color rgb) +{ + float cmax, cmin, h, s, l; + + cmax = max(rgb[0], max(rgb[1], rgb[2])); + cmin = min(rgb[0], min(rgb[1], rgb[2])); + l = min(1.0, (cmax + cmin) / 2.0); + + if (cmax == cmin) { + h = s = 0.0; /* achromatic */ + } + else { + float cdelta = cmax - cmin; + s = l > 0.5 ? cdelta / (2.0 - cmax - cmin) : cdelta / (cmax + cmin); + if (cmax == rgb[0]) { + h = (rgb[1] - rgb[2]) / cdelta + (rgb[1] < rgb[2] ? 6.0 : 0.0); + } + else if (cmax == rgb[1]) { + h = (rgb[2] - rgb[0]) / cdelta + 2.0; + } + else { + h = (rgb[0] - rgb[1]) / cdelta + 4.0; + } + } + h /= 6.0; + + return color(h, s, l); +} + +color hsl_to_rgb(color hsl) +{ + float nr, ng, nb, chroma, h, s, l; + + h = hsl[0]; + s = hsl[1]; + l = hsl[2]; + + nr = abs(h * 6.0 - 3.0) - 1.0; + ng = 2.0 - abs(h * 6.0 - 2.0); + nb = 2.0 - abs(h * 6.0 - 4.0); + + nr = clamp(nr, 0.0, 1.0); + nb = clamp(nb, 0.0, 1.0); + ng = clamp(ng, 0.0, 1.0); + + chroma = (1.0 - abs(2.0 * l - 1.0)) * s; + + return color((nr - 0.5) * chroma + l, (ng - 0.5) * chroma + l, (nb - 0.5) * chroma + l); +} diff --git a/intern/cycles/kernel/osl/shaders/node_combine_color.osl b/intern/cycles/kernel/osl/shaders/node_combine_color.osl new file mode 100644 index 00000000000..681a592d2bb --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_combine_color.osl @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "stdcycles.h" + +shader node_combine_color(string color_type = "rgb", + float Red = 0.0, + float Green = 0.0, + float Blue = 0.0, + output color Color = 0.8) +{ + if (color_type == "rgb" || color_type == "hsv" || color_type == "hsl") + Color = color(color_type, Red, Green, Blue); + else + warning("%s", "Unknown color space!"); +} diff --git a/intern/cycles/kernel/osl/shaders/node_separate_color.osl b/intern/cycles/kernel/osl/shaders/node_separate_color.osl new file mode 100644 index 00000000000..6f3e3149d8e --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_separate_color.osl @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "node_color.h" +#include "stdcycles.h" + +shader node_separate_color(string color_type = "rgb", + color Color = 0.8, + output float Red = 0.0, + output float Green = 0.0, + output float Blue = 0.0) +{ + color col; + if (color_type == "rgb") + col = Color; + else if (color_type == "hsv") + col = rgb_to_hsv(Color); + else if (color_type == "hsl") + col = rgb_to_hsl(Color); + else + warning("%s", "Unknown color space!"); + + Red = col[0]; + Green = col[1]; + Blue = col[2]; +} diff --git a/intern/cycles/kernel/svm/color_util.h b/intern/cycles/kernel/svm/color_util.h index b439721383c..fa22d4bc8c2 100644 --- a/intern/cycles/kernel/svm/color_util.h +++ b/intern/cycles/kernel/svm/color_util.h @@ -307,4 +307,30 @@ ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness, return color; } +ccl_device float3 svm_combine_color(NodeCombSepColorType type, float3 color) +{ + switch (type) { + case NODE_COMBSEP_COLOR_HSV: + return hsv_to_rgb(color); + case NODE_COMBSEP_COLOR_HSL: + return hsl_to_rgb(color); + case NODE_COMBSEP_COLOR_RGB: + default: + return color; + } +} + +ccl_device float3 svm_separate_color(NodeCombSepColorType type, float3 color) +{ + switch (type) { + case NODE_COMBSEP_COLOR_HSV: + return rgb_to_hsv(color); + case NODE_COMBSEP_COLOR_HSL: + return rgb_to_hsl(color); + case NODE_COMBSEP_COLOR_RGB: + default: + return color; + } +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/sepcomb_color.h b/intern/cycles/kernel/svm/sepcomb_color.h new file mode 100644 index 00000000000..d186e7f163b --- /dev/null +++ b/intern/cycles/kernel/svm/sepcomb_color.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +ccl_device_noinline void svm_node_combine_color(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private float *stack, + uint color_type, + uint inputs_stack_offsets, + uint result_stack_offset) +{ + uint red_stack_offset, green_stack_offset, blue_stack_offset; + svm_unpack_node_uchar3( + inputs_stack_offsets, &red_stack_offset, &green_stack_offset, &blue_stack_offset); + + float r = stack_load_float(stack, red_stack_offset); + float g = stack_load_float(stack, green_stack_offset); + float b = stack_load_float(stack, blue_stack_offset); + + /* Combine, and convert back to RGB */ + float3 color = svm_combine_color((NodeCombSepColorType)color_type, make_float3(r, g, b)); + + if (stack_valid(result_stack_offset)) + stack_store_float3(stack, result_stack_offset, color); +} + +ccl_device_noinline void svm_node_separate_color(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private float *stack, + uint color_type, + uint input_stack_offset, + uint results_stack_offsets) +{ + float3 color = stack_load_float3(stack, input_stack_offset); + + /* Convert color space */ + color = svm_separate_color((NodeCombSepColorType)color_type, color); + + uint red_stack_offset, green_stack_offset, blue_stack_offset; + svm_unpack_node_uchar3( + results_stack_offsets, &red_stack_offset, &green_stack_offset, &blue_stack_offset); + + if (stack_valid(red_stack_offset)) + stack_store_float(stack, red_stack_offset, color.x); + if (stack_valid(green_stack_offset)) + stack_store_float(stack, green_stack_offset, color.y); + if (stack_valid(blue_stack_offset)) + stack_store_float(stack, blue_stack_offset, color.z); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 08352a6231f..5def943c87f 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -181,6 +181,7 @@ CCL_NAMESPACE_END #include "kernel/svm/noisetex.h" #include "kernel/svm/normal.h" #include "kernel/svm/ramp.h" +#include "kernel/svm/sepcomb_color.h" #include "kernel/svm/sepcomb_hsv.h" #include "kernel/svm/sepcomb_vector.h" #include "kernel/svm/sky.h" @@ -508,6 +509,12 @@ ccl_device void svm_eval_nodes(KernelGlobals kg, case NODE_MIX: offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset); break; + case NODE_SEPARATE_COLOR: + svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w); + break; + case NODE_COMBINE_COLOR: + svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w); + break; case NODE_SEPARATE_VECTOR: svm_node_separate_vector(sd, stack, node.y, node.z, node.w); break; diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h index bede58f7a54..82109ec4c4f 100644 --- a/intern/cycles/kernel/svm/types.h +++ b/intern/cycles/kernel/svm/types.h @@ -92,6 +92,8 @@ typedef enum ShaderNodeType { NODE_NORMAL_MAP, NODE_INVERT, NODE_MIX, + NODE_SEPARATE_COLOR, + NODE_COMBINE_COLOR, NODE_SEPARATE_VECTOR, NODE_COMBINE_VECTOR, NODE_SEPARATE_HSV, @@ -487,6 +489,12 @@ typedef enum NodePrincipledHairParametrization { NODE_PRINCIPLED_HAIR_NUM, } NodePrincipledHairParametrization; +typedef enum NodeCombSepColorType { + NODE_COMBSEP_COLOR_RGB, + NODE_COMBSEP_COLOR_HSV, + NODE_COMBSEP_COLOR_HSL, +} NodeCombSepColorType; + /* Closure */ typedef enum ClosureType { diff --git a/intern/cycles/scene/image.cpp b/intern/cycles/scene/image.cpp index 21fde88915e..c61ad1f1d71 100644 --- a/intern/cycles/scene/image.cpp +++ b/intern/cycles/scene/image.cpp @@ -117,12 +117,12 @@ void ImageHandle::clear() manager = NULL; } -bool ImageHandle::empty() +bool ImageHandle::empty() const { return tile_slots.empty(); } -int ImageHandle::num_tiles() +int ImageHandle::num_tiles() const { return tile_slots.size(); } @@ -154,6 +154,35 @@ int ImageHandle::svm_slot(const int tile_index) const return tile_slots[tile_index]; } +vector<int4> ImageHandle::get_svm_slots() const +{ + const size_t num_nodes = divide_up(tile_slots.size(), 2); + + vector<int4> svm_slots; + svm_slots.reserve(num_nodes); + for (size_t i = 0; i < num_nodes; i++) { + int4 node; + + int slot = tile_slots[2 * i]; + node.x = manager->images[slot]->loader->get_tile_number(); + node.y = slot; + + if ((2 * i + 1) < tile_slots.size()) { + slot = tile_slots[2 * i + 1]; + node.z = manager->images[slot]->loader->get_tile_number(); + node.w = slot; + } + else { + node.z = -1; + node.w = -1; + } + + svm_slots.push_back(node); + } + + return svm_slots; +} + device_texture *ImageHandle::image_memory(const int tile_index) const { if (tile_index >= tile_slots.size()) { @@ -266,6 +295,11 @@ ustring ImageLoader::osl_filepath() const return ustring(); } +int ImageLoader::get_tile_number() const +{ + return 0; +} + bool ImageLoader::equals(const ImageLoader *a, const ImageLoader *b) { if (a == NULL && b == NULL) { @@ -397,6 +431,19 @@ ImageHandle ImageManager::add_image(ImageLoader *loader, return handle; } +ImageHandle ImageManager::add_image(const vector<ImageLoader *> &loaders, + const ImageParams ¶ms) +{ + ImageHandle handle; + for (ImageLoader *loader : loaders) { + const int slot = add_image_slot(loader, params, true); + handle.tile_slots.push_back(slot); + } + + handle.manager = this; + return handle; +} + int ImageManager::add_image_slot(ImageLoader *loader, const ImageParams ¶ms, const bool builtin) diff --git a/intern/cycles/scene/image.h b/intern/cycles/scene/image.h index 4d0dee35eca..9edb6a7eaf5 100644 --- a/intern/cycles/scene/image.h +++ b/intern/cycles/scene/image.h @@ -112,6 +112,9 @@ class ImageLoader { /* Optional for OSL texture cache. */ virtual ustring osl_filepath() const; + /* Optional for tiled textures loaded externally. */ + virtual int get_tile_number() const; + /* Free any memory used for loading metadata and pixels. */ virtual void cleanup(){}; @@ -139,11 +142,12 @@ class ImageHandle { void clear(); - bool empty(); - int num_tiles(); + bool empty() const; + int num_tiles() const; ImageMetaData metadata(); int svm_slot(const int tile_index = 0) const; + vector<int4> get_svm_slots() const; device_texture *image_memory(const int tile_index = 0) const; VDBImageLoader *vdb_loader(const int tile_index = 0) const; @@ -169,6 +173,7 @@ class ImageManager { const ImageParams ¶ms, const array<int> &tiles); ImageHandle add_image(ImageLoader *loader, const ImageParams ¶ms, const bool builtin = true); + ImageHandle add_image(const vector<ImageLoader *> &loaders, const ImageParams ¶ms); void device_update(Device *device, Scene *scene, Progress &progress); void device_update_slot(Device *device, Scene *scene, int slot, Progress *progress); diff --git a/intern/cycles/scene/osl.cpp b/intern/cycles/scene/osl.cpp index ffa1a2f5623..6698e6e2cce 100644 --- a/intern/cycles/scene/osl.cpp +++ b/intern/cycles/scene/osl.cpp @@ -1211,14 +1211,15 @@ void OSLCompiler::parameter_texture(const char *name, ustring filename, ustring parameter(name, filename); } -void OSLCompiler::parameter_texture(const char *name, int svm_slot) +void OSLCompiler::parameter_texture(const char *name, const ImageHandle &handle) { /* Texture loaded through SVM image texture system. We generate a unique * name, which ends up being used in OSLRenderServices::get_texture_handle * to get handle again. Note that this name must be unique between multiple * render sessions as the render services are shared. */ ustring filename(string_printf("@svm%d", texture_shared_unique_id++).c_str()); - services->textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::SVM, svm_slot)); + services->textures.insert(filename, + new OSLTextureHandle(OSLTextureHandle::SVM, handle.get_svm_slots())); parameter(name, filename); } @@ -1290,7 +1291,7 @@ void OSLCompiler::parameter_texture(const char * /* name */, { } -void OSLCompiler::parameter_texture(const char * /* name */, int /* svm_slot */) +void OSLCompiler::parameter_texture(const char * /* name */, const ImageHandle & /*handle*/) { } diff --git a/intern/cycles/scene/osl.h b/intern/cycles/scene/osl.h index f0f97dbcaad..bf27069b1b1 100644 --- a/intern/cycles/scene/osl.h +++ b/intern/cycles/scene/osl.h @@ -147,7 +147,7 @@ class OSLCompiler { void parameter_attribute(const char *name, ustring s); void parameter_texture(const char *name, ustring filename, ustring colorspace); - void parameter_texture(const char *name, int svm_slot); + void parameter_texture(const char *name, const ImageHandle &handle); void parameter_texture_ies(const char *name, int svm_slot); ShaderType output_type() diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp index 95fccf725f3..03c152928d5 100644 --- a/intern/cycles/scene/shader_nodes.cpp +++ b/intern/cycles/scene/shader_nodes.cpp @@ -19,7 +19,6 @@ #include "util/color.h" #include "util/foreach.h" #include "util/log.h" -#include "util/string.h" #include "util/transform.h" #include "kernel/tables.h" @@ -450,22 +449,19 @@ void ImageTextureNode::compile(OSLCompiler &compiler) const ustring known_colorspace = metadata.colorspace; if (handle.svm_slot() == -1) { - /* OIIO currently does not support <UVTILE> substitutions natively. Replace with a format they - * understand. */ - std::string osl_filename = filename.string(); - string_replace(osl_filename, "<UVTILE>", "<U>_<V>"); compiler.parameter_texture( - "filename", ustring(osl_filename), compress_as_srgb ? u_colorspace_raw : known_colorspace); + "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace); } else { - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle); } const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) || alpha_type == IMAGE_ALPHA_CHANNEL_PACKED || alpha_type == IMAGE_ALPHA_IGNORE); const bool is_tiled = (filename.find("<UDIM>") != string::npos || - filename.find("<UVTILE>") != string::npos); + filename.find("<UVTILE>") != string::npos) || + handle.num_tiles() > 1; compiler.parameter(this, "projection"); compiler.parameter(this, "projection_blend"); @@ -610,7 +606,7 @@ void EnvironmentTextureNode::compile(OSLCompiler &compiler) "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace); } else { - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle); } compiler.parameter(this, "projection"); @@ -965,7 +961,7 @@ void SkyTextureNode::compile(OSLCompiler &compiler) compiler.parameter_array("nishita_data", sunsky.nishita_data, 10); /* nishita texture */ if (sky_type == NODE_SKY_NISHITA) { - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle); } compiler.add(this, "node_sky_texture"); } @@ -1860,7 +1856,7 @@ void PointDensityTextureNode::compile(OSLCompiler &compiler) handle = image_manager->add_image(filename.string(), image_params()); } - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle); if (space == NODE_TEX_VOXEL_SPACE_WORLD) { compiler.parameter("mapping", tfm); compiler.parameter("use_mapping", 1); @@ -5010,6 +5006,63 @@ void MixNode::constant_fold(const ConstantFolder &folder) } } +/* Combine Color */ + +NODE_DEFINE(CombineColorNode) +{ + NodeType *type = NodeType::add("combine_color", create, NodeType::SHADER); + + static NodeEnum type_enum; + type_enum.insert("rgb", NODE_COMBSEP_COLOR_RGB); + type_enum.insert("hsv", NODE_COMBSEP_COLOR_HSV); + type_enum.insert("hsl", NODE_COMBSEP_COLOR_HSL); + SOCKET_ENUM(color_type, "Type", type_enum, NODE_COMBSEP_COLOR_RGB); + + SOCKET_IN_FLOAT(r, "Red", 0.0f); + SOCKET_IN_FLOAT(g, "Green", 0.0f); + SOCKET_IN_FLOAT(b, "Blue", 0.0f); + + SOCKET_OUT_COLOR(color, "Color"); + + return type; +} + +CombineColorNode::CombineColorNode() : ShaderNode(get_node_type()) +{ +} + +void CombineColorNode::constant_fold(const ConstantFolder &folder) +{ + if (folder.all_inputs_constant()) { + folder.make_constant(svm_combine_color(color_type, make_float3(r, g, b))); + } +} + +void CombineColorNode::compile(SVMCompiler &compiler) +{ + ShaderInput *red_in = input("Red"); + ShaderInput *green_in = input("Green"); + ShaderInput *blue_in = input("Blue"); + ShaderOutput *color_out = output("Color"); + + int red_stack_offset = compiler.stack_assign(red_in); + int green_stack_offset = compiler.stack_assign(green_in); + int blue_stack_offset = compiler.stack_assign(blue_in); + int color_stack_offset = compiler.stack_assign(color_out); + + compiler.add_node( + NODE_COMBINE_COLOR, + color_type, + compiler.encode_uchar4(red_stack_offset, green_stack_offset, blue_stack_offset), + color_stack_offset); +} + +void CombineColorNode::compile(OSLCompiler &compiler) +{ + compiler.parameter(this, "color_type"); + compiler.add(this, "node_combine_color"); +} + /* Combine RGB */ NODE_DEFINE(CombineRGBNode) @@ -5250,6 +5303,70 @@ void BrightContrastNode::compile(OSLCompiler &compiler) compiler.add(this, "node_brightness"); } +/* Separate Color */ + +NODE_DEFINE(SeparateColorNode) +{ + NodeType *type = NodeType::add("separate_color", create, NodeType::SHADER); + + static NodeEnum type_enum; + type_enum.insert("rgb", NODE_COMBSEP_COLOR_RGB); + type_enum.insert("hsv", NODE_COMBSEP_COLOR_HSV); + type_enum.insert("hsl", NODE_COMBSEP_COLOR_HSL); + SOCKET_ENUM(color_type, "Type", type_enum, NODE_COMBSEP_COLOR_RGB); + + SOCKET_IN_COLOR(color, "Color", zero_float3()); + + SOCKET_OUT_FLOAT(r, "Red"); + SOCKET_OUT_FLOAT(g, "Green"); + SOCKET_OUT_FLOAT(b, "Blue"); + + return type; +} + +SeparateColorNode::SeparateColorNode() : ShaderNode(get_node_type()) +{ +} + +void SeparateColorNode::constant_fold(const ConstantFolder &folder) +{ + if (folder.all_inputs_constant()) { + float3 col = svm_separate_color(color_type, color); + + for (int channel = 0; channel < 3; channel++) { + if (outputs[channel] == folder.output) { + folder.make_constant(col[channel]); + return; + } + } + } +} + +void SeparateColorNode::compile(SVMCompiler &compiler) +{ + ShaderInput *color_in = input("Color"); + ShaderOutput *red_out = output("Red"); + ShaderOutput *green_out = output("Green"); + ShaderOutput *blue_out = output("Blue"); + + int color_stack_offset = compiler.stack_assign(color_in); + int red_stack_offset = compiler.stack_assign(red_out); + int green_stack_offset = compiler.stack_assign(green_out); + int blue_stack_offset = compiler.stack_assign(blue_out); + + compiler.add_node( + NODE_SEPARATE_COLOR, + color_type, + color_stack_offset, + compiler.encode_uchar4(red_stack_offset, green_stack_offset, blue_stack_offset)); +} + +void SeparateColorNode::compile(OSLCompiler &compiler) +{ + compiler.parameter(this, "color_type"); + compiler.add(this, "node_separate_color"); +} + /* Separate RGB */ NODE_DEFINE(SeparateRGBNode) diff --git a/intern/cycles/scene/shader_nodes.h b/intern/cycles/scene/shader_nodes.h index 9aef5d3151f..ac40a397c1e 100644 --- a/intern/cycles/scene/shader_nodes.h +++ b/intern/cycles/scene/shader_nodes.h @@ -1101,6 +1101,17 @@ class MixNode : public ShaderNode { NODE_SOCKET_API(float, fac) }; +class CombineColorNode : public ShaderNode { + public: + SHADER_NODE_CLASS(CombineColorNode) + void constant_fold(const ConstantFolder &folder); + + NODE_SOCKET_API(NodeCombSepColorType, color_type) + NODE_SOCKET_API(float, r) + NODE_SOCKET_API(float, g) + NODE_SOCKET_API(float, b) +}; + class CombineRGBNode : public ShaderNode { public: SHADER_NODE_CLASS(CombineRGBNode) @@ -1150,6 +1161,15 @@ class BrightContrastNode : public ShaderNode { NODE_SOCKET_API(float, contrast) }; +class SeparateColorNode : public ShaderNode { + public: + SHADER_NODE_CLASS(SeparateColorNode) + void constant_fold(const ConstantFolder &folder); + + NODE_SOCKET_API(NodeCombSepColorType, color_type) + NODE_SOCKET_API(float3, color) +}; + class SeparateRGBNode : public ShaderNode { public: SHADER_NODE_CLASS(SeparateRGBNode) diff --git a/intern/cycles/util/color.h b/intern/cycles/util/color.h index cccccde3ba6..795c3754976 100644 --- a/intern/cycles/util/color.h +++ b/intern/cycles/util/color.h @@ -152,6 +152,56 @@ ccl_device float3 hsv_to_rgb(float3 hsv) return rgb; } +ccl_device float3 rgb_to_hsl(float3 rgb) +{ + float cmax, cmin, h, s, l; + + cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z)); + cmin = min(rgb.x, min(rgb.y, rgb.z)); + l = min(1.0f, (cmax + cmin) / 2.0f); + + if (cmax == cmin) { + h = s = 0.0f; /* achromatic */ + } + else { + float cdelta = cmax - cmin; + s = l > 0.5f ? cdelta / (2.0f - cmax - cmin) : cdelta / (cmax + cmin); + if (cmax == rgb.x) { + h = (rgb.y - rgb.z) / cdelta + (rgb.y < rgb.z ? 6.0f : 0.0f); + } + else if (cmax == rgb.y) { + h = (rgb.z - rgb.x) / cdelta + 2.0f; + } + else { + h = (rgb.x - rgb.y) / cdelta + 4.0f; + } + } + h /= 6.0f; + + return make_float3(h, s, l); +} + +ccl_device float3 hsl_to_rgb(float3 hsl) +{ + float nr, ng, nb, chroma, h, s, l; + + h = hsl.x; + s = hsl.y; + l = hsl.z; + + nr = fabsf(h * 6.0f - 3.0f) - 1.0f; + ng = 2.0f - fabsf(h * 6.0f - 2.0f); + nb = 2.0f - fabsf(h * 6.0f - 4.0f); + + nr = clamp(nr, 0.0f, 1.0f); + nb = clamp(nb, 0.0f, 1.0f); + ng = clamp(ng, 0.0f, 1.0f); + + chroma = (1.0f - fabsf(2.0f * l - 1.0f)) * s; + + return make_float3((nr - 0.5f) * chroma + l, (ng - 0.5f) * chroma + l, (nb - 0.5f) * chroma + l); +} + ccl_device float3 xyY_to_xyz(float x, float y, float Y) { float X, Z; diff --git a/intern/ghost/CMakeLists.txt b/intern/ghost/CMakeLists.txt index 9421edecf12..dceb9ced803 100644 --- a/intern/ghost/CMakeLists.txt +++ b/intern/ghost/CMakeLists.txt @@ -376,6 +376,7 @@ elseif(WIN32) intern/GHOST_DisplayManagerWin32.cpp intern/GHOST_DropTargetWin32.cpp intern/GHOST_SystemWin32.cpp + intern/GHOST_TrackpadWin32.cpp intern/GHOST_WindowWin32.cpp intern/GHOST_Wintab.cpp @@ -384,6 +385,7 @@ elseif(WIN32) intern/GHOST_DropTargetWin32.h intern/GHOST_SystemWin32.h intern/GHOST_TaskbarWin32.h + intern/GHOST_TrackpadWin32.h intern/GHOST_WindowWin32.h intern/GHOST_Wintab.h ) diff --git a/intern/ghost/intern/GHOST_SystemCocoa.mm b/intern/ghost/intern/GHOST_SystemCocoa.mm index b6836614962..8677c0b9552 100644 --- a/intern/ghost/intern/GHOST_SystemCocoa.mm +++ b/intern/ghost/intern/GHOST_SystemCocoa.mm @@ -967,7 +967,7 @@ bool GHOST_SystemCocoa::processEvents(bool waitForEvent) return anyProcessed; } -// Note: called from NSApplication delegate +/* NOTE: called from #NSApplication delegate. */ GHOST_TSuccess GHOST_SystemCocoa::handleApplicationBecomeActiveEvent() { for (GHOST_IWindow *iwindow : m_windowManager->getWindows()) { @@ -1046,7 +1046,7 @@ void GHOST_SystemCocoa::notifyExternalEventProcessed() m_outsideLoopEventProcessed = true; } -// Note: called from NSWindow delegate +/* NOTE: called from #NSWindow delegate. */ GHOST_TSuccess GHOST_SystemCocoa::handleWindowEvent(GHOST_TEventType eventType, GHOST_WindowCocoa *window) { @@ -1108,7 +1108,7 @@ GHOST_TSuccess GHOST_SystemCocoa::handleWindowEvent(GHOST_TEventType eventType, return GHOST_kSuccess; } -// Note: called from NSWindow subclass +/* NOTE: called from #NSWindow subclass. */ GHOST_TSuccess GHOST_SystemCocoa::handleDraggingEvent(GHOST_TEventType eventType, GHOST_TDragnDropTypes draggedObjectType, GHOST_WindowCocoa *window, diff --git a/intern/ghost/intern/GHOST_SystemWayland.cpp b/intern/ghost/intern/GHOST_SystemWayland.cpp index 6f694bfd9a6..dae3d578fa0 100644 --- a/intern/ghost/intern/GHOST_SystemWayland.cpp +++ b/intern/ghost/intern/GHOST_SystemWayland.cpp @@ -761,7 +761,7 @@ static void data_device_selection(void *data, input->data_offer_copy_paste = data_offer; std::string mime_receive; - for (const std::string &type : {mime_text_utf8, mime_text_plain}) { + for (const std::string type : {mime_text_utf8, mime_text_plain}) { if (data_offer->types.count(type)) { mime_receive = type; break; diff --git a/intern/ghost/intern/GHOST_SystemWin32.cpp b/intern/ghost/intern/GHOST_SystemWin32.cpp index 83869188b65..8e07bf4ea3d 100644 --- a/intern/ghost/intern/GHOST_SystemWin32.cpp +++ b/intern/ghost/intern/GHOST_SystemWin32.cpp @@ -8,12 +8,14 @@ #include "GHOST_SystemWin32.h" #include "GHOST_ContextD3D.h" #include "GHOST_EventDragnDrop.h" +#include "GHOST_EventTrackpad.h" #ifndef _WIN32_IE # define _WIN32_IE 0x0501 /* shipped before XP, so doesn't impose additional requirements */ #endif #include <commctrl.h> +#include <dwmapi.h> #include <psapi.h> #include <shellapi.h> #include <shellscalingapi.h> @@ -414,6 +416,8 @@ bool GHOST_SystemWin32::processEvents(bool waitForEvent) hasEventHandled = true; } + driveTrackpad(); + // Process all the events waiting for us while (::PeekMessageW(&msg, NULL, 0, 0, PM_REMOVE) != 0) { // TranslateMessage doesn't alter the message, and doesn't change our raw keyboard data. @@ -423,6 +427,8 @@ bool GHOST_SystemWin32::processEvents(bool waitForEvent) hasEventHandled = true; } + processTrackpad(); + /* PeekMessage above is allowed to dispatch messages to the wndproc without us * noticing, so we need to check the event manager here to see if there are * events waiting in the queue. @@ -1416,6 +1422,52 @@ bool GHOST_SystemWin32::processNDOF(RAWINPUT const &raw) } #endif // WITH_INPUT_NDOF +void GHOST_SystemWin32::driveTrackpad() +{ + GHOST_WindowWin32 *active_window = static_cast<GHOST_WindowWin32 *>( + getWindowManager()->getActiveWindow()); + if (active_window) { + active_window->updateDirectManipulation(); + } +} + +void GHOST_SystemWin32::processTrackpad() +{ + GHOST_WindowWin32 *active_window = static_cast<GHOST_WindowWin32 *>( + getWindowManager()->getActiveWindow()); + + if (!active_window) { + return; + } + + GHOST_TTrackpadInfo trackpad_info = active_window->getTrackpadInfo(); + GHOST_SystemWin32 *system = (GHOST_SystemWin32 *)getSystem(); + + int32_t cursor_x, cursor_y; + system->getCursorPosition(cursor_x, cursor_y); + + if (trackpad_info.x != 0 || trackpad_info.y != 0) { + system->pushEvent(new GHOST_EventTrackpad(system->getMilliSeconds(), + active_window, + GHOST_kTrackpadEventScroll, + cursor_x, + cursor_y, + trackpad_info.x, + trackpad_info.y, + trackpad_info.isScrollDirectionInverted)); + } + if (trackpad_info.scale != 0) { + system->pushEvent(new GHOST_EventTrackpad(system->getMilliSeconds(), + active_window, + GHOST_kTrackpadEventMagnify, + cursor_x, + cursor_y, + trackpad_info.scale, + 0, + false)); + } +} + LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) { GHOST_Event *event = NULL; @@ -1968,6 +2020,8 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam, suggestedWindowRect->right - suggestedWindowRect->left, suggestedWindowRect->bottom - suggestedWindowRect->top, SWP_NOZORDER | SWP_NOACTIVATE); + + window->updateDPI(); } break; case WM_DISPLAYCHANGE: { @@ -1985,6 +2039,12 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam, ::SetFocus(hwnd); } break; + case WM_SETTINGCHANGE: + /* Microsoft: "Note that some applications send this message with lParam set to NULL" */ + if ((lParam != NULL) && (wcscmp(LPCWSTR(lParam), L"ImmersiveColorSet") == 0)) { + window->ThemeRefresh(); + } + break; //////////////////////////////////////////////////////////////////////// // Window events, ignored //////////////////////////////////////////////////////////////////////// @@ -2056,6 +2116,12 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam, * In GHOST, we let DefWindowProc call the timer callback. */ break; + case DM_POINTERHITTEST: + /* The DM_POINTERHITTEST message is sent to a window, when pointer input is first + * detected, in order to determine the most probable input target for Direct + * Manipulation. */ + window->onPointerHitTest(wParam); + break; } } else { diff --git a/intern/ghost/intern/GHOST_SystemWin32.h b/intern/ghost/intern/GHOST_SystemWin32.h index 9f8d52f9ca3..689b78b0317 100644 --- a/intern/ghost/intern/GHOST_SystemWin32.h +++ b/intern/ghost/intern/GHOST_SystemWin32.h @@ -407,6 +407,16 @@ class GHOST_SystemWin32 : public GHOST_System { #endif /** + * Drives Direct Manipulation update. + */ + void driveTrackpad(); + + /** + * Creates trackpad events for the active window. + */ + void processTrackpad(); + + /** * Returns the local state of the modifier keys (from the message queue). * \param keys: The state of the keys. */ diff --git a/intern/ghost/intern/GHOST_SystemX11.cpp b/intern/ghost/intern/GHOST_SystemX11.cpp index ebee5f58fff..bbce6fdfdb5 100644 --- a/intern/ghost/intern/GHOST_SystemX11.cpp +++ b/intern/ghost/intern/GHOST_SystemX11.cpp @@ -63,7 +63,7 @@ #include <stdio.h> /* for fprintf only */ #include <vector> -/* for debugging - so we can breakpoint X11 errors */ +/* For debugging, so we can break-point X11 errors. */ // #define USE_X11_ERROR_HANDLERS #ifdef WITH_X11_XINPUT @@ -2492,7 +2492,7 @@ GHOST_TSuccess GHOST_SystemX11::pushDragDropEvent(GHOST_TEventType eventType, } #endif /** - * These callbacks can be used for debugging, so we can breakpoint on an X11 error. + * These callbacks can be used for debugging, so we can break-point on an X11 error. * * Dummy function to get around IO Handler exiting if device invalid * Basically it will not crash blender now if you have a X device that diff --git a/intern/ghost/intern/GHOST_TrackpadWin32.cpp b/intern/ghost/intern/GHOST_TrackpadWin32.cpp new file mode 100644 index 00000000000..d5317f0f780 --- /dev/null +++ b/intern/ghost/intern/GHOST_TrackpadWin32.cpp @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup GHOST + */ + +#include <cmath> + +#include "GHOST_Debug.h" +#include "GHOST_TrackpadWin32.h" + +GHOST_DirectManipulationHelper::GHOST_DirectManipulationHelper( + HWND hWnd, + Microsoft::WRL::ComPtr<IDirectManipulationManager> directManipulationManager, + Microsoft::WRL::ComPtr<IDirectManipulationUpdateManager> directManipulationUpdateManager, + Microsoft::WRL::ComPtr<IDirectManipulationViewport> directManipulationViewport, + Microsoft::WRL::ComPtr<GHOST_DirectManipulationViewportEventHandler> + directManipulationEventHandler, + DWORD directManipulationViewportHandlerCookie, + bool isScrollDirectionInverted) + : m_hWnd(hWnd), + m_scrollDirectionRegKey(NULL), + m_scrollDirectionChangeEvent(NULL), + m_directManipulationManager(directManipulationManager), + m_directManipulationUpdateManager(directManipulationUpdateManager), + m_directManipulationViewport(directManipulationViewport), + m_directManipulationEventHandler(directManipulationEventHandler), + m_directManipulationViewportHandlerCookie(directManipulationViewportHandlerCookie), + m_isScrollDirectionInverted(isScrollDirectionInverted) +{ +} + +GHOST_DirectManipulationHelper *GHOST_DirectManipulationHelper::create(HWND hWnd, uint16_t dpi) +{ +#define DM_CHECK_RESULT_AND_EXIT_EARLY(hr, failMessage) \ + { \ + if (!SUCCEEDED(hr)) { \ + GHOST_PRINT(failMessage); \ + return nullptr; \ + } \ + } + + Microsoft::WRL::ComPtr<IDirectManipulationManager> directManipulationManager; + HRESULT hr = ::CoCreateInstance(CLSID_DirectManipulationManager, + nullptr, + CLSCTX_INPROC_SERVER, + IID_PPV_ARGS(&directManipulationManager)); + DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "DirectManipulationManager create failed\n"); + + /* Since we want to use fake viewport, we need to send fake updates to UpdateManager. */ + Microsoft::WRL::ComPtr<IDirectManipulationUpdateManager> directManipulationUpdateManager; + hr = directManipulationManager->GetUpdateManager(IID_PPV_ARGS(&directManipulationUpdateManager)); + DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Get UpdateManager failed\n"); + + Microsoft::WRL::ComPtr<IDirectManipulationViewport> directManipulationViewport; + hr = directManipulationManager->CreateViewport( + nullptr, hWnd, IID_PPV_ARGS(&directManipulationViewport)); + DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport create failed\n"); + + DIRECTMANIPULATION_CONFIGURATION configuration = + DIRECTMANIPULATION_CONFIGURATION_INTERACTION | + DIRECTMANIPULATION_CONFIGURATION_TRANSLATION_X | + DIRECTMANIPULATION_CONFIGURATION_TRANSLATION_Y | + DIRECTMANIPULATION_CONFIGURATION_TRANSLATION_INERTIA | + DIRECTMANIPULATION_CONFIGURATION_SCALING; + + hr = directManipulationViewport->ActivateConfiguration(configuration); + DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport set ActivateConfiguration failed\n"); + + /* Since we are using fake viewport and only want to use Direct Manipulation for touchpad, we + * need to use MANUALUPDATE option. */ + hr = directManipulationViewport->SetViewportOptions( + DIRECTMANIPULATION_VIEWPORT_OPTIONS_MANUALUPDATE); + DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport set ViewportOptions failed\n"); + + /* We receive Direct Manipulation transform updates in IDirectManipulationViewportEventHandler + * callbacks. */ + Microsoft::WRL::ComPtr<GHOST_DirectManipulationViewportEventHandler> + directManipulationEventHandler = + Microsoft::WRL::Make<GHOST_DirectManipulationViewportEventHandler>(dpi); + DWORD directManipulationViewportHandlerCookie; + directManipulationViewport->AddEventHandler( + hWnd, directManipulationEventHandler.Get(), &directManipulationViewportHandlerCookie); + DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport add EventHandler failed\n"); + + /* Set default rect for viewport before activating. */ + RECT rect = {0, 0, 10000, 10000}; + hr = directManipulationViewport->SetViewportRect(&rect); + DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport set rect failed\n"); + + hr = directManipulationManager->Activate(hWnd); + DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "DirectManipulationManager activate failed\n"); + + hr = directManipulationViewport->Enable(); + DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport enable failed\n"); + + directManipulationEventHandler->resetViewport(directManipulationViewport.Get()); + + bool isScrollDirectionInverted = getScrollDirectionFromReg(); + + auto instance = new GHOST_DirectManipulationHelper(hWnd, + directManipulationManager, + directManipulationUpdateManager, + directManipulationViewport, + directManipulationEventHandler, + directManipulationViewportHandlerCookie, + isScrollDirectionInverted); + + instance->registerScrollDirectionChangeListener(); + + return instance; + +#undef DM_CHECK_RESULT_AND_EXIT_EARLY +} + +bool GHOST_DirectManipulationHelper::getScrollDirectionFromReg() +{ + DWORD scrollDirectionRegValue, pcbData; + HRESULT hr = HRESULT_FROM_WIN32( + RegGetValueW(HKEY_CURRENT_USER, + L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\PrecisionTouchPad\\", + L"ScrollDirection", + RRF_RT_REG_DWORD, + NULL, + &scrollDirectionRegValue, + &pcbData)); + if (!SUCCEEDED(hr)) { + GHOST_PRINT("Failed to get scroll direction from registry\n"); + return false; + } + + return scrollDirectionRegValue == 0; +} + +void GHOST_DirectManipulationHelper::registerScrollDirectionChangeListener() +{ + + if (!m_scrollDirectionRegKey) { + HRESULT hr = HRESULT_FROM_WIN32( + RegOpenKeyExW(HKEY_CURRENT_USER, + L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\PrecisionTouchPad\\", + 0, + KEY_NOTIFY, + &m_scrollDirectionRegKey)); + if (!SUCCEEDED(hr)) { + GHOST_PRINT("Failed to open scroll direction registry key\n"); + return; + } + } + + if (!m_scrollDirectionChangeEvent) { + m_scrollDirectionChangeEvent = CreateEventW(NULL, true, false, NULL); + } + else { + ResetEvent(m_scrollDirectionChangeEvent); + } + HRESULT hr = HRESULT_FROM_WIN32(RegNotifyChangeKeyValue(m_scrollDirectionRegKey, + true, + REG_NOTIFY_CHANGE_LAST_SET, + m_scrollDirectionChangeEvent, + true)); + if (!SUCCEEDED(hr)) { + GHOST_PRINT("Failed to register scroll direction change listener\n"); + return; + } +} + +void GHOST_DirectManipulationHelper::onPointerHitTest(UINT32 pointerId) +{ + [[maybe_unused]] HRESULT hr = m_directManipulationViewport->SetContact(pointerId); + GHOST_ASSERT(SUCCEEDED(hr), "Viewport set contact failed\n"); + + if (WaitForSingleObject(m_scrollDirectionChangeEvent, 0) == WAIT_OBJECT_0) { + m_isScrollDirectionInverted = getScrollDirectionFromReg(); + registerScrollDirectionChangeListener(); + } +} + +void GHOST_DirectManipulationHelper::update() +{ + if (m_directManipulationEventHandler->dm_status == DIRECTMANIPULATION_RUNNING || + m_directManipulationEventHandler->dm_status == DIRECTMANIPULATION_INERTIA) { + [[maybe_unused]] HRESULT hr = m_directManipulationUpdateManager->Update(nullptr); + GHOST_ASSERT(SUCCEEDED(hr), "DirectManipulationUpdateManager update failed\n"); + } +} + +void GHOST_DirectManipulationHelper::setDPI(uint16_t dpi) +{ + m_directManipulationEventHandler->dpi = dpi; +} + +GHOST_TTrackpadInfo GHOST_DirectManipulationHelper::getTrackpadInfo() +{ + GHOST_TTrackpadInfo result = m_directManipulationEventHandler->accumulated_values; + result.isScrollDirectionInverted = m_isScrollDirectionInverted; + + m_directManipulationEventHandler->accumulated_values = {0, 0, 0}; + return result; +} + +GHOST_DirectManipulationHelper::~GHOST_DirectManipulationHelper() +{ + HRESULT hr; + hr = m_directManipulationViewport->Stop(); + GHOST_ASSERT(SUCCEEDED(hr), "Viewport stop failed\n"); + + hr = m_directManipulationViewport->RemoveEventHandler(m_directManipulationViewportHandlerCookie); + GHOST_ASSERT(SUCCEEDED(hr), "Viewport remove event handler failed\n"); + + hr = m_directManipulationViewport->Abandon(); + GHOST_ASSERT(SUCCEEDED(hr), "Viewport abandon failed\n"); + + hr = m_directManipulationManager->Deactivate(m_hWnd); + GHOST_ASSERT(SUCCEEDED(hr), "DirectManipulationManager deactivate failed\n"); + + if (m_scrollDirectionChangeEvent) { + CloseHandle(m_scrollDirectionChangeEvent); + m_scrollDirectionChangeEvent = NULL; + } + if (m_scrollDirectionRegKey) { + RegCloseKey(m_scrollDirectionRegKey); + m_scrollDirectionRegKey = NULL; + } +} + +GHOST_DirectManipulationViewportEventHandler::GHOST_DirectManipulationViewportEventHandler( + uint16_t dpi) + : accumulated_values({0, 0, 0}), dpi(dpi), dm_status(DIRECTMANIPULATION_BUILDING) +{ +} + +void GHOST_DirectManipulationViewportEventHandler::resetViewport( + IDirectManipulationViewport *viewport) +{ + if (gesture_state != GESTURE_NONE) { + [[maybe_unused]] HRESULT hr = viewport->ZoomToRect(0.0f, 0.0f, 10000.0f, 10000.0f, FALSE); + GHOST_ASSERT(SUCCEEDED(hr), "Viewport reset failed\n"); + } + + gesture_state = GESTURE_NONE; + + last_scale = PINCH_SCALE_FACTOR; + last_x = 0.0f; + last_y = 0.0f; +} + +HRESULT GHOST_DirectManipulationViewportEventHandler::OnViewportStatusChanged( + IDirectManipulationViewport *viewport, + DIRECTMANIPULATION_STATUS current, + DIRECTMANIPULATION_STATUS previous) +{ + dm_status = current; + + if (current == previous) { + return S_OK; + } + + if (previous == DIRECTMANIPULATION_ENABLED || current == DIRECTMANIPULATION_READY || + (previous == DIRECTMANIPULATION_INERTIA && current != DIRECTMANIPULATION_INERTIA)) { + resetViewport(viewport); + } + + return S_OK; +} + +HRESULT GHOST_DirectManipulationViewportEventHandler::OnViewportUpdated( + IDirectManipulationViewport *viewport) +{ + /* Nothing to do here. */ + return S_OK; +} + +HRESULT GHOST_DirectManipulationViewportEventHandler::OnContentUpdated( + IDirectManipulationViewport *viewport, IDirectManipulationContent *content) +{ + float transform[6]; + HRESULT hr = content->GetContentTransform(transform, ARRAYSIZE(transform)); + GHOST_ASSERT(SUCCEEDED(hr), "DirectManipulationContent get transform failed\n"); + + const float device_scale_factor = dpi / 96.0f; + + const float scale = transform[0] * PINCH_SCALE_FACTOR; + const float x = transform[4] / device_scale_factor; + const float y = transform[5] / device_scale_factor; + + const float EPS = 3e-5; + + /* Ignore repeating or incorrect input. */ + if ((fabs(scale - last_scale) <= EPS && fabs(x - last_x) <= EPS && fabs(y - last_y) <= EPS) || + scale == 0.0f) { + GHOST_PRINT("Ignoring touchpad input\n"); + return hr; + } + + /* Assume that every gesture is a pan in the beginning. + * If it's a pinch, the gesture will be changed below. */ + if (gesture_state == GESTURE_NONE) { + gesture_state = GESTURE_PAN; + } + + /* DM doesn't always immediately recognize pinch gestures, + * so allow transition from pan to pinch. */ + if (gesture_state == GESTURE_PAN) { + if (fabs(scale - PINCH_SCALE_FACTOR) > EPS) { + gesture_state = GESTURE_PINCH; + } + } + + /* This state machine is used here because: + * 1. Pinch and pan gestures must be differentiated and cannot be processed at the same time + * because XY transform values become nonsensical during pinch gesture. + * 2. GHOST requires delta values for events while DM provides transformation matrix of the + * current gesture. + * 3. GHOST events accept integer values while DM values are non-integer. + * Truncated fractional parts are accumulated and accounted for in following updates. + */ + switch (gesture_state) { + case GESTURE_PINCH: { + int32_t dscale = roundf(scale - last_scale); + + last_scale += dscale; + + accumulated_values.scale += dscale; + break; + } + case GESTURE_PAN: { + int32_t dx = roundf(x - last_x); + int32_t dy = roundf(y - last_y); + + last_x += dx; + last_y += dy; + + accumulated_values.x += dx; + accumulated_values.y += dy; + break; + } + case GESTURE_NONE: + break; + } + + return hr; +} diff --git a/intern/ghost/intern/GHOST_TrackpadWin32.h b/intern/ghost/intern/GHOST_TrackpadWin32.h new file mode 100644 index 00000000000..2e28f756965 --- /dev/null +++ b/intern/ghost/intern/GHOST_TrackpadWin32.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup GHOST + * Declaration of GHOST DirectManipulation classes. + */ + +#pragma once + +#ifndef WIN32 +# error WIN32 only! +#endif // WIN32 + +#include "GHOST_Types.h" + +#include <directmanipulation.h> +#include <wrl.h> + +#define PINCH_SCALE_FACTOR 125.0f + +typedef struct { + int32_t x, y, scale; + bool isScrollDirectionInverted; +} GHOST_TTrackpadInfo; + +class GHOST_DirectManipulationHelper; + +class GHOST_DirectManipulationViewportEventHandler + : public Microsoft::WRL::RuntimeClass< + Microsoft::WRL::RuntimeClassFlags<Microsoft::WRL::RuntimeClassType::ClassicCom>, + Microsoft::WRL::Implements< + Microsoft::WRL::RuntimeClassFlags<Microsoft::WRL::RuntimeClassType::ClassicCom>, + Microsoft::WRL::FtmBase, + IDirectManipulationViewportEventHandler>> { + public: + GHOST_DirectManipulationViewportEventHandler(uint16_t dpi); + + /* + * Resets viewport and tracked touchpad state. + */ + void resetViewport(IDirectManipulationViewport *viewport); + + /* DirectManipulation callbacks. */ + HRESULT STDMETHODCALLTYPE OnViewportStatusChanged(IDirectManipulationViewport *viewport, + DIRECTMANIPULATION_STATUS current, + DIRECTMANIPULATION_STATUS previous) override; + + HRESULT STDMETHODCALLTYPE OnViewportUpdated(IDirectManipulationViewport *viewport) override; + + HRESULT STDMETHODCALLTYPE OnContentUpdated(IDirectManipulationViewport *viewport, + IDirectManipulationContent *content) override; + + private: + enum { GESTURE_NONE, GESTURE_PAN, GESTURE_PINCH } gesture_state; + + int32_t last_x, last_y, last_scale; + GHOST_TTrackpadInfo accumulated_values; + uint16_t dpi; + DIRECTMANIPULATION_STATUS dm_status; + + friend class GHOST_DirectManipulationHelper; +}; + +class GHOST_DirectManipulationHelper { + public: + /* + * Creates a GHOST_DirectManipulationHelper for the provided window. + * \param hWnd: The window receiving DirectManipulation events. + * \param dpi: The current DPI. + * \return Pointer to the new GHOST_DirectManipulationHelper if created, nullptr if there was an + * error. + */ + static GHOST_DirectManipulationHelper *create(HWND hWnd, uint16_t dpi); + + ~GHOST_DirectManipulationHelper(); + + /* + * Drives the DirectManipulation context. + * DirectManipulation's intended use is to tie user input into DirectComposition's compositor + * scaling and translating. We are not using DirectComposition and therefore must drive + * DirectManipulation manually. + */ + void update(); + + /* + * Sets pointer in contact with the DirectManipulation context. + * \param pointerId: ID of the pointer in contact. + */ + void onPointerHitTest(UINT32 pointerId); + + /* + * Updates DPI information for touchpad scaling. + * \param dpi: The new DPI. + */ + void setDPI(uint16_t dpi); + + /* + * Retrieves trackpad input. + * \return The accumulated trackpad translation and scale since last call. + */ + GHOST_TTrackpadInfo getTrackpadInfo(); + + private: + GHOST_DirectManipulationHelper( + HWND hWnd, + Microsoft::WRL::ComPtr<IDirectManipulationManager> directManipulationManager, + Microsoft::WRL::ComPtr<IDirectManipulationUpdateManager> directManipulationUpdateManager, + Microsoft::WRL::ComPtr<IDirectManipulationViewport> directManipulationViewport, + Microsoft::WRL::ComPtr<GHOST_DirectManipulationViewportEventHandler> + directManipulationEventHandler, + DWORD directManipulationViewportHandlerCookie, + bool isScrollDirectionInverted); + + /* + * Retrieves the scroll direction from the registry. + * \return True if scroll direction is inverted. + */ + static bool getScrollDirectionFromReg(); + + /* + * Registers listener for registry scroll direction entry changes. + */ + void registerScrollDirectionChangeListener(); + + HWND m_hWnd; + + HKEY m_scrollDirectionRegKey; + HANDLE m_scrollDirectionChangeEvent; + + Microsoft::WRL::ComPtr<IDirectManipulationManager> m_directManipulationManager; + Microsoft::WRL::ComPtr<IDirectManipulationUpdateManager> m_directManipulationUpdateManager; + Microsoft::WRL::ComPtr<IDirectManipulationViewport> m_directManipulationViewport; + Microsoft::WRL::ComPtr<GHOST_DirectManipulationViewportEventHandler> + m_directManipulationEventHandler; + DWORD m_directManipulationViewportHandlerCookie; + + bool m_isScrollDirectionInverted; +}; diff --git a/intern/ghost/intern/GHOST_WindowCocoa.mm b/intern/ghost/intern/GHOST_WindowCocoa.mm index 4a1b3c2fe16..e7f5fdaa011 100644 --- a/intern/ghost/intern/GHOST_WindowCocoa.mm +++ b/intern/ghost/intern/GHOST_WindowCocoa.mm @@ -426,8 +426,8 @@ GHOST_WindowCocoa::~GHOST_WindowCocoa() [m_window close]; } - // Check for other blender opened windows and make the frontmost key - // Note: for some reason the closed window is still in the list + /* Check for other blender opened windows and make the front-most key + * NOTE: for some reason the closed window is still in the list. */ NSArray *windowsList = [NSApp orderedWindows]; for (int a = 0; a < [windowsList count]; a++) { if (m_window != (CocoaWindow *)[windowsList objectAtIndex:a]) { diff --git a/intern/ghost/intern/GHOST_WindowWin32.cpp b/intern/ghost/intern/GHOST_WindowWin32.cpp index 2ce224b666b..2e17454d24f 100644 --- a/intern/ghost/intern/GHOST_WindowWin32.cpp +++ b/intern/ghost/intern/GHOST_WindowWin32.cpp @@ -16,9 +16,7 @@ #include "GHOST_ContextWGL.h" -#ifdef WIN32_COMPOSITING -# include <Dwmapi.h> -#endif +#include <Dwmapi.h> #include <assert.h> #include <math.h> @@ -70,6 +68,7 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system, m_normal_state(GHOST_kWindowStateNormal), m_user32(::LoadLibrary("user32.dll")), m_parentWindowHwnd(parentwindow ? parentwindow->m_hWnd : HWND_DESKTOP), + m_directManipulationHelper(NULL), m_debug_context(is_debug) { DWORD style = parentwindow ? @@ -172,6 +171,8 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system, break; } + ThemeRefresh(); + ::ShowWindow(m_hWnd, nCmdShow); #ifdef WIN32_COMPOSITING @@ -204,6 +205,42 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system, /* Allow the showing of a progress bar on the taskbar. */ CoCreateInstance( CLSID_TaskbarList, NULL, CLSCTX_INPROC_SERVER, IID_ITaskbarList3, (LPVOID *)&m_Bar); + + /* Initialize Direct Manipulation. */ + m_directManipulationHelper = GHOST_DirectManipulationHelper::create(m_hWnd, getDPIHint()); +} + +void GHOST_WindowWin32::updateDirectManipulation() +{ + if (!m_directManipulationHelper) { + return; + } + + m_directManipulationHelper->update(); +} + +void GHOST_WindowWin32::onPointerHitTest(WPARAM wParam) +{ + /* Only DM_POINTERHITTEST can be the first message of input sequence of touchpad input. */ + + if (!m_directManipulationHelper) { + return; + } + + UINT32 pointerId = GET_POINTERID_WPARAM(wParam); + POINTER_INPUT_TYPE pointerType; + if (GetPointerType(pointerId, &pointerType) && pointerType == PT_TOUCHPAD) { + m_directManipulationHelper->onPointerHitTest(pointerId); + } +} + +GHOST_TTrackpadInfo GHOST_WindowWin32::getTrackpadInfo() +{ + if (!m_directManipulationHelper) { + return {0, 0, 0}; + } + + return m_directManipulationHelper->getTrackpadInfo(); } GHOST_WindowWin32::~GHOST_WindowWin32() @@ -253,6 +290,9 @@ GHOST_WindowWin32::~GHOST_WindowWin32() ::DestroyWindow(m_hWnd); m_hWnd = 0; } + + delete m_directManipulationHelper; + m_directManipulationHelper = NULL; } void GHOST_WindowWin32::adjustWindowRectForClosestMonitor(LPRECT win_rect, @@ -282,7 +322,7 @@ void GHOST_WindowWin32::adjustWindowRectForClosestMonitor(LPRECT win_rect, } /* Adjust to allow for caption, borders, shadows, scaling, etc. Resulting values can be - * correctly outside of monitor bounds. Note: You cannot specify WS_OVERLAPPED when calling. */ + * correctly outside of monitor bounds. NOTE: You cannot specify #WS_OVERLAPPED when calling. */ if (fpAdjustWindowRectExForDpi) { UINT dpiX, dpiY; GetDpiForMonitor(hmonitor, MDT_EFFECTIVE_DPI, &dpiX, &dpiY); @@ -1016,6 +1056,32 @@ GHOST_TabletData GHOST_WindowWin32::getTabletData() } } +void GHOST_WindowWin32::ThemeRefresh() +{ + DWORD lightMode; + DWORD pcbData = sizeof(lightMode); + if (RegGetValueW(HKEY_CURRENT_USER, + L"Software\\Microsoft\\Windows\\CurrentVersion\\Themes\\Personalize\\", + L"AppsUseLightTheme", + RRF_RT_REG_DWORD, + NULL, + &lightMode, + &pcbData) == ERROR_SUCCESS) { + BOOL DarkMode = !lightMode; + + /* 20 == DWMWA_USE_IMMERSIVE_DARK_MODE in Windows 11 SDK. This value was undocumented for + * Windows 10 versions 2004 and later, supported for Windows 11 Build 22000 and later. */ + DwmSetWindowAttribute(this->m_hWnd, 20, &DarkMode, sizeof(DarkMode)); + } +} + +void GHOST_WindowWin32::updateDPI() +{ + if (m_directManipulationHelper) { + m_directManipulationHelper->setDPI(getDPIHint()); + } +} + uint16_t GHOST_WindowWin32::getDPIHint() { if (m_user32) { diff --git a/intern/ghost/intern/GHOST_WindowWin32.h b/intern/ghost/intern/GHOST_WindowWin32.h index d5f47871aff..c958a89ac48 100644 --- a/intern/ghost/intern/GHOST_WindowWin32.h +++ b/intern/ghost/intern/GHOST_WindowWin32.h @@ -13,6 +13,7 @@ #endif // WIN32 #include "GHOST_TaskbarWin32.h" +#include "GHOST_TrackpadWin32.h" #include "GHOST_Window.h" #include "GHOST_Wintab.h" #ifdef WITH_INPUT_IME @@ -286,6 +287,8 @@ class GHOST_WindowWin32 : public GHOST_Window { return GHOST_kFailure; } + void updateDPI(); + uint16_t getDPIHint() override; /** True if the mouse is either over or captured by the window. */ @@ -294,6 +297,9 @@ class GHOST_WindowWin32 : public GHOST_Window { /** True if the window currently resizing. */ bool m_inLiveResize; + /** Called when OS colors change and when the window is created. */ + void ThemeRefresh(); + #ifdef WITH_INPUT_IME GHOST_ImeWin32 *getImeInput() { @@ -305,6 +311,19 @@ class GHOST_WindowWin32 : public GHOST_Window { void endIME(); #endif /* WITH_INPUT_IME */ + /* + * Drive DirectManipulation context. + */ + void updateDirectManipulation(); + + /* + * Handle DM_POINTERHITTEST events. + * \param wParam: wParam from the event. + */ + void onPointerHitTest(WPARAM wParam); + + GHOST_TTrackpadInfo getTrackpadInfo(); + private: /** * \param type: The type of rendering context create. @@ -388,6 +407,8 @@ class GHOST_WindowWin32 : public GHOST_Window { HWND m_parentWindowHwnd; + GHOST_DirectManipulationHelper *m_directManipulationHelper; + #ifdef WITH_INPUT_IME /** Handle input method editors event */ GHOST_ImeWin32 m_imeInput; diff --git a/intern/guardedalloc/intern/mallocn_guarded_impl.c b/intern/guardedalloc/intern/mallocn_guarded_impl.c index acad413b4c1..8bf1680e6f8 100644 --- a/intern/guardedalloc/intern/mallocn_guarded_impl.c +++ b/intern/guardedalloc/intern/mallocn_guarded_impl.c @@ -54,7 +54,7 @@ # define DEBUG_MEMCOUNTER_ERROR_VAL 0 static int _mallocn_count = 0; -/* breakpoint here */ +/* Break-point here. */ static void memcount_raise(const char *name) { fprintf(stderr, "%s: memcount-leak, %d\n", name, _mallocn_count); diff --git a/intern/iksolver/intern/IK_QTask.cpp b/intern/iksolver/intern/IK_QTask.cpp index caf9585a94e..80eda01b17f 100644 --- a/intern/iksolver/intern/IK_QTask.cpp +++ b/intern/iksolver/intern/IK_QTask.cpp @@ -116,7 +116,7 @@ void IK_QOrientationTask::ComputeJacobian(IK_QJacobian &jacobian) } // IK_QCenterOfMassTask -// Note: implementation not finished! +// NOTE: implementation not finished! IK_QCenterOfMassTask::IK_QCenterOfMassTask(bool primary, const IK_QSegment *segment, diff --git a/intern/libmv/CMakeLists.txt b/intern/libmv/CMakeLists.txt index f9fef9f7a29..e0ed68eb20e 100644 --- a/intern/libmv/CMakeLists.txt +++ b/intern/libmv/CMakeLists.txt @@ -26,7 +26,6 @@ if(WITH_LIBMV) endif() add_definitions(${GFLAGS_DEFINES}) add_definitions(${GLOG_DEFINES}) - add_definitions(${CERES_DEFINES}) add_definitions(-DLIBMV_GFLAGS_NAMESPACE=${GFLAGS_NAMESPACE}) list(APPEND INC diff --git a/intern/libmv/bundle.sh b/intern/libmv/bundle.sh index 6808e244c05..82293068745 100755 --- a/intern/libmv/bundle.sh +++ b/intern/libmv/bundle.sh @@ -124,7 +124,6 @@ if(WITH_LIBMV) endif() add_definitions(\${GFLAGS_DEFINES}) add_definitions(\${GLOG_DEFINES}) - add_definitions(\${CERES_DEFINES}) add_definitions(-DLIBMV_GFLAGS_NAMESPACE=\${GFLAGS_NAMESPACE}) list(APPEND INC diff --git a/intern/libmv/libmv/multiview/euclidean_resection.h b/intern/libmv/libmv/multiview/euclidean_resection.h index 3c4c3979ff6..cdb9b5af52e 100644 --- a/intern/libmv/libmv/multiview/euclidean_resection.h +++ b/intern/libmv/libmv/multiview/euclidean_resection.h @@ -118,7 +118,7 @@ void EuclideanResectionAnsarDaniilidis(const Mat2X& x_camera, * This is the algorithm described in: * "{EP$n$P: An Accurate $O(n)$ Solution to the P$n$P Problem", by V. Lepetit * and F. Moreno-Noguer and P. Fua, IJCV 2009. vol. 81, no. 2 - * \note: the non-linear optimization is not implemented here. + * \note the non-linear optimization is not implemented here. */ bool EuclideanResectionEPnP(const Mat2X& x_camera, const Mat3X& X_world, diff --git a/intern/libmv/libmv/simple_pipeline/bundle.cc b/intern/libmv/libmv/simple_pipeline/bundle.cc index e86c3bca57f..355c167d000 100644 --- a/intern/libmv/libmv/simple_pipeline/bundle.cc +++ b/intern/libmv/libmv/simple_pipeline/bundle.cc @@ -685,7 +685,7 @@ void EuclideanBundleCommonIntrinsics(const Tracks& tracks, PackCamerasRotationAndTranslation(*reconstruction); // Parameterization used to restrict camera motion for modal solvers. - ceres::SubsetParameterization* constant_translation_parameterization = NULL; + ceres::SubsetManifold* constant_translation_manifold = NULL; if (bundle_constraints & BUNDLE_NO_TRANSLATION) { std::vector<int> constant_translation; @@ -694,8 +694,8 @@ void EuclideanBundleCommonIntrinsics(const Tracks& tracks, constant_translation.push_back(4); constant_translation.push_back(5); - constant_translation_parameterization = - new ceres::SubsetParameterization(6, constant_translation); + constant_translation_manifold = + new ceres::SubsetManifold(6, constant_translation); } // Add residual blocks to the problem. @@ -735,8 +735,7 @@ void EuclideanBundleCommonIntrinsics(const Tracks& tracks, } if (bundle_constraints & BUNDLE_NO_TRANSLATION) { - problem.SetParameterization(current_camera_R_t, - constant_translation_parameterization); + problem.SetManifold(current_camera_R_t, constant_translation_manifold); } zero_weight_tracks_flags[marker.track] = false; @@ -787,11 +786,11 @@ void EuclideanBundleCommonIntrinsics(const Tracks& tracks, #undef MAYBE_SET_CONSTANT if (!constant_intrinsics.empty()) { - ceres::SubsetParameterization* subset_parameterization = - new ceres::SubsetParameterization(PackedIntrinsics::NUM_PARAMETERS, - constant_intrinsics); + ceres::SubsetManifold* subset_parameterization = + new ceres::SubsetManifold(PackedIntrinsics::NUM_PARAMETERS, + constant_intrinsics); - problem.SetParameterization(intrinsics_block, subset_parameterization); + problem.SetManifold(intrinsics_block, subset_parameterization); } } diff --git a/intern/libmv/libmv/simple_pipeline/modal_solver.cc b/intern/libmv/libmv/simple_pipeline/modal_solver.cc index 845b299e31e..206d264f1f8 100644 --- a/intern/libmv/libmv/simple_pipeline/modal_solver.cc +++ b/intern/libmv/libmv/simple_pipeline/modal_solver.cc @@ -180,7 +180,7 @@ void ModalSolver(const Tracks& tracks, // NOTE: Parameterization is lazily initialized when it is really needed, // and is re-used by all parameters block. - ceres::LocalParameterization* quaternion_parameterization = NULL; + ceres::Manifold* quaternion_manifold = NULL; int num_residuals = 0; for (int i = 0; i < all_markers.size(); ++i) { @@ -197,12 +197,11 @@ void ModalSolver(const Tracks& tracks, &quaternion(0)); num_residuals++; - if (quaternion_parameterization == NULL) { - quaternion_parameterization = new ceres::QuaternionParameterization(); + if (quaternion_manifold == NULL) { + quaternion_manifold = new ceres::QuaternionManifold(); } - problem.SetParameterization(&quaternion(0), - quaternion_parameterization); + problem.SetManifold(&quaternion(0), quaternion_manifold); } } diff --git a/intern/opensubdiv/internal/evaluator/eval_output.h b/intern/opensubdiv/internal/evaluator/eval_output.h index cff7c8d18c9..bc5494bfe41 100644 --- a/intern/opensubdiv/internal/evaluator/eval_output.h +++ b/intern/opensubdiv/internal/evaluator/eval_output.h @@ -372,15 +372,15 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput { } // Create evaluators for every face varying channel. - face_varying_evaluators.reserve(all_face_varying_stencils.size()); + face_varying_evaluators_.reserve(all_face_varying_stencils.size()); int face_varying_channel = 0; for (const StencilTable *face_varying_stencils : all_face_varying_stencils) { - face_varying_evaluators.push_back(new FaceVaryingEval(face_varying_channel, - face_varying_stencils, - face_varying_width, - patch_table_, - evaluator_cache_, - device_context_)); + face_varying_evaluators_.push_back(new FaceVaryingEval(face_varying_channel, + face_varying_stencils, + face_varying_width, + patch_table_, + evaluator_cache_, + device_context_)); ++face_varying_channel; } } @@ -393,7 +393,7 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput { delete patch_table_; delete vertex_stencils_; delete varying_stencils_; - for (FaceVaryingEval *face_varying_evaluator : face_varying_evaluators) { + for (FaceVaryingEval *face_varying_evaluator : face_varying_evaluators_) { delete face_varying_evaluator; } } @@ -421,8 +421,8 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput { int num_vertices) override { assert(face_varying_channel >= 0); - assert(face_varying_channel < face_varying_evaluators.size()); - face_varying_evaluators[face_varying_channel]->updateData(src, start_vertex, num_vertices); + assert(face_varying_channel < face_varying_evaluators_.size()); + face_varying_evaluators_[face_varying_channel]->updateData(src, start_vertex, num_vertices); } bool hasVaryingData() const @@ -434,7 +434,7 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput { bool hasFaceVaryingData() const { - return face_varying_evaluators.size() != 0; + return face_varying_evaluators_.size() != 0; } void refine() override @@ -483,7 +483,7 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput { } // Evaluate face-varying data. if (hasFaceVaryingData()) { - for (FaceVaryingEval *face_varying_evaluator : face_varying_evaluators) { + for (FaceVaryingEval *face_varying_evaluator : face_varying_evaluators_) { face_varying_evaluator->refine(); } } @@ -589,8 +589,8 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput { float face_varying[2]) override { assert(face_varying_channel >= 0); - assert(face_varying_channel < face_varying_evaluators.size()); - face_varying_evaluators[face_varying_channel]->evalPatches( + assert(face_varying_channel < face_varying_evaluators_.size()); + face_varying_evaluators_[face_varying_channel]->evalPatches( patch_coord, num_patch_coords, face_varying); } @@ -606,17 +606,17 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput { SRC_VERTEX_BUFFER *getFVarSrcBuffer(const int face_varying_channel) const { - return face_varying_evaluators[face_varying_channel]->getSrcBuffer(); + return face_varying_evaluators_[face_varying_channel]->getSrcBuffer(); } int getFVarSrcBufferOffset(const int face_varying_channel) const { - return face_varying_evaluators[face_varying_channel]->getFVarSrcBufferOffset(); + return face_varying_evaluators_[face_varying_channel]->getFVarSrcBufferOffset(); } PATCH_TABLE *getFVarPatchTable(const int face_varying_channel) const { - return face_varying_evaluators[face_varying_channel]->getPatchTable(); + return face_varying_evaluators_[face_varying_channel]->getPatchTable(); } private: @@ -634,7 +634,7 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput { const STENCIL_TABLE *varying_stencils_; int face_varying_width_; - vector<FaceVaryingEval *> face_varying_evaluators; + vector<FaceVaryingEval *> face_varying_evaluators_; EvaluatorCache *evaluator_cache_; DEVICE_CONTEXT *device_context_; diff --git a/intern/opensubdiv/internal/evaluator/eval_output_cpu.h b/intern/opensubdiv/internal/evaluator/eval_output_cpu.h index 2b3c738d6ab..35fd03f6158 100644 --- a/intern/opensubdiv/internal/evaluator/eval_output_cpu.h +++ b/intern/opensubdiv/internal/evaluator/eval_output_cpu.h @@ -32,7 +32,7 @@ using OpenSubdiv::Osd::CpuVertexBuffer; namespace blender { namespace opensubdiv { -// Note: Define as a class instead of typedef to make it possible +// NOTE: Define as a class instead of typedef to make it possible // to have anonymous class in opensubdiv_evaluator_internal.h class CpuEvalOutput : public VolatileEvalOutput<CpuVertexBuffer, CpuVertexBuffer, diff --git a/intern/rigidbody/RBI_api.h b/intern/rigidbody/RBI_api.h index 791a4a6ac05..13b1c096a80 100644 --- a/intern/rigidbody/RBI_api.h +++ b/intern/rigidbody/RBI_api.h @@ -14,7 +14,7 @@ extern "C" { #endif /* API Notes: - * Currently, this API is optimised for Bullet RigidBodies, and doesn't + * Currently, this API is optimized for Bullet RigidBodies, and doesn't * take into account other Physics Engines. Some tweaking may be necessary * to allow other systems to be used, in particular there may be references * to datatypes that aren't used here... |