Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/addon/properties.py99
-rw-r--r--intern/cycles/blender/addon/ui.py14
-rw-r--r--intern/cycles/blender/image.cpp15
-rw-r--r--intern/cycles/blender/image.h8
-rw-r--r--intern/cycles/blender/mesh.cpp2
-rw-r--r--intern/cycles/blender/shader.cpp32
-rw-r--r--intern/cycles/device/metal/device_impl.h6
-rw-r--r--intern/cycles/device/metal/device_impl.mm96
-rw-r--r--intern/cycles/device/metal/kernel.h108
-rw-r--r--intern/cycles/device/metal/kernel.mm856
-rw-r--r--intern/cycles/device/metal/queue.mm31
-rw-r--r--intern/cycles/device/optix/device_impl.cpp38
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/device/metal/compat.h24
-rw-r--r--intern/cycles/kernel/osl/services.cpp38
-rw-r--r--intern/cycles/kernel/osl/services.h14
-rw-r--r--intern/cycles/kernel/osl/shaders/CMakeLists.txt2
-rw-r--r--intern/cycles/kernel/osl/shaders/node_color.h50
-rw-r--r--intern/cycles/kernel/osl/shaders/node_combine_color.osl16
-rw-r--r--intern/cycles/kernel/osl/shaders/node_separate_color.osl26
-rw-r--r--intern/cycles/kernel/svm/color_util.h26
-rw-r--r--intern/cycles/kernel/svm/sepcomb_color.h54
-rw-r--r--intern/cycles/kernel/svm/svm.h7
-rw-r--r--intern/cycles/kernel/svm/types.h8
-rw-r--r--intern/cycles/scene/image.cpp51
-rw-r--r--intern/cycles/scene/image.h9
-rw-r--r--intern/cycles/scene/osl.cpp7
-rw-r--r--intern/cycles/scene/osl.h2
-rw-r--r--intern/cycles/scene/shader_nodes.cpp139
-rw-r--r--intern/cycles/scene/shader_nodes.h20
-rw-r--r--intern/cycles/util/color.h50
-rw-r--r--intern/ghost/CMakeLists.txt2
-rw-r--r--intern/ghost/intern/GHOST_SystemCocoa.mm6
-rw-r--r--intern/ghost/intern/GHOST_SystemWayland.cpp2
-rw-r--r--intern/ghost/intern/GHOST_SystemWin32.cpp66
-rw-r--r--intern/ghost/intern/GHOST_SystemWin32.h10
-rw-r--r--intern/ghost/intern/GHOST_SystemX11.cpp4
-rw-r--r--intern/ghost/intern/GHOST_TrackpadWin32.cpp343
-rw-r--r--intern/ghost/intern/GHOST_TrackpadWin32.h138
-rw-r--r--intern/ghost/intern/GHOST_WindowCocoa.mm4
-rw-r--r--intern/ghost/intern/GHOST_WindowWin32.cpp74
-rw-r--r--intern/ghost/intern/GHOST_WindowWin32.h21
-rw-r--r--intern/guardedalloc/intern/mallocn_guarded_impl.c2
-rw-r--r--intern/iksolver/intern/IK_QTask.cpp2
-rw-r--r--intern/libmv/CMakeLists.txt1
-rwxr-xr-xintern/libmv/bundle.sh1
-rw-r--r--intern/libmv/libmv/multiview/euclidean_resection.h2
-rw-r--r--intern/libmv/libmv/simple_pipeline/bundle.cc17
-rw-r--r--intern/libmv/libmv/simple_pipeline/modal_solver.cc9
-rw-r--r--intern/opensubdiv/internal/evaluator/eval_output.h36
-rw-r--r--intern/opensubdiv/internal/evaluator/eval_output_cpu.h2
-rw-r--r--intern/rigidbody/RBI_api.h2
52 files changed, 1919 insertions, 674 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 9b4799d252f..9acc9e99ad0 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -24,12 +24,19 @@ from . import camera
enum_devices = (
('CPU', "CPU", "Use CPU for rendering"),
- ('GPU', "GPU Compute", "Use GPU compute device for rendering, configured in the system tab in the user preferences"),
+ ('GPU', "GPU Compute",
+ "Use GPU compute device for rendering, configured in the system tab in the user preferences"),
)
enum_feature_set = (
- ('SUPPORTED', "Supported", "Only use finished and supported features"),
- ('EXPERIMENTAL', "Experimental", "Use experimental and incomplete features that might be broken or change in the future", 'ERROR', 1),
+ ('SUPPORTED',
+ "Supported",
+ "Only use finished and supported features"),
+ ('EXPERIMENTAL',
+ "Experimental",
+ "Use experimental and incomplete features that might be broken or change in the future",
+ 'ERROR',
+ 1),
)
enum_displacement_methods = (
@@ -81,9 +88,14 @@ enum_sampling_pattern = (
)
enum_volume_sampling = (
- ('DISTANCE', "Distance", "Use distance sampling, best for dense volumes with lights far away"),
- ('EQUIANGULAR', "Equiangular", "Use equiangular sampling, best for volumes with low density with light inside or near the volume"),
- ('MULTIPLE_IMPORTANCE', "Multiple Importance",
+ ('DISTANCE',
+ "Distance",
+ "Use distance sampling, best for dense volumes with lights far away"),
+ ('EQUIANGULAR',
+ "Equiangular",
+ "Use equiangular sampling, best for volumes with low density with light inside or near the volume"),
+ ('MULTIPLE_IMPORTANCE',
+ "Multiple Importance",
"Combine distance and equi-angular sampling for volumes where neither method is ideal"),
)
@@ -93,10 +105,15 @@ enum_volume_interpolation = (
)
enum_world_mis = (
- ('NONE', "None", "Don't sample the background, faster but might cause noise for non-solid backgrounds"),
- ('AUTOMATIC', "Auto", "Automatically try to determine the best setting"),
- ('MANUAL', "Manual", "Manually set the resolution of the sampling map, higher values are slower and require more memory but reduce noise")
-)
+ ('NONE',
+ "None",
+ "Don't sample the background, faster but might cause noise for non-solid backgrounds"),
+ ('AUTOMATIC',
+ "Auto",
+ "Automatically try to determine the best setting"),
+ ('MANUAL',
+ "Manual",
+ "Manually set the resolution of the sampling map, higher values are slower and require more memory but reduce noise"))
enum_device_type = (
('CPU', "CPU", "CPU", 0),
@@ -210,17 +227,33 @@ enum_denoising_input_passes = (
)
enum_denoising_prefilter = (
- ('NONE', "None", "No prefiltering, use when guiding passes are noise-free", 1),
- ('FAST', "Fast", "Denoise color and guiding passes together. Improves quality when guiding passes are noisy using least amount of extra processing time", 2),
- ('ACCURATE', "Accurate", "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", 3),
+ ('NONE',
+ "None",
+ "No prefiltering, use when guiding passes are noise-free",
+ 1),
+ ('FAST',
+ "Fast",
+ "Denoise color and guiding passes together. Improves quality when guiding passes are noisy using least amount of extra processing time",
+ 2),
+ ('ACCURATE',
+ "Accurate",
+ "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time",
+ 3),
)
enum_direct_light_sampling_type = (
- ('MULTIPLE_IMPORTANCE_SAMPLING', "Multiple Importance Sampling",
- "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing", 0),
- ('FORWARD_PATH_TRACING', "Forward Path Tracing", "Direct light contributions are only sampled using forward path tracing", 1),
- ('NEXT_EVENT_ESTIMATION', "Next-Event Estimation",
- "Direct light contributions are only sampled using next-event estimation", 2),
+ ('MULTIPLE_IMPORTANCE_SAMPLING',
+ "Multiple Importance Sampling",
+ "Multiple importance sampling is used to combine direct light contributions from next-event estimation and forward path tracing",
+ 0),
+ ('FORWARD_PATH_TRACING',
+ "Forward Path Tracing",
+ "Direct light contributions are only sampled using forward path tracing",
+ 1),
+ ('NEXT_EVENT_ESTIMATION',
+ "Next-Event Estimation",
+ "Direct light contributions are only sampled using next-event estimation",
+ 2),
)
@@ -357,7 +390,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
scrambling_distance: FloatProperty(
name="Scrambling Distance",
default=1.0,
- min=0.0, soft_max=1.0,
+ min=0.0,
+ soft_max=1.0,
description="Reduce randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts if set too low",
)
preview_scrambling_distance: BoolProperty(
@@ -383,7 +417,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
name="Light Sampling Threshold",
description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). "
"Zero disables the test and never ignores lights",
- min=0.0, max=1.0,
+ min=0.0,
+ max=1.0,
default=0.01,
)
@@ -395,7 +430,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
adaptive_threshold: FloatProperty(
name="Adaptive Sampling Threshold",
description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples",
- min=0.0, max=1.0,
+ min=0.0,
+ max=1.0,
soft_min=0.001,
default=0.01,
precision=4,
@@ -403,7 +439,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
adaptive_min_samples: IntProperty(
name="Adaptive Min Samples",
description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold",
- min=0, max=4096,
+ min=0,
+ max=4096,
default=0,
)
@@ -415,7 +452,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
preview_adaptive_threshold: FloatProperty(
name="Adaptive Sampling Threshold",
description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples, for viewport renders",
- min=0.0, max=1.0,
+ min=0.0,
+ max=1.0,
soft_min=0.001,
default=0.1,
precision=4,
@@ -423,7 +461,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
preview_adaptive_min_samples: IntProperty(
name="Adaptive Min Samples",
description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold, for viewport renders",
- min=0, max=4096,
+ min=0,
+ max=4096,
default=0,
)
@@ -550,7 +589,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
max_subdivisions: IntProperty(
name="Max Subdivisions",
description="Stop subdividing when this level is reached even if the dice rate would produce finer tessellation",
- min=0, max=16,
+ min=0,
+ max=16,
default=12,
)
@@ -817,8 +857,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
debug_use_optix_debug: BoolProperty(
name="OptiX Module Debug",
description="Load OptiX module in debug mode: lower logging verbosity level, enable validations, and lower optimization level",
- default=False
- )
+ default=False)
@classmethod
def register(cls):
@@ -1185,7 +1224,8 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
motion_steps: IntProperty(
name="Motion Steps",
description="Control accuracy of motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))",
- min=1, max=7,
+ min=1,
+ max=7,
default=1,
)
@@ -1224,7 +1264,8 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
shadow_terminator_geometry_offset: FloatProperty(
name="Shadow Terminator Geometry Offset",
description="Offset rays from the surface to reduce shadow terminator artifact on low poly geometry. Only affects triangles at grazing angles to light",
- min=0.0, max=1.0,
+ min=0.0,
+ max=1.0,
default=0.1,
)
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 886f5345020..9d2dbdf6732 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -1082,8 +1082,18 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
def has_geometry_visibility(ob):
- return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT', 'VOLUME', 'POINTCLOUD', 'CURVES'}) or
- (ob.instance_type == 'COLLECTION' and ob.instance_collection))
+ return ob and (
+ (ob.type in {
+ 'MESH',
+ 'CURVE',
+ 'SURFACE',
+ 'FONT',
+ 'META',
+ 'LIGHT',
+ 'VOLUME',
+ 'POINTCLOUD',
+ 'CURVES',
+ }) or (ob.instance_type == 'COLLECTION' and ob.instance_collection))
class CYCLES_OBJECT_PT_shading(CyclesButtonsPanel, Panel):
diff --git a/intern/cycles/blender/image.cpp b/intern/cycles/blender/image.cpp
index ca4c8f5904a..e01b72c1653 100644
--- a/intern/cycles/blender/image.cpp
+++ b/intern/cycles/blender/image.cpp
@@ -13,9 +13,11 @@ CCL_NAMESPACE_BEGIN
BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
const int frame,
+ const int tile_number,
const bool is_preview_render)
: b_image(b_image),
frame(frame),
+ tile_number(tile_number),
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
properly later as we are close to release. */
free_cache(!is_preview_render && !b_image.has_data())
@@ -66,12 +68,11 @@ bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata,
{
const size_t num_pixels = ((size_t)metadata.width) * metadata.height;
const int channels = metadata.channels;
- const int tile = 0; /* TODO(lukas): Support tiles here? */
if (b_image.is_float()) {
/* image data */
float *image_pixels;
- image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile);
+ image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile_number);
if (image_pixels && num_pixels * channels == pixels_size) {
memcpy(pixels, image_pixels, pixels_size * sizeof(float));
@@ -99,7 +100,7 @@ bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata,
}
}
else {
- unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile);
+ unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile_number);
if (image_pixels && num_pixels * channels == pixels_size) {
memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
@@ -153,7 +154,13 @@ string BlenderImageLoader::name() const
bool BlenderImageLoader::equals(const ImageLoader &other) const
{
const BlenderImageLoader &other_loader = (const BlenderImageLoader &)other;
- return b_image == other_loader.b_image && frame == other_loader.frame;
+ return b_image == other_loader.b_image && frame == other_loader.frame &&
+ tile_number == other_loader.tile_number;
+}
+
+int BlenderImageLoader::get_tile_number() const
+{
+ return tile_number;
}
/* Point Density */
diff --git a/intern/cycles/blender/image.h b/intern/cycles/blender/image.h
index ee576b31f7e..c2cc0f51b9b 100644
--- a/intern/cycles/blender/image.h
+++ b/intern/cycles/blender/image.h
@@ -12,7 +12,10 @@ CCL_NAMESPACE_BEGIN
class BlenderImageLoader : public ImageLoader {
public:
- BlenderImageLoader(BL::Image b_image, const int frame, const bool is_preview_render);
+ BlenderImageLoader(BL::Image b_image,
+ const int frame,
+ const int tile_number,
+ const bool is_preview_render);
bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
bool load_pixels(const ImageMetaData &metadata,
@@ -22,8 +25,11 @@ class BlenderImageLoader : public ImageLoader {
string name() const override;
bool equals(const ImageLoader &other) const override;
+ int get_tile_number() const override;
+
BL::Image b_image;
int frame;
+ int tile_number;
bool free_cache;
};
diff --git a/intern/cycles/blender/mesh.cpp b/intern/cycles/blender/mesh.cpp
index de67e27923d..c76ce3801d4 100644
--- a/intern/cycles/blender/mesh.cpp
+++ b/intern/cycles/blender/mesh.cpp
@@ -316,7 +316,7 @@ static void fill_generic_attribute(BL::Mesh &b_mesh,
break;
}
case BL::Attribute::domain_EDGE: {
- /* Averge edge attributes at vertices. */
+ /* Average edge attributes at vertices. */
const size_t num_verts = b_mesh.vertices.length();
vector<int> count(num_verts, 0);
diff --git a/intern/cycles/blender/shader.cpp b/intern/cycles/blender/shader.cpp
index d3527567b96..81a64457c88 100644
--- a/intern/cycles/blender/shader.cpp
+++ b/intern/cycles/blender/shader.cpp
@@ -355,6 +355,18 @@ static ShaderNode *add_node(Scene *scene,
else if (b_node.is_a(&RNA_ShaderNodeCombineHSV)) {
node = graph->create_node<CombineHSVNode>();
}
+ else if (b_node.is_a(&RNA_ShaderNodeSeparateColor)) {
+ BL::ShaderNodeSeparateColor b_separate_node(b_node);
+ SeparateColorNode *separate_node = graph->create_node<SeparateColorNode>();
+ separate_node->set_color_type((NodeCombSepColorType)b_separate_node.mode());
+ node = separate_node;
+ }
+ else if (b_node.is_a(&RNA_ShaderNodeCombineColor)) {
+ BL::ShaderNodeCombineColor b_combine_node(b_node);
+ CombineColorNode *combine_node = graph->create_node<CombineColorNode>();
+ combine_node->set_color_type((NodeCombSepColorType)b_combine_node.mode());
+ node = combine_node;
+ }
else if (b_node.is_a(&RNA_ShaderNodeSeparateXYZ)) {
node = graph->create_node<SeparateXYZNode>();
}
@@ -764,9 +776,21 @@ static ShaderNode *add_node(Scene *scene,
*/
int scene_frame = b_scene.frame_current();
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
- image->handle = scene->image_manager->add_image(
- new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
- image->image_params());
+ if (b_image.source() != BL::Image::source_TILED) {
+ image->handle = scene->image_manager->add_image(
+ new BlenderImageLoader(b_image, image_frame, 0, b_engine.is_preview()),
+ image->image_params());
+ }
+ else {
+ vector<ImageLoader *> loaders;
+ loaders.reserve(image->get_tiles().size());
+ for (int tile_number : image->get_tiles()) {
+ loaders.push_back(
+ new BlenderImageLoader(b_image, image_frame, tile_number, b_engine.is_preview()));
+ }
+
+ image->handle = scene->image_manager->add_image(loaders, image->image_params());
+ }
}
else {
ustring filename = ustring(
@@ -802,7 +826,7 @@ static ShaderNode *add_node(Scene *scene,
int scene_frame = b_scene.frame_current();
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
env->handle = scene->image_manager->add_image(
- new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
+ new BlenderImageLoader(b_image, image_frame, 0, b_engine.is_preview()),
env->image_params());
}
else {
diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index 27c58ce6d2f..7506b9b069f 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -28,7 +28,8 @@ class MetalDevice : public Device {
id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
nil; /* encoder used for fetching device pointers from MTLBuffers */
- string source_used_for_compile[PSO_NUM];
+ string source[PSO_NUM];
+ string source_md5[PSO_NUM];
KernelParamsMetal launch_params = {0};
@@ -72,7 +73,6 @@ class MetalDevice : public Device {
id<MTLBuffer> texture_bindings_3d = nil;
std::vector<id<MTLTexture>> texture_slot_map;
- MetalDeviceKernels kernels;
bool use_metalrt = false;
bool use_function_specialisation = false;
@@ -110,6 +110,8 @@ class MetalDevice : public Device {
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
+ id<MTLLibrary> compile(string const &source);
+
/* ------------------------------------------------------------------ */
/* low-level memory management */
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index c01f51fb506..e1438a9d6e2 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -275,96 +275,44 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
* active, but may still need to be rendered without motion blur if that isn't active as well. */
motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
- NSError *error = NULL;
+ source[PSO_GENERIC] = get_source(kernel_features);
+ mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
- for (int i = 0; i < PSO_NUM; i++) {
- if (mtlLibrary[i]) {
- [mtlLibrary[i] release];
- mtlLibrary[i] = nil;
- }
- }
+ MD5Hash md5;
+ md5.append(source[PSO_GENERIC]);
+ source_md5[PSO_GENERIC] = md5.get_hex();
+
+ metal_printf("Front-end compilation finished (generic)\n");
+
+ bool result = MetalDeviceKernels::load(this, false);
+
+ reserve_local_memory(kernel_features);
+
+ return result;
+}
+id<MTLLibrary> MetalDevice::compile(string const &source)
+{
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
options.fastMathEnabled = YES;
if (@available(macOS 12.0, *)) {
options.languageVersion = MTLLanguageVersion2_4;
}
- else {
- return false;
- }
- string metalsrc;
-
- /* local helper: dump source to disk and return filepath */
- auto dump_source = [&](int kernel_type) -> string {
- string &source = source_used_for_compile[kernel_type];
- string metalsrc = path_cache_get(path_join("kernels",
- string_printf("%s.%s.metal",
- kernel_type_as_string(kernel_type),
- util_md5_string(source).c_str())));
- path_write_text(metalsrc, source);
- return metalsrc;
- };
-
- /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor,
- * then compile it into a MTLLibrary */
- auto fetch_and_compile_source = [&](int kernel_type) {
- /* Record the source used to compile this library, for hash building later. */
- string &source = source_used_for_compile[kernel_type];
-
- switch (kernel_type) {
- case PSO_GENERIC: {
- source = get_source(kernel_features);
- break;
- }
- case PSO_SPECIALISED: {
- /* PSO_SPECIALISED derives from PSO_GENERIC */
- string &generic_source = source_used_for_compile[PSO_GENERIC];
- if (generic_source.empty()) {
- generic_source = get_source(kernel_features);
- }
- source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source;
- break;
- }
- default:
- assert(0);
- }
-
- /* create MTLLibrary (front-end compilation) */
- mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str())
+ NSError *error = NULL;
+ id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
options:options
error:&error];
- bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr);
-
- if (!mtlLibrary[kernel_type] || do_source_dump) {
- string metalsrc = dump_source(kernel_type);
-
- if (!mtlLibrary[kernel_type]) {
- NSString *err = [error localizedDescription];
- set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
-
- return false;
- }
- }
- return true;
- };
-
- fetch_and_compile_source(PSO_GENERIC);
-
- if (use_function_specialisation) {
- fetch_and_compile_source(PSO_SPECIALISED);
+ if (!mtlLibrary) {
+ NSString *err = [error localizedDescription];
+ set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
}
- metal_printf("Front-end compilation finished\n");
-
- bool result = kernels.load(this, PSO_GENERIC);
-
[options release];
- reserve_local_memory(kernel_features);
- return result;
+ return mtlLibrary;
}
void MetalDevice::reserve_local_memory(const uint kernel_features)
diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h
index b12491d820d..69b2a686ecc 100644
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -54,103 +54,41 @@ enum {
const char *kernel_type_as_string(int kernel_type);
struct MetalKernelPipeline {
- void release()
- {
- if (pipeline) {
- [pipeline release];
- pipeline = nil;
- if (@available(macOS 11.0, *)) {
- for (int i = 0; i < METALRT_TABLE_NUM; i++) {
- if (intersection_func_table[i]) {
- [intersection_func_table[i] release];
- intersection_func_table[i] = nil;
- }
- }
- }
- }
- if (function) {
- [function release];
- function = nil;
- }
- if (@available(macOS 11.0, *)) {
- for (int i = 0; i < METALRT_TABLE_NUM; i++) {
- if (intersection_func_table[i]) {
- [intersection_func_table[i] release];
- }
- }
- }
- }
- bool loaded = false;
- id<MTLFunction> function = nil;
- id<MTLComputePipelineState> pipeline = nil;
-
- API_AVAILABLE(macos(11.0))
- id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
-};
-
-struct MetalKernelLoadDesc {
- int pso_index = 0;
- const char *function_name = nullptr;
- int kernel_index = 0;
- int threads_per_threadgroup = 0;
- MTLFunctionConstantValues *constant_values = nullptr;
- NSArray *linked_functions = nullptr;
-
- struct IntersectorFunctions {
- NSArray *defaults;
- NSArray *shadow;
- NSArray *local;
- NSArray *operator[](int index) const
- {
- if (index == METALRT_TABLE_DEFAULT)
- return defaults;
- if (index == METALRT_TABLE_SHADOW)
- return shadow;
- return local;
- }
- } intersector_functions = {nullptr};
-};
-
-/* Metal kernel and associate occupancy information. */
-class MetalDeviceKernel {
- public:
- ~MetalDeviceKernel();
+ void compile();
- bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5);
+ id<MTLLibrary> mtlLibrary = nil;
+ bool scene_specialized;
+ string source_md5;
- void mark_loaded(int pso_index)
- {
- pso[pso_index].loaded = true;
- }
+ bool use_metalrt;
+ bool metalrt_hair;
+ bool metalrt_hair_thick;
+ bool metalrt_pointcloud;
- int get_num_threads_per_block() const
- {
- return num_threads_per_block;
- }
- const MetalKernelPipeline &get_pso() const;
+ int threads_per_threadgroup;
- double load_duration = 0.0;
+ DeviceKernel device_kernel;
+ bool loaded = false;
+ id<MTLDevice> mtlDevice = nil;
+ id<MTLFunction> function = nil;
+ id<MTLComputePipelineState> pipeline = nil;
+ int num_threads_per_block = 0;
- private:
- MetalKernelPipeline pso[PSO_NUM];
+ string error_str;
- int num_threads_per_block = 0;
+ API_AVAILABLE(macos(11.0))
+ id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
+ id<MTLFunction> rt_intersection_function[METALRT_FUNC_NUM] = {nil};
};
/* Cache of Metal kernels for each DeviceKernel. */
-class MetalDeviceKernels {
- public:
- bool load(MetalDevice *device, int kernel_type);
- bool available(DeviceKernel kernel) const;
- const MetalDeviceKernel &get(DeviceKernel kernel) const;
+namespace MetalDeviceKernels {
- MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM];
+bool load(MetalDevice *device, bool scene_specialized);
+const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);
- id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}};
-
- string loaded_md5[PSO_NUM];
-};
+} /* namespace MetalDeviceKernels */
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 9555ca03c8e..304efc813ec 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -9,6 +9,7 @@
# include "util/path.h"
# include "util/tbb.h"
# include "util/time.h"
+# include "util/unique_ptr.h"
CCL_NAMESPACE_BEGIN
@@ -28,82 +29,376 @@ const char *kernel_type_as_string(int kernel_type)
return "";
}
-MetalDeviceKernel::~MetalDeviceKernel()
+bool kernel_has_intersection(DeviceKernel device_kernel)
{
- for (int i = 0; i < PSO_NUM; i++) {
- pso[i].release();
+ return (device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
+ device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
+}
+
+struct ShaderCache {
+ ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice)
+ {
+ }
+ ~ShaderCache();
+
+ /* Get the fastest available pipeline for the specified kernel. */
+ MetalKernelPipeline *get_best_pipeline(DeviceKernel kernel, const MetalDevice *device);
+
+ /* Non-blocking request for a kernel, optionally specialized to the scene being rendered by
+ * device. */
+ void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized);
+
+ void wait_for_all();
+
+ private:
+ friend ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice);
+
+ void compile_thread_func(int thread_index);
+
+ using PipelineCollection = std::vector<unique_ptr<MetalKernelPipeline>>;
+
+ struct PipelineRequest {
+ MetalKernelPipeline *pipeline = nullptr;
+ std::function<void(MetalKernelPipeline *)> completionHandler;
+ };
+
+ std::mutex cache_mutex;
+
+ PipelineCollection pipelines[DEVICE_KERNEL_NUM];
+ id<MTLDevice> mtlDevice;
+
+ bool running = false;
+ std::condition_variable cond_var;
+ std::deque<PipelineRequest> request_queue;
+ std::vector<std::thread> compile_threads;
+ std::atomic_int incomplete_requests = 0;
+};
+
+std::mutex g_shaderCacheMutex;
+std::map<id<MTLDevice>, unique_ptr<ShaderCache>> g_shaderCache;
+
+ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice)
+{
+ thread_scoped_lock lock(g_shaderCacheMutex);
+ auto it = g_shaderCache.find(mtlDevice);
+ if (it != g_shaderCache.end()) {
+ return it->second.get();
+ }
+
+ g_shaderCache[mtlDevice] = make_unique<ShaderCache>(mtlDevice);
+ return g_shaderCache[mtlDevice].get();
+}
+
+ShaderCache::~ShaderCache()
+{
+ metal_printf("ShaderCache shutting down with incomplete_requests = %d\n",
+ int(incomplete_requests));
+
+ running = false;
+ cond_var.notify_all();
+ for (auto &thread : compile_threads) {
+ thread.join();
+ }
+}
+
+void ShaderCache::wait_for_all()
+{
+ while (incomplete_requests > 0) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
+ }
+}
+
+void ShaderCache::compile_thread_func(int thread_index)
+{
+ while (1) {
+
+ /* wait for / acquire next request */
+ PipelineRequest request;
+ {
+ thread_scoped_lock lock(cache_mutex);
+ cond_var.wait(lock, [&] { return !running || !request_queue.empty(); });
+ if (!running) {
+ break;
+ }
+
+ if (!request_queue.empty()) {
+ request = request_queue.front();
+ request_queue.pop_front();
+ }
+ }
+
+ /* service request */
+ if (request.pipeline) {
+ request.pipeline->compile();
+ incomplete_requests--;
+ }
}
}
-bool MetalDeviceKernel::load(MetalDevice *device,
- MetalKernelLoadDesc const &desc_in,
- MD5Hash const &md5)
+void ShaderCache::load_kernel(DeviceKernel device_kernel,
+ MetalDevice *device,
+ bool scene_specialized)
{
- __block MetalKernelLoadDesc const desc(desc_in);
- if (desc.kernel_index == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+ {
+ /* create compiler threads on first run */
+ thread_scoped_lock lock(cache_mutex);
+ if (compile_threads.empty()) {
+ running = true;
+ for (int i = 0; i < max_mtlcompiler_threads; i++) {
+ compile_threads.push_back(std::thread([&] { compile_thread_func(i); }));
+ }
+ }
+ }
+
+ if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
/* skip megakernel */
- return true;
+ return;
}
- bool use_binary_archive = true;
- if (device->device_vendor == METAL_GPU_APPLE) {
- /* Workaround for T94142: Cycles Metal crash with simultaneous viewport and final render */
- use_binary_archive = false;
+ if (scene_specialized) {
+ /* Only specialize kernels where it can make an impact. */
+ if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+ device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+ return;
+ }
}
- if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
- use_binary_archive = (atoi(str) == 0);
+ {
+ /* check whether the kernel has already been requested / cached */
+ thread_scoped_lock lock(cache_mutex);
+ for (auto &pipeline : pipelines[device_kernel]) {
+ if (scene_specialized) {
+ if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
+ /* we already requested a pipeline that is specialized for this kernel data */
+ metal_printf("Specialized kernel already requested (%s)\n",
+ device_kernel_as_string(device_kernel));
+ return;
+ }
+ }
+ else {
+ if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) {
+ /* we already requested a generic pipeline for this kernel */
+ metal_printf("Generic kernel already requested (%s)\n",
+ device_kernel_as_string(device_kernel));
+ return;
+ }
+ }
+ }
}
- id<MTLBinaryArchive> archive = nil;
- string metalbin_path;
- if (use_binary_archive) {
- NSProcessInfo *processInfo = [NSProcessInfo processInfo];
- string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
- MD5Hash local_md5(md5);
- local_md5.append(osVersion);
- string metalbin_name = string(desc.function_name) + "." + local_md5.get_hex() +
- to_string(desc.pso_index) + ".bin";
- metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
- path_create_directories(metalbin_path);
+ incomplete_requests++;
+
+ PipelineRequest request;
+ request.pipeline = new MetalKernelPipeline;
+ request.pipeline->scene_specialized = scene_specialized;
+ request.pipeline->mtlDevice = mtlDevice;
+ request.pipeline->source_md5 =
+ device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
+ request.pipeline->mtlLibrary =
+ device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
+ request.pipeline->device_kernel = device_kernel;
+ request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
+
+ /* metalrt options */
+ request.pipeline->use_metalrt = device->use_metalrt;
+ request.pipeline->metalrt_hair = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_HAIR);
+ request.pipeline->metalrt_hair_thick = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+ request.pipeline->metalrt_pointcloud = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
+
+ {
+ thread_scoped_lock lock(cache_mutex);
+ pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
+ request_queue.push_back(request);
+ }
+ cond_var.notify_one();
+}
- if (path_exists(metalbin_path) && use_binary_archive) {
- if (@available(macOS 11.0, *)) {
- MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
- archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
- archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
- [archiveDesc release];
+MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device)
+{
+ thread_scoped_lock lock(cache_mutex);
+ auto &collection = pipelines[kernel];
+ if (collection.empty()) {
+ return nullptr;
+ }
+
+ /* metalrt options */
+ bool use_metalrt = device->use_metalrt;
+ bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR);
+ bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+ bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
+
+ MetalKernelPipeline *best_pipeline = nullptr;
+ for (auto &pipeline : collection) {
+ if (!pipeline->loaded) {
+ /* still loading - ignore */
+ continue;
+ }
+
+ if (pipeline->use_metalrt != use_metalrt || pipeline->metalrt_hair != metalrt_hair ||
+ pipeline->metalrt_hair_thick != metalrt_hair_thick ||
+ pipeline->metalrt_pointcloud != metalrt_pointcloud) {
+ /* wrong combination of metalrt options */
+ continue;
+ }
+
+ if (pipeline->scene_specialized) {
+ if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
+ best_pipeline = pipeline.get();
}
}
+ else if (!best_pipeline) {
+ best_pipeline = pipeline.get();
+ }
+ }
+
+ return best_pipeline;
+}
+
+void MetalKernelPipeline::compile()
+{
+ int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC;
+
+ const std::string function_name = std::string("cycles_metal_") +
+ device_kernel_as_string(device_kernel);
+
+ int threads_per_threadgroup = this->threads_per_threadgroup;
+ if (device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL &&
+ device_kernel < DEVICE_KERNEL_INTEGRATOR_RESET) {
+ /* Always use 512 for the sorting kernels */
+ threads_per_threadgroup = 512;
}
- NSString *entryPoint = [@(desc.function_name) copy];
+ NSString *entryPoint = [@(function_name.c_str()) copy];
NSError *error = NULL;
if (@available(macOS 11.0, *)) {
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
func_desc.name = entryPoint;
- if (desc.constant_values) {
- func_desc.constantValues = desc.constant_values;
- }
- pso[desc.pso_index].function = [device->mtlLibrary[desc.pso_index]
- newFunctionWithDescriptor:func_desc
- error:&error];
+ function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
}
+
[entryPoint release];
- if (pso[desc.pso_index].function == nil) {
+ if (function == nil) {
NSString *err = [error localizedDescription];
string errors = [err UTF8String];
+ metal_printf("Error getting function \"%s\": %s", function_name.c_str(), errors.c_str());
+ return;
+ }
+
+ function.label = [entryPoint copy];
+
+ if (use_metalrt) {
+ if (@available(macOS 11.0, *)) {
+ /* create the id<MTLFunction> for each intersection function */
+ const char *function_names[] = {
+ "__anyhit__cycles_metalrt_visibility_test_tri",
+ "__anyhit__cycles_metalrt_visibility_test_box",
+ "__anyhit__cycles_metalrt_shadow_all_hit_tri",
+ "__anyhit__cycles_metalrt_shadow_all_hit_box",
+ "__anyhit__cycles_metalrt_local_hit_tri",
+ "__anyhit__cycles_metalrt_local_hit_box",
+ "__intersection__curve_ribbon",
+ "__intersection__curve_ribbon_shadow",
+ "__intersection__curve_all",
+ "__intersection__curve_all_shadow",
+ "__intersection__point",
+ "__intersection__point_shadow",
+ };
+ assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
+
+ MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
+ for (int i = 0; i < METALRT_FUNC_NUM; i++) {
+ const char *function_name = function_names[i];
+ desc.name = [@(function_name) copy];
+
+ NSError *error = NULL;
+ rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
- device->set_error(
- string_printf("Error getting function \"%s\": %s", desc.function_name, errors.c_str()));
- return false;
+ if (rt_intersection_function[i] == nil) {
+ NSString *err = [error localizedDescription];
+ string errors = [err UTF8String];
+
+ error_str = string_printf(
+ "Error getting intersection function \"%s\": %s", function_name, errors.c_str());
+ break;
+ }
+
+ rt_intersection_function[i].label = [@(function_name) copy];
+ }
+ }
}
- pso[desc.pso_index].function.label = [@(desc.function_name) copy];
+ NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
+ NSArray *linked_functions = nil;
+
+ if (use_metalrt) {
+ id<MTLFunction> curve_intersect_default = nil;
+ id<MTLFunction> curve_intersect_shadow = nil;
+ id<MTLFunction> point_intersect_default = nil;
+ id<MTLFunction> point_intersect_shadow = nil;
+ if (metalrt_hair) {
+ /* Add curve intersection programs. */
+ if (metalrt_hair_thick) {
+ /* Slower programs for thick hair since that also slows down ribbons.
+ * Ideally this should not be needed. */
+ curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL];
+ curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_ALL_SHADOW];
+ }
+ else {
+ curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON];
+ curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW];
+ }
+ }
+ if (metalrt_pointcloud) {
+ point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT];
+ point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW];
+ }
+ table_functions[METALRT_TABLE_DEFAULT] = [NSArray
+ arrayWithObjects:rt_intersection_function[METALRT_FUNC_DEFAULT_TRI],
+ curve_intersect_default ?
+ curve_intersect_default :
+ rt_intersection_function[METALRT_FUNC_DEFAULT_BOX],
+ point_intersect_default ?
+ point_intersect_default :
+ rt_intersection_function[METALRT_FUNC_DEFAULT_BOX],
+ nil];
+ table_functions[METALRT_TABLE_SHADOW] = [NSArray
+ arrayWithObjects:rt_intersection_function[METALRT_FUNC_SHADOW_TRI],
+ curve_intersect_shadow ?
+ curve_intersect_shadow :
+ rt_intersection_function[METALRT_FUNC_SHADOW_BOX],
+ point_intersect_shadow ?
+ point_intersect_shadow :
+ rt_intersection_function[METALRT_FUNC_SHADOW_BOX],
+ nil];
+ table_functions[METALRT_TABLE_LOCAL] = [NSArray
+ arrayWithObjects:rt_intersection_function[METALRT_FUNC_LOCAL_TRI],
+ rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
+ rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
+ nil];
+
+ NSMutableSet *unique_functions = [NSMutableSet
+ setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
+ [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
+ [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
+
+ if (kernel_has_intersection(device_kernel)) {
+ linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]]
+ sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
+ return [f1.label compare:f2.label];
+ }];
+ }
+ unique_functions = nil;
+ }
- __block MTLComputePipelineDescriptor *computePipelineStateDescriptor =
+ MTLComputePipelineDescriptor *computePipelineStateDescriptor =
[[MTLComputePipelineDescriptor alloc] init];
computePipelineStateDescriptor.buffers[0].mutability = MTLMutabilityImmutable;
@@ -111,52 +406,86 @@ bool MetalDeviceKernel::load(MetalDevice *device,
computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable;
if (@available(macos 10.14, *)) {
- computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = desc.threads_per_threadgroup;
+ computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = threads_per_threadgroup;
}
computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
- computePipelineStateDescriptor.computeFunction = pso[desc.pso_index].function;
+ computePipelineStateDescriptor.computeFunction = function;
+
if (@available(macOS 11.0, *)) {
/* Attach the additional functions to an MTLLinkedFunctions object */
- if (desc.linked_functions) {
+ if (linked_functions) {
computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
- computePipelineStateDescriptor.linkedFunctions.functions = desc.linked_functions;
+ computePipelineStateDescriptor.linkedFunctions.functions = linked_functions;
}
-
computePipelineStateDescriptor.maxCallStackDepth = 1;
+ if (use_metalrt) {
+ computePipelineStateDescriptor.maxCallStackDepth = 8;
+ }
}
- /* Create a new Compute pipeline state object */
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
- bool creating_new_archive = false;
+ bool use_binary_archive = true;
+ if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
+ use_binary_archive = (atoi(str) == 0);
+ }
+
+ id<MTLBinaryArchive> archive = nil;
+ string metalbin_path;
+ string metalbin_name;
+ if (use_binary_archive) {
+ NSProcessInfo *processInfo = [NSProcessInfo processInfo];
+ string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
+ MD5Hash local_md5;
+ local_md5.append(source_md5);
+ local_md5.append(osVersion);
+ local_md5.append((uint8_t *)&this->threads_per_threadgroup,
+ sizeof(this->threads_per_threadgroup));
+
+ string options;
+ if (use_metalrt && kernel_has_intersection(device_kernel)) {
+ /* incorporate any MetalRT specializations into the archive name */
+ options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d",
+ metalrt_hair ? 1 : 0,
+ metalrt_hair_thick ? 1 : 0,
+ metalrt_pointcloud ? 1 : 0);
+ }
+
+ /* Replace non-alphanumerical characters with underscores. */
+ string device_name = [mtlDevice.name UTF8String];
+ for (char &c : device_name) {
+ if ((c < '0' || c > '9') && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) {
+ c = '_';
+ }
+ }
+
+ metalbin_name = device_name;
+ metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel));
+ metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type));
+ metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin");
+
+ metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
+ path_create_directories(metalbin_path);
+
+ if (path_exists(metalbin_path) && use_binary_archive) {
+ if (@available(macOS 11.0, *)) {
+ MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
+ archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
+ archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
+ [archiveDesc release];
+ }
+ }
+ }
+
+ __block bool creating_new_archive = false;
if (@available(macOS 11.0, *)) {
if (use_binary_archive) {
if (!archive) {
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
archiveDesc.url = nil;
- archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
+ archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
creating_new_archive = true;
-
- double starttime = time_dt();
-
- if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
- error:&error]) {
- NSString *errStr = [error localizedDescription];
- metal_printf("Failed to add PSO to archive:\n%s\n",
- errStr ? [errStr UTF8String] : "nil");
- }
- else {
- double duration = time_dt() - starttime;
- metal_printf("%2d | %-55s | %7.2fs\n",
- desc.kernel_index,
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
- duration);
-
- if (desc.pso_index == PSO_GENERIC) {
- this->load_duration = duration;
- }
- }
}
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
@@ -170,17 +499,14 @@ bool MetalDeviceKernel::load(MetalDevice *device,
MTLComputePipelineReflection *reflection,
NSError *error) {
bool recreate_archive = false;
- if (computePipelineState == nil && archive && !creating_new_archive) {
-
- assert(0);
-
+ if (computePipelineState == nil && archive) {
NSString *errStr = [error localizedDescription];
metal_printf(
"Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
"(error: %s)\n",
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
+ device_kernel_as_string((DeviceKernel)device_kernel),
errStr ? [errStr UTF8String] : "nil");
- computePipelineState = [device->mtlDevice
+ computePipelineState = [mtlDevice
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
options:MTLPipelineOptionNone
reflection:nullptr
@@ -192,32 +518,23 @@ bool MetalDeviceKernel::load(MetalDevice *device,
if (computePipelineState == nil) {
NSString *errStr = [error localizedDescription];
- device->set_error(string_printf("Failed to create compute pipeline state \"%s\", error: \n",
- device_kernel_as_string((DeviceKernel)desc.kernel_index)) +
- (errStr ? [errStr UTF8String] : "nil"));
- metal_printf("%2d | %-55s | %7.2fs | FAILED!\n",
- desc.kernel_index,
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
+ error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n",
+ device_kernel_as_string((DeviceKernel)device_kernel));
+ error_str += (errStr ? [errStr UTF8String] : "nil");
+ metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n",
+ kernel_type_as_string(pso_type),
+ device_kernel,
+ device_kernel_as_string((DeviceKernel)device_kernel),
duration);
return;
}
- pso[desc.pso_index].pipeline = computePipelineState;
- num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
- computePipelineState.threadExecutionWidth);
+ int num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
+ computePipelineState.threadExecutionWidth);
num_threads_per_block = std::max(num_threads_per_block,
(int)computePipelineState.threadExecutionWidth);
-
- if (!use_binary_archive) {
- metal_printf("%2d | %-55s | %7.2fs\n",
- desc.kernel_index,
- device_kernel_as_string((DeviceKernel)desc.kernel_index),
- duration);
-
- if (desc.pso_index == PSO_GENERIC) {
- this->load_duration = duration;
- }
- }
+ this->pipeline = computePipelineState;
+ this->num_threads_per_block = num_threads_per_block;
if (@available(macOS 11.0, *)) {
if (creating_new_archive || recreate_archive) {
@@ -228,304 +545,85 @@ bool MetalDeviceKernel::load(MetalDevice *device,
}
}
}
+ };
- [computePipelineStateDescriptor release];
- computePipelineStateDescriptor = nil;
-
- if (device->use_metalrt && desc.linked_functions) {
- for (int table = 0; table < METALRT_TABLE_NUM; table++) {
- if (@available(macOS 11.0, *)) {
- MTLIntersectionFunctionTableDescriptor *ift_desc =
- [[MTLIntersectionFunctionTableDescriptor alloc] init];
- ift_desc.functionCount = desc.intersector_functions[table].count;
-
- pso[desc.pso_index].intersection_func_table[table] = [pso[desc.pso_index].pipeline
- newIntersectionFunctionTableWithDescriptor:ift_desc];
-
- /* Finally write the function handles into this pipeline's table */
- for (int i = 0; i < 2; i++) {
- id<MTLFunctionHandle> handle = [pso[desc.pso_index].pipeline
- functionHandleWithFunction:desc.intersector_functions[table][i]];
- [pso[desc.pso_index].intersection_func_table[table] setFunction:handle atIndex:i];
- }
+ /* Block on load to ensure we continue with a valid kernel function */
+ if (creating_new_archive) {
+ starttime = time_dt();
+ NSError *error;
+ if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
+ error:&error]) {
+ NSString *errStr = [error localizedDescription];
+ metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil");
+ }
+ }
+ id<MTLComputePipelineState> pipeline = [mtlDevice
+ newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
+ options:pipelineOptions
+ reflection:nullptr
+ error:&error];
+ completionHandler(pipeline, nullptr, error);
+
+ this->loaded = true;
+ [computePipelineStateDescriptor release];
+ computePipelineStateDescriptor = nil;
+
+ if (use_metalrt && linked_functions) {
+ for (int table = 0; table < METALRT_TABLE_NUM; table++) {
+ if (@available(macOS 11.0, *)) {
+ MTLIntersectionFunctionTableDescriptor *ift_desc =
+ [[MTLIntersectionFunctionTableDescriptor alloc] init];
+ ift_desc.functionCount = table_functions[table].count;
+ intersection_func_table[table] = [this->pipeline
+ newIntersectionFunctionTableWithDescriptor:ift_desc];
+
+ /* Finally write the function handles into this pipeline's table */
+ for (int i = 0; i < 2; i++) {
+ id<MTLFunctionHandle> handle = [pipeline
+ functionHandleWithFunction:table_functions[table][i]];
+ [intersection_func_table[table] setFunction:handle atIndex:i];
}
}
}
+ }
- mark_loaded(desc.pso_index);
- };
+ double duration = time_dt() - starttime;
- if (desc.pso_index == PSO_SPECIALISED) {
- /* Asynchronous load */
- dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
- NSError *error;
- id<MTLComputePipelineState> pipeline = [device->mtlDevice
- newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
- options:pipelineOptions
- reflection:nullptr
- error:&error];
- completionHandler(pipeline, nullptr, error);
- });
+ if (!use_binary_archive) {
+ metal_printf("%16s | %2d | %-55s | %7.2fs\n",
+ kernel_type_as_string(pso_type),
+ int(device_kernel),
+ device_kernel_as_string(device_kernel),
+ duration);
}
else {
- /* Block on load to ensure we continue with a valid kernel function */
- id<MTLComputePipelineState> pipeline = [device->mtlDevice
- newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
- options:pipelineOptions
- reflection:nullptr
- error:&error];
- completionHandler(pipeline, nullptr, error);
- }
-
- return true;
-}
-
-const MetalKernelPipeline &MetalDeviceKernel::get_pso() const
-{
- if (pso[PSO_SPECIALISED].loaded) {
- return pso[PSO_SPECIALISED];
+ metal_printf("%16s | %2d | %-55s | %7.2fs | %s: %s\n",
+ kernel_type_as_string(pso_type),
+ device_kernel,
+ device_kernel_as_string((DeviceKernel)device_kernel),
+ duration,
+ creating_new_archive ? " new" : "load",
+ metalbin_name.c_str());
}
-
- assert(pso[PSO_GENERIC].loaded);
- return pso[PSO_GENERIC];
}
-bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
+bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized)
{
- bool any_error = false;
-
- MD5Hash md5;
-
- /* Build the function constant table */
- MTLFunctionConstantValues *constant_values = nullptr;
- if (kernel_type == PSO_SPECIALISED) {
- constant_values = [MTLFunctionConstantValues new];
-
-# define KERNEL_FILM(_type, name) \
- [constant_values setConstantValue:&data.film.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_film_##name]; \
- md5.append((uint8_t *)&data.film.name, sizeof(data.film.name));
-
-# define KERNEL_BACKGROUND(_type, name) \
- [constant_values setConstantValue:&data.background.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_background_##name]; \
- md5.append((uint8_t *)&data.background.name, sizeof(data.background.name));
-
-# define KERNEL_INTEGRATOR(_type, name) \
- [constant_values setConstantValue:&data.integrator.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_integrator_##name]; \
- md5.append((uint8_t *)&data.integrator.name, sizeof(data.integrator.name));
-
-# define KERNEL_BVH(_type, name) \
- [constant_values setConstantValue:&data.bvh.name \
- type:get_MTLDataType_##_type() \
- atIndex:KernelData_bvh_##name]; \
- md5.append((uint8_t *)&data.bvh.name, sizeof(data.bvh.name));
-
- /* METAL_WIP: populate constant_values based on KernelData */
- assert(0);
- /*
- const KernelData &data = device->launch_params.data;
- # include "kernel/types/background.h"
- # include "kernel/types/bvh.h"
- # include "kernel/types/film.h"
- # include "kernel/types/integrator.h"
- */
- }
-
- if (device->use_metalrt) {
- if (@available(macOS 11.0, *)) {
- /* create the id<MTLFunction> for each intersection function */
- const char *function_names[] = {
- "__anyhit__cycles_metalrt_visibility_test_tri",
- "__anyhit__cycles_metalrt_visibility_test_box",
- "__anyhit__cycles_metalrt_shadow_all_hit_tri",
- "__anyhit__cycles_metalrt_shadow_all_hit_box",
- "__anyhit__cycles_metalrt_local_hit_tri",
- "__anyhit__cycles_metalrt_local_hit_box",
- "__intersection__curve_ribbon",
- "__intersection__curve_ribbon_shadow",
- "__intersection__curve_all",
- "__intersection__curve_all_shadow",
- "__intersection__point",
- "__intersection__point_shadow",
- };
- assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
-
- MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
- if (kernel_type == PSO_SPECIALISED) {
- desc.constantValues = constant_values;
- }
- for (int i = 0; i < METALRT_FUNC_NUM; i++) {
- const char *function_name = function_names[i];
- desc.name = [@(function_name) copy];
-
- NSError *error = NULL;
- rt_intersection_funcs[kernel_type][i] = [device->mtlLibrary[kernel_type]
- newFunctionWithDescriptor:desc
- error:&error];
-
- if (rt_intersection_funcs[kernel_type][i] == nil) {
- NSString *err = [error localizedDescription];
- string errors = [err UTF8String];
-
- device->set_error(string_printf(
- "Error getting intersection function \"%s\": %s", function_name, errors.c_str()));
- any_error = true;
- break;
- }
-
- rt_intersection_funcs[kernel_type][i].label = [@(function_name) copy];
- }
- }
- }
- md5.append(device->source_used_for_compile[kernel_type]);
-
- string hash = md5.get_hex();
- if (loaded_md5[kernel_type] == hash) {
- return true;
- }
-
- if (!any_error) {
- NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
- NSArray *function_list = nil;
-
- if (device->use_metalrt) {
- id<MTLFunction> curve_intersect_default = nil;
- id<MTLFunction> curve_intersect_shadow = nil;
- id<MTLFunction> point_intersect_default = nil;
- id<MTLFunction> point_intersect_shadow = nil;
- if (device->kernel_features & KERNEL_FEATURE_HAIR) {
- /* Add curve intersection programs. */
- if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) {
- /* Slower programs for thick hair since that also slows down ribbons.
- * Ideally this should not be needed. */
- curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL];
- curve_intersect_shadow =
- rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW];
- }
- else {
- curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON];
- curve_intersect_shadow =
- rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW];
- }
- }
- if (device->kernel_features & KERNEL_FEATURE_POINTCLOUD) {
- point_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT];
- point_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT_SHADOW];
- }
- table_functions[METALRT_TABLE_DEFAULT] = [NSArray
- arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI],
- curve_intersect_default ?
- curve_intersect_default :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX],
- point_intersect_default ?
- point_intersect_default :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX],
- nil];
- table_functions[METALRT_TABLE_SHADOW] = [NSArray
- arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI],
- curve_intersect_shadow ?
- curve_intersect_shadow :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX],
- point_intersect_shadow ?
- point_intersect_shadow :
- rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX],
- nil];
- table_functions[METALRT_TABLE_LOCAL] = [NSArray
- arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI],
- rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX],
- rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX],
- nil];
-
- NSMutableSet *unique_functions = [NSMutableSet
- setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
- [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
- [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
-
- function_list = [[NSArray arrayWithArray:[unique_functions allObjects]]
- sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
- return [f1.label compare:f2.label];
- }];
-
- unique_functions = nil;
- }
-
- metal_printf("Starting %s \"cycles_metal_...\" pipeline builds\n",
- kernel_type_as_string(kernel_type));
-
- tbb::task_arena local_arena(max_mtlcompiler_threads);
- local_arena.execute([&]() {
- parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) {
- /* skip megakernel */
- if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
- return;
- }
-
- /* Only specialize kernels where it can make an impact. */
- if (kernel_type == PSO_SPECIALISED) {
- if (i < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
- i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
- return;
- }
- }
-
- MetalDeviceKernel &kernel = kernels_[i];
-
- const std::string function_name = std::string("cycles_metal_") +
- device_kernel_as_string((DeviceKernel)i);
- int threads_per_threadgroup = device->max_threads_per_threadgroup;
- if (i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && i < DEVICE_KERNEL_INTEGRATOR_RESET) {
- /* Always use 512 for the sorting kernels */
- threads_per_threadgroup = 512;
- }
-
- NSArray *kernel_function_list = nil;
-
- if (i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
- i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
- i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
- i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
- i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
- kernel_function_list = function_list;
- }
-
- MetalKernelLoadDesc desc;
- desc.pso_index = kernel_type;
- desc.kernel_index = i;
- desc.linked_functions = kernel_function_list;
- desc.intersector_functions.defaults = table_functions[METALRT_TABLE_DEFAULT];
- desc.intersector_functions.shadow = table_functions[METALRT_TABLE_SHADOW];
- desc.intersector_functions.local = table_functions[METALRT_TABLE_LOCAL];
- desc.constant_values = constant_values;
- desc.threads_per_threadgroup = threads_per_threadgroup;
- desc.function_name = function_name.c_str();
-
- bool success = kernel.load(device, desc, md5);
-
- any_error |= !success;
- });
- });
+ auto shader_cache = get_shader_cache(device->mtlDevice);
+ for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
+ shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized);
}
- bool loaded = !any_error;
- if (loaded) {
- loaded_md5[kernel_type] = hash;
+ if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) {
+ shader_cache->wait_for_all();
}
- return loaded;
-}
-
-const MetalDeviceKernel &MetalDeviceKernels::get(DeviceKernel kernel) const
-{
- return kernels_[(int)kernel];
+ return true;
}
-bool MetalDeviceKernels::available(DeviceKernel kernel) const
+const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
+ DeviceKernel kernel)
{
- return kernels_[(int)kernel].get_pso().function != nil;
+ return get_shader_cache(device->mtlDevice)->get_best_pipeline(kernel, device);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 1686ab95ffa..ec10e091b25 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -108,9 +108,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
VLOG(3) << "Metal queue launch " << device_kernel_as_string(kernel) << ", work_size "
<< work_size;
- const MetalDeviceKernel &metal_kernel = metal_device->kernels.get(kernel);
- const MetalKernelPipeline &metal_kernel_pso = metal_kernel.get_pso();
-
id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
/* Determine size requirement for argument buffer. */
@@ -212,6 +209,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
bytes_written = globals_offsets + sizeof(KernelParamsMetal);
+ const MetalKernelPipeline *metal_kernel_pso = MetalDeviceKernels::get_best_pipeline(metal_device,
+ kernel);
+ if (!metal_kernel_pso) {
+ metal_device->set_error(
+ string_printf("No MetalKernelPipeline for %s\n", device_kernel_as_string(kernel)));
+ return false;
+ }
+
/* Encode ancillaries */
[metal_device->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets];
[metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_2d
@@ -228,14 +233,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
- if (metal_kernel_pso.intersection_func_table[table]) {
- [metal_kernel_pso.intersection_func_table[table] setBuffer:arg_buffer
- offset:globals_offsets
- atIndex:1];
+ if (metal_kernel_pso->intersection_func_table[table]) {
+ [metal_kernel_pso->intersection_func_table[table] setBuffer:arg_buffer
+ offset:globals_offsets
+ atIndex:1];
[metal_device->mtlAncillaryArgEncoder
- setIntersectionFunctionTable:metal_kernel_pso.intersection_func_table[table]
+ setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
atIndex:3 + table];
- [mtlComputeCommandEncoder useResource:metal_kernel_pso.intersection_func_table[table]
+ [mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
usage:MTLResourceUsageRead];
}
else {
@@ -281,10 +286,10 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
}
- [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso.pipeline];
+ [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso->pipeline];
/* Compute kernel launch parameters. */
- const int num_threads_per_block = metal_kernel.get_num_threads_per_block();
+ const int num_threads_per_block = metal_kernel_pso->num_threads_per_block;
int shared_mem_bytes = 0;
@@ -314,7 +319,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
threadsPerThreadgroup:size_threads_per_threadgroup];
[mtlCommandBuffer addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
- NSString *kernel_name = metal_kernel_pso.function.label;
+ NSString *kernel_name = metal_kernel_pso->function.label;
/* Enhanced command buffer errors are only available in 11.0+ */
if (@available(macos 11.0, *)) {
@@ -547,6 +552,8 @@ id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel
computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
MTLDispatchTypeSerial];
+ [mtlComputeEncoder setLabel:@(device_kernel_as_string(kernel))];
+
/* declare usage of MTLBuffers etc */
prepare_resources(kernel);
}
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 8830d8c44ac..9fc265bc327 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -23,6 +23,7 @@
# include "util/md5.h"
# include "util/path.h"
# include "util/progress.h"
+# include "util/task.h"
# include "util/time.h"
# undef __KERNEL_CPU__
@@ -216,6 +217,25 @@ static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
return OPTIX_SUCCESS;
}
+# if OPTIX_ABI_VERSION >= 55
+static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &failure_reason)
+{
+ OptixTask additional_tasks[16];
+ unsigned int num_additional_tasks = 0;
+
+ const OptixResult result = optixTaskExecute(task, additional_tasks, 16, &num_additional_tasks);
+ if (result == OPTIX_SUCCESS) {
+ for (unsigned int i = 0; i < num_additional_tasks; ++i) {
+ pool.push(function_bind(
+ &execute_optix_task, std::ref(pool), additional_tasks[i], std::ref(failure_reason)));
+ }
+ }
+ else {
+ failure_reason = result;
+ }
+}
+# endif
+
} // namespace
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
@@ -453,6 +473,23 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
return false;
}
+# if OPTIX_ABI_VERSION >= 55
+ OptixTask task = nullptr;
+ OptixResult result = optixModuleCreateFromPTXWithTasks(context,
+ &module_options,
+ &pipeline_options,
+ ptx_data.data(),
+ ptx_data.size(),
+ nullptr,
+ nullptr,
+ &optix_module,
+ &task);
+ if (result == OPTIX_SUCCESS) {
+ TaskPool pool;
+ execute_optix_task(pool, task, result);
+ pool.wait_work();
+ }
+# else
const OptixResult result = optixModuleCreateFromPTX(context,
&module_options,
&pipeline_options,
@@ -461,6 +498,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
nullptr,
0,
&optix_module);
+# endif
if (result != OPTIX_SUCCESS) {
set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)",
ptx_filename.c_str(),
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index d97854a52d0..473bdb67920 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -145,6 +145,7 @@ set(SRC_KERNEL_SVM_HEADERS
svm/normal.h
svm/ramp.h
svm/ramp_util.h
+ svm/sepcomb_color.h
svm/sepcomb_hsv.h
svm/sepcomb_vector.h
svm/sky.h
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 4e309f16c08..0ed52074a90 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -29,10 +29,26 @@ using namespace metal::raytracing;
/* Qualifiers */
-#define ccl_device
-#define ccl_device_inline ccl_device
-#define ccl_device_forceinline ccl_device
-#define ccl_device_noinline ccl_device __attribute__((noinline))
+#if defined(__KERNEL_METAL_APPLE__)
+
+/* Inline everything for Apple GPUs.
+ * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface
+ * at the cost of longer compile times (~4.5 minutes on M1 Max). */
+
+# define ccl_device __attribute__((always_inline))
+# define ccl_device_inline __attribute__((always_inline))
+# define ccl_device_forceinline __attribute__((always_inline))
+# define ccl_device_noinline __attribute__((always_inline))
+
+#else
+
+# define ccl_device
+# define ccl_device_inline ccl_device
+# define ccl_device_forceinline ccl_device
+# define ccl_device_noinline ccl_device __attribute__((noinline))
+
+#endif
+
#define ccl_device_noinline_cpu ccl_device
#define ccl_device_inline_method ccl_device
#define ccl_global device
diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp
index 832498f1f73..e2e10b5b83f 100644
--- a/intern/cycles/kernel/osl/services.cpp
+++ b/intern/cycles/kernel/osl/services.cpp
@@ -1304,8 +1304,38 @@ bool OSLRenderServices::texture(ustring filename,
break;
}
case OSLTextureHandle::SVM: {
- /* Packed texture. */
- float4 rgba = kernel_tex_image_interp(kernel_globals, handle->svm_slot, s, 1.0f - t);
+ int id = -1;
+ if (handle->svm_slots[0].w == -1) {
+ /* Packed single texture. */
+ id = handle->svm_slots[0].y;
+ }
+ else {
+ /* Packed tiled texture. */
+ int tx = (int)s;
+ int ty = (int)t;
+ int tile = 1001 + 10 * ty + tx;
+ for (int4 tile_node : handle->svm_slots) {
+ if (tile_node.x == tile) {
+ id = tile_node.y;
+ break;
+ }
+ if (tile_node.z == tile) {
+ id = tile_node.w;
+ break;
+ }
+ }
+ s -= tx;
+ t -= ty;
+ }
+
+ float4 rgba;
+ if (id == -1) {
+ rgba = make_float4(
+ TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
+ }
+ else {
+ rgba = kernel_tex_image_interp(kernel_globals, id, s, 1.0f - t);
+ }
result[0] = rgba[0];
if (nchannels > 1)
@@ -1319,7 +1349,7 @@ bool OSLRenderServices::texture(ustring filename,
}
case OSLTextureHandle::IES: {
/* IES light. */
- result[0] = kernel_ies_interp(kernel_globals, handle->svm_slot, s, t);
+ result[0] = kernel_ies_interp(kernel_globals, handle->svm_slots[0].y, s, t);
status = true;
break;
}
@@ -1413,7 +1443,7 @@ bool OSLRenderServices::texture3d(ustring filename,
/* Packed texture. */
ShaderData *sd = (ShaderData *)(sg->renderstate);
KernelGlobals kernel_globals = sd->osl_globals;
- int slot = handle->svm_slot;
+ int slot = handle->svm_slots[0].y;
float3 P_float3 = make_float3(P.x, P.y, P.z);
float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE);
diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h
index 653fa017140..edffd912bad 100644
--- a/intern/cycles/kernel/osl/services.h
+++ b/intern/cycles/kernel/osl/services.h
@@ -39,18 +39,26 @@ struct KernelGlobalsCPU;
* with additional data.
*
* These are stored in a concurrent hash map, because OSL can compile multiple
- * shaders in parallel. */
+ * shaders in parallel.
+ *
+ * NOTE: The svm_slots array contains a compressed mapping of tile to svm_slot pairs
+ * stored as follows: x:tile_a, y:svm_slot_a, z:tile_b, w:svm_slot_b etc. */
struct OSLTextureHandle : public OIIO::RefCnt {
enum Type { OIIO, SVM, IES, BEVEL, AO };
+ OSLTextureHandle(Type type, const vector<int4> &svm_slots)
+ : type(type), svm_slots(svm_slots), oiio_handle(NULL), processor(NULL)
+ {
+ }
+
OSLTextureHandle(Type type = OIIO, int svm_slot = -1)
- : type(type), svm_slot(svm_slot), oiio_handle(NULL), processor(NULL)
+ : OSLTextureHandle(type, {make_int4(0, svm_slot, -1, -1)})
{
}
Type type;
- int svm_slot;
+ vector<int4> svm_slots;
OSL::TextureSystem::TextureHandle *oiio_handle;
ColorSpaceProcessor *processor;
};
diff --git a/intern/cycles/kernel/osl/shaders/CMakeLists.txt b/intern/cycles/kernel/osl/shaders/CMakeLists.txt
index 7ced21c5670..741bce7c399 100644
--- a/intern/cycles/kernel/osl/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/shaders/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRC_OSL
node_camera.osl
node_checker_texture.osl
node_clamp.osl
+ node_combine_color.osl
node_combine_rgb.osl
node_combine_hsv.osl
node_combine_xyz.osl
@@ -68,6 +69,7 @@ set(SRC_OSL
node_refraction_bsdf.osl
node_rgb_curves.osl
node_rgb_ramp.osl
+ node_separate_color.osl
node_separate_rgb.osl
node_separate_hsv.osl
node_separate_xyz.osl
diff --git a/intern/cycles/kernel/osl/shaders/node_color.h b/intern/cycles/kernel/osl/shaders/node_color.h
index 388dd114e9a..06735f5b03d 100644
--- a/intern/cycles/kernel/osl/shaders/node_color.h
+++ b/intern/cycles/kernel/osl/shaders/node_color.h
@@ -148,3 +148,53 @@ color hsv_to_rgb(color hsv)
return rgb;
}
+
+color rgb_to_hsl(color rgb)
+{
+ float cmax, cmin, h, s, l;
+
+ cmax = max(rgb[0], max(rgb[1], rgb[2]));
+ cmin = min(rgb[0], min(rgb[1], rgb[2]));
+ l = min(1.0, (cmax + cmin) / 2.0);
+
+ if (cmax == cmin) {
+ h = s = 0.0; /* achromatic */
+ }
+ else {
+ float cdelta = cmax - cmin;
+ s = l > 0.5 ? cdelta / (2.0 - cmax - cmin) : cdelta / (cmax + cmin);
+ if (cmax == rgb[0]) {
+ h = (rgb[1] - rgb[2]) / cdelta + (rgb[1] < rgb[2] ? 6.0 : 0.0);
+ }
+ else if (cmax == rgb[1]) {
+ h = (rgb[2] - rgb[0]) / cdelta + 2.0;
+ }
+ else {
+ h = (rgb[0] - rgb[1]) / cdelta + 4.0;
+ }
+ }
+ h /= 6.0;
+
+ return color(h, s, l);
+}
+
+color hsl_to_rgb(color hsl)
+{
+ float nr, ng, nb, chroma, h, s, l;
+
+ h = hsl[0];
+ s = hsl[1];
+ l = hsl[2];
+
+ nr = abs(h * 6.0 - 3.0) - 1.0;
+ ng = 2.0 - abs(h * 6.0 - 2.0);
+ nb = 2.0 - abs(h * 6.0 - 4.0);
+
+ nr = clamp(nr, 0.0, 1.0);
+ nb = clamp(nb, 0.0, 1.0);
+ ng = clamp(ng, 0.0, 1.0);
+
+ chroma = (1.0 - abs(2.0 * l - 1.0)) * s;
+
+ return color((nr - 0.5) * chroma + l, (ng - 0.5) * chroma + l, (nb - 0.5) * chroma + l);
+}
diff --git a/intern/cycles/kernel/osl/shaders/node_combine_color.osl b/intern/cycles/kernel/osl/shaders/node_combine_color.osl
new file mode 100644
index 00000000000..681a592d2bb
--- /dev/null
+++ b/intern/cycles/kernel/osl/shaders/node_combine_color.osl
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#include "stdcycles.h"
+
+shader node_combine_color(string color_type = "rgb",
+ float Red = 0.0,
+ float Green = 0.0,
+ float Blue = 0.0,
+ output color Color = 0.8)
+{
+ if (color_type == "rgb" || color_type == "hsv" || color_type == "hsl")
+ Color = color(color_type, Red, Green, Blue);
+ else
+ warning("%s", "Unknown color space!");
+}
diff --git a/intern/cycles/kernel/osl/shaders/node_separate_color.osl b/intern/cycles/kernel/osl/shaders/node_separate_color.osl
new file mode 100644
index 00000000000..6f3e3149d8e
--- /dev/null
+++ b/intern/cycles/kernel/osl/shaders/node_separate_color.osl
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#include "node_color.h"
+#include "stdcycles.h"
+
+shader node_separate_color(string color_type = "rgb",
+ color Color = 0.8,
+ output float Red = 0.0,
+ output float Green = 0.0,
+ output float Blue = 0.0)
+{
+ color col;
+ if (color_type == "rgb")
+ col = Color;
+ else if (color_type == "hsv")
+ col = rgb_to_hsv(Color);
+ else if (color_type == "hsl")
+ col = rgb_to_hsl(Color);
+ else
+ warning("%s", "Unknown color space!");
+
+ Red = col[0];
+ Green = col[1];
+ Blue = col[2];
+}
diff --git a/intern/cycles/kernel/svm/color_util.h b/intern/cycles/kernel/svm/color_util.h
index b439721383c..fa22d4bc8c2 100644
--- a/intern/cycles/kernel/svm/color_util.h
+++ b/intern/cycles/kernel/svm/color_util.h
@@ -307,4 +307,30 @@ ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness,
return color;
}
+ccl_device float3 svm_combine_color(NodeCombSepColorType type, float3 color)
+{
+ switch (type) {
+ case NODE_COMBSEP_COLOR_HSV:
+ return hsv_to_rgb(color);
+ case NODE_COMBSEP_COLOR_HSL:
+ return hsl_to_rgb(color);
+ case NODE_COMBSEP_COLOR_RGB:
+ default:
+ return color;
+ }
+}
+
+ccl_device float3 svm_separate_color(NodeCombSepColorType type, float3 color)
+{
+ switch (type) {
+ case NODE_COMBSEP_COLOR_HSV:
+ return rgb_to_hsv(color);
+ case NODE_COMBSEP_COLOR_HSL:
+ return rgb_to_hsl(color);
+ case NODE_COMBSEP_COLOR_RGB:
+ default:
+ return color;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/sepcomb_color.h b/intern/cycles/kernel/svm/sepcomb_color.h
new file mode 100644
index 00000000000..d186e7f163b
--- /dev/null
+++ b/intern/cycles/kernel/svm/sepcomb_color.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline void svm_node_combine_color(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ ccl_private float *stack,
+ uint color_type,
+ uint inputs_stack_offsets,
+ uint result_stack_offset)
+{
+ uint red_stack_offset, green_stack_offset, blue_stack_offset;
+ svm_unpack_node_uchar3(
+ inputs_stack_offsets, &red_stack_offset, &green_stack_offset, &blue_stack_offset);
+
+ float r = stack_load_float(stack, red_stack_offset);
+ float g = stack_load_float(stack, green_stack_offset);
+ float b = stack_load_float(stack, blue_stack_offset);
+
+ /* Combine, and convert back to RGB */
+ float3 color = svm_combine_color((NodeCombSepColorType)color_type, make_float3(r, g, b));
+
+ if (stack_valid(result_stack_offset))
+ stack_store_float3(stack, result_stack_offset, color);
+}
+
+ccl_device_noinline void svm_node_separate_color(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ ccl_private float *stack,
+ uint color_type,
+ uint input_stack_offset,
+ uint results_stack_offsets)
+{
+ float3 color = stack_load_float3(stack, input_stack_offset);
+
+ /* Convert color space */
+ color = svm_separate_color((NodeCombSepColorType)color_type, color);
+
+ uint red_stack_offset, green_stack_offset, blue_stack_offset;
+ svm_unpack_node_uchar3(
+ results_stack_offsets, &red_stack_offset, &green_stack_offset, &blue_stack_offset);
+
+ if (stack_valid(red_stack_offset))
+ stack_store_float(stack, red_stack_offset, color.x);
+ if (stack_valid(green_stack_offset))
+ stack_store_float(stack, green_stack_offset, color.y);
+ if (stack_valid(blue_stack_offset))
+ stack_store_float(stack, blue_stack_offset, color.z);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 08352a6231f..5def943c87f 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -181,6 +181,7 @@ CCL_NAMESPACE_END
#include "kernel/svm/noisetex.h"
#include "kernel/svm/normal.h"
#include "kernel/svm/ramp.h"
+#include "kernel/svm/sepcomb_color.h"
#include "kernel/svm/sepcomb_hsv.h"
#include "kernel/svm/sepcomb_vector.h"
#include "kernel/svm/sky.h"
@@ -508,6 +509,12 @@ ccl_device void svm_eval_nodes(KernelGlobals kg,
case NODE_MIX:
offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset);
break;
+ case NODE_SEPARATE_COLOR:
+ svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w);
+ break;
+ case NODE_COMBINE_COLOR:
+ svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w);
+ break;
case NODE_SEPARATE_VECTOR:
svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
break;
diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h
index bede58f7a54..82109ec4c4f 100644
--- a/intern/cycles/kernel/svm/types.h
+++ b/intern/cycles/kernel/svm/types.h
@@ -92,6 +92,8 @@ typedef enum ShaderNodeType {
NODE_NORMAL_MAP,
NODE_INVERT,
NODE_MIX,
+ NODE_SEPARATE_COLOR,
+ NODE_COMBINE_COLOR,
NODE_SEPARATE_VECTOR,
NODE_COMBINE_VECTOR,
NODE_SEPARATE_HSV,
@@ -487,6 +489,12 @@ typedef enum NodePrincipledHairParametrization {
NODE_PRINCIPLED_HAIR_NUM,
} NodePrincipledHairParametrization;
+typedef enum NodeCombSepColorType {
+ NODE_COMBSEP_COLOR_RGB,
+ NODE_COMBSEP_COLOR_HSV,
+ NODE_COMBSEP_COLOR_HSL,
+} NodeCombSepColorType;
+
/* Closure */
typedef enum ClosureType {
diff --git a/intern/cycles/scene/image.cpp b/intern/cycles/scene/image.cpp
index 21fde88915e..c61ad1f1d71 100644
--- a/intern/cycles/scene/image.cpp
+++ b/intern/cycles/scene/image.cpp
@@ -117,12 +117,12 @@ void ImageHandle::clear()
manager = NULL;
}
-bool ImageHandle::empty()
+bool ImageHandle::empty() const
{
return tile_slots.empty();
}
-int ImageHandle::num_tiles()
+int ImageHandle::num_tiles() const
{
return tile_slots.size();
}
@@ -154,6 +154,35 @@ int ImageHandle::svm_slot(const int tile_index) const
return tile_slots[tile_index];
}
+vector<int4> ImageHandle::get_svm_slots() const
+{
+ const size_t num_nodes = divide_up(tile_slots.size(), 2);
+
+ vector<int4> svm_slots;
+ svm_slots.reserve(num_nodes);
+ for (size_t i = 0; i < num_nodes; i++) {
+ int4 node;
+
+ int slot = tile_slots[2 * i];
+ node.x = manager->images[slot]->loader->get_tile_number();
+ node.y = slot;
+
+ if ((2 * i + 1) < tile_slots.size()) {
+ slot = tile_slots[2 * i + 1];
+ node.z = manager->images[slot]->loader->get_tile_number();
+ node.w = slot;
+ }
+ else {
+ node.z = -1;
+ node.w = -1;
+ }
+
+ svm_slots.push_back(node);
+ }
+
+ return svm_slots;
+}
+
device_texture *ImageHandle::image_memory(const int tile_index) const
{
if (tile_index >= tile_slots.size()) {
@@ -266,6 +295,11 @@ ustring ImageLoader::osl_filepath() const
return ustring();
}
+int ImageLoader::get_tile_number() const
+{
+ return 0;
+}
+
bool ImageLoader::equals(const ImageLoader *a, const ImageLoader *b)
{
if (a == NULL && b == NULL) {
@@ -397,6 +431,19 @@ ImageHandle ImageManager::add_image(ImageLoader *loader,
return handle;
}
+ImageHandle ImageManager::add_image(const vector<ImageLoader *> &loaders,
+ const ImageParams &params)
+{
+ ImageHandle handle;
+ for (ImageLoader *loader : loaders) {
+ const int slot = add_image_slot(loader, params, true);
+ handle.tile_slots.push_back(slot);
+ }
+
+ handle.manager = this;
+ return handle;
+}
+
int ImageManager::add_image_slot(ImageLoader *loader,
const ImageParams &params,
const bool builtin)
diff --git a/intern/cycles/scene/image.h b/intern/cycles/scene/image.h
index 4d0dee35eca..9edb6a7eaf5 100644
--- a/intern/cycles/scene/image.h
+++ b/intern/cycles/scene/image.h
@@ -112,6 +112,9 @@ class ImageLoader {
/* Optional for OSL texture cache. */
virtual ustring osl_filepath() const;
+ /* Optional for tiled textures loaded externally. */
+ virtual int get_tile_number() const;
+
/* Free any memory used for loading metadata and pixels. */
virtual void cleanup(){};
@@ -139,11 +142,12 @@ class ImageHandle {
void clear();
- bool empty();
- int num_tiles();
+ bool empty() const;
+ int num_tiles() const;
ImageMetaData metadata();
int svm_slot(const int tile_index = 0) const;
+ vector<int4> get_svm_slots() const;
device_texture *image_memory(const int tile_index = 0) const;
VDBImageLoader *vdb_loader(const int tile_index = 0) const;
@@ -169,6 +173,7 @@ class ImageManager {
const ImageParams &params,
const array<int> &tiles);
ImageHandle add_image(ImageLoader *loader, const ImageParams &params, const bool builtin = true);
+ ImageHandle add_image(const vector<ImageLoader *> &loaders, const ImageParams &params);
void device_update(Device *device, Scene *scene, Progress &progress);
void device_update_slot(Device *device, Scene *scene, int slot, Progress *progress);
diff --git a/intern/cycles/scene/osl.cpp b/intern/cycles/scene/osl.cpp
index ffa1a2f5623..6698e6e2cce 100644
--- a/intern/cycles/scene/osl.cpp
+++ b/intern/cycles/scene/osl.cpp
@@ -1211,14 +1211,15 @@ void OSLCompiler::parameter_texture(const char *name, ustring filename, ustring
parameter(name, filename);
}
-void OSLCompiler::parameter_texture(const char *name, int svm_slot)
+void OSLCompiler::parameter_texture(const char *name, const ImageHandle &handle)
{
/* Texture loaded through SVM image texture system. We generate a unique
* name, which ends up being used in OSLRenderServices::get_texture_handle
* to get handle again. Note that this name must be unique between multiple
* render sessions as the render services are shared. */
ustring filename(string_printf("@svm%d", texture_shared_unique_id++).c_str());
- services->textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::SVM, svm_slot));
+ services->textures.insert(filename,
+ new OSLTextureHandle(OSLTextureHandle::SVM, handle.get_svm_slots()));
parameter(name, filename);
}
@@ -1290,7 +1291,7 @@ void OSLCompiler::parameter_texture(const char * /* name */,
{
}
-void OSLCompiler::parameter_texture(const char * /* name */, int /* svm_slot */)
+void OSLCompiler::parameter_texture(const char * /* name */, const ImageHandle & /*handle*/)
{
}
diff --git a/intern/cycles/scene/osl.h b/intern/cycles/scene/osl.h
index f0f97dbcaad..bf27069b1b1 100644
--- a/intern/cycles/scene/osl.h
+++ b/intern/cycles/scene/osl.h
@@ -147,7 +147,7 @@ class OSLCompiler {
void parameter_attribute(const char *name, ustring s);
void parameter_texture(const char *name, ustring filename, ustring colorspace);
- void parameter_texture(const char *name, int svm_slot);
+ void parameter_texture(const char *name, const ImageHandle &handle);
void parameter_texture_ies(const char *name, int svm_slot);
ShaderType output_type()
diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp
index 95fccf725f3..03c152928d5 100644
--- a/intern/cycles/scene/shader_nodes.cpp
+++ b/intern/cycles/scene/shader_nodes.cpp
@@ -19,7 +19,6 @@
#include "util/color.h"
#include "util/foreach.h"
#include "util/log.h"
-#include "util/string.h"
#include "util/transform.h"
#include "kernel/tables.h"
@@ -450,22 +449,19 @@ void ImageTextureNode::compile(OSLCompiler &compiler)
const ustring known_colorspace = metadata.colorspace;
if (handle.svm_slot() == -1) {
- /* OIIO currently does not support <UVTILE> substitutions natively. Replace with a format they
- * understand. */
- std::string osl_filename = filename.string();
- string_replace(osl_filename, "<UVTILE>", "<U>_<V>");
compiler.parameter_texture(
- "filename", ustring(osl_filename), compress_as_srgb ? u_colorspace_raw : known_colorspace);
+ "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace);
}
else {
- compiler.parameter_texture("filename", handle.svm_slot());
+ compiler.parameter_texture("filename", handle);
}
const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) ||
alpha_type == IMAGE_ALPHA_CHANNEL_PACKED ||
alpha_type == IMAGE_ALPHA_IGNORE);
const bool is_tiled = (filename.find("<UDIM>") != string::npos ||
- filename.find("<UVTILE>") != string::npos);
+ filename.find("<UVTILE>") != string::npos) ||
+ handle.num_tiles() > 1;
compiler.parameter(this, "projection");
compiler.parameter(this, "projection_blend");
@@ -610,7 +606,7 @@ void EnvironmentTextureNode::compile(OSLCompiler &compiler)
"filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace);
}
else {
- compiler.parameter_texture("filename", handle.svm_slot());
+ compiler.parameter_texture("filename", handle);
}
compiler.parameter(this, "projection");
@@ -965,7 +961,7 @@ void SkyTextureNode::compile(OSLCompiler &compiler)
compiler.parameter_array("nishita_data", sunsky.nishita_data, 10);
/* nishita texture */
if (sky_type == NODE_SKY_NISHITA) {
- compiler.parameter_texture("filename", handle.svm_slot());
+ compiler.parameter_texture("filename", handle);
}
compiler.add(this, "node_sky_texture");
}
@@ -1860,7 +1856,7 @@ void PointDensityTextureNode::compile(OSLCompiler &compiler)
handle = image_manager->add_image(filename.string(), image_params());
}
- compiler.parameter_texture("filename", handle.svm_slot());
+ compiler.parameter_texture("filename", handle);
if (space == NODE_TEX_VOXEL_SPACE_WORLD) {
compiler.parameter("mapping", tfm);
compiler.parameter("use_mapping", 1);
@@ -5010,6 +5006,63 @@ void MixNode::constant_fold(const ConstantFolder &folder)
}
}
+/* Combine Color */
+
+NODE_DEFINE(CombineColorNode)
+{
+ NodeType *type = NodeType::add("combine_color", create, NodeType::SHADER);
+
+ static NodeEnum type_enum;
+ type_enum.insert("rgb", NODE_COMBSEP_COLOR_RGB);
+ type_enum.insert("hsv", NODE_COMBSEP_COLOR_HSV);
+ type_enum.insert("hsl", NODE_COMBSEP_COLOR_HSL);
+ SOCKET_ENUM(color_type, "Type", type_enum, NODE_COMBSEP_COLOR_RGB);
+
+ SOCKET_IN_FLOAT(r, "Red", 0.0f);
+ SOCKET_IN_FLOAT(g, "Green", 0.0f);
+ SOCKET_IN_FLOAT(b, "Blue", 0.0f);
+
+ SOCKET_OUT_COLOR(color, "Color");
+
+ return type;
+}
+
+CombineColorNode::CombineColorNode() : ShaderNode(get_node_type())
+{
+}
+
+void CombineColorNode::constant_fold(const ConstantFolder &folder)
+{
+ if (folder.all_inputs_constant()) {
+ folder.make_constant(svm_combine_color(color_type, make_float3(r, g, b)));
+ }
+}
+
+void CombineColorNode::compile(SVMCompiler &compiler)
+{
+ ShaderInput *red_in = input("Red");
+ ShaderInput *green_in = input("Green");
+ ShaderInput *blue_in = input("Blue");
+ ShaderOutput *color_out = output("Color");
+
+ int red_stack_offset = compiler.stack_assign(red_in);
+ int green_stack_offset = compiler.stack_assign(green_in);
+ int blue_stack_offset = compiler.stack_assign(blue_in);
+ int color_stack_offset = compiler.stack_assign(color_out);
+
+ compiler.add_node(
+ NODE_COMBINE_COLOR,
+ color_type,
+ compiler.encode_uchar4(red_stack_offset, green_stack_offset, blue_stack_offset),
+ color_stack_offset);
+}
+
+void CombineColorNode::compile(OSLCompiler &compiler)
+{
+ compiler.parameter(this, "color_type");
+ compiler.add(this, "node_combine_color");
+}
+
/* Combine RGB */
NODE_DEFINE(CombineRGBNode)
@@ -5250,6 +5303,70 @@ void BrightContrastNode::compile(OSLCompiler &compiler)
compiler.add(this, "node_brightness");
}
+/* Separate Color */
+
+NODE_DEFINE(SeparateColorNode)
+{
+ NodeType *type = NodeType::add("separate_color", create, NodeType::SHADER);
+
+ static NodeEnum type_enum;
+ type_enum.insert("rgb", NODE_COMBSEP_COLOR_RGB);
+ type_enum.insert("hsv", NODE_COMBSEP_COLOR_HSV);
+ type_enum.insert("hsl", NODE_COMBSEP_COLOR_HSL);
+ SOCKET_ENUM(color_type, "Type", type_enum, NODE_COMBSEP_COLOR_RGB);
+
+ SOCKET_IN_COLOR(color, "Color", zero_float3());
+
+ SOCKET_OUT_FLOAT(r, "Red");
+ SOCKET_OUT_FLOAT(g, "Green");
+ SOCKET_OUT_FLOAT(b, "Blue");
+
+ return type;
+}
+
+SeparateColorNode::SeparateColorNode() : ShaderNode(get_node_type())
+{
+}
+
+void SeparateColorNode::constant_fold(const ConstantFolder &folder)
+{
+ if (folder.all_inputs_constant()) {
+ float3 col = svm_separate_color(color_type, color);
+
+ for (int channel = 0; channel < 3; channel++) {
+ if (outputs[channel] == folder.output) {
+ folder.make_constant(col[channel]);
+ return;
+ }
+ }
+ }
+}
+
+void SeparateColorNode::compile(SVMCompiler &compiler)
+{
+ ShaderInput *color_in = input("Color");
+ ShaderOutput *red_out = output("Red");
+ ShaderOutput *green_out = output("Green");
+ ShaderOutput *blue_out = output("Blue");
+
+ int color_stack_offset = compiler.stack_assign(color_in);
+ int red_stack_offset = compiler.stack_assign(red_out);
+ int green_stack_offset = compiler.stack_assign(green_out);
+ int blue_stack_offset = compiler.stack_assign(blue_out);
+
+ compiler.add_node(
+ NODE_SEPARATE_COLOR,
+ color_type,
+ color_stack_offset,
+ compiler.encode_uchar4(red_stack_offset, green_stack_offset, blue_stack_offset));
+}
+
+void SeparateColorNode::compile(OSLCompiler &compiler)
+{
+ compiler.parameter(this, "color_type");
+ compiler.add(this, "node_separate_color");
+}
+
/* Separate RGB */
NODE_DEFINE(SeparateRGBNode)
diff --git a/intern/cycles/scene/shader_nodes.h b/intern/cycles/scene/shader_nodes.h
index 9aef5d3151f..ac40a397c1e 100644
--- a/intern/cycles/scene/shader_nodes.h
+++ b/intern/cycles/scene/shader_nodes.h
@@ -1101,6 +1101,17 @@ class MixNode : public ShaderNode {
NODE_SOCKET_API(float, fac)
};
+class CombineColorNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(CombineColorNode)
+ void constant_fold(const ConstantFolder &folder);
+
+ NODE_SOCKET_API(NodeCombSepColorType, color_type)
+ NODE_SOCKET_API(float, r)
+ NODE_SOCKET_API(float, g)
+ NODE_SOCKET_API(float, b)
+};
+
class CombineRGBNode : public ShaderNode {
public:
SHADER_NODE_CLASS(CombineRGBNode)
@@ -1150,6 +1161,15 @@ class BrightContrastNode : public ShaderNode {
NODE_SOCKET_API(float, contrast)
};
+class SeparateColorNode : public ShaderNode {
+ public:
+ SHADER_NODE_CLASS(SeparateColorNode)
+ void constant_fold(const ConstantFolder &folder);
+
+ NODE_SOCKET_API(NodeCombSepColorType, color_type)
+ NODE_SOCKET_API(float3, color)
+};
+
class SeparateRGBNode : public ShaderNode {
public:
SHADER_NODE_CLASS(SeparateRGBNode)
diff --git a/intern/cycles/util/color.h b/intern/cycles/util/color.h
index cccccde3ba6..795c3754976 100644
--- a/intern/cycles/util/color.h
+++ b/intern/cycles/util/color.h
@@ -152,6 +152,56 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
return rgb;
}
+ccl_device float3 rgb_to_hsl(float3 rgb)
+{
+ float cmax, cmin, h, s, l;
+
+ cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z));
+ cmin = min(rgb.x, min(rgb.y, rgb.z));
+ l = min(1.0f, (cmax + cmin) / 2.0f);
+
+ if (cmax == cmin) {
+ h = s = 0.0f; /* achromatic */
+ }
+ else {
+ float cdelta = cmax - cmin;
+ s = l > 0.5f ? cdelta / (2.0f - cmax - cmin) : cdelta / (cmax + cmin);
+ if (cmax == rgb.x) {
+ h = (rgb.y - rgb.z) / cdelta + (rgb.y < rgb.z ? 6.0f : 0.0f);
+ }
+ else if (cmax == rgb.y) {
+ h = (rgb.z - rgb.x) / cdelta + 2.0f;
+ }
+ else {
+ h = (rgb.x - rgb.y) / cdelta + 4.0f;
+ }
+ }
+ h /= 6.0f;
+
+ return make_float3(h, s, l);
+}
+
+ccl_device float3 hsl_to_rgb(float3 hsl)
+{
+ float nr, ng, nb, chroma, h, s, l;
+
+ h = hsl.x;
+ s = hsl.y;
+ l = hsl.z;
+
+ nr = fabsf(h * 6.0f - 3.0f) - 1.0f;
+ ng = 2.0f - fabsf(h * 6.0f - 2.0f);
+ nb = 2.0f - fabsf(h * 6.0f - 4.0f);
+
+ nr = clamp(nr, 0.0f, 1.0f);
+ nb = clamp(nb, 0.0f, 1.0f);
+ ng = clamp(ng, 0.0f, 1.0f);
+
+ chroma = (1.0f - fabsf(2.0f * l - 1.0f)) * s;
+
+ return make_float3((nr - 0.5f) * chroma + l, (ng - 0.5f) * chroma + l, (nb - 0.5f) * chroma + l);
+}
+
ccl_device float3 xyY_to_xyz(float x, float y, float Y)
{
float X, Z;
diff --git a/intern/ghost/CMakeLists.txt b/intern/ghost/CMakeLists.txt
index 9421edecf12..dceb9ced803 100644
--- a/intern/ghost/CMakeLists.txt
+++ b/intern/ghost/CMakeLists.txt
@@ -376,6 +376,7 @@ elseif(WIN32)
intern/GHOST_DisplayManagerWin32.cpp
intern/GHOST_DropTargetWin32.cpp
intern/GHOST_SystemWin32.cpp
+ intern/GHOST_TrackpadWin32.cpp
intern/GHOST_WindowWin32.cpp
intern/GHOST_Wintab.cpp
@@ -384,6 +385,7 @@ elseif(WIN32)
intern/GHOST_DropTargetWin32.h
intern/GHOST_SystemWin32.h
intern/GHOST_TaskbarWin32.h
+ intern/GHOST_TrackpadWin32.h
intern/GHOST_WindowWin32.h
intern/GHOST_Wintab.h
)
diff --git a/intern/ghost/intern/GHOST_SystemCocoa.mm b/intern/ghost/intern/GHOST_SystemCocoa.mm
index b6836614962..8677c0b9552 100644
--- a/intern/ghost/intern/GHOST_SystemCocoa.mm
+++ b/intern/ghost/intern/GHOST_SystemCocoa.mm
@@ -967,7 +967,7 @@ bool GHOST_SystemCocoa::processEvents(bool waitForEvent)
return anyProcessed;
}
-// Note: called from NSApplication delegate
+/* NOTE: called from #NSApplication delegate. */
GHOST_TSuccess GHOST_SystemCocoa::handleApplicationBecomeActiveEvent()
{
for (GHOST_IWindow *iwindow : m_windowManager->getWindows()) {
@@ -1046,7 +1046,7 @@ void GHOST_SystemCocoa::notifyExternalEventProcessed()
m_outsideLoopEventProcessed = true;
}
-// Note: called from NSWindow delegate
+/* NOTE: called from #NSWindow delegate. */
GHOST_TSuccess GHOST_SystemCocoa::handleWindowEvent(GHOST_TEventType eventType,
GHOST_WindowCocoa *window)
{
@@ -1108,7 +1108,7 @@ GHOST_TSuccess GHOST_SystemCocoa::handleWindowEvent(GHOST_TEventType eventType,
return GHOST_kSuccess;
}
-// Note: called from NSWindow subclass
+/* NOTE: called from #NSWindow subclass. */
GHOST_TSuccess GHOST_SystemCocoa::handleDraggingEvent(GHOST_TEventType eventType,
GHOST_TDragnDropTypes draggedObjectType,
GHOST_WindowCocoa *window,
diff --git a/intern/ghost/intern/GHOST_SystemWayland.cpp b/intern/ghost/intern/GHOST_SystemWayland.cpp
index 6f694bfd9a6..dae3d578fa0 100644
--- a/intern/ghost/intern/GHOST_SystemWayland.cpp
+++ b/intern/ghost/intern/GHOST_SystemWayland.cpp
@@ -761,7 +761,7 @@ static void data_device_selection(void *data,
input->data_offer_copy_paste = data_offer;
std::string mime_receive;
- for (const std::string &type : {mime_text_utf8, mime_text_plain}) {
+ for (const std::string type : {mime_text_utf8, mime_text_plain}) {
if (data_offer->types.count(type)) {
mime_receive = type;
break;
diff --git a/intern/ghost/intern/GHOST_SystemWin32.cpp b/intern/ghost/intern/GHOST_SystemWin32.cpp
index 83869188b65..8e07bf4ea3d 100644
--- a/intern/ghost/intern/GHOST_SystemWin32.cpp
+++ b/intern/ghost/intern/GHOST_SystemWin32.cpp
@@ -8,12 +8,14 @@
#include "GHOST_SystemWin32.h"
#include "GHOST_ContextD3D.h"
#include "GHOST_EventDragnDrop.h"
+#include "GHOST_EventTrackpad.h"
#ifndef _WIN32_IE
# define _WIN32_IE 0x0501 /* shipped before XP, so doesn't impose additional requirements */
#endif
#include <commctrl.h>
+#include <dwmapi.h>
#include <psapi.h>
#include <shellapi.h>
#include <shellscalingapi.h>
@@ -414,6 +416,8 @@ bool GHOST_SystemWin32::processEvents(bool waitForEvent)
hasEventHandled = true;
}
+ driveTrackpad();
+
// Process all the events waiting for us
while (::PeekMessageW(&msg, NULL, 0, 0, PM_REMOVE) != 0) {
// TranslateMessage doesn't alter the message, and doesn't change our raw keyboard data.
@@ -423,6 +427,8 @@ bool GHOST_SystemWin32::processEvents(bool waitForEvent)
hasEventHandled = true;
}
+ processTrackpad();
+
/* PeekMessage above is allowed to dispatch messages to the wndproc without us
* noticing, so we need to check the event manager here to see if there are
* events waiting in the queue.
@@ -1416,6 +1422,52 @@ bool GHOST_SystemWin32::processNDOF(RAWINPUT const &raw)
}
#endif // WITH_INPUT_NDOF
+void GHOST_SystemWin32::driveTrackpad()
+{
+ GHOST_WindowWin32 *active_window = static_cast<GHOST_WindowWin32 *>(
+ getWindowManager()->getActiveWindow());
+ if (active_window) {
+ active_window->updateDirectManipulation();
+ }
+}
+
+void GHOST_SystemWin32::processTrackpad()
+{
+ GHOST_WindowWin32 *active_window = static_cast<GHOST_WindowWin32 *>(
+ getWindowManager()->getActiveWindow());
+
+ if (!active_window) {
+ return;
+ }
+
+ GHOST_TTrackpadInfo trackpad_info = active_window->getTrackpadInfo();
+ GHOST_SystemWin32 *system = (GHOST_SystemWin32 *)getSystem();
+
+ int32_t cursor_x, cursor_y;
+ system->getCursorPosition(cursor_x, cursor_y);
+
+ if (trackpad_info.x != 0 || trackpad_info.y != 0) {
+ system->pushEvent(new GHOST_EventTrackpad(system->getMilliSeconds(),
+ active_window,
+ GHOST_kTrackpadEventScroll,
+ cursor_x,
+ cursor_y,
+ trackpad_info.x,
+ trackpad_info.y,
+ trackpad_info.isScrollDirectionInverted));
+ }
+ if (trackpad_info.scale != 0) {
+ system->pushEvent(new GHOST_EventTrackpad(system->getMilliSeconds(),
+ active_window,
+ GHOST_kTrackpadEventMagnify,
+ cursor_x,
+ cursor_y,
+ trackpad_info.scale,
+ 0,
+ false));
+ }
+}
+
LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam)
{
GHOST_Event *event = NULL;
@@ -1968,6 +2020,8 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam,
suggestedWindowRect->right - suggestedWindowRect->left,
suggestedWindowRect->bottom - suggestedWindowRect->top,
SWP_NOZORDER | SWP_NOACTIVATE);
+
+ window->updateDPI();
}
break;
case WM_DISPLAYCHANGE: {
@@ -1985,6 +2039,12 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam,
::SetFocus(hwnd);
}
break;
+ case WM_SETTINGCHANGE:
+ /* Microsoft: "Note that some applications send this message with lParam set to NULL" */
+ if ((lParam != NULL) && (wcscmp(LPCWSTR(lParam), L"ImmersiveColorSet") == 0)) {
+ window->ThemeRefresh();
+ }
+ break;
////////////////////////////////////////////////////////////////////////
// Window events, ignored
////////////////////////////////////////////////////////////////////////
@@ -2056,6 +2116,12 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam,
* In GHOST, we let DefWindowProc call the timer callback.
*/
break;
+ case DM_POINTERHITTEST:
+ /* The DM_POINTERHITTEST message is sent to a window, when pointer input is first
+ * detected, in order to determine the most probable input target for Direct
+ * Manipulation. */
+ window->onPointerHitTest(wParam);
+ break;
}
}
else {
diff --git a/intern/ghost/intern/GHOST_SystemWin32.h b/intern/ghost/intern/GHOST_SystemWin32.h
index 9f8d52f9ca3..689b78b0317 100644
--- a/intern/ghost/intern/GHOST_SystemWin32.h
+++ b/intern/ghost/intern/GHOST_SystemWin32.h
@@ -407,6 +407,16 @@ class GHOST_SystemWin32 : public GHOST_System {
#endif
/**
+ * Drives Direct Manipulation update.
+ */
+ void driveTrackpad();
+
+ /**
+ * Creates trackpad events for the active window.
+ */
+ void processTrackpad();
+
+ /**
* Returns the local state of the modifier keys (from the message queue).
* \param keys: The state of the keys.
*/
diff --git a/intern/ghost/intern/GHOST_SystemX11.cpp b/intern/ghost/intern/GHOST_SystemX11.cpp
index ebee5f58fff..bbce6fdfdb5 100644
--- a/intern/ghost/intern/GHOST_SystemX11.cpp
+++ b/intern/ghost/intern/GHOST_SystemX11.cpp
@@ -63,7 +63,7 @@
#include <stdio.h> /* for fprintf only */
#include <vector>
-/* for debugging - so we can breakpoint X11 errors */
+/* For debugging, so we can break-point X11 errors. */
// #define USE_X11_ERROR_HANDLERS
#ifdef WITH_X11_XINPUT
@@ -2492,7 +2492,7 @@ GHOST_TSuccess GHOST_SystemX11::pushDragDropEvent(GHOST_TEventType eventType,
}
#endif
/**
- * These callbacks can be used for debugging, so we can breakpoint on an X11 error.
+ * These callbacks can be used for debugging, so we can break-point on an X11 error.
*
* Dummy function to get around IO Handler exiting if device invalid
* Basically it will not crash blender now if you have a X device that
diff --git a/intern/ghost/intern/GHOST_TrackpadWin32.cpp b/intern/ghost/intern/GHOST_TrackpadWin32.cpp
new file mode 100644
index 00000000000..d5317f0f780
--- /dev/null
+++ b/intern/ghost/intern/GHOST_TrackpadWin32.cpp
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup GHOST
+ */
+
+#include <cmath>
+
+#include "GHOST_Debug.h"
+#include "GHOST_TrackpadWin32.h"
+
+GHOST_DirectManipulationHelper::GHOST_DirectManipulationHelper(
+ HWND hWnd,
+ Microsoft::WRL::ComPtr<IDirectManipulationManager> directManipulationManager,
+ Microsoft::WRL::ComPtr<IDirectManipulationUpdateManager> directManipulationUpdateManager,
+ Microsoft::WRL::ComPtr<IDirectManipulationViewport> directManipulationViewport,
+ Microsoft::WRL::ComPtr<GHOST_DirectManipulationViewportEventHandler>
+ directManipulationEventHandler,
+ DWORD directManipulationViewportHandlerCookie,
+ bool isScrollDirectionInverted)
+ : m_hWnd(hWnd),
+ m_scrollDirectionRegKey(NULL),
+ m_scrollDirectionChangeEvent(NULL),
+ m_directManipulationManager(directManipulationManager),
+ m_directManipulationUpdateManager(directManipulationUpdateManager),
+ m_directManipulationViewport(directManipulationViewport),
+ m_directManipulationEventHandler(directManipulationEventHandler),
+ m_directManipulationViewportHandlerCookie(directManipulationViewportHandlerCookie),
+ m_isScrollDirectionInverted(isScrollDirectionInverted)
+{
+}
+
+GHOST_DirectManipulationHelper *GHOST_DirectManipulationHelper::create(HWND hWnd, uint16_t dpi)
+{
+#define DM_CHECK_RESULT_AND_EXIT_EARLY(hr, failMessage) \
+ { \
+ if (!SUCCEEDED(hr)) { \
+ GHOST_PRINT(failMessage); \
+ return nullptr; \
+ } \
+ }
+
+ Microsoft::WRL::ComPtr<IDirectManipulationManager> directManipulationManager;
+ HRESULT hr = ::CoCreateInstance(CLSID_DirectManipulationManager,
+ nullptr,
+ CLSCTX_INPROC_SERVER,
+ IID_PPV_ARGS(&directManipulationManager));
+ DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "DirectManipulationManager create failed\n");
+
+ /* Since we want to use fake viewport, we need to send fake updates to UpdateManager. */
+ Microsoft::WRL::ComPtr<IDirectManipulationUpdateManager> directManipulationUpdateManager;
+ hr = directManipulationManager->GetUpdateManager(IID_PPV_ARGS(&directManipulationUpdateManager));
+ DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Get UpdateManager failed\n");
+
+ Microsoft::WRL::ComPtr<IDirectManipulationViewport> directManipulationViewport;
+ hr = directManipulationManager->CreateViewport(
+ nullptr, hWnd, IID_PPV_ARGS(&directManipulationViewport));
+ DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport create failed\n");
+
+ DIRECTMANIPULATION_CONFIGURATION configuration =
+ DIRECTMANIPULATION_CONFIGURATION_INTERACTION |
+ DIRECTMANIPULATION_CONFIGURATION_TRANSLATION_X |
+ DIRECTMANIPULATION_CONFIGURATION_TRANSLATION_Y |
+ DIRECTMANIPULATION_CONFIGURATION_TRANSLATION_INERTIA |
+ DIRECTMANIPULATION_CONFIGURATION_SCALING;
+
+ hr = directManipulationViewport->ActivateConfiguration(configuration);
+ DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport set ActivateConfiguration failed\n");
+
+ /* Since we are using fake viewport and only want to use Direct Manipulation for touchpad, we
+ * need to use MANUALUPDATE option. */
+ hr = directManipulationViewport->SetViewportOptions(
+ DIRECTMANIPULATION_VIEWPORT_OPTIONS_MANUALUPDATE);
+ DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport set ViewportOptions failed\n");
+
+ /* We receive Direct Manipulation transform updates in IDirectManipulationViewportEventHandler
+ * callbacks. */
+ Microsoft::WRL::ComPtr<GHOST_DirectManipulationViewportEventHandler>
+ directManipulationEventHandler =
+ Microsoft::WRL::Make<GHOST_DirectManipulationViewportEventHandler>(dpi);
+ DWORD directManipulationViewportHandlerCookie;
+ directManipulationViewport->AddEventHandler(
+ hWnd, directManipulationEventHandler.Get(), &directManipulationViewportHandlerCookie);
+ DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport add EventHandler failed\n");
+
+ /* Set default rect for viewport before activating. */
+ RECT rect = {0, 0, 10000, 10000};
+ hr = directManipulationViewport->SetViewportRect(&rect);
+ DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport set rect failed\n");
+
+ hr = directManipulationManager->Activate(hWnd);
+ DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "DirectManipulationManager activate failed\n");
+
+ hr = directManipulationViewport->Enable();
+ DM_CHECK_RESULT_AND_EXIT_EARLY(hr, "Viewport enable failed\n");
+
+ directManipulationEventHandler->resetViewport(directManipulationViewport.Get());
+
+ bool isScrollDirectionInverted = getScrollDirectionFromReg();
+
+ auto instance = new GHOST_DirectManipulationHelper(hWnd,
+ directManipulationManager,
+ directManipulationUpdateManager,
+ directManipulationViewport,
+ directManipulationEventHandler,
+ directManipulationViewportHandlerCookie,
+ isScrollDirectionInverted);
+
+ instance->registerScrollDirectionChangeListener();
+
+ return instance;
+
+#undef DM_CHECK_RESULT_AND_EXIT_EARLY
+}
+
+bool GHOST_DirectManipulationHelper::getScrollDirectionFromReg()
+{
+ DWORD scrollDirectionRegValue, pcbData;
+ HRESULT hr = HRESULT_FROM_WIN32(
+ RegGetValueW(HKEY_CURRENT_USER,
+ L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\PrecisionTouchPad\\",
+ L"ScrollDirection",
+ RRF_RT_REG_DWORD,
+ NULL,
+ &scrollDirectionRegValue,
+ &pcbData));
+ if (!SUCCEEDED(hr)) {
+ GHOST_PRINT("Failed to get scroll direction from registry\n");
+ return false;
+ }
+
+ return scrollDirectionRegValue == 0;
+}
+
+void GHOST_DirectManipulationHelper::registerScrollDirectionChangeListener()
+{
+
+ if (!m_scrollDirectionRegKey) {
+ HRESULT hr = HRESULT_FROM_WIN32(
+ RegOpenKeyExW(HKEY_CURRENT_USER,
+ L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\PrecisionTouchPad\\",
+ 0,
+ KEY_NOTIFY,
+ &m_scrollDirectionRegKey));
+ if (!SUCCEEDED(hr)) {
+ GHOST_PRINT("Failed to open scroll direction registry key\n");
+ return;
+ }
+ }
+
+ if (!m_scrollDirectionChangeEvent) {
+ m_scrollDirectionChangeEvent = CreateEventW(NULL, true, false, NULL);
+ }
+ else {
+ ResetEvent(m_scrollDirectionChangeEvent);
+ }
+ HRESULT hr = HRESULT_FROM_WIN32(RegNotifyChangeKeyValue(m_scrollDirectionRegKey,
+ true,
+ REG_NOTIFY_CHANGE_LAST_SET,
+ m_scrollDirectionChangeEvent,
+ true));
+ if (!SUCCEEDED(hr)) {
+ GHOST_PRINT("Failed to register scroll direction change listener\n");
+ return;
+ }
+}
+
+void GHOST_DirectManipulationHelper::onPointerHitTest(UINT32 pointerId)
+{
+ [[maybe_unused]] HRESULT hr = m_directManipulationViewport->SetContact(pointerId);
+ GHOST_ASSERT(SUCCEEDED(hr), "Viewport set contact failed\n");
+
+ if (WaitForSingleObject(m_scrollDirectionChangeEvent, 0) == WAIT_OBJECT_0) {
+ m_isScrollDirectionInverted = getScrollDirectionFromReg();
+ registerScrollDirectionChangeListener();
+ }
+}
+
+void GHOST_DirectManipulationHelper::update()
+{
+ if (m_directManipulationEventHandler->dm_status == DIRECTMANIPULATION_RUNNING ||
+ m_directManipulationEventHandler->dm_status == DIRECTMANIPULATION_INERTIA) {
+ [[maybe_unused]] HRESULT hr = m_directManipulationUpdateManager->Update(nullptr);
+ GHOST_ASSERT(SUCCEEDED(hr), "DirectManipulationUpdateManager update failed\n");
+ }
+}
+
+void GHOST_DirectManipulationHelper::setDPI(uint16_t dpi)
+{
+ m_directManipulationEventHandler->dpi = dpi;
+}
+
+GHOST_TTrackpadInfo GHOST_DirectManipulationHelper::getTrackpadInfo()
+{
+ GHOST_TTrackpadInfo result = m_directManipulationEventHandler->accumulated_values;
+ result.isScrollDirectionInverted = m_isScrollDirectionInverted;
+
+ m_directManipulationEventHandler->accumulated_values = {0, 0, 0};
+ return result;
+}
+
+GHOST_DirectManipulationHelper::~GHOST_DirectManipulationHelper()
+{
+ HRESULT hr;
+ hr = m_directManipulationViewport->Stop();
+ GHOST_ASSERT(SUCCEEDED(hr), "Viewport stop failed\n");
+
+ hr = m_directManipulationViewport->RemoveEventHandler(m_directManipulationViewportHandlerCookie);
+ GHOST_ASSERT(SUCCEEDED(hr), "Viewport remove event handler failed\n");
+
+ hr = m_directManipulationViewport->Abandon();
+ GHOST_ASSERT(SUCCEEDED(hr), "Viewport abandon failed\n");
+
+ hr = m_directManipulationManager->Deactivate(m_hWnd);
+ GHOST_ASSERT(SUCCEEDED(hr), "DirectManipulationManager deactivate failed\n");
+
+ if (m_scrollDirectionChangeEvent) {
+ CloseHandle(m_scrollDirectionChangeEvent);
+ m_scrollDirectionChangeEvent = NULL;
+ }
+ if (m_scrollDirectionRegKey) {
+ RegCloseKey(m_scrollDirectionRegKey);
+ m_scrollDirectionRegKey = NULL;
+ }
+}
+
+GHOST_DirectManipulationViewportEventHandler::GHOST_DirectManipulationViewportEventHandler(
+ uint16_t dpi)
+ : accumulated_values({0, 0, 0}), dpi(dpi), dm_status(DIRECTMANIPULATION_BUILDING)
+{
+}
+
+void GHOST_DirectManipulationViewportEventHandler::resetViewport(
+ IDirectManipulationViewport *viewport)
+{
+ if (gesture_state != GESTURE_NONE) {
+ [[maybe_unused]] HRESULT hr = viewport->ZoomToRect(0.0f, 0.0f, 10000.0f, 10000.0f, FALSE);
+ GHOST_ASSERT(SUCCEEDED(hr), "Viewport reset failed\n");
+ }
+
+ gesture_state = GESTURE_NONE;
+
+ last_scale = PINCH_SCALE_FACTOR;
+ last_x = 0.0f;
+ last_y = 0.0f;
+}
+
+HRESULT GHOST_DirectManipulationViewportEventHandler::OnViewportStatusChanged(
+ IDirectManipulationViewport *viewport,
+ DIRECTMANIPULATION_STATUS current,
+ DIRECTMANIPULATION_STATUS previous)
+{
+ dm_status = current;
+
+ if (current == previous) {
+ return S_OK;
+ }
+
+ if (previous == DIRECTMANIPULATION_ENABLED || current == DIRECTMANIPULATION_READY ||
+ (previous == DIRECTMANIPULATION_INERTIA && current != DIRECTMANIPULATION_INERTIA)) {
+ resetViewport(viewport);
+ }
+
+ return S_OK;
+}
+
+HRESULT GHOST_DirectManipulationViewportEventHandler::OnViewportUpdated(
+ IDirectManipulationViewport *viewport)
+{
+ /* Nothing to do here. */
+ return S_OK;
+}
+
+HRESULT GHOST_DirectManipulationViewportEventHandler::OnContentUpdated(
+ IDirectManipulationViewport *viewport, IDirectManipulationContent *content)
+{
+ float transform[6];
+ HRESULT hr = content->GetContentTransform(transform, ARRAYSIZE(transform));
+ GHOST_ASSERT(SUCCEEDED(hr), "DirectManipulationContent get transform failed\n");
+
+ const float device_scale_factor = dpi / 96.0f;
+
+ const float scale = transform[0] * PINCH_SCALE_FACTOR;
+ const float x = transform[4] / device_scale_factor;
+ const float y = transform[5] / device_scale_factor;
+
+ const float EPS = 3e-5;
+
+ /* Ignore repeating or incorrect input. */
+ if ((fabs(scale - last_scale) <= EPS && fabs(x - last_x) <= EPS && fabs(y - last_y) <= EPS) ||
+ scale == 0.0f) {
+ GHOST_PRINT("Ignoring touchpad input\n");
+ return hr;
+ }
+
+ /* Assume that every gesture is a pan in the beginning.
+ * If it's a pinch, the gesture will be changed below. */
+ if (gesture_state == GESTURE_NONE) {
+ gesture_state = GESTURE_PAN;
+ }
+
+ /* DM doesn't always immediately recognize pinch gestures,
+ * so allow transition from pan to pinch. */
+ if (gesture_state == GESTURE_PAN) {
+ if (fabs(scale - PINCH_SCALE_FACTOR) > EPS) {
+ gesture_state = GESTURE_PINCH;
+ }
+ }
+
+ /* This state machine is used here because:
+ * 1. Pinch and pan gestures must be differentiated and cannot be processed at the same time
+ * because XY transform values become nonsensical during pinch gesture.
+ * 2. GHOST requires delta values for events while DM provides transformation matrix of the
+ * current gesture.
+ * 3. GHOST events accept integer values while DM values are non-integer.
+ * Truncated fractional parts are accumulated and accounted for in following updates.
+ */
+ switch (gesture_state) {
+ case GESTURE_PINCH: {
+ int32_t dscale = roundf(scale - last_scale);
+
+ last_scale += dscale;
+
+ accumulated_values.scale += dscale;
+ break;
+ }
+ case GESTURE_PAN: {
+ int32_t dx = roundf(x - last_x);
+ int32_t dy = roundf(y - last_y);
+
+ last_x += dx;
+ last_y += dy;
+
+ accumulated_values.x += dx;
+ accumulated_values.y += dy;
+ break;
+ }
+ case GESTURE_NONE:
+ break;
+ }
+
+ return hr;
+}
diff --git a/intern/ghost/intern/GHOST_TrackpadWin32.h b/intern/ghost/intern/GHOST_TrackpadWin32.h
new file mode 100644
index 00000000000..2e28f756965
--- /dev/null
+++ b/intern/ghost/intern/GHOST_TrackpadWin32.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup GHOST
+ * Declaration of GHOST DirectManipulation classes.
+ */
+
+#pragma once
+
+#ifndef WIN32
+# error WIN32 only!
+#endif // WIN32
+
+#include "GHOST_Types.h"
+
+#include <directmanipulation.h>
+#include <wrl.h>
+
+#define PINCH_SCALE_FACTOR 125.0f
+
+typedef struct {
+ int32_t x, y, scale;
+ bool isScrollDirectionInverted;
+} GHOST_TTrackpadInfo;
+
+class GHOST_DirectManipulationHelper;
+
+class GHOST_DirectManipulationViewportEventHandler
+ : public Microsoft::WRL::RuntimeClass<
+ Microsoft::WRL::RuntimeClassFlags<Microsoft::WRL::RuntimeClassType::ClassicCom>,
+ Microsoft::WRL::Implements<
+ Microsoft::WRL::RuntimeClassFlags<Microsoft::WRL::RuntimeClassType::ClassicCom>,
+ Microsoft::WRL::FtmBase,
+ IDirectManipulationViewportEventHandler>> {
+ public:
+ GHOST_DirectManipulationViewportEventHandler(uint16_t dpi);
+
+ /*
+ * Resets viewport and tracked touchpad state.
+ */
+ void resetViewport(IDirectManipulationViewport *viewport);
+
+ /* DirectManipulation callbacks. */
+ HRESULT STDMETHODCALLTYPE OnViewportStatusChanged(IDirectManipulationViewport *viewport,
+ DIRECTMANIPULATION_STATUS current,
+ DIRECTMANIPULATION_STATUS previous) override;
+
+ HRESULT STDMETHODCALLTYPE OnViewportUpdated(IDirectManipulationViewport *viewport) override;
+
+ HRESULT STDMETHODCALLTYPE OnContentUpdated(IDirectManipulationViewport *viewport,
+ IDirectManipulationContent *content) override;
+
+ private:
+ enum { GESTURE_NONE, GESTURE_PAN, GESTURE_PINCH } gesture_state;
+
+ int32_t last_x, last_y, last_scale;
+ GHOST_TTrackpadInfo accumulated_values;
+ uint16_t dpi;
+ DIRECTMANIPULATION_STATUS dm_status;
+
+ friend class GHOST_DirectManipulationHelper;
+};
+
+class GHOST_DirectManipulationHelper {
+ public:
+ /*
+ * Creates a GHOST_DirectManipulationHelper for the provided window.
+ * \param hWnd: The window receiving DirectManipulation events.
+ * \param dpi: The current DPI.
+ * \return Pointer to the new GHOST_DirectManipulationHelper if created, nullptr if there was an
+ * error.
+ */
+ static GHOST_DirectManipulationHelper *create(HWND hWnd, uint16_t dpi);
+
+ ~GHOST_DirectManipulationHelper();
+
+ /*
+ * Drives the DirectManipulation context.
+ * DirectManipulation's intended use is to tie user input into DirectComposition's compositor
+ * scaling and translating. We are not using DirectComposition and therefore must drive
+ * DirectManipulation manually.
+ */
+ void update();
+
+ /*
+ * Sets pointer in contact with the DirectManipulation context.
+ * \param pointerId: ID of the pointer in contact.
+ */
+ void onPointerHitTest(UINT32 pointerId);
+
+ /*
+ * Updates DPI information for touchpad scaling.
+ * \param dpi: The new DPI.
+ */
+ void setDPI(uint16_t dpi);
+
+ /*
+ * Retrieves trackpad input.
+ * \return The accumulated trackpad translation and scale since last call.
+ */
+ GHOST_TTrackpadInfo getTrackpadInfo();
+
+ private:
+ GHOST_DirectManipulationHelper(
+ HWND hWnd,
+ Microsoft::WRL::ComPtr<IDirectManipulationManager> directManipulationManager,
+ Microsoft::WRL::ComPtr<IDirectManipulationUpdateManager> directManipulationUpdateManager,
+ Microsoft::WRL::ComPtr<IDirectManipulationViewport> directManipulationViewport,
+ Microsoft::WRL::ComPtr<GHOST_DirectManipulationViewportEventHandler>
+ directManipulationEventHandler,
+ DWORD directManipulationViewportHandlerCookie,
+ bool isScrollDirectionInverted);
+
+ /*
+ * Retrieves the scroll direction from the registry.
+ * \return True if scroll direction is inverted.
+ */
+ static bool getScrollDirectionFromReg();
+
+ /*
+ * Registers listener for registry scroll direction entry changes.
+ */
+ void registerScrollDirectionChangeListener();
+
+ HWND m_hWnd;
+
+ HKEY m_scrollDirectionRegKey;
+ HANDLE m_scrollDirectionChangeEvent;
+
+ Microsoft::WRL::ComPtr<IDirectManipulationManager> m_directManipulationManager;
+ Microsoft::WRL::ComPtr<IDirectManipulationUpdateManager> m_directManipulationUpdateManager;
+ Microsoft::WRL::ComPtr<IDirectManipulationViewport> m_directManipulationViewport;
+ Microsoft::WRL::ComPtr<GHOST_DirectManipulationViewportEventHandler>
+ m_directManipulationEventHandler;
+ DWORD m_directManipulationViewportHandlerCookie;
+
+ bool m_isScrollDirectionInverted;
+};
diff --git a/intern/ghost/intern/GHOST_WindowCocoa.mm b/intern/ghost/intern/GHOST_WindowCocoa.mm
index 4a1b3c2fe16..e7f5fdaa011 100644
--- a/intern/ghost/intern/GHOST_WindowCocoa.mm
+++ b/intern/ghost/intern/GHOST_WindowCocoa.mm
@@ -426,8 +426,8 @@ GHOST_WindowCocoa::~GHOST_WindowCocoa()
[m_window close];
}
- // Check for other blender opened windows and make the frontmost key
- // Note: for some reason the closed window is still in the list
+ /* Check for other blender opened windows and make the front-most key
+ * NOTE: for some reason the closed window is still in the list. */
NSArray *windowsList = [NSApp orderedWindows];
for (int a = 0; a < [windowsList count]; a++) {
if (m_window != (CocoaWindow *)[windowsList objectAtIndex:a]) {
diff --git a/intern/ghost/intern/GHOST_WindowWin32.cpp b/intern/ghost/intern/GHOST_WindowWin32.cpp
index 2ce224b666b..2e17454d24f 100644
--- a/intern/ghost/intern/GHOST_WindowWin32.cpp
+++ b/intern/ghost/intern/GHOST_WindowWin32.cpp
@@ -16,9 +16,7 @@
#include "GHOST_ContextWGL.h"
-#ifdef WIN32_COMPOSITING
-# include <Dwmapi.h>
-#endif
+#include <Dwmapi.h>
#include <assert.h>
#include <math.h>
@@ -70,6 +68,7 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system,
m_normal_state(GHOST_kWindowStateNormal),
m_user32(::LoadLibrary("user32.dll")),
m_parentWindowHwnd(parentwindow ? parentwindow->m_hWnd : HWND_DESKTOP),
+ m_directManipulationHelper(NULL),
m_debug_context(is_debug)
{
DWORD style = parentwindow ?
@@ -172,6 +171,8 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system,
break;
}
+ ThemeRefresh();
+
::ShowWindow(m_hWnd, nCmdShow);
#ifdef WIN32_COMPOSITING
@@ -204,6 +205,42 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system,
/* Allow the showing of a progress bar on the taskbar. */
CoCreateInstance(
CLSID_TaskbarList, NULL, CLSCTX_INPROC_SERVER, IID_ITaskbarList3, (LPVOID *)&m_Bar);
+
+ /* Initialize Direct Manipulation. */
+ m_directManipulationHelper = GHOST_DirectManipulationHelper::create(m_hWnd, getDPIHint());
+}
+
+void GHOST_WindowWin32::updateDirectManipulation()
+{
+ if (!m_directManipulationHelper) {
+ return;
+ }
+
+ m_directManipulationHelper->update();
+}
+
+void GHOST_WindowWin32::onPointerHitTest(WPARAM wParam)
+{
+ /* Only DM_POINTERHITTEST can be the first message of input sequence of touchpad input. */
+
+ if (!m_directManipulationHelper) {
+ return;
+ }
+
+ UINT32 pointerId = GET_POINTERID_WPARAM(wParam);
+ POINTER_INPUT_TYPE pointerType;
+ if (GetPointerType(pointerId, &pointerType) && pointerType == PT_TOUCHPAD) {
+ m_directManipulationHelper->onPointerHitTest(pointerId);
+ }
+}
+
+GHOST_TTrackpadInfo GHOST_WindowWin32::getTrackpadInfo()
+{
+ if (!m_directManipulationHelper) {
+ return {0, 0, 0};
+ }
+
+ return m_directManipulationHelper->getTrackpadInfo();
}
GHOST_WindowWin32::~GHOST_WindowWin32()
@@ -253,6 +290,9 @@ GHOST_WindowWin32::~GHOST_WindowWin32()
::DestroyWindow(m_hWnd);
m_hWnd = 0;
}
+
+ delete m_directManipulationHelper;
+ m_directManipulationHelper = NULL;
}
void GHOST_WindowWin32::adjustWindowRectForClosestMonitor(LPRECT win_rect,
@@ -282,7 +322,7 @@ void GHOST_WindowWin32::adjustWindowRectForClosestMonitor(LPRECT win_rect,
}
/* Adjust to allow for caption, borders, shadows, scaling, etc. Resulting values can be
- * correctly outside of monitor bounds. Note: You cannot specify WS_OVERLAPPED when calling. */
+ * correctly outside of monitor bounds. NOTE: You cannot specify #WS_OVERLAPPED when calling. */
if (fpAdjustWindowRectExForDpi) {
UINT dpiX, dpiY;
GetDpiForMonitor(hmonitor, MDT_EFFECTIVE_DPI, &dpiX, &dpiY);
@@ -1016,6 +1056,32 @@ GHOST_TabletData GHOST_WindowWin32::getTabletData()
}
}
+void GHOST_WindowWin32::ThemeRefresh()
+{
+ DWORD lightMode;
+ DWORD pcbData = sizeof(lightMode);
+ if (RegGetValueW(HKEY_CURRENT_USER,
+ L"Software\\Microsoft\\Windows\\CurrentVersion\\Themes\\Personalize\\",
+ L"AppsUseLightTheme",
+ RRF_RT_REG_DWORD,
+ NULL,
+ &lightMode,
+ &pcbData) == ERROR_SUCCESS) {
+ BOOL DarkMode = !lightMode;
+
+ /* 20 == DWMWA_USE_IMMERSIVE_DARK_MODE in Windows 11 SDK. This value was undocumented for
+ * Windows 10 versions 2004 and later, supported for Windows 11 Build 22000 and later. */
+ DwmSetWindowAttribute(this->m_hWnd, 20, &DarkMode, sizeof(DarkMode));
+ }
+}
+
+void GHOST_WindowWin32::updateDPI()
+{
+ if (m_directManipulationHelper) {
+ m_directManipulationHelper->setDPI(getDPIHint());
+ }
+}
+
uint16_t GHOST_WindowWin32::getDPIHint()
{
if (m_user32) {
diff --git a/intern/ghost/intern/GHOST_WindowWin32.h b/intern/ghost/intern/GHOST_WindowWin32.h
index d5f47871aff..c958a89ac48 100644
--- a/intern/ghost/intern/GHOST_WindowWin32.h
+++ b/intern/ghost/intern/GHOST_WindowWin32.h
@@ -13,6 +13,7 @@
#endif // WIN32
#include "GHOST_TaskbarWin32.h"
+#include "GHOST_TrackpadWin32.h"
#include "GHOST_Window.h"
#include "GHOST_Wintab.h"
#ifdef WITH_INPUT_IME
@@ -286,6 +287,8 @@ class GHOST_WindowWin32 : public GHOST_Window {
return GHOST_kFailure;
}
+ void updateDPI();
+
uint16_t getDPIHint() override;
/** True if the mouse is either over or captured by the window. */
@@ -294,6 +297,9 @@ class GHOST_WindowWin32 : public GHOST_Window {
/** True if the window currently resizing. */
bool m_inLiveResize;
+ /** Called when OS colors change and when the window is created. */
+ void ThemeRefresh();
+
#ifdef WITH_INPUT_IME
GHOST_ImeWin32 *getImeInput()
{
@@ -305,6 +311,19 @@ class GHOST_WindowWin32 : public GHOST_Window {
void endIME();
#endif /* WITH_INPUT_IME */
+ /*
+ * Drive DirectManipulation context.
+ */
+ void updateDirectManipulation();
+
+ /*
+ * Handle DM_POINTERHITTEST events.
+ * \param wParam: wParam from the event.
+ */
+ void onPointerHitTest(WPARAM wParam);
+
+ GHOST_TTrackpadInfo getTrackpadInfo();
+
private:
/**
* \param type: The type of rendering context create.
@@ -388,6 +407,8 @@ class GHOST_WindowWin32 : public GHOST_Window {
HWND m_parentWindowHwnd;
+ GHOST_DirectManipulationHelper *m_directManipulationHelper;
+
#ifdef WITH_INPUT_IME
/** Handle input method editors event */
GHOST_ImeWin32 m_imeInput;
diff --git a/intern/guardedalloc/intern/mallocn_guarded_impl.c b/intern/guardedalloc/intern/mallocn_guarded_impl.c
index acad413b4c1..8bf1680e6f8 100644
--- a/intern/guardedalloc/intern/mallocn_guarded_impl.c
+++ b/intern/guardedalloc/intern/mallocn_guarded_impl.c
@@ -54,7 +54,7 @@
# define DEBUG_MEMCOUNTER_ERROR_VAL 0
static int _mallocn_count = 0;
-/* breakpoint here */
+/* Break-point here. */
static void memcount_raise(const char *name)
{
fprintf(stderr, "%s: memcount-leak, %d\n", name, _mallocn_count);
diff --git a/intern/iksolver/intern/IK_QTask.cpp b/intern/iksolver/intern/IK_QTask.cpp
index caf9585a94e..80eda01b17f 100644
--- a/intern/iksolver/intern/IK_QTask.cpp
+++ b/intern/iksolver/intern/IK_QTask.cpp
@@ -116,7 +116,7 @@ void IK_QOrientationTask::ComputeJacobian(IK_QJacobian &jacobian)
}
// IK_QCenterOfMassTask
-// Note: implementation not finished!
+// NOTE: implementation not finished!
IK_QCenterOfMassTask::IK_QCenterOfMassTask(bool primary,
const IK_QSegment *segment,
diff --git a/intern/libmv/CMakeLists.txt b/intern/libmv/CMakeLists.txt
index f9fef9f7a29..e0ed68eb20e 100644
--- a/intern/libmv/CMakeLists.txt
+++ b/intern/libmv/CMakeLists.txt
@@ -26,7 +26,6 @@ if(WITH_LIBMV)
endif()
add_definitions(${GFLAGS_DEFINES})
add_definitions(${GLOG_DEFINES})
- add_definitions(${CERES_DEFINES})
add_definitions(-DLIBMV_GFLAGS_NAMESPACE=${GFLAGS_NAMESPACE})
list(APPEND INC
diff --git a/intern/libmv/bundle.sh b/intern/libmv/bundle.sh
index 6808e244c05..82293068745 100755
--- a/intern/libmv/bundle.sh
+++ b/intern/libmv/bundle.sh
@@ -124,7 +124,6 @@ if(WITH_LIBMV)
endif()
add_definitions(\${GFLAGS_DEFINES})
add_definitions(\${GLOG_DEFINES})
- add_definitions(\${CERES_DEFINES})
add_definitions(-DLIBMV_GFLAGS_NAMESPACE=\${GFLAGS_NAMESPACE})
list(APPEND INC
diff --git a/intern/libmv/libmv/multiview/euclidean_resection.h b/intern/libmv/libmv/multiview/euclidean_resection.h
index 3c4c3979ff6..cdb9b5af52e 100644
--- a/intern/libmv/libmv/multiview/euclidean_resection.h
+++ b/intern/libmv/libmv/multiview/euclidean_resection.h
@@ -118,7 +118,7 @@ void EuclideanResectionAnsarDaniilidis(const Mat2X& x_camera,
* This is the algorithm described in:
* "{EP$n$P: An Accurate $O(n)$ Solution to the P$n$P Problem", by V. Lepetit
* and F. Moreno-Noguer and P. Fua, IJCV 2009. vol. 81, no. 2
- * \note: the non-linear optimization is not implemented here.
+ * \note the non-linear optimization is not implemented here.
*/
bool EuclideanResectionEPnP(const Mat2X& x_camera,
const Mat3X& X_world,
diff --git a/intern/libmv/libmv/simple_pipeline/bundle.cc b/intern/libmv/libmv/simple_pipeline/bundle.cc
index e86c3bca57f..355c167d000 100644
--- a/intern/libmv/libmv/simple_pipeline/bundle.cc
+++ b/intern/libmv/libmv/simple_pipeline/bundle.cc
@@ -685,7 +685,7 @@ void EuclideanBundleCommonIntrinsics(const Tracks& tracks,
PackCamerasRotationAndTranslation(*reconstruction);
// Parameterization used to restrict camera motion for modal solvers.
- ceres::SubsetParameterization* constant_translation_parameterization = NULL;
+ ceres::SubsetManifold* constant_translation_manifold = NULL;
if (bundle_constraints & BUNDLE_NO_TRANSLATION) {
std::vector<int> constant_translation;
@@ -694,8 +694,8 @@ void EuclideanBundleCommonIntrinsics(const Tracks& tracks,
constant_translation.push_back(4);
constant_translation.push_back(5);
- constant_translation_parameterization =
- new ceres::SubsetParameterization(6, constant_translation);
+ constant_translation_manifold =
+ new ceres::SubsetManifold(6, constant_translation);
}
// Add residual blocks to the problem.
@@ -735,8 +735,7 @@ void EuclideanBundleCommonIntrinsics(const Tracks& tracks,
}
if (bundle_constraints & BUNDLE_NO_TRANSLATION) {
- problem.SetParameterization(current_camera_R_t,
- constant_translation_parameterization);
+ problem.SetManifold(current_camera_R_t, constant_translation_manifold);
}
zero_weight_tracks_flags[marker.track] = false;
@@ -787,11 +786,11 @@ void EuclideanBundleCommonIntrinsics(const Tracks& tracks,
#undef MAYBE_SET_CONSTANT
if (!constant_intrinsics.empty()) {
- ceres::SubsetParameterization* subset_parameterization =
- new ceres::SubsetParameterization(PackedIntrinsics::NUM_PARAMETERS,
- constant_intrinsics);
+ ceres::SubsetManifold* subset_parameterization =
+ new ceres::SubsetManifold(PackedIntrinsics::NUM_PARAMETERS,
+ constant_intrinsics);
- problem.SetParameterization(intrinsics_block, subset_parameterization);
+ problem.SetManifold(intrinsics_block, subset_parameterization);
}
}
diff --git a/intern/libmv/libmv/simple_pipeline/modal_solver.cc b/intern/libmv/libmv/simple_pipeline/modal_solver.cc
index 845b299e31e..206d264f1f8 100644
--- a/intern/libmv/libmv/simple_pipeline/modal_solver.cc
+++ b/intern/libmv/libmv/simple_pipeline/modal_solver.cc
@@ -180,7 +180,7 @@ void ModalSolver(const Tracks& tracks,
// NOTE: Parameterization is lazily initialized when it is really needed,
// and is re-used by all parameters block.
- ceres::LocalParameterization* quaternion_parameterization = NULL;
+ ceres::Manifold* quaternion_manifold = NULL;
int num_residuals = 0;
for (int i = 0; i < all_markers.size(); ++i) {
@@ -197,12 +197,11 @@ void ModalSolver(const Tracks& tracks,
&quaternion(0));
num_residuals++;
- if (quaternion_parameterization == NULL) {
- quaternion_parameterization = new ceres::QuaternionParameterization();
+ if (quaternion_manifold == NULL) {
+ quaternion_manifold = new ceres::QuaternionManifold();
}
- problem.SetParameterization(&quaternion(0),
- quaternion_parameterization);
+ problem.SetManifold(&quaternion(0), quaternion_manifold);
}
}
diff --git a/intern/opensubdiv/internal/evaluator/eval_output.h b/intern/opensubdiv/internal/evaluator/eval_output.h
index cff7c8d18c9..bc5494bfe41 100644
--- a/intern/opensubdiv/internal/evaluator/eval_output.h
+++ b/intern/opensubdiv/internal/evaluator/eval_output.h
@@ -372,15 +372,15 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput {
}
// Create evaluators for every face varying channel.
- face_varying_evaluators.reserve(all_face_varying_stencils.size());
+ face_varying_evaluators_.reserve(all_face_varying_stencils.size());
int face_varying_channel = 0;
for (const StencilTable *face_varying_stencils : all_face_varying_stencils) {
- face_varying_evaluators.push_back(new FaceVaryingEval(face_varying_channel,
- face_varying_stencils,
- face_varying_width,
- patch_table_,
- evaluator_cache_,
- device_context_));
+ face_varying_evaluators_.push_back(new FaceVaryingEval(face_varying_channel,
+ face_varying_stencils,
+ face_varying_width,
+ patch_table_,
+ evaluator_cache_,
+ device_context_));
++face_varying_channel;
}
}
@@ -393,7 +393,7 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput {
delete patch_table_;
delete vertex_stencils_;
delete varying_stencils_;
- for (FaceVaryingEval *face_varying_evaluator : face_varying_evaluators) {
+ for (FaceVaryingEval *face_varying_evaluator : face_varying_evaluators_) {
delete face_varying_evaluator;
}
}
@@ -421,8 +421,8 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput {
int num_vertices) override
{
assert(face_varying_channel >= 0);
- assert(face_varying_channel < face_varying_evaluators.size());
- face_varying_evaluators[face_varying_channel]->updateData(src, start_vertex, num_vertices);
+ assert(face_varying_channel < face_varying_evaluators_.size());
+ face_varying_evaluators_[face_varying_channel]->updateData(src, start_vertex, num_vertices);
}
bool hasVaryingData() const
@@ -434,7 +434,7 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput {
bool hasFaceVaryingData() const
{
- return face_varying_evaluators.size() != 0;
+ return face_varying_evaluators_.size() != 0;
}
void refine() override
@@ -483,7 +483,7 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput {
}
// Evaluate face-varying data.
if (hasFaceVaryingData()) {
- for (FaceVaryingEval *face_varying_evaluator : face_varying_evaluators) {
+ for (FaceVaryingEval *face_varying_evaluator : face_varying_evaluators_) {
face_varying_evaluator->refine();
}
}
@@ -589,8 +589,8 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput {
float face_varying[2]) override
{
assert(face_varying_channel >= 0);
- assert(face_varying_channel < face_varying_evaluators.size());
- face_varying_evaluators[face_varying_channel]->evalPatches(
+ assert(face_varying_channel < face_varying_evaluators_.size());
+ face_varying_evaluators_[face_varying_channel]->evalPatches(
patch_coord, num_patch_coords, face_varying);
}
@@ -606,17 +606,17 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput {
SRC_VERTEX_BUFFER *getFVarSrcBuffer(const int face_varying_channel) const
{
- return face_varying_evaluators[face_varying_channel]->getSrcBuffer();
+ return face_varying_evaluators_[face_varying_channel]->getSrcBuffer();
}
int getFVarSrcBufferOffset(const int face_varying_channel) const
{
- return face_varying_evaluators[face_varying_channel]->getFVarSrcBufferOffset();
+ return face_varying_evaluators_[face_varying_channel]->getFVarSrcBufferOffset();
}
PATCH_TABLE *getFVarPatchTable(const int face_varying_channel) const
{
- return face_varying_evaluators[face_varying_channel]->getPatchTable();
+ return face_varying_evaluators_[face_varying_channel]->getPatchTable();
}
private:
@@ -634,7 +634,7 @@ class VolatileEvalOutput : public EvalOutputAPI::EvalOutput {
const STENCIL_TABLE *varying_stencils_;
int face_varying_width_;
- vector<FaceVaryingEval *> face_varying_evaluators;
+ vector<FaceVaryingEval *> face_varying_evaluators_;
EvaluatorCache *evaluator_cache_;
DEVICE_CONTEXT *device_context_;
diff --git a/intern/opensubdiv/internal/evaluator/eval_output_cpu.h b/intern/opensubdiv/internal/evaluator/eval_output_cpu.h
index 2b3c738d6ab..35fd03f6158 100644
--- a/intern/opensubdiv/internal/evaluator/eval_output_cpu.h
+++ b/intern/opensubdiv/internal/evaluator/eval_output_cpu.h
@@ -32,7 +32,7 @@ using OpenSubdiv::Osd::CpuVertexBuffer;
namespace blender {
namespace opensubdiv {
-// Note: Define as a class instead of typedef to make it possible
+// NOTE: Define as a class instead of typedef to make it possible
// to have anonymous class in opensubdiv_evaluator_internal.h
class CpuEvalOutput : public VolatileEvalOutput<CpuVertexBuffer,
CpuVertexBuffer,
diff --git a/intern/rigidbody/RBI_api.h b/intern/rigidbody/RBI_api.h
index 791a4a6ac05..13b1c096a80 100644
--- a/intern/rigidbody/RBI_api.h
+++ b/intern/rigidbody/RBI_api.h
@@ -14,7 +14,7 @@ extern "C" {
#endif
/* API Notes:
- * Currently, this API is optimised for Bullet RigidBodies, and doesn't
+ * Currently, this API is optimized for Bullet RigidBodies, and doesn't
* take into account other Physics Engines. Some tweaking may be necessary
* to allow other systems to be used, in particular there may be references
* to datatypes that aren't used here...